#The MNIST Loss function

In [2]:
import torch
from fastai.vision.all import *
import matplotlib.pyplot as plt

In [3]:
path = untar_data(URLs.MNIST_SAMPLE)


In [4]:
threes = (path/'train'/'3').ls().sorted()
sevens = (path/'train'/'7').ls().sorted()

In [5]:
threes_tensors = [tensor(Image.open(o)) for o in threes]
sevens_tensors = [tensor(Image.open(o)) for o in sevens]


In [6]:
stacked_sevens = torch.stack(sevens_tensors).float()/255
stacked_threes = torch.stack(threes_tensors).float()/255


In [7]:
valid_3_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'3').ls()])
valid_7_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'7').ls()])
valid_7_tens = valid_7_tens.float()/255
valid_3_tens = valid_3_tens.float()/255

In [8]:

train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1,28*28)


In [9]:
train_y = tensor([1]*len(threes) + [0]*len(sevens)).unsqueeze(1)
train_x.shape, train_y.shape

(torch.Size([12396, 784]), torch.Size([12396, 1]))

In [10]:
dset = list(zip(train_x, train_y))
x,y = dset[0]
x.shape, y

(torch.Size([784]), tensor([1]))

In [11]:
valid_x = torch.cat([valid_3_tens, valid_7_tens]).view(-1,28*28)
valid_y = tensor([1]*len(valid_3_tens) + [0]*len(valid_7_tens)).unsqueeze(1)
valid_dset = list(zip(valid_x, valid_y))

In [12]:
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()

weights = init_params((28*28,1))
bias = init_params(1)

In [13]:
(train_x[0]*weights).sum() + bias

tensor([4387.3281], grad_fn=<AddBackward0>)

In [14]:
def linear1(xb): return xb@weights + bias
preds = linear1(train_x)
preds

tensor([[-2.2997],
        [-7.0570],
        [ 1.0263],
        ...,
        [ 7.5738],
        [-1.1479],
        [ 9.5748]], grad_fn=<AddBackward0>)

In [15]:
corrects = (preds>0.0).float() == train_y
corrects

tensor([[False],
        [False],
        [ True],
        ...,
        [False],
        [ True],
        [False]])

In [16]:
corrects.float().mean()

tensor(0.3525)

In [17]:
weights[0].data *= 1.0001
preds = linear1(train_x)
((preds>0.0).float() == train_y).float().mean()

tensor(0.3525)

In [18]:
trgts = tensor([1,0,1])
prds = tensor([0.9,0.4,0.2])

In [19]:
def mnist_loss(preds, targs):
    return torch.where(targs==1, 1-preds, preds).mean()

In [20]:
torch.where(trgts==1, 1-prds, prds)

tensor([0.1000, 0.4000, 0.8000])

In [21]:
mnist_loss(tensor([0.9,0.4,0.8]), trgts)

tensor(0.2333)

#Sigmoid

In [22]:
def sigmoid(x): return 1/(1+torch.exp(-x))


In [23]:
#plot_function(torch.sigmoid, title='Sigmoid function',min=-4,max=4)

In [24]:
def mnist_loss(predictions,targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()



In [25]:
coll = range(15)
dl = DataLoader(coll, batch_size=5,shuffle=True)
list(dl)

[tensor([10,  0,  9,  4,  8]),
 tensor([14,  6, 11,  5,  7]),
 tensor([12,  2, 13,  1,  3])]

In [26]:
ds = L(enumerate(string.ascii_lowercase))
ds

[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (4, 'e'), (5, 'f'), (6, 'g'), (7, 'h'), (8, 'i'), (9, 'j'), (10, 'k'), (11, 'l'), (12, 'm'), (13, 'n'), (14, 'o'), (15, 'p'), (16, 'q'), (17, 'r'), (18, 's'), (19, 't'), (20, 'u'), (21, 'v'), (22, 'w'), (23, 'x'), (24, 'y'), (25, 'z')]

In [27]:
dl = DataLoader(ds,batch_size=6, shuffle=True)
list(dl)


[(tensor([ 1, 14, 23,  0,  8, 13]), ('b', 'o', 'x', 'a', 'i', 'n')),
 (tensor([ 2,  9, 22, 19, 18,  7]), ('c', 'j', 'w', 't', 's', 'h')),
 (tensor([25,  4, 20, 17,  6, 12]), ('z', 'e', 'u', 'r', 'g', 'm')),
 (tensor([24,  3, 15,  5, 11, 21]), ('y', 'd', 'p', 'f', 'l', 'v')),
 (tensor([10, 16]), ('k', 'q'))]

for x,y in dl:
    pred = model(x)
    loss = loss_func(pred, y)
    loss.backward()
    parameters -= parameters.grad * lr



In [28]:
weights = init_params((28*28,1))
bias = init_params(1)

In [29]:
dl = DataLoader(dset,batch_size=256)
xb,yb = first(dl)
xb.shape, yb.shape

(torch.Size([256, 784]), torch.Size([256, 1]))

In [30]:
valid_dl = DataLoader(valid_dset,batch_size=256)

In [31]:
batch =train_x[:4]
batch.shape

torch.Size([4, 784])

In [32]:
preds =linear1(batch)
preds

tensor([[-14.7835],
        [-24.3078],
        [-25.0730],
        [-20.9898]], grad_fn=<AddBackward0>)

In [33]:
loss = mnist_loss(preds,train_y[:4])
loss

tensor(1.0000, grad_fn=<MeanBackward0>)

In [34]:
loss.backward()
weights.grad.shape,weights.grad.mean(),bias.grad

(torch.Size([784, 1]), tensor(-1.3595e-08), tensor([-9.5161e-08]))

In [35]:
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

In [36]:
calc_grad(batch, train_y[:4], linear1)
weights.grad.mean(),bias.grad

(tensor(-2.7190e-08), tensor([-1.9032e-07]))

In [37]:
weights.grad.zero_()
bias.grad.zero_()

tensor([0.])

In [38]:
def train_epoch(model,lr,params):
    for xb,yb in dl:
        calc_grad(xb,yb,model)
        for p in params:
            p.data -= p.grad * lr
            p.grad.zero_()



In [39]:
(preds > 0.0).float() == train_y[:4]


tensor([[False],
        [False],
        [False],
        [False]])

In [40]:
def batch_accuracy(xb,yb):
    preds =xb.sigmoid()
    correct =(preds>0.5) == yb
    return correct.float().mean()

In [41]:
batch_accuracy(linear1(batch),train_y[:4])

tensor(0.)

In [42]:
def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl]
    return round(torch.stack(accs).mean().item(), 4)

validate_epoch(linear1)

0.4143

In [43]:
lr = 1.
params = weights,bias
train_epoch(linear1, lr, params)
validate_epoch(linear1)

0.5686

In [44]:
for i in range(20):
    train_epoch(linear1, lr, params)
    print(validate_epoch(linear1), end=' ')

0.7557 0.888 0.9232 0.9339 0.9506 0.954 0.9594 0.9618 

0.9637 0.9652 0.9657 0.9672 0.9681 0.9691 0.9691 0.9691 0.9711 0.9716 0.972 0.974 

# Creating an Optimizer

In [45]:
linear_model = nn.Linear(28*28,1)

In [46]:
w,b = linear_model.parameters()
w.shape, b.shape

(torch.Size([1, 784]), torch.Size([1]))

In [47]:
class BasicOptim:
    def __init__(self,params,lr):
        self.params,self.lr = list(params),lr

    def step(self,*args,**kwargs):
        for p in self.params:
            p.data -= p.grad.data*self.lr
    
    def zero_grad(self):
        for p in self.params:
            p.grad = None

In [48]:
opt = BasicOptim(linear_model.parameters(),lr)

In [49]:
def train_epoch(model):
    for xb,yb in dl:
        calc_grad(xb,yb,model)
        opt.step()
        opt.zero_grad()

In [50]:
validate_epoch(linear_model)

0.7506

In [51]:
def train_model(model, epochs):
    for i in range(epochs):
        train_epoch(model)
        print(validate_epoch(model), end=' ')

In [52]:
train_model(linear_model,20)

0.4932 0.7866 0.8594 0.9179 0.9346 0.9502 0.957 0.9638 0.9658 0.9672 0.9702 0.9721 0.9736 0.9751 0.9761 0.977 0.9775 0.9775 0.978 0.979 

In [53]:
linear_model = nn.Linear(28*28,1)
opt = SGD(linear_model.parameters(),lr)
train_model(linear_model,20)

0.4932 0.7617 0.8574 0.917 0.935 0.9492 0.956 0.9633 0.9653 0.9677 0.9702 0.9716 0.9736 0.9746 0.9761 0.977 0.9775 0.978 0.9785 0.979 

In [54]:
dls = DataLoaders(dl,valid_dl)

In [57]:
learn = Learner(dls, nn.Linear(28*28,1), opt_func=SGD,loss_func=mnist_loss, metrics=batch_accuracy)

In [58]:
learn.fit(10, lr=lr)

AttributeError: Exception occured in `ProgressCallback` when calling event `before_fit`:
	'NBMasterBar' object has no attribute 'out'