#The MNIST Loss function

In [37]:
import torch
from fastai.vision.all import *
import matplotlib.pyplot as plt

In [38]:
path = untar_data(URLs.MNIST_SAMPLE)


In [39]:
threes = (path/'train'/'3').ls().sorted()
sevens = (path/'train'/'7').ls().sorted()

In [40]:
threes_tensors = [tensor(Image.open(o)) for o in threes]
sevens_tensors = [tensor(Image.open(o)) for o in sevens]


In [41]:
stacked_sevens = torch.stack(sevens_tensors).float()/255
stacked_threes = torch.stack(threes_tensors).float()/255


In [42]:
valid_3_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'3').ls()])
valid_7_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'7').ls()])
valid_7_tens = valid_7_tens.float()/255
valid_3_tens = valid_3_tens.float()/255

In [43]:

train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1,28*28)


In [44]:
train_y = tensor([1]*len(threes) + [0]*len(sevens)).unsqueeze(1)
train_x.shape, train_y.shape

(torch.Size([12396, 784]), torch.Size([12396, 1]))

In [45]:
dset = list(zip(train_x, train_y))
x,y = dset[0]
x.shape, y

(torch.Size([784]), tensor([1]))

In [46]:
valid_x = torch.cat([valid_3_tens, valid_7_tens]).view(-1,28*28)
valid_y = tensor([1]*len(valid_3_tens) + [0]*len(valid_7_tens)).unsqueeze(1)
valid_dset = list(zip(valid_x, valid_y))

In [47]:
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()

weights = init_params((28*28,1))
bias = init_params(1)

In [48]:
(train_x[0]*weights).sum() + bias

tensor([-4227.6357], grad_fn=<AddBackward0>)

In [49]:
def linear1(xb): return xb@weights + bias
preds = linear1(train_x)
preds

tensor([[  9.5694],
        [  8.2718],
        [  5.2086],
        ...,
        [-17.6700],
        [ -7.0875],
        [-13.6705]], grad_fn=<AddBackward0>)

In [50]:
corrects = (preds>0.0).float() == train_y
corrects

tensor([[True],
        [True],
        [True],
        ...,
        [True],
        [True],
        [True]])

In [51]:
corrects.float().mean()

tensor(0.7085)

In [52]:
weights[0].data *= 1.0001
preds = linear1(train_x)
((preds>0.0).float() == train_y).float().mean()

tensor(0.7085)

In [53]:
trgts = tensor([1,0,1])
prds = tensor([0.9,0.4,0.2])

In [54]:
def mnist_loss(preds, targs):
    return torch.where(targs==1, 1-preds, preds).mean()

In [55]:
torch.where(trgts==1, 1-prds, prds)

tensor([0.1000, 0.4000, 0.8000])

In [56]:
mnist_loss(tensor([0.9,0.4,0.8]), trgts)

tensor(0.2333)

#Sigmoid

In [57]:
def sigmoid(x): return 1/(1+torch.exp(-x))


In [58]:
#plot_function(torch.sigmoid, title='Sigmoid function',min=-4,max=4)

In [59]:
def mnist_loss(predictions,targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()



In [60]:
coll = range(15)
dl = DataLoader(coll, batch_size=5,shuffle=True)
list(dl)

[tensor([ 1,  8, 14,  9, 10]),
 tensor([12, 13, 11,  7,  0]),
 tensor([4, 2, 5, 3, 6])]

In [61]:
ds = L(enumerate(string.ascii_lowercase))
ds

[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (4, 'e'), (5, 'f'), (6, 'g'), (7, 'h'), (8, 'i'), (9, 'j'), (10, 'k'), (11, 'l'), (12, 'm'), (13, 'n'), (14, 'o'), (15, 'p'), (16, 'q'), (17, 'r'), (18, 's'), (19, 't'), (20, 'u'), (21, 'v'), (22, 'w'), (23, 'x'), (24, 'y'), (25, 'z')]

In [62]:
dl = DataLoader(ds,batch_size=6, shuffle=True)
list(dl)


[(tensor([20, 16,  4,  9, 15, 19]), ('u', 'q', 'e', 'j', 'p', 't')),
 (tensor([ 3, 24, 13,  7, 12, 17]), ('d', 'y', 'n', 'h', 'm', 'r')),
 (tensor([ 5,  0, 22, 14,  6,  8]), ('f', 'a', 'w', 'o', 'g', 'i')),
 (tensor([25,  1,  2, 10, 11, 21]), ('z', 'b', 'c', 'k', 'l', 'v')),
 (tensor([18, 23]), ('s', 'x'))]

for x,y in dl:
    pred = model(x)
    loss = loss_func(pred, y)
    loss.backward()
    parameters -= parameters.grad * lr



In [63]:
weights = init_params((28*28,1))
bias = init_params(1)

In [64]:
dl = DataLoader(dset,batch_size=256)
xb,yb = first(dl)
xb.shape, yb.shape

(torch.Size([256, 784]), torch.Size([256, 1]))

In [65]:
valid_dl = DataLoader(valid_dset,batch_size=256)

In [66]:
batch =train_x[:4]
batch.shape

torch.Size([4, 784])

In [67]:
preds =linear1(batch)
preds

tensor([[-13.2894],
        [-12.1200],
        [-20.4647],
        [-26.5647]], grad_fn=<AddBackward0>)

In [68]:
loss = mnist_loss(preds,train_y[:4])
loss

tensor(1.0000, grad_fn=<MeanBackward0>)

In [69]:
loss.backward()
weights.grad.shape,weights.grad.mean(),bias.grad

(torch.Size([784, 1]), tensor(-2.5443e-07), tensor([-1.7857e-06]))