In [2]:
import torch
import torchvision
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.functional as f



In [3]:
#download dataset to be trained

dataset = MNIST(root='data/', download=True)

In [4]:
len(dataset)

60000

In [5]:
test_dataset = MNIST(root='data/', train=False)

In [6]:
len(test_dataset)

10000

In [7]:
dataset = MNIST(root='data/',train=True,transform=transforms.ToTensor())

In [8]:
def split_indices(n,pct):
    
    n_val= int(pct * n)
    
    idxs = np.random.permutation(n)
    
    return idxs[n_val:], idxs[:n_val]
    

In [9]:
train_indices, val_indices = split_indices(len(dataset), pct=0.2)

In [10]:
print(len(train_indices), len(val_indices))

48000 12000


In [11]:
batch_size=100

train_sampler = SubsetRandomSampler(train_indices)
train_loader = DataLoader(dataset,batch_size,sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
val_loader = DataLoader(dataset,batch_size,sampler=val_sampler)

In [12]:
input_size = 28*28
num_classes = 10

model = nn.Linear(input_size,num_classes)

In [13]:
class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size,num_classes)
    
    def forward(self,xb):
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out
model = MnistModel()

In [14]:
for images,labels in train_loader:
    outputs = model(images)
    break
    
    

In [15]:
probs = f.softmax(outputs, dim=1)


In [16]:
max_probs, preds = torch.max(probs,dim=1)
print(preds)
print(max_probs)

tensor([0, 5, 7, 5, 5, 5, 5, 5, 1, 6, 6, 7, 1, 5, 5, 1, 2, 5, 6, 1, 9, 1, 5, 6,
        0, 1, 0, 5, 2, 5, 9, 4, 6, 2, 0, 5, 4, 5, 1, 7, 2, 5, 7, 5, 5, 9, 2, 5,
        5, 6, 5, 5, 5, 1, 5, 7, 5, 7, 7, 9, 5, 5, 5, 5, 0, 5, 5, 2, 1, 9, 1, 2,
        5, 6, 5, 5, 5, 1, 7, 5, 1, 5, 7, 2, 5, 1, 0, 9, 7, 6, 7, 1, 5, 5, 1, 5,
        5, 9, 5, 5])
tensor([0.1184, 0.1229, 0.1507, 0.1273, 0.1324, 0.1167, 0.1374, 0.1157, 0.1361,
        0.1450, 0.1234, 0.1251, 0.1188, 0.1251, 0.1151, 0.1365, 0.1481, 0.1282,
        0.1441, 0.1592, 0.1336, 0.1266, 0.1327, 0.1331, 0.1464, 0.1630, 0.1402,
        0.1466, 0.1478, 0.1364, 0.1358, 0.1123, 0.1262, 0.1312, 0.1268, 0.1279,
        0.1238, 0.1223, 0.1489, 0.1244, 0.1235, 0.1242, 0.1514, 0.1282, 0.1440,
        0.1198, 0.1217, 0.1105, 0.1117, 0.1365, 0.1578, 0.1443, 0.1381, 0.1384,
        0.1552, 0.1216, 0.1203, 0.1166, 0.1462, 0.1212, 0.1251, 0.1220, 0.1478,
        0.1264, 0.1443, 0.1424, 0.1219, 0.1325, 0.1165, 0.1414, 0.1301, 0.1276,
        0.1297, 0.1

In [17]:
def accuracy(ll,l2):
    return torch.sum(ll == l2).item() / len(ll)

In [18]:
print(len(preds))
print(len(labels))
accuracy(preds,labels)

100
100


0.07

In [53]:
loss_fn = f.cross_entropy

In [54]:
loss = loss_fn(outputs,labels)
print(loss)

tensor(2.3072, grad_fn=<NllLossBackward0>)


In [55]:
learning_rate = 0.001
optimiser = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [56]:
def loss_batch(model,loss_func,xb,yb,opt=None,metric=None):
    preds = model(xb)
    loss = loss_func(preds,yb)
    
    if opt is not None:
        
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    
    metric_result = None
    
    if metric is not None:
        metric_result = metric(preds,yb)
        
    
    return loss.item(),len(xb),metric_result
        

In [57]:
def evaluate(model,loss_fn,valid_dl,metric=None):
    with torch.no_grad():
        results = [loss_batch(model,loss_fn,xb,yb,metric=metric) for xb,yb in valid_dl]
        losses,nums,metrics = zip(*results)
        total = np.sum(nums)
        
        avg_loss = np.sum(np.multiply(losses,nums)) / total
        avg_metric = None
        if metric is not None:
            avg_metric = np.sum(np.multiply(metrics,nums)) / total
            
        
        return avg_loss,total,avg_metric
            

In [58]:
def accuracy(outputs,labels):
    _, preds = torch.max(outputs,dim=1)
    return torch.sum(preds == labels).item() / len(preds)

In [59]:
val_loss,total,val_acc = evaluate(model,loss_fn,val_loader,metric=accuracy)
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(val_loss,val_acc))

Loss: 2.3035, Accuracy: 0.1009


In [60]:
def fit(epochs,model,loss_fn,opt,train_dl,valid_dl,metric=None):
    for epoch in range(epochs):
        
        for xb,yb in train_dl:
        
            loss,_,_ = loss_batch(model,loss_fn,xb,yb,opt)
        result = evaluate(model,loss_fn,valid_dl,metric)
        val_loss,total,val_metric = result
        
        if metric is None:
            print('Epoch [{}/{}], Loss: {:.4f}'
            .format(epoch+1,epochs,val_loss))
        else:
            print('Epoch[{}/{}],Loss: {:.4f}, {}: {:.4f}'
                .format(epoch+1,epochs,val_loss,metric.__name__,val_metric))

In [61]:
model = MnistModel()
optimiser = torch.optim.SGD(model.parameters(), lr=learning_rate)


In [62]:
fit(10,model,f.cross_entropy,optimiser,train_loader,val_loader,accuracy)

Epoch[1/10],Loss: 1.8652, accuracy: 0.6469
Epoch[2/10],Loss: 1.5702, accuracy: 0.7342
Epoch[3/10],Loss: 1.3628, accuracy: 0.7674
Epoch[4/10],Loss: 1.2138, accuracy: 0.7887
Epoch[5/10],Loss: 1.1031, accuracy: 0.8031
Epoch[6/10],Loss: 1.0183, accuracy: 0.8139
Epoch[7/10],Loss: 0.9513, accuracy: 0.8213
Epoch[8/10],Loss: 0.8971, accuracy: 0.8259
Epoch[9/10],Loss: 0.8524, accuracy: 0.8310
Epoch[10/10],Loss: 0.8148, accuracy: 0.8354
