In [3]:
import torch
import torchvision 
from torchvision.datasets import MNIST
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import torchvision.transforms as transforms
import torch.nn.functional as F

In [7]:
input_size = 28*28
hidden_size = 100
num_classes = 10
batch_size = 200
learn_rate = 0.003

In [8]:
train_ds = torchvision.datasets.MNIST(root="data/JupyterProjects/",train=True,transform=transforms.ToTensor(),download=True)
test_ds = torchvision.datasets.MNIST(root="data/JupyterProjects/",train=False,transform=transforms.ToTensor(),download=True)

In [9]:
from torch.utils.data import DataLoader


In [10]:
train_dl = DataLoader(train_ds,shuffle=True,batch_size=batch_size)
test_dl = DataLoader(test_ds,shuffle=True,batch_size=batch_size)

In [11]:
for xb,yb in train_dl:
    print(xb.shape)
    break

torch.Size([200, 1, 28, 28])


In [13]:
class SimpleNN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super().__init__()
        self.lin1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.lin2 = nn.Linear(hidden_size,output_size)
    
    def forward(self,dl):
        out=self.lin1(dl)
        out=self.relu(out)
        out=self.lin2(out)
        return out

In [14]:
model1 = SimpleNN(input_size,hidden_size,num_classes)


In [15]:
opt = torch.optim.Adam(model1.parameters(),learn_rate)
opt

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.003
    weight_decay: 0
)

In [16]:
loss_func = nn.CrossEntropyLoss() 

In [21]:

def train(num_epochs,model):
    for epoch in range(num_epochs):
        for xb,yb in train_dl: #xb, yb are taken batchwise from dataloader
            #reshapes the xb(input from [100,1,28,28] to [100,784] to match the model parameters)
            if model.__class__.__name__ == "SimpleNN":
                xb=xb.reshape(-1,28*28)
            
            #predicts the output for the given batch xb
            pred = model(xb)

            #calc the loss and find its gradient in order to calc change in weight needed
            loss=loss_func(pred,yb)
            loss.backward()

            #optimizer adjusts weights to reduce loss
            opt.step()

            #makes grad=0 to ensure theres no overlapping and summing of gradients
            opt.zero_grad()

        print(f"Loss in epoch {epoch+1}/{num_epochs} : {loss}")

In [13]:
train(10,model1)

Loss in epoch 1/10 : 0.2013801783323288
Loss in epoch 2/10 : 0.11297214776277542
Loss in epoch 3/10 : 0.19428320229053497
Loss in epoch 4/10 : 0.09385834634304047
Loss in epoch 5/10 : 0.04026123508810997
Loss in epoch 6/10 : 0.05915455520153046
Loss in epoch 7/10 : 0.022008370608091354
Loss in epoch 8/10 : 0.02458163909614086
Loss in epoch 9/10 : 0.03563602268695831
Loss in epoch 10/10 : 0.020175039768218994


In [25]:
def test(model):
    with torch.no_grad():
        correct=0
        total=0
        for xb,yb in test_dl:
            if model.__class__.__name__ == "SimpleNN":
                xb=xb.reshape(-1,28*28)
            pred=model(xb)
            _,pred_label=torch.max(pred,dim=1)
            correct += (pred_label==yb).sum().item()
            total += yb.shape[0]  #yb.shape[0] gives us the number of labels in a batch
        accuracy = 100*correct/total
        print(f"Accuracy = {accuracy}%")

In [15]:
test(model1)


Accuracy = 97.5%


In [17]:
class convModel(nn.Module): #output size formula = (input_width - filter + 2*padding)/stride  + 1
    def __init__(self):
        
        super().__init__()
        #1x28x28
        self.conv1=nn.Conv2d(1,8,kernel_size=3,padding=1,stride=1) 
        #8x28x28
        self.max1=nn.MaxPool2d(2,2)
        #8x14x14
        self.conv2=nn.Conv2d(8,16,kernel_size=3,padding=1,stride=1)
        #16x14x14
        self.max2=nn.MaxPool2d(2,2)
        #16x7x7
        self.fc1=nn.Linear(16*7*7,256)
        self.fc2=nn.Linear(256,10)
    
    def forward(self,xb):
        out=self.conv1(xb)
        out=F.relu(out)
        out=self.max1(out)
        out=self.conv2(out)
        out=F.relu(out)
        out=self.max2(out)
        out=out.view(-1,16*7*7)
        out=self.fc1(out)
        out=self.fc2(out)
        
        return out
        

In [18]:
model2 = convModel()

In [27]:
learn_rate=0.0001
opt = torch.optim.Adam(model2.parameters(),learn_rate)
opt

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)

In [28]:
train(10,model2) 

Loss in epoch 1/10 : 0.47581589221954346
Loss in epoch 2/10 : 0.2447930872440338
Loss in epoch 3/10 : 0.25057968497276306
Loss in epoch 4/10 : 0.1454818844795227
Loss in epoch 5/10 : 0.2774015963077545
Loss in epoch 6/10 : 0.23115921020507812
Loss in epoch 7/10 : 0.09506908059120178
Loss in epoch 8/10 : 0.06403619050979614
Loss in epoch 9/10 : 0.03712291643023491
Loss in epoch 10/10 : 0.11274515837430954


In [29]:
test(model2)

Accuracy = 97.3%
