In [1]:
# Importing all the libraries in
import torch # Import pytorch
import torch.nn as nn # For creating Neural Network Models
import torch.nn.functional as F #
import torch.optim as optim # Importing optimizer from pytorch library
from torchvision import datasets, transforms # Importing datasets and transform function to run over the dataset
!pip install torchsummary # Installing Torch summary to view the model summary
from torchsummary import summary # Importing the summary function from the installed torchsummary library

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
  # checking if Cuda is available, it is available it returns True, else returns False
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu") # Assign Device to Cuda or CPU based on avaiablily
device

device(type='cuda')

In [3]:
# current Batch size is set to 128, meaning the dataset will be split in different batches,
# each batch will contain 128 datapoints or here images. 
batch_size = 128

# Calling a Dataloader function that takes in the dataset, batchsize and shuffle.
# Here the dataset is mnist and is loaded from the datasets function loaded from 
# torch vision library
train_loader = torch.utils.data.DataLoader(
    # loading MNIST dataset to data directory, train is true therefore loading the
    # dataset from the training directory, download is true indicating the data needs
    # to be downloaded. Performing a Transformation Operation, here there are two operations
    # ToTensor and Normalize and each is performed one after the other. ToTensor operation is 
    # used to convert it tensor and Normalize takes in mean and std of the dataset to normalize
    # in the input image. shuffle indicates the data will be loaded randomly
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomApply([transforms.CenterCrop(22), ], p=0.1),
                        transforms.Resize((28, 28)),
                        transforms.RandomRotation((-15., 15.), fill=0),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,)
                        ) # these are the MNIST dataset mean and std values of dataset set
                    ])),
                    
    batch_size=batch_size, shuffle=True)
  # Performing the same operation to create the test data, therefore train is set to false and
  # is loaded to test_loader
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,)) # these are the MNIST dataset mean and std values of training set
                    ])),
    batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 163837419.49it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 118993805.33it/s]


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 25600000.73it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 12394618.59it/s]


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



# Some Notes on our naive model

We are going to write a network based on what we have learnt so far. 

The size of the input image is 28x28x1. We are going to add as many layers as required to reach RF = 32 "atleast". 

In [8]:
class Net(nn.Module):
    #This defines the structure of the NN.
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3,bias = False)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3,bias = False)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3,bias = False)
        self.conv4 = nn.Conv2d(32, 16, kernel_size=1,bias = False)
        self.conv5 = nn.Conv2d(16, 16, kernel_size=3,bias = False)
        self.conv6 = nn.Conv2d(16, 16, kernel_size=3,bias = False)
        self.conv7 = nn.Conv2d(16, 16, padding = 1, kernel_size=3,bias = False)
        self.conv8 = nn.Conv2d(16, 16, kernel_size=3,bias = False)
        self.conv9 = nn.Conv2d(16, 10, kernel_size=1,bias = False)
        self.batch1 = nn.BatchNorm2d(16)
        self.batch2 = nn.BatchNorm2d(16)
        self.batch3 = nn.BatchNorm2d(32)
        self.batch4 = nn.BatchNorm2d(16)
        self.batch5 = nn.BatchNorm2d(16)
        self.batch6 = nn.BatchNorm2d(16)
        self.batch7 = nn.BatchNorm2d(16)
        self.batch8 = nn.BatchNorm2d(16)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout1 = nn.Dropout2d(0.1)
        self.dropout2 = nn.Dropout2d(0.1)
        self.dropout3 = nn.Dropout2d(0.1)
        self.dropout4 = nn.Dropout2d(0.1)
        self.dropout5 = nn.Dropout2d(0.1)
        self.dropout6 = nn.Dropout2d(0.1)
        self.dropout7 = nn.Dropout2d(0.1)
        self.avgpool = nn.AvgPool2d(5)

    def forward(self, x):
        x = self.dropout1(self.batch1(F.relu(self.conv1(x))))
        x = self.dropout2(self.batch2(F.relu(self.conv2(x))))
        x = self.dropout3(self.batch3(F.relu(self.conv3(x))))
        x = self.pool(F.relu(self.conv4(x)))
        x = self.dropout4(self.batch4(F.relu(self.conv5(x))))
        x = self.dropout5(self.batch5(F.relu(self.conv6(x))))
        x = self.dropout6(self.batch6(F.relu(self.conv7(x))))
        x = self.dropout7(self.batch7(F.relu(self.conv8(x))))
        x = self.avgpool(self.conv9(x))
        x = x.view(-1,10)

        return F.log_softmax(x, dim=1)

# loading Neural Network to device
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
       BatchNorm2d-2           [-1, 16, 26, 26]              32
         Dropout2d-3           [-1, 16, 26, 26]               0
            Conv2d-4           [-1, 16, 24, 24]           2,304
       BatchNorm2d-5           [-1, 16, 24, 24]              32
         Dropout2d-6           [-1, 16, 24, 24]               0
            Conv2d-7           [-1, 32, 22, 22]           4,608
       BatchNorm2d-8           [-1, 32, 22, 22]              64
         Dropout2d-9           [-1, 32, 22, 22]               0
           Conv2d-10           [-1, 16, 22, 22]             512
        MaxPool2d-11           [-1, 16, 11, 11]               0
           Conv2d-12             [-1, 16, 9, 9]           2,304
      BatchNorm2d-13             [-1, 16, 9, 9]              32
        Dropout2d-14             [-1, 1

In [11]:
from tqdm import tqdm
def train(model:nn.Module, device:torch.device, train_loader, optimizer, epoch):
    # print(type(model))
    # print(type(device))
    # print(type(train_loader))
    # print(type(optimizer))
    # print(type(epoch))
    # Putting the model to train mode
    model.train()
    
    # Loading train dtaloader to Tqdm to produce output in bar for 
    # visual interpretation.
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        # adding data and target label to cuda
        data, target = data.to(device), target.to(device)
        # print("\nData Shape:",data.shape)
        # making all the gradients zero before forward propogation
        optimizer.zero_grad()
        # print("Target Shape:",target.shape)
        # loading data to model
        output = model(data)
        # print("output Shape:",output.shape)
        
        # calculating loss with output and target using negative log likelyhood loss
        loss = F.nll_loss(output, target)
        
        # calcualting back propogation
        loss.backward()
        
        # Revaulating the model and updating the gradient
        optimizer.step()
        
        pred = output.argmax(dim=1,keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Accuracy = {correct/len(train_loader.dataset)}')
        # break
    # print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    #     test_loss, correct, len(test_loader.dataset),
    #     100. * correct / len(test_loader.dataset)))


def test(model, device, test_loader):
    # Putting the model to eval mode
    model.eval()

    # Test_loss is kept to 0
    test_loss = 0
    
    # correct value 
    correct = 0
    # Loading model without gradient
    with torch.no_grad():
        # Load test model
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            # Running model to testing data
            output = model(data)
            # calculating testing call
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            # calculating prediction
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            # checking all the correct predictions
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [12]:
# Loading Stochastic gradient descent optimzer with momentum with learning as 0.01 and momentum as 0.9.

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1, verbose=True)
#running the model for one epoch 
for epoch in range(1, 21):
    # calling the train the function that takes in model, device, 
    # train_loader, optimzer and epoch as input
    print("Epoch:",epoch)
    train(model, device, train_loader, optimizer, epoch)
    # calling the test the function that takes in model, device, 
    # test_loader as input
    test(model, device, test_loader)

Epoch: 1


loss=0.12354777008295059 batch_id=468 Accuracy = 0.8275166666666667: 100%|██████████| 469/469 [00:33<00:00, 14.05it/s]



Test set: Average loss: 0.0692, Accuracy: 9803/10000 (98.03%)

Epoch: 2


loss=0.13961510360240936 batch_id=468 Accuracy = 0.9527166666666667: 100%|██████████| 469/469 [00:31<00:00, 15.06it/s]



Test set: Average loss: 0.0479, Accuracy: 9858/10000 (98.58%)

Epoch: 3


loss=0.12682782113552094 batch_id=468 Accuracy = 0.96255: 100%|██████████| 469/469 [00:30<00:00, 15.16it/s]



Test set: Average loss: 0.0335, Accuracy: 9906/10000 (99.06%)

Epoch: 4


loss=0.06745871156454086 batch_id=468 Accuracy = 0.9664166666666667: 100%|██████████| 469/469 [00:30<00:00, 15.33it/s]



Test set: Average loss: 0.0301, Accuracy: 9919/10000 (99.19%)

Epoch: 5


loss=0.2324662208557129 batch_id=468 Accuracy = 0.96935: 100%|██████████| 469/469 [00:30<00:00, 15.36it/s]



Test set: Average loss: 0.0273, Accuracy: 9917/10000 (99.17%)

Epoch: 6


loss=0.07950099557638168 batch_id=468 Accuracy = 0.9716166666666667: 100%|██████████| 469/469 [00:30<00:00, 15.29it/s]



Test set: Average loss: 0.0289, Accuracy: 9904/10000 (99.04%)

Epoch: 7


loss=0.09743116050958633 batch_id=468 Accuracy = 0.97285: 100%|██████████| 469/469 [00:29<00:00, 15.67it/s]



Test set: Average loss: 0.0244, Accuracy: 9922/10000 (99.22%)

Epoch: 8


loss=0.0546792633831501 batch_id=468 Accuracy = 0.9755: 100%|██████████| 469/469 [00:30<00:00, 15.52it/s]



Test set: Average loss: 0.0253, Accuracy: 9917/10000 (99.17%)

Epoch: 9


loss=0.12470967322587967 batch_id=468 Accuracy = 0.9755666666666667: 100%|██████████| 469/469 [00:30<00:00, 15.39it/s]



Test set: Average loss: 0.0230, Accuracy: 9929/10000 (99.29%)

Epoch: 10


loss=0.0580698698759079 batch_id=468 Accuracy = 0.9755833333333334: 100%|██████████| 469/469 [00:29<00:00, 15.71it/s]



Test set: Average loss: 0.0225, Accuracy: 9930/10000 (99.30%)

Epoch: 11


loss=0.07679096609354019 batch_id=468 Accuracy = 0.9776833333333333: 100%|██████████| 469/469 [00:30<00:00, 15.45it/s]



Test set: Average loss: 0.0211, Accuracy: 9937/10000 (99.37%)

Epoch: 12


loss=0.22441083192825317 batch_id=468 Accuracy = 0.9773333333333334: 100%|██████████| 469/469 [00:30<00:00, 15.51it/s]



Test set: Average loss: 0.0213, Accuracy: 9934/10000 (99.34%)

Epoch: 13


loss=0.15725980699062347 batch_id=468 Accuracy = 0.9785666666666667: 100%|██████████| 469/469 [00:29<00:00, 15.75it/s]



Test set: Average loss: 0.0221, Accuracy: 9921/10000 (99.21%)

Epoch: 14


loss=0.15376102924346924 batch_id=468 Accuracy = 0.9785333333333334: 100%|██████████| 469/469 [00:31<00:00, 14.82it/s]



Test set: Average loss: 0.0192, Accuracy: 9937/10000 (99.37%)

Epoch: 15


loss=0.16552381217479706 batch_id=468 Accuracy = 0.9795833333333334: 100%|██████████| 469/469 [00:31<00:00, 15.09it/s]



Test set: Average loss: 0.0186, Accuracy: 9937/10000 (99.37%)

Epoch: 16


loss=0.02699831686913967 batch_id=468 Accuracy = 0.9792833333333333: 100%|██████████| 469/469 [00:30<00:00, 15.34it/s]



Test set: Average loss: 0.0202, Accuracy: 9938/10000 (99.38%)

Epoch: 17


loss=0.06324764341115952 batch_id=468 Accuracy = 0.9800166666666666: 100%|██████████| 469/469 [00:30<00:00, 15.25it/s]



Test set: Average loss: 0.0199, Accuracy: 9944/10000 (99.44%)

Epoch: 18


loss=0.08698845654726028 batch_id=468 Accuracy = 0.9807833333333333: 100%|██████████| 469/469 [00:30<00:00, 15.29it/s]



Test set: Average loss: 0.0201, Accuracy: 9933/10000 (99.33%)

Epoch: 19


loss=0.15648655593395233 batch_id=468 Accuracy = 0.9797: 100%|██████████| 469/469 [00:30<00:00, 15.26it/s]



Test set: Average loss: 0.0184, Accuracy: 9943/10000 (99.43%)

Epoch: 20


loss=0.014032346196472645 batch_id=468 Accuracy = 0.9799166666666667: 100%|██████████| 469/469 [00:30<00:00, 15.60it/s]



Test set: Average loss: 0.0183, Accuracy: 9943/10000 (99.43%)



## With Scheduler


In [14]:
# Loading Stochastic gradient descent optimzer with momentum with learning as 0.01 and momentum as 0.9.
model = Net().to(device)
summary(model, input_size=(1, 28, 28))
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1, verbose=True)
#running the model for one epoch 
for epoch in range(1, 21):
    # calling the train the function that takes in model, device, 
    # train_loader, optimzer and epoch as input
    print("Epoch:",epoch)
    train(model, device, train_loader, optimizer, epoch)
    # calling the test the function that takes in model, device, 
    # test_loader as input
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
       BatchNorm2d-2           [-1, 16, 26, 26]              32
         Dropout2d-3           [-1, 16, 26, 26]               0
            Conv2d-4           [-1, 16, 24, 24]           2,304
       BatchNorm2d-5           [-1, 16, 24, 24]              32
         Dropout2d-6           [-1, 16, 24, 24]               0
            Conv2d-7           [-1, 32, 22, 22]           4,608
       BatchNorm2d-8           [-1, 32, 22, 22]              64
         Dropout2d-9           [-1, 32, 22, 22]               0
           Conv2d-10           [-1, 16, 22, 22]             512
        MaxPool2d-11           [-1, 16, 11, 11]               0
           Conv2d-12             [-1, 16, 9, 9]           2,304
      BatchNorm2d-13             [-1, 16, 9, 9]              32
        Dropout2d-14             [-1, 1

loss=0.14270387589931488 batch_id=468 Accuracy = 0.8207666666666666: 100%|██████████| 469/469 [00:31<00:00, 14.83it/s]



Test set: Average loss: 0.0706, Accuracy: 9809/10000 (98.09%)

Epoch: 2


loss=0.13561177253723145 batch_id=468 Accuracy = 0.9539166666666666: 100%|██████████| 469/469 [00:30<00:00, 15.25it/s]



Test set: Average loss: 0.0429, Accuracy: 9872/10000 (98.72%)

Epoch: 3


loss=0.11224984377622604 batch_id=468 Accuracy = 0.9630833333333333: 100%|██████████| 469/469 [00:30<00:00, 15.54it/s]



Test set: Average loss: 0.0334, Accuracy: 9898/10000 (98.98%)

Epoch: 4


loss=0.11910528689622879 batch_id=468 Accuracy = 0.9687666666666667: 100%|██████████| 469/469 [00:30<00:00, 15.40it/s]



Test set: Average loss: 0.0280, Accuracy: 9910/10000 (99.10%)

Epoch: 5


loss=0.08742427825927734 batch_id=468 Accuracy = 0.9704666666666667: 100%|██████████| 469/469 [00:30<00:00, 15.17it/s]



Test set: Average loss: 0.0311, Accuracy: 9905/10000 (99.05%)

Epoch: 6


loss=0.14327527582645416 batch_id=468 Accuracy = 0.97045: 100%|██████████| 469/469 [00:30<00:00, 15.52it/s]



Test set: Average loss: 0.0278, Accuracy: 9914/10000 (99.14%)

Epoch: 7


loss=0.1552935540676117 batch_id=468 Accuracy = 0.9731833333333333: 100%|██████████| 469/469 [00:30<00:00, 15.18it/s]



Test set: Average loss: 0.0284, Accuracy: 9910/10000 (99.10%)

Epoch: 8


loss=0.09453827142715454 batch_id=468 Accuracy = 0.9744166666666667: 100%|██████████| 469/469 [00:31<00:00, 15.06it/s]



Test set: Average loss: 0.0246, Accuracy: 9925/10000 (99.25%)

Epoch: 9


loss=0.041011519730091095 batch_id=468 Accuracy = 0.9742: 100%|██████████| 469/469 [00:30<00:00, 15.40it/s]



Test set: Average loss: 0.0256, Accuracy: 9912/10000 (99.12%)

Epoch: 10


loss=0.14996129274368286 batch_id=468 Accuracy = 0.9759166666666667: 100%|██████████| 469/469 [00:30<00:00, 15.47it/s]



Test set: Average loss: 0.0248, Accuracy: 9927/10000 (99.27%)

Epoch: 11


loss=0.02681819163262844 batch_id=468 Accuracy = 0.9765: 100%|██████████| 469/469 [00:31<00:00, 14.98it/s]



Test set: Average loss: 0.0227, Accuracy: 9926/10000 (99.26%)

Epoch: 12


loss=0.08092562854290009 batch_id=468 Accuracy = 0.977: 100%|██████████| 469/469 [00:31<00:00, 14.97it/s]



Test set: Average loss: 0.0212, Accuracy: 9933/10000 (99.33%)

Epoch: 13


loss=0.05091549828648567 batch_id=468 Accuracy = 0.9779666666666667: 100%|██████████| 469/469 [00:30<00:00, 15.48it/s]



Test set: Average loss: 0.0201, Accuracy: 9940/10000 (99.40%)

Epoch: 14


loss=0.08318787813186646 batch_id=468 Accuracy = 0.9781833333333333: 100%|██████████| 469/469 [00:30<00:00, 15.16it/s]



Test set: Average loss: 0.0201, Accuracy: 9938/10000 (99.38%)

Epoch: 15


loss=0.014820307493209839 batch_id=468 Accuracy = 0.97895: 100%|██████████| 469/469 [00:31<00:00, 15.03it/s]



Test set: Average loss: 0.0205, Accuracy: 9933/10000 (99.33%)

Epoch: 16


loss=0.08080179244279861 batch_id=468 Accuracy = 0.97935: 100%|██████████| 469/469 [00:30<00:00, 15.45it/s]



Test set: Average loss: 0.0201, Accuracy: 9936/10000 (99.36%)

Epoch: 17


loss=0.13949298858642578 batch_id=468 Accuracy = 0.9783166666666666: 100%|██████████| 469/469 [00:30<00:00, 15.33it/s]



Test set: Average loss: 0.0204, Accuracy: 9939/10000 (99.39%)

Epoch: 18


loss=0.02707289718091488 batch_id=468 Accuracy = 0.9795: 100%|██████████| 469/469 [00:30<00:00, 15.15it/s]



Test set: Average loss: 0.0198, Accuracy: 9937/10000 (99.37%)

Epoch: 19


loss=0.11449801176786423 batch_id=468 Accuracy = 0.9804166666666667: 100%|██████████| 469/469 [00:30<00:00, 15.59it/s]



Test set: Average loss: 0.0192, Accuracy: 9939/10000 (99.39%)

Epoch: 20


loss=0.09861847013235092 batch_id=468 Accuracy = 0.9805833333333334: 100%|██████████| 469/469 [00:30<00:00, 15.39it/s]



Test set: Average loss: 0.0204, Accuracy: 9934/10000 (99.34%)

