In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import pandas as pd

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=20, kernel_size=(3,3), padding=0, stride=1), 
            nn.ReLU()
        )#Output=26 RF=3X3 [RFin + (Ksize-1 * JMPin) => 1+(3-1)*1 =3]  :JMPin=1, Jout= JMPin X s = 1
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=20, out_channels=16, kernel_size=(3,3), padding=0, stride=1),
            nn.ReLU()    
        )#Output=24 RF=5X5  [RFin + (Ksize-1 * JMPin) => 3+(3-1)*1 =5] :JMPin=1, Jout =JMPin X s =1

        self.pool1 = nn.MaxPool2d(2, 2)#Output=12 RF=6X6 [RFin + (Ksize-1 * JMPin) => 5+(2-1)*1 =6] :JMPin=1, Jout=  JMPin X s =2

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3,3), padding=0, stride=1),
            nn.ReLU()
        )#Output=10 RF=10X10 [RFin + (Ksize-1 * JMPin) => 6+(3-1)*2 =10] : Jout= JMPin X s = 2X1 :JMPin=2, Jout= JMPin X s = 2X1=2

        self.pool2 = nn.MaxPool2d(2, 2) #Output=5 RF=12[RFin + (Ksize-1 * JMPin) => 10+(2-1)*2 =12]  :JMPin=2, Jout =JMPin X s = 2X2 =4

        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3,3), padding=0, stride=1),
            nn.ReLU()            
        )#Output=3 RF= 20[RFin + (Ksize-1 * JMPin) => 12+(3-1)*4 =20] :JMPin=4, Jout =JMPin X s = 4X1=4

        
        self.conv5 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(3,3), padding=0, stride=1),
            #nn.ReLU()
        )#Output=1 RF=28 [RFin + (Ksize-1 * JMPin) => 20+(3-1)*4 =28]  :JMPin=4, Jout=JMPin X s = 4X1=4
     

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool1(x)
        x = self.conv3(x)
        x = self.pool2(x)
        x = self.conv4(x)
        x = self.conv5(x)        
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [5]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 20, 26, 26]             200
              ReLU-2           [-1, 20, 26, 26]               0
            Conv2d-3           [-1, 16, 24, 24]           2,896
              ReLU-4           [-1, 16, 24, 24]               0
         MaxPool2d-5           [-1, 16, 12, 12]               0
            Conv2d-6           [-1, 16, 10, 10]           2,320
              ReLU-7           [-1, 16, 10, 10]               0
         MaxPool2d-8             [-1, 16, 5, 5]               0
            Conv2d-9             [-1, 16, 3, 3]           2,320
             ReLU-10             [-1, 16, 3, 3]               0
           Conv2d-11             [-1, 10, 1, 1]           1,450
Total params: 9,186
Trainable params: 9,186
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/back



In [6]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!


In [7]:
from tqdm import tqdm

Train_Loss=[]
Train_Accuracy=[]
Test_Loss=[]
Test_Accuracy=[]

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    correct_train=0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        Train_Loss.append(loss.item())
        pred = output.argmax(dim=1, keepdim=True)
        correct_train += pred.eq(target.view_as(pred)).sum().item()
        Train_Accuracy.append(100.00 *correct_train/len(train_loader.dataset))
        loss.backward()
        optimizer.step()
        #pbar.set_description(desc= f'TRAINING Loss={loss.item()} batch_id={batch_idx}')
        
        ##Added detailed percentage:
        pbar.set_description(desc= f'TRAIN Loss={loss.item()} batch_id={batch_idx} Correct={correct_train} / {len(train_loader.dataset)} TRAIN ACCURACY={100.00 *correct_train/len(train_loader.dataset)}')
     


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss=F.nll_loss(output, target)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            
            correct += pred.eq(target.view_as(pred)).sum().item()
            
    test_loss /= len(test_loader.dataset)
    Test_Loss.append(test_loss)
    Test_Accuracy.append(100. * correct / len(test_loader.dataset))
    print('\nTEST: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    






In [8]:
import pandas as pd
import numpy as np

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
df_Test_Train_Analysis=pd.DataFrame(columns=['EPOCH','Test_Acc','Train_Acc','Acc_Diff'])
print(df_Test_Train_Analysis.shape)
for epoch in range(1, 20):
    print('EPOCH #',epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    print('----------------------------------------------------------------------')

  0%|          | 0/469 [00:00<?, ?it/s]

(0, 4)
EPOCH # 1


TRAIN Loss=0.13264529407024384 batch_id=468 Correct=50105 / 60000 TRAIN ACCURACY=83.50833333333334: 100%|██████████| 469/469 [00:10<00:00, 43.03it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.1138, Accuracy: 9645/10000 (96.45%)

----------------------------------------------------------------------
EPOCH # 2


TRAIN Loss=0.09003958106040955 batch_id=468 Correct=58042 / 60000 TRAIN ACCURACY=96.73666666666666: 100%|██████████| 469/469 [00:10<00:00, 44.27it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0770, Accuracy: 9737/10000 (97.37%)

----------------------------------------------------------------------
EPOCH # 3


TRAIN Loss=0.12336793541908264 batch_id=468 Correct=58582 / 60000 TRAIN ACCURACY=97.63666666666667: 100%|██████████| 469/469 [00:10<00:00, 44.64it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0499, Accuracy: 9840/10000 (98.40%)

----------------------------------------------------------------------
EPOCH # 4


TRAIN Loss=0.02165309153497219 batch_id=468 Correct=58878 / 60000 TRAIN ACCURACY=98.13: 100%|██████████| 469/469 [00:10<00:00, 44.38it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0498, Accuracy: 9837/10000 (98.37%)

----------------------------------------------------------------------
EPOCH # 5


TRAIN Loss=0.0799863189458847 batch_id=468 Correct=59014 / 60000 TRAIN ACCURACY=98.35666666666667: 100%|██████████| 469/469 [00:10<00:00, 44.75it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0490, Accuracy: 9836/10000 (98.36%)

----------------------------------------------------------------------
EPOCH # 6


TRAIN Loss=0.055754512548446655 batch_id=468 Correct=59127 / 60000 TRAIN ACCURACY=98.545: 100%|██████████| 469/469 [00:10<00:00, 44.47it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0388, Accuracy: 9880/10000 (98.80%)

----------------------------------------------------------------------
EPOCH # 7


TRAIN Loss=0.0051189265213906765 batch_id=468 Correct=59216 / 60000 TRAIN ACCURACY=98.69333333333333: 100%|██████████| 469/469 [00:10<00:00, 44.32it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0498, Accuracy: 9838/10000 (98.38%)

----------------------------------------------------------------------
EPOCH # 8


TRAIN Loss=0.12742696702480316 batch_id=468 Correct=59239 / 60000 TRAIN ACCURACY=98.73166666666667: 100%|██████████| 469/469 [00:10<00:00, 44.64it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0383, Accuracy: 9876/10000 (98.76%)

----------------------------------------------------------------------
EPOCH # 9


TRAIN Loss=0.03983405604958534 batch_id=468 Correct=59313 / 60000 TRAIN ACCURACY=98.855: 100%|██████████| 469/469 [00:10<00:00, 44.79it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0347, Accuracy: 9885/10000 (98.85%)

----------------------------------------------------------------------
EPOCH # 10


TRAIN Loss=0.05129906162619591 batch_id=468 Correct=59378 / 60000 TRAIN ACCURACY=98.96333333333334: 100%|██████████| 469/469 [00:10<00:00, 44.75it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0347, Accuracy: 9889/10000 (98.89%)

----------------------------------------------------------------------
EPOCH # 11


TRAIN Loss=0.014971266500651836 batch_id=468 Correct=59417 / 60000 TRAIN ACCURACY=99.02833333333334: 100%|██████████| 469/469 [00:10<00:00, 44.43it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0394, Accuracy: 9881/10000 (98.81%)

----------------------------------------------------------------------
EPOCH # 12


TRAIN Loss=0.0712423026561737 batch_id=468 Correct=59437 / 60000 TRAIN ACCURACY=99.06166666666667: 100%|██████████| 469/469 [00:10<00:00, 43.84it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0411, Accuracy: 9879/10000 (98.79%)

----------------------------------------------------------------------
EPOCH # 13


TRAIN Loss=0.01284786593168974 batch_id=468 Correct=59459 / 60000 TRAIN ACCURACY=99.09833333333333: 100%|██████████| 469/469 [00:10<00:00, 44.81it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0438, Accuracy: 9877/10000 (98.77%)

----------------------------------------------------------------------
EPOCH # 14


TRAIN Loss=0.04641224816441536 batch_id=468 Correct=59472 / 60000 TRAIN ACCURACY=99.12: 100%|██████████| 469/469 [00:10<00:00, 44.48it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0315, Accuracy: 9907/10000 (99.07%)

----------------------------------------------------------------------
EPOCH # 15


TRAIN Loss=0.003996739629656076 batch_id=468 Correct=59507 / 60000 TRAIN ACCURACY=99.17833333333333: 100%|██████████| 469/469 [00:10<00:00, 44.80it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0358, Accuracy: 9886/10000 (98.86%)

----------------------------------------------------------------------
EPOCH # 16


TRAIN Loss=0.005070199724286795 batch_id=468 Correct=59581 / 60000 TRAIN ACCURACY=99.30166666666666: 100%|██████████| 469/469 [00:10<00:00, 44.81it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0375, Accuracy: 9884/10000 (98.84%)

----------------------------------------------------------------------
EPOCH # 17


TRAIN Loss=0.005464931484311819 batch_id=468 Correct=59587 / 60000 TRAIN ACCURACY=99.31166666666667: 100%|██████████| 469/469 [00:10<00:00, 44.40it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0324, Accuracy: 9906/10000 (99.06%)

----------------------------------------------------------------------
EPOCH # 18


TRAIN Loss=0.003985658288002014 batch_id=468 Correct=59575 / 60000 TRAIN ACCURACY=99.29166666666667: 100%|██████████| 469/469 [00:10<00:00, 44.63it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0331, Accuracy: 9895/10000 (98.95%)

----------------------------------------------------------------------
EPOCH # 19


TRAIN Loss=0.01127992570400238 batch_id=468 Correct=59609 / 60000 TRAIN ACCURACY=99.34833333333333: 100%|██████████| 469/469 [00:10<00:00, 44.46it/s]



TEST: Average loss: 0.0358, Accuracy: 9885/10000 (98.85%)

----------------------------------------------------------------------


#Goal	
Now we have model skeleton ready with 'not so' overfitting performance. The next focus is on reducing the number of Kernels as we are currently using way too many resources than needed for the problem in hand.

It makes sense to optimize the resources before we go ahead and optimize the model.

#Params	
9,186
#Best Train Accuracy	
99.34%

#Best Test Accuracy	
99.07%
#Observation/ Analysis/Conclusion	

The Model is not great because it's unable to reach close to the Target of 99.4%.
But I would continue to further optimize this model because -
The Model clearly is overfitting on all the epochs.
But the Accuracy difference is consistent.

A consistent model not meeting Target is better than an incosistent model meeting target.

Such a model can be trained further using different methods to overcome the overfitting Problem.

Hence in next step we shall try applying Batch Normalization to try how that impacts the model accuracy and consistancy across differnet Epochs.


#Comment	
We will continue to optimize the model further.