In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import pandas as pd

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=20, kernel_size=(3,3), padding=0, stride=1), 
            nn.BatchNorm2d(20),
            nn.Dropout(0.02),
            nn.ReLU()
        )
        #Output=26 RF=3X3 [RFin + (Ksize-1 * JMPin) => 1+(3-1)*1 =3]  :JMPin=1, Jout= JMPin X s = 1
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=20, out_channels=16, kernel_size=(3,3), padding=0, stride=1),
            nn.BatchNorm2d(16),
            nn.Dropout(0.02),
            nn.ReLU()    
        )
        #Output=24 RF=5X5  [RFin + (Ksize-1 * JMPin) => 3+(3-1)*1 =5] :JMPin=1, Jout =JMPin X s =1

        self.pool1 = nn.MaxPool2d(2, 2)#Output=12 RF=6X6 [RFin + (Ksize-1 * JMPin) => 5+(2-1)*1 =6] :JMPin=1, Jout=  JMPin X s =2

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3,3), padding=0, stride=1),
            nn.BatchNorm2d(16),
            nn.Dropout(0.02),
            nn.ReLU()
        )
        #Output=10 RF=10X10 [RFin + (Ksize-1 * JMPin) => 6+(3-1)*2 =10] : Jout= JMPin X s = 2X1 :JMPin=2, Jout= JMPin X s = 2X1=2

        self.pool2 = nn.MaxPool2d(2, 2) #Output=5 RF=12[RFin + (Ksize-1 * JMPin) => 10+(2-1)*2 =12]  :JMPin=2, Jout =JMPin X s = 2X2 =4

        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3,3), padding=0, stride=1),
            nn.BatchNorm2d(16),
            nn.Dropout(0.02),
            nn.ReLU()            
        )
        #Output=3 RF= 20[RFin + (Ksize-1 * JMPin) => 12+(3-1)*4 =20] :JMPin=4, Jout =JMPin X s = 4X1=4

        
        self.conv5 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(3,3), padding=0, stride=1),
            
        )
        #Output=1 RF=28 [RFin + (Ksize-1 * JMPin) => 20+(3-1)*4 =28]  :JMPin=4, Jout=JMPin X s = 4X1=4
     

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool1(x)
        x = self.conv3(x)
        x = self.pool2(x)
        x = self.conv4(x)
        x = self.conv5(x)        
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [13]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 20, 26, 26]             200
       BatchNorm2d-2           [-1, 20, 26, 26]              40
           Dropout-3           [-1, 20, 26, 26]               0
              ReLU-4           [-1, 20, 26, 26]               0
            Conv2d-5           [-1, 16, 24, 24]           2,896
       BatchNorm2d-6           [-1, 16, 24, 24]              32
           Dropout-7           [-1, 16, 24, 24]               0
              ReLU-8           [-1, 16, 24, 24]               0
         MaxPool2d-9           [-1, 16, 12, 12]               0
           Conv2d-10           [-1, 16, 10, 10]           2,320
      BatchNorm2d-11           [-1, 16, 10, 10]              32
          Dropout-12           [-1, 16, 10, 10]               0
             ReLU-13           [-1, 16, 10, 10]               0
        MaxPool2d-14             [-1, 1



In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:
from tqdm import tqdm

Train_Loss=[]
Train_Accuracy=[]
Test_Loss=[]
Test_Accuracy=[]

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    correct_train=0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        Train_Loss.append(loss.item())
        pred = output.argmax(dim=1, keepdim=True)
        correct_train += pred.eq(target.view_as(pred)).sum().item()
        Train_Accuracy.append(100.00 *correct_train/len(train_loader.dataset))
        loss.backward()
        optimizer.step()
        #pbar.set_description(desc= f'TRAINING Loss={loss.item()} batch_id={batch_idx}')
        
        ##Added detailed percentage:
        pbar.set_description(desc= f'TRAIN Loss={loss.item()} batch_id={batch_idx} Correct={correct_train} / {len(train_loader.dataset)} TRAIN ACCURACY={100.00 *correct_train/len(train_loader.dataset)}')
     


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss=F.nll_loss(output, target)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            
            correct += pred.eq(target.view_as(pred)).sum().item()
            
    test_loss /= len(test_loader.dataset)
    Test_Loss.append(test_loss)
    Test_Accuracy.append(100. * correct / len(test_loader.dataset))
    print('\nTEST: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    


In [17]:
import pandas as pd
import numpy as np

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
df_Test_Train_Analysis=pd.DataFrame(columns=['EPOCH','Test_Acc','Train_Acc','Acc_Diff'])
print(df_Test_Train_Analysis.shape)
for epoch in range(1, 15):
    print('EPOCH #',epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    print('----------------------------------------------------------------------')

  0%|          | 0/469 [00:00<?, ?it/s]

(0, 4)
EPOCH # 1


TRAIN Loss=0.06455826759338379 batch_id=468 Correct=56765 / 60000 TRAIN ACCURACY=94.60833333333333: 100%|██████████| 469/469 [00:10<00:00, 43.74it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0633, Accuracy: 9819/10000 (98.19%)

----------------------------------------------------------------------
EPOCH # 2


TRAIN Loss=0.09931989759206772 batch_id=468 Correct=59050 / 60000 TRAIN ACCURACY=98.41666666666667: 100%|██████████| 469/469 [00:11<00:00, 42.25it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0434, Accuracy: 9873/10000 (98.73%)

----------------------------------------------------------------------
EPOCH # 3


TRAIN Loss=0.0717339962720871 batch_id=468 Correct=59244 / 60000 TRAIN ACCURACY=98.74: 100%|██████████| 469/469 [00:10<00:00, 43.05it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0386, Accuracy: 9881/10000 (98.81%)

----------------------------------------------------------------------
EPOCH # 4


TRAIN Loss=0.0131152318790555 batch_id=468 Correct=59369 / 60000 TRAIN ACCURACY=98.94833333333334: 100%|██████████| 469/469 [00:10<00:00, 44.32it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0310, Accuracy: 9911/10000 (99.11%)

----------------------------------------------------------------------
EPOCH # 5


TRAIN Loss=0.1045006588101387 batch_id=468 Correct=59434 / 60000 TRAIN ACCURACY=99.05666666666667: 100%|██████████| 469/469 [00:10<00:00, 43.78it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0314, Accuracy: 9897/10000 (98.97%)

----------------------------------------------------------------------
EPOCH # 6


TRAIN Loss=0.022844230756163597 batch_id=468 Correct=59509 / 60000 TRAIN ACCURACY=99.18166666666667: 100%|██████████| 469/469 [00:11<00:00, 42.00it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0313, Accuracy: 9905/10000 (99.05%)

----------------------------------------------------------------------
EPOCH # 7


TRAIN Loss=0.020272187888622284 batch_id=468 Correct=59553 / 60000 TRAIN ACCURACY=99.255: 100%|██████████| 469/469 [00:10<00:00, 44.78it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0307, Accuracy: 9901/10000 (99.01%)

----------------------------------------------------------------------
EPOCH # 8


TRAIN Loss=0.056018222123384476 batch_id=468 Correct=59578 / 60000 TRAIN ACCURACY=99.29666666666667: 100%|██████████| 469/469 [00:10<00:00, 44.06it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0288, Accuracy: 9902/10000 (99.02%)

----------------------------------------------------------------------
EPOCH # 9


TRAIN Loss=0.027747867628932 batch_id=468 Correct=59636 / 60000 TRAIN ACCURACY=99.39333333333333: 100%|██████████| 469/469 [00:10<00:00, 43.07it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0261, Accuracy: 9920/10000 (99.20%)

----------------------------------------------------------------------
EPOCH # 10


TRAIN Loss=0.0373755544424057 batch_id=468 Correct=59643 / 60000 TRAIN ACCURACY=99.405: 100%|██████████| 469/469 [00:10<00:00, 43.00it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0282, Accuracy: 9899/10000 (98.99%)

----------------------------------------------------------------------
EPOCH # 11


TRAIN Loss=0.004404167179018259 batch_id=468 Correct=59682 / 60000 TRAIN ACCURACY=99.47: 100%|██████████| 469/469 [00:10<00:00, 44.00it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0261, Accuracy: 9911/10000 (99.11%)

----------------------------------------------------------------------
EPOCH # 12


TRAIN Loss=0.011189390905201435 batch_id=468 Correct=59701 / 60000 TRAIN ACCURACY=99.50166666666667: 100%|██████████| 469/469 [00:10<00:00, 43.70it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0251, Accuracy: 9917/10000 (99.17%)

----------------------------------------------------------------------
EPOCH # 13


TRAIN Loss=0.0014507770538330078 batch_id=468 Correct=59724 / 60000 TRAIN ACCURACY=99.54: 100%|██████████| 469/469 [00:10<00:00, 43.32it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


TEST: Average loss: 0.0294, Accuracy: 9908/10000 (99.08%)

----------------------------------------------------------------------
EPOCH # 14


TRAIN Loss=0.007728129625320435 batch_id=468 Correct=59704 / 60000 TRAIN ACCURACY=99.50666666666666: 100%|██████████| 469/469 [00:10<00:00, 44.33it/s]



TEST: Average loss: 0.0238, Accuracy: 9920/10000 (99.20%)

----------------------------------------------------------------------


#Goal	
In Last attempt the final model was very good  because of the CONSISTENCY in the Training and Test Accuracy difference. But no where close to Target Accuracy.
The goal is still to  achieve  99.4 % target accuracy. Intentend of this model to extend previous model and try and use DropOut.
DropOut has the effect of reducing the capacity or thinning the network during training. When we use Dropout in a network, randomly selected neurons are ignored during training. They are “dropped-out” randomly. 

It's a regularization method, which  reduces over-fitting by adding a penalty to the loss function.
#Params	
9322

#WITH 15 EPOCHS

#DropOut value
2 %
#Best Train Accuracy	
99.54%

#Best Test Accuracy	
99.2%
#Observation/ Analysis/Conclusion	
This is really very GOOD Model. With consistent Train and test Accuracy.
The model does not have overfitting issue, but the target is still not matched.
In order for model to perfrom better learning, we could try increasing the capacity of model.

Why do we want to increase the Model capacity at this stage? Because the currently trained model is very consistant in Learning and does not suffer from Overfitting. We have managed to train a Stable Model.

#Comment	
