Model 1:
Target 
1. Get the setup right - Transforms, Data Loader and Basic Training and testing loop
2. Have a skeleton right with less params

Result:
1. Parameters : 12.9k
2. Best Train Accuracy : 98.87
3. Best Test Accuracy : 98.64

Analysis:
1. Good light model
2. Initially the difference between train and test accuracy was less but gradually becomes constant. 
3. Intial accuracy is less and model takes time to learn, Batch norm can help.

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))
])

In [3]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

In [4]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

#train_loader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

#test_loader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True


In [13]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()

    #INPUT
    self.conv1= nn.Sequential(
        nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), padding=0, bias=False),
        nn.ReLU()
    )#output = 26

    #CONV 1 BLOCK
    self.conv2 = nn.Sequential(
        nn.Conv2d(in_channels=8, out_channels=8, kernel_size=(3,3), padding=0, bias=False),
        nn.ReLU()
    )#output = 24

    self.conv3 = nn.Sequential(
        nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), padding=0, bias=False),
        nn.ReLU()
    )#output = 22

    #TRANSITION BLOCK
    self.pool1 = nn.MaxPool2d(2,2)

    self.conv4 = nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=8, kernel_size=(1,1), padding=0, bias=False),
        nn.ReLU()
    )#output = 11

    #CONV 2 BLOCK
    self.conv5 = nn.Sequential(
        nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), padding=0, bias=False),
        nn.ReLU()
    )#output = 9

    self.conv6 = nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), padding=0, bias=False),
        nn.ReLU()
    )#output = 7

    #OUTPUT BLOCK
    self.conv7 = nn.Sequential(
        nn.Conv2d(in_channels=32, out_channels=10, kernel_size=(1,1), padding=0, bias=False),
        nn.ReLU()
    )#output = 7

    self.conv8 = nn.Sequential(
        nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(7,7), padding=0, bias=False),
    )#output = 1

  def forward(self,x):
    x = self.conv1(x)
    x = self.conv2(x)
    x = self.conv3(x)
    x = self.pool1(x)
    x = self.conv4(x)
    x = self.conv5(x)
    x = self.conv6(x)
    x = self.conv7(x)
    x = self.conv8(x)
    x = x.view(-1,10)
    return F.log_softmax(x, dim=-1)

In [14]:
!pip install torchsummary
from torchsummary import summary

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

model = Net().to(device)
summary(model, input_size=(1,28,28))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
              ReLU-2            [-1, 8, 26, 26]               0
            Conv2d-3            [-1, 8, 24, 24]             576
              ReLU-4            [-1, 8, 24, 24]               0
            Conv2d-5           [-1, 16, 22, 22]           1,152
              ReLU-6           [-1, 16, 22, 22]               0
         MaxPool2d-7           [-1, 16, 11, 11]               0
            Conv2d-8            [-1, 8, 11, 11]             128
              ReLU-9            [-1, 8, 11, 11]               0
           Conv2d-10             [-1, 16, 9, 9]           1,152
             ReLU-11             [-1, 16, 9, 9]               0
           Conv2d-12             [-1, 32, 7, 7]           4,608
             ReLU-13             [-1, 32, 7, 7]               0
           Conv2d-14             [

In [15]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct=0
  processed=0

  for batch_idx, (data,target) in enumerate(pbar):
    data,target = data.to(device), target.to(device)

    optimizer.zero_grad()

    y_pred=model(data)

    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    loss.backward()
    optimizer.step()

    pred = y_pred.argmax(dim=1, keepdim=True)
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc=f'loss={loss.item()} Batch_id ={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
  model.eval()
  test_loss = 0;
  correct = 0;

  with torch.no_grad():
    for data, target in test_loader:
      data,target = data.to(device), target.to(device)
      output=model(data)
      test_loss += F.nll_loss(output, target, reduction='sum').item()
      pred = output.argmax(dim=1, keepdim=True)
      correct += pred.eq(target.view_as(pred)).sum().item()

  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)

  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
      test_loss, correct, len(test_loader.dataset),
      100. * correct / len(test_loader.dataset)
  ))

  test_acc.append(100. * correct / len(test_loader.dataset))

In [16]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 15

for epoch in range (EPOCHS):
  print("EPOCH: ", epoch+1)
  train(model, device, train_loader, optimizer, epoch)
  test(model, device, test_loader)

EPOCH:  1


loss=2.3014414310455322 Batch_id =468 Accuracy=11.51: 100%|██████████| 469/469 [00:04<00:00, 103.64it/s]



Test set: Average loss: 2.2999, Accuracy: 1011/10000 (10.11%)

EPOCH:  2


loss=0.27498659491539 Batch_id =468 Accuracy=68.26: 100%|██████████| 469/469 [00:04<00:00, 109.22it/s]   



Test set: Average loss: 0.1916, Accuracy: 9422/10000 (94.22%)

EPOCH:  3


loss=0.14109930396080017 Batch_id =468 Accuracy=95.42: 100%|██████████| 469/469 [00:04<00:00, 109.71it/s] 



Test set: Average loss: 0.0931, Accuracy: 9714/10000 (97.14%)

EPOCH:  4


loss=0.07211481779813766 Batch_id =468 Accuracy=97.11: 100%|██████████| 469/469 [00:04<00:00, 113.59it/s] 



Test set: Average loss: 0.0695, Accuracy: 9776/10000 (97.76%)

EPOCH:  5


loss=0.04495631530880928 Batch_id =468 Accuracy=97.61: 100%|██████████| 469/469 [00:04<00:00, 113.72it/s]  



Test set: Average loss: 0.0621, Accuracy: 9800/10000 (98.00%)

EPOCH:  6


loss=0.03272944688796997 Batch_id =468 Accuracy=97.99: 100%|██████████| 469/469 [00:04<00:00, 113.72it/s]  



Test set: Average loss: 0.0575, Accuracy: 9808/10000 (98.08%)

EPOCH:  7


loss=0.09798584133386612 Batch_id =468 Accuracy=98.24: 100%|██████████| 469/469 [00:04<00:00, 114.77it/s]  



Test set: Average loss: 0.0596, Accuracy: 9809/10000 (98.09%)

EPOCH:  8


loss=0.021586701273918152 Batch_id =468 Accuracy=98.38: 100%|██████████| 469/469 [00:04<00:00, 111.17it/s] 



Test set: Average loss: 0.0524, Accuracy: 9830/10000 (98.30%)

EPOCH:  9


loss=0.005305544938892126 Batch_id =468 Accuracy=98.41: 100%|██████████| 469/469 [00:04<00:00, 110.10it/s] 



Test set: Average loss: 0.0525, Accuracy: 9842/10000 (98.42%)

EPOCH:  10


loss=0.026615120470523834 Batch_id =468 Accuracy=98.55: 100%|██████████| 469/469 [00:04<00:00, 110.04it/s] 



Test set: Average loss: 0.0515, Accuracy: 9838/10000 (98.38%)

EPOCH:  11


loss=0.028326719999313354 Batch_id =468 Accuracy=98.68: 100%|██████████| 469/469 [00:04<00:00, 113.64it/s]



Test set: Average loss: 0.0482, Accuracy: 9849/10000 (98.49%)

EPOCH:  12


loss=0.025617117062211037 Batch_id =468 Accuracy=98.74: 100%|██████████| 469/469 [00:04<00:00, 111.86it/s] 



Test set: Average loss: 0.0504, Accuracy: 9843/10000 (98.43%)

EPOCH:  13


loss=0.0017125156009569764 Batch_id =468 Accuracy=98.73: 100%|██████████| 469/469 [00:04<00:00, 109.58it/s]



Test set: Average loss: 0.0548, Accuracy: 9842/10000 (98.42%)

EPOCH:  14


loss=0.05996864661574364 Batch_id =468 Accuracy=98.87: 100%|██████████| 469/469 [00:04<00:00, 111.24it/s]  



Test set: Average loss: 0.0417, Accuracy: 9864/10000 (98.64%)

EPOCH:  15


loss=0.023782795295119286 Batch_id =468 Accuracy=98.84: 100%|██████████| 469/469 [00:04<00:00, 109.38it/s] 



Test set: Average loss: 0.0507, Accuracy: 9849/10000 (98.49%)

