Model 2:
Target
1. Add Batch norm to increase efficiency
2. Add GAP to reduce params in last layer

Result:
1. Parameters : 8.2k
2. Best Train Accuracy : 99.12
3. Best Test Accuracy : 99.09

Analysis:
1. Great model, just above 8k parameters
2. Model is underfitting and both train and test accuracy has increased over epochs
3. Model can be pushed further with regularization or better learning start at start

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))
])

In [3]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

In [4]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

#train_loader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

#test_loader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True


In [9]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()

    #INPUT
    self.conv1= nn.Sequential(
        nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), padding=0, bias=False),
        nn.BatchNorm2d(8),
        nn.ReLU()
    )#output = 26

    #CONV 1 BLOCK
    self.conv2 = nn.Sequential(
        nn.Conv2d(in_channels=8, out_channels=8, kernel_size=(3,3), padding=0, bias=False),
        nn.BatchNorm2d(8),
        nn.ReLU()
    )#output = 24

    self.conv3 = nn.Sequential(
        nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), padding=0, bias=False),
        nn.BatchNorm2d(16),
        nn.ReLU()
    )#output = 22

    #TRANSITION BLOCK
    self.pool1 = nn.MaxPool2d(2,2)

    self.conv4 = nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=8, kernel_size=(1,1), padding=0, bias=False),
        nn.BatchNorm2d(8),
        nn.ReLU()
    )#output = 11

    #CONV 2 BLOCK
    self.conv5 = nn.Sequential(
        nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), padding=0, bias=False),
        nn.BatchNorm2d(16),
        nn.ReLU()
    )#output = 9

    self.conv6 = nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), padding=0, bias=False),
        nn.BatchNorm2d(32),
        nn.ReLU()
    )#output = 7

    #OUTPUT BLOCK
    self.conv7 = nn.Sequential(
        nn.Conv2d(in_channels=32, out_channels=10, kernel_size=(1,1), padding=0, bias=False),
        nn.BatchNorm2d(10),
        nn.ReLU()
    )#output = 7

    self.gap = nn.Sequential(
         nn.AvgPool2d(kernel_size=7)
    )#output = 1

    #self.dropout = nn.Dropout(0.1)

  def forward(self,x):
    x = self.conv1(x)
    x = self.conv2(x)
    x = self.conv3(x)
    #x = self.dropout(x)
    x = self.pool1(x)
    x = self.conv4(x)
    x = self.conv5(x)
    x = self.conv6(x)
    #x = self.dropout(x)
    x = self.conv7(x)
    x = self.gap(x)
    x = x.view(-1,10)
    return F.log_softmax(x, dim=-1)

In [10]:
!pip install torchsummary
from torchsummary import summary

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

model = Net().to(device)
summary(model, input_size=(1,28,28))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
       BatchNorm2d-2            [-1, 8, 26, 26]              16
              ReLU-3            [-1, 8, 26, 26]               0
            Conv2d-4            [-1, 8, 24, 24]             576
       BatchNorm2d-5            [-1, 8, 24, 24]              16
              ReLU-6            [-1, 8, 24, 24]               0
            Conv2d-7           [-1, 16, 22, 22]           1,152
       BatchNorm2d-8           [-1, 16, 22, 22]              32
              ReLU-9           [-1, 16, 22, 22]               0
        MaxPool2d-10           [-1, 16, 11, 11]               0
           Conv2d-11            [-1, 8, 11, 11]             128
      BatchNorm2d-12            [-1, 8, 11, 11]              16
             ReLU-13            [-1, 8, 11, 11]               0
           Conv2d-14             [

In [11]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct=0
  processed=0

  for batch_idx, (data,target) in enumerate(pbar):
    data,target = data.to(device), target.to(device)

    optimizer.zero_grad()

    y_pred=model(data)

    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    loss.backward()
    optimizer.step()

    pred = y_pred.argmax(dim=1, keepdim=True)
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc=f'loss={loss.item()} Batch_id ={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
  model.eval()
  test_loss = 0;
  correct = 0;

  with torch.no_grad():
    for data, target in test_loader:
      data,target = data.to(device), target.to(device)
      output=model(data)
      test_loss += F.nll_loss(output, target, reduction='sum').item()
      pred = output.argmax(dim=1, keepdim=True)
      correct += pred.eq(target.view_as(pred)).sum().item()

  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)

  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
      test_loss, correct, len(test_loader.dataset),
      100. * correct / len(test_loader.dataset)
  ))

  test_acc.append(100. * correct / len(test_loader.dataset))

In [12]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 15

for epoch in range (EPOCHS):
  print("EPOCH: ", epoch+1)
  train(model, device, train_loader, optimizer, epoch)
  test(model, device, test_loader)

EPOCH:  1


loss=0.25584545731544495 Batch_id =468 Accuracy=86.77: 100%|██████████| 469/469 [00:04<00:00, 107.79it/s]



Test set: Average loss: 0.2452, Accuracy: 9616/10000 (96.16%)

EPOCH:  2


loss=0.12266247719526291 Batch_id =468 Accuracy=97.05: 100%|██████████| 469/469 [00:04<00:00, 110.57it/s]



Test set: Average loss: 0.1143, Accuracy: 9802/10000 (98.02%)

EPOCH:  3


loss=0.1803968995809555 Batch_id =468 Accuracy=97.80: 100%|██████████| 469/469 [00:04<00:00, 110.66it/s]  



Test set: Average loss: 0.0967, Accuracy: 9815/10000 (98.15%)

EPOCH:  4


loss=0.07428574562072754 Batch_id =468 Accuracy=98.19: 100%|██████████| 469/469 [00:04<00:00, 108.11it/s] 



Test set: Average loss: 0.0805, Accuracy: 9839/10000 (98.39%)

EPOCH:  5


loss=0.0607139877974987 Batch_id =468 Accuracy=98.41: 100%|██████████| 469/469 [00:04<00:00, 100.45it/s]  



Test set: Average loss: 0.0728, Accuracy: 9833/10000 (98.33%)

EPOCH:  6


loss=0.12083827704191208 Batch_id =468 Accuracy=98.57: 100%|██████████| 469/469 [00:04<00:00, 108.00it/s] 



Test set: Average loss: 0.0543, Accuracy: 9881/10000 (98.81%)

EPOCH:  7


loss=0.035741548985242844 Batch_id =468 Accuracy=98.72: 100%|██████████| 469/469 [00:04<00:00, 106.73it/s]



Test set: Average loss: 0.0553, Accuracy: 9875/10000 (98.75%)

EPOCH:  8


loss=0.08134206384420395 Batch_id =468 Accuracy=98.77: 100%|██████████| 469/469 [00:04<00:00, 105.65it/s] 



Test set: Average loss: 0.0558, Accuracy: 9856/10000 (98.56%)

EPOCH:  9


loss=0.02947012335062027 Batch_id =468 Accuracy=98.81: 100%|██████████| 469/469 [00:04<00:00, 106.86it/s] 



Test set: Average loss: 0.0528, Accuracy: 9878/10000 (98.78%)

EPOCH:  10


loss=0.02343722991645336 Batch_id =468 Accuracy=98.90: 100%|██████████| 469/469 [00:04<00:00, 107.56it/s] 



Test set: Average loss: 0.0441, Accuracy: 9892/10000 (98.92%)

EPOCH:  11


loss=0.02757439576089382 Batch_id =468 Accuracy=98.95: 100%|██████████| 469/469 [00:04<00:00, 106.92it/s] 



Test set: Average loss: 0.0396, Accuracy: 9905/10000 (99.05%)

EPOCH:  12


loss=0.032595787197351456 Batch_id =468 Accuracy=99.00: 100%|██████████| 469/469 [00:04<00:00, 106.61it/s]



Test set: Average loss: 0.0414, Accuracy: 9900/10000 (99.00%)

EPOCH:  13


loss=0.04581087827682495 Batch_id =468 Accuracy=98.98: 100%|██████████| 469/469 [00:04<00:00, 105.38it/s] 



Test set: Average loss: 0.0380, Accuracy: 9909/10000 (99.09%)

EPOCH:  14


loss=0.02119535394012928 Batch_id =468 Accuracy=99.07: 100%|██████████| 469/469 [00:04<00:00, 103.98it/s] 



Test set: Average loss: 0.0369, Accuracy: 9908/10000 (99.08%)

EPOCH:  15


loss=0.054779887199401855 Batch_id =468 Accuracy=99.12: 100%|██████████| 469/469 [00:04<00:00, 106.98it/s]



Test set: Average loss: 0.0401, Accuracy: 9897/10000 (98.97%)

