In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [None]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                       transforms.Resize((28, 28)),
                                       transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values.
                                       # Note the difference between (0.1307) and (0.1307,)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])


In [None]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

In [None]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True




In [None]:
class Model3(nn.Module):
    def __init__(self):
        super(Model3, self).__init__()

        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=10, kernel_size=(3, 3), padding=0),
            nn.BatchNorm2d(10),
            nn.ReLU()
        ) # INPUT 28*28 | OUTPUT 26*26 | RF 3

        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0),
            nn.BatchNorm2d(10),
            nn.ReLU()
        ) # INPUT 26*26 | OUTPUT 24*24 | RF 5

        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0),
            nn.BatchNorm2d(10),
            nn.ReLU()
        ) # INPUT 24*24 | OUTPUT 22*22 | RF 7

        self.pool1 = nn.MaxPool2d(2, 2) # INPUT 22*22 | OUTPUT 11*11 | RF 8

        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0),
            nn.BatchNorm2d(10),
            nn.ReLU()
        ) # INPUT 11*11 | OUTPUT 9*9 | RF 12

        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU()
        ) # INPUT 9*9 | OUTPUT 7*7 | RF 16


        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(3, 3), padding=0),
            nn.BatchNorm2d(10),
            nn.ReLU()
        ) # INPUT 7*7 | OUTPUT 5*5 | RF 20

        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0),
            nn.BatchNorm2d(10),
            nn.ReLU()
        ) # INPUT 5*5 | OUTPUT 3*3 | RF 24

        self.gap = nn.Sequential(
            nn.AvgPool2d(3)
        ) # INPUT 3*3 | OUTPUT 1*1 | RF 28

        self.dropout = nn.Dropout(0.1) #increased dropout


    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.dropout(x)  #dropout
        x = self.convblock3(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.convblock5(x)
        x = self.dropout(x) #dropout
        x = self.convblock6(x)
        x = self.convblock7(x)
        x = self.gap(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

In [None]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Model3().to(device)
summary(model, input_size=(1, 28, 28))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 26, 26]             100
       BatchNorm2d-2           [-1, 10, 26, 26]              20
              ReLU-3           [-1, 10, 26, 26]               0
            Conv2d-4           [-1, 10, 24, 24]             910
       BatchNorm2d-5           [-1, 10, 24, 24]              20
              ReLU-6           [-1, 10, 24, 24]               0
           Dropout-7           [-1, 10, 24, 24]               0
            Conv2d-8           [-1, 10, 22, 22]             910
       BatchNorm2d-9           [-1, 10, 22, 22]              20
             ReLU-10           [-1, 10, 22, 22]               0
        MaxPool2d-11           [-1, 10, 11, 11]               0
           Conv2d-12             [-1, 10, 9, 9]             910
      BatchNorm2d-13             [-1, 10, 9, 9]              20
             ReLU-14             [

In [None]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))

In [None]:
model =  Model3().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 15
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

EPOCH: 0


Loss=0.11774060130119324 Batch_id=468 Accuracy=93.16: 100%|██████████| 469/469 [00:32<00:00, 14.48it/s]



Test set: Average loss: 0.0979, Accuracy: 9820/10000 (98.20%)

EPOCH: 1


Loss=0.04535025358200073 Batch_id=468 Accuracy=97.93: 100%|██████████| 469/469 [00:31<00:00, 15.04it/s]



Test set: Average loss: 0.0611, Accuracy: 9875/10000 (98.75%)

EPOCH: 2


Loss=0.09495209902524948 Batch_id=468 Accuracy=98.35: 100%|██████████| 469/469 [00:32<00:00, 14.57it/s]



Test set: Average loss: 0.0555, Accuracy: 9862/10000 (98.62%)

EPOCH: 3


Loss=0.07882190495729446 Batch_id=468 Accuracy=98.51: 100%|██████████| 469/469 [00:31<00:00, 15.07it/s]



Test set: Average loss: 0.0390, Accuracy: 9909/10000 (99.09%)

EPOCH: 4


Loss=0.06368156522512436 Batch_id=468 Accuracy=98.65: 100%|██████████| 469/469 [00:31<00:00, 15.06it/s]



Test set: Average loss: 0.0344, Accuracy: 9914/10000 (99.14%)

EPOCH: 5


Loss=0.04764308035373688 Batch_id=468 Accuracy=98.70: 100%|██████████| 469/469 [00:31<00:00, 15.11it/s]



Test set: Average loss: 0.0357, Accuracy: 9906/10000 (99.06%)

EPOCH: 6


Loss=0.10236049443483353 Batch_id=468 Accuracy=98.84: 100%|██████████| 469/469 [00:30<00:00, 15.15it/s]



Test set: Average loss: 0.0361, Accuracy: 9898/10000 (98.98%)

EPOCH: 7


Loss=0.03791040554642677 Batch_id=468 Accuracy=98.91: 100%|██████████| 469/469 [00:30<00:00, 15.26it/s]



Test set: Average loss: 0.0311, Accuracy: 9922/10000 (99.22%)

EPOCH: 8


Loss=0.09331950545310974 Batch_id=468 Accuracy=98.93: 100%|██████████| 469/469 [00:31<00:00, 14.66it/s]



Test set: Average loss: 0.0290, Accuracy: 9913/10000 (99.13%)

EPOCH: 9


Loss=0.05725391209125519 Batch_id=468 Accuracy=99.00: 100%|██████████| 469/469 [00:30<00:00, 15.33it/s]



Test set: Average loss: 0.0259, Accuracy: 9932/10000 (99.32%)

EPOCH: 10


Loss=0.02974691428244114 Batch_id=468 Accuracy=99.03: 100%|██████████| 469/469 [00:30<00:00, 15.28it/s]



Test set: Average loss: 0.0274, Accuracy: 9923/10000 (99.23%)

EPOCH: 11


Loss=0.053218577057123184 Batch_id=468 Accuracy=99.04: 100%|██████████| 469/469 [00:31<00:00, 14.81it/s]



Test set: Average loss: 0.0242, Accuracy: 9931/10000 (99.31%)

EPOCH: 12


Loss=0.02607146091759205 Batch_id=468 Accuracy=99.10: 100%|██████████| 469/469 [00:30<00:00, 15.32it/s]



Test set: Average loss: 0.0242, Accuracy: 9930/10000 (99.30%)

EPOCH: 13


Loss=0.023845404386520386 Batch_id=468 Accuracy=99.13: 100%|██████████| 469/469 [00:31<00:00, 14.82it/s]



Test set: Average loss: 0.0230, Accuracy: 9932/10000 (99.32%)

EPOCH: 14


Loss=0.03304101154208183 Batch_id=468 Accuracy=99.15: 100%|██████████| 469/469 [00:30<00:00, 15.38it/s]



Test set: Average loss: 0.0236, Accuracy: 9927/10000 (99.27%)



**TARGET**

Keep the parameters less than 8000

Reduce the overfitting

Reach test accuracy > 99.4

**RESULT**

Parameters :  6,798

Best Training Accuracy : 99.15

Best Test Accuracy : 99.32

**ANALYSIS**

Total parameters has been kept under 8000

The model is not overfitting any more

Required test accuracy > 99.4% has not been reached yet

**WHAT CAN BE DONE**

Gradually increase the number of feature maps in intermediate layers but still keep the count under 8000

Introducing scheduler like StepLR to adjust the learning rate during training

Trying Adam optimizer with lower learning rate for optimization