In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values. 
                                       # Note the difference between (0.1307) and (0.1307,)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])


In [3]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

In [4]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True


In [5]:

# figure = plt.figure()
# num_of_images = 60
# for index in range(1, num_of_images + 1):
#     plt.subplot(6, 10, index)
#     plt.axis('off')
#     plt.imshow(images[index].numpy().squeeze(), cmap='gray_r')

In [6]:
dropout_value = 0.1
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 26

        # CONVOLUTION BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 24

        # TRANSITION BLOCK 1
        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        ) # output_size = 24
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 12

        # CONVOLUTION BLOCK 2
        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 10
        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 8
        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10
                      , kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(10),
            nn.Dropout(dropout_value)
        ) # output_size = 6
        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(10),
            nn.Dropout(dropout_value)
        ) # output_size = 6
        
        # OUTPUT BLOCK
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=6)
        ) # output_size = 1

        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
            # nn.BatchNorm2d(10),
            # nn.ReLU(),
            # nn.Dropout(dropout_value)
        ) 


        self.dropout = nn.Dropout(dropout_value)

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.convblock7(x)
        x = self.gap(x)        
        x = self.convblock8(x)

        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

In [7]:
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
              ReLU-2           [-1, 16, 26, 26]               0
       BatchNorm2d-3           [-1, 16, 26, 26]              32
           Dropout-4           [-1, 16, 26, 26]               0
            Conv2d-5           [-1, 16, 24, 24]           2,304
              ReLU-6           [-1, 16, 24, 24]               0
       BatchNorm2d-7           [-1, 16, 24, 24]              32
           Dropout-8           [-1, 16, 24, 24]               0
            Conv2d-9           [-1, 10, 24, 24]             160
        MaxPool2d-10           [-1, 10, 12, 12]               0
           Conv2d-11           [-1, 16, 10, 10]           1,440
             ReLU-12           [-1, 16, 10, 10]               0
      BatchNorm2d-13           [-1, 16, 10, 10]              32
          Dropout-14           [-1

In [8]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm
    
    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))

In [12]:
from torch.optim.lr_scheduler import StepLR, LambdaLR

model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# scheduler = StepLR(optimizer, step_size=4, gamma=0.1)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[6,8,9], gamma=0.1)

EPOCHS = 15
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

EPOCH: 0


Loss=0.08965738862752914 Batch_id=468 Accuracy=86.30: 100%|██████████████████████████| 469/469 [00:06<00:00, 70.75it/s]



Test set: Average loss: 0.0868, Accuracy: 9775/10000 (97.75%)

EPOCH: 1


Loss=0.04424923285841942 Batch_id=468 Accuracy=97.11: 100%|██████████████████████████| 469/469 [00:06<00:00, 73.50it/s]



Test set: Average loss: 0.0470, Accuracy: 9864/10000 (98.64%)

EPOCH: 2


Loss=0.0767342820763588 Batch_id=468 Accuracy=97.81: 100%|███████████████████████████| 469/469 [00:06<00:00, 71.06it/s]



Test set: Average loss: 0.0371, Accuracy: 9890/10000 (98.90%)

EPOCH: 3


Loss=0.09889210015535355 Batch_id=468 Accuracy=98.09: 100%|██████████████████████████| 469/469 [00:06<00:00, 71.88it/s]



Test set: Average loss: 0.0283, Accuracy: 9914/10000 (99.14%)

EPOCH: 4


Loss=0.00900891050696373 Batch_id=468 Accuracy=98.22: 100%|██████████████████████████| 469/469 [00:06<00:00, 72.13it/s]



Test set: Average loss: 0.0309, Accuracy: 9908/10000 (99.08%)

EPOCH: 5


Loss=0.053252458572387695 Batch_id=468 Accuracy=98.36: 100%|█████████████████████████| 469/469 [00:06<00:00, 70.92it/s]



Test set: Average loss: 0.0249, Accuracy: 9928/10000 (99.28%)

EPOCH: 6


Loss=0.08922633528709412 Batch_id=468 Accuracy=98.38: 100%|██████████████████████████| 469/469 [00:06<00:00, 69.75it/s]



Test set: Average loss: 0.0244, Accuracy: 9930/10000 (99.30%)

EPOCH: 7


Loss=0.024826057255268097 Batch_id=468 Accuracy=98.56: 100%|█████████████████████████| 469/469 [00:06<00:00, 71.08it/s]



Test set: Average loss: 0.0268, Accuracy: 9921/10000 (99.21%)

EPOCH: 8


Loss=0.06517934054136276 Batch_id=468 Accuracy=98.56: 100%|██████████████████████████| 469/469 [00:06<00:00, 71.20it/s]



Test set: Average loss: 0.0280, Accuracy: 9920/10000 (99.20%)

EPOCH: 9


Loss=0.047742992639541626 Batch_id=468 Accuracy=98.62: 100%|█████████████████████████| 469/469 [00:06<00:00, 70.11it/s]



Test set: Average loss: 0.0245, Accuracy: 9926/10000 (99.26%)

EPOCH: 10


Loss=0.024365544319152832 Batch_id=468 Accuracy=98.68: 100%|█████████████████████████| 469/469 [00:06<00:00, 72.90it/s]



Test set: Average loss: 0.0246, Accuracy: 9923/10000 (99.23%)

EPOCH: 11


Loss=0.04483857378363609 Batch_id=468 Accuracy=98.74: 100%|██████████████████████████| 469/469 [00:06<00:00, 70.89it/s]



Test set: Average loss: 0.0250, Accuracy: 9929/10000 (99.29%)

EPOCH: 12


Loss=0.04694798216223717 Batch_id=468 Accuracy=98.79: 100%|██████████████████████████| 469/469 [00:06<00:00, 74.60it/s]



Test set: Average loss: 0.0247, Accuracy: 9920/10000 (99.20%)

EPOCH: 13


Loss=0.063986636698246 Batch_id=468 Accuracy=98.67: 100%|████████████████████████████| 469/469 [00:06<00:00, 70.68it/s]



Test set: Average loss: 0.0207, Accuracy: 9939/10000 (99.39%)

EPOCH: 14


Loss=0.016594987362623215 Batch_id=468 Accuracy=98.72: 100%|█████████████████████████| 469/469 [00:06<00:00, 74.96it/s]



Test set: Average loss: 0.0228, Accuracy: 9940/10000 (99.40%)



### Target:
    1.99.4% (this must be consistently shown in your last few epochs, and not a one-time achievement)
    2.Less than or equal to 15 Epochs
    3.Less than 10000 Parameters (additional points for doing this in less than 8000 pts)
    

### Results:
    1. Achieved test accuracy > 99.30(average over a few epochs)
    2. Trained the model for 15 epochs
    3. Best Train accuracy: 98.80
    4. Best Test Accuracy: 99.40
    3. Total number of parameters: 8960


### Analysis:
    1. Model is slightly under-fitting now.
    2. We have used data augmentation i.e., slight random rotation to make the training harder.
    3. We have used Dropout layers and made the training a bit harder.
    4. However, the test accuracy of the model has increased, and reached 99.40. 
    5. We have used GAP layer (the penultimate layer).
    6. We have also used MultistepLR for the Learning rate scheduler, a better learning rate scheduling could help the model acheive above 99.4 accuracy.