In [77]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

##The layers have been modified to limit maximum numbers of channels to 32.

In [130]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1,3, 3,padding = 1) #Img size 28x28
        self.conv2 = nn.Conv2d(3, 6, 3,padding = 1) #Img size 28x28
        self.pool1 = nn.MaxPool2d(2, 2) #Img size 14x14
        
        self.conv3 = nn.Conv2d(6, 12, 3,padding = 1) #Img size 14x14
        self.conv4 = nn.Conv2d(12, 24, 3,padding = 1) #Img size 14x14
        self.pool2 = nn.MaxPool2d(2, 2) #Img size 7x7
        
        self.conv5 = nn.Conv2d(24, 32, 3) #Img size 5x5
        self.conv6 = nn.Conv2d(32, 12, 3) #Img size 3x3
        self.conv7 = nn.Conv2d(12, 10, 3) #Img size 1x1
        

    def forward(self, x):
        
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)
        
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)
        
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = F.relu(self.conv7(x))
        
        # output layer
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [131]:
#!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model2 = Net().to(device)
summary(model2, input_size=(1, 28, 28))

cpu
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 3, 28, 28]              30
            Conv2d-2            [-1, 6, 28, 28]             168
         MaxPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 12, 14, 14]             660
            Conv2d-5           [-1, 24, 14, 14]           2,616
         MaxPool2d-6             [-1, 24, 7, 7]               0
            Conv2d-7             [-1, 32, 5, 5]           6,944
            Conv2d-8             [-1, 12, 3, 3]           3,468
            Conv2d-9             [-1, 10, 1, 1]           1,090
Total params: 14,976
Trainable params: 14,976
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.13
Params size (MB): 0.06
Estimated Total Size (MB): 0.19
-----------------------------------------



## The batch size reduction from 128 to 64 showed improvement in accuracy. When batch size was further reduced the accuracy decreased. Also, increasing batch size to 512 and beyond, reduced the accuracy.

In [138]:


torch.manual_seed(1)
batch_size = 64 ## Batch size set to 64

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [81]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return(round(100. * correct / len(test_loader.dataset),1)) ## Function modified to return accuracy for adjustment of learning rate

##By adjusting learning rate we could achieve target upto 99.3% from 9th epoch onwards. The adjustment rules for learning rates are decsribed below along with the code.

In [132]:
# The learning rate has been adjusted with each epoch depending on the accuracy achieved. 
# As we reach towards 99% the learning rate is reduced to minimize divergence.
model = Net().to(device)
learning_rate = .02 # Learning rate initilaized at .02
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

for epoch in range(1, 20):
  print ('epoch - ',epoch)
  print ('learning rate = ', learning_rate)
  train(model, device, train_loader, optimizer, epoch)
  acc = test(model, device, test_loader)
  if (acc <= 90):
    learning_rate = learning_rate*2 # Increase learning rate for faster convergence
    if (learning_rate > .1):
      learning_rate = .1
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    print ('learning_rate increased to ', learning_rate)
  elif (90 < acc < 99):
    learning_rate = .02 # Set learning at a constant to minimize divergence
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    print ('learning_rate set to ', learning_rate) 
  if (acc >= 99):
    learning_rate=learning_rate/2 # Reduce learning as we reach our target
    if (learning_rate < .0001):
      learning_rate = .0001 # Limit learning rate to .0001
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    print ('learning_rate reduced to ', learning_rate)
  if (acc >= 99.4):
    print('Accuracy 99.4 achieved...')
    break

epoch -  1
learning rate =  0.02


loss=0.13866214454174042 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.64it/s]



Test set: Average loss: 0.0815, Accuracy: 9752/10000 (97.5%)

learning_rate set to  0.02
epoch -  2
learning rate =  0.02


loss=0.0427381657063961 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.60it/s]



Test set: Average loss: 0.0626, Accuracy: 9790/10000 (97.9%)

learning_rate set to  0.02
epoch -  3
learning rate =  0.02


loss=0.021159321069717407 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.64it/s]



Test set: Average loss: 0.0509, Accuracy: 9840/10000 (98.4%)

learning_rate set to  0.02
epoch -  4
learning rate =  0.02


loss=0.012019328773021698 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.57it/s]



Test set: Average loss: 0.0376, Accuracy: 9883/10000 (98.8%)

learning_rate set to  0.02
epoch -  5
learning rate =  0.02


loss=0.0020500440150499344 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.70it/s]



Test set: Average loss: 0.0394, Accuracy: 9879/10000 (98.8%)

learning_rate set to  0.02
epoch -  6
learning rate =  0.02


loss=0.0008113918593153358 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.64it/s]



Test set: Average loss: 0.0339, Accuracy: 9902/10000 (99.0%)

learning_rate reduced to  0.01
epoch -  7
learning rate =  0.01


loss=0.000240879540797323 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.65it/s]



Test set: Average loss: 0.0310, Accuracy: 9909/10000 (99.1%)

learning_rate reduced to  0.005
epoch -  8
learning rate =  0.005


loss=0.012170139700174332 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.50it/s]



Test set: Average loss: 0.0258, Accuracy: 9922/10000 (99.2%)

learning_rate reduced to  0.0025
epoch -  9
learning rate =  0.0025


loss=0.0010943172965198755 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.55it/s]



Test set: Average loss: 0.0266, Accuracy: 9926/10000 (99.3%)

learning_rate reduced to  0.00125
epoch -  10
learning rate =  0.00125


loss=1.3134093023836613e-05 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.75it/s]



Test set: Average loss: 0.0269, Accuracy: 9927/10000 (99.3%)

learning_rate reduced to  0.000625
epoch -  11
learning rate =  0.000625


loss=8.515416993759573e-05 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.61it/s]



Test set: Average loss: 0.0273, Accuracy: 9926/10000 (99.3%)

learning_rate reduced to  0.0003125
epoch -  12
learning rate =  0.0003125


loss=0.1923384815454483 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.50it/s]



Test set: Average loss: 0.0275, Accuracy: 9926/10000 (99.3%)

learning_rate reduced to  0.00015625
epoch -  13
learning rate =  0.00015625


loss=0.005193327087908983 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.47it/s]



Test set: Average loss: 0.0276, Accuracy: 9926/10000 (99.3%)

learning_rate reduced to  0.0001
epoch -  14
learning rate =  0.0001


loss=0.0015098382718861103 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.45it/s]



Test set: Average loss: 0.0276, Accuracy: 9925/10000 (99.2%)

learning_rate reduced to  0.0001
epoch -  15
learning rate =  0.0001


loss=0.00013804758782498538 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.54it/s]



Test set: Average loss: 0.0277, Accuracy: 9925/10000 (99.2%)

learning_rate reduced to  0.0001
epoch -  16
learning rate =  0.0001


loss=1.4687428119941615e-05 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.38it/s]



Test set: Average loss: 0.0278, Accuracy: 9926/10000 (99.3%)

learning_rate reduced to  0.0001
epoch -  17
learning rate =  0.0001


loss=0.001200155820697546 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.43it/s]



Test set: Average loss: 0.0278, Accuracy: 9925/10000 (99.2%)

learning_rate reduced to  0.0001
epoch -  18
learning rate =  0.0001


loss=0.0002162163145840168 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.51it/s]



Test set: Average loss: 0.0279, Accuracy: 9925/10000 (99.2%)

learning_rate reduced to  0.0001
epoch -  19
learning rate =  0.0001


loss=0.0006310287280939519 batch_id=937: 100%|██████████| 938/938 [00:41<00:00, 22.35it/s]



Test set: Average loss: 0.0280, Accuracy: 9925/10000 (99.2%)

learning_rate reduced to  0.0001
