<a href="https://colab.research.google.com/github/iamSarang/DS-Projects/blob/master/Untitled17.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR,OneCycleLR

# Let's visualize some of the images
%matplotlib inline
import matplotlib.pyplot as plt

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()


        #Block 1
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 8, 3,padding=0,bias=False),  # 28x28 output 28x28 RF : 3x3
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout2d(0.1),

            nn.Conv2d(8, 16, 3,padding=0,bias=False), # 28x28 output 28x28 RF : 5x5
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(0.1),
           
        )

        #Transition Block (MaxPool + 1x1)
        self.trans1 = nn.Sequential(

            # 1x1 convolution
            nn.Conv2d(16, 10, 1,bias=False), # 26x26 output - 26x26 RF 14x14
            nn.ReLU(),

            nn.MaxPool2d(2, 2),  # 26x26 output - 13x13 RF 14x14

        )

        #Block 2
        self.conv2 =  nn.Sequential(

            nn.Conv2d(10, 16, 3,padding=0, bias=False), # 13x13 output - 11x11 RF 16x16
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(0.1),

            nn.Conv2d(16, 16, 3,padding=0, bias=False),  # 11x11 output - 9x9 RF 18x18
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(0.1), 

            nn.Conv2d(16, 16, 3,padding=0, bias=False), # 9x9 output - 7x7 RF 20x20
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(0.1), 

            
        )


        #Block 4 and GAP Layer
        self.avg_pool = nn.Sequential(
            nn.AvgPool2d(kernel_size = 6)
        )

        # Fully Connected layer - Used 1x1 
        self.conv_4 = nn.Sequential(

            # 1x1 convolution
            nn.Conv2d(16, 16, 1,bias=False), # 11x11 output - 9x9 RF 18x18
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(0.1), 

            nn.Conv2d(16, 10, 1,bias=False), # 11x11 output - 9x9 RF 18x18
        )

        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.trans1(x)
        x = self.conv2(x)

        x = self.avg_pool(x)
        x = self.conv_4(x)

        x = x.view(-1,10)
        return F.log_softmax(x,dim=1)

In [7]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
              ReLU-2            [-1, 8, 26, 26]               0
       BatchNorm2d-3            [-1, 8, 26, 26]              16
         Dropout2d-4            [-1, 8, 26, 26]               0
            Conv2d-5           [-1, 16, 24, 24]           1,152
              ReLU-6           [-1, 16, 24, 24]               0
       BatchNorm2d-7           [-1, 16, 24, 24]              32
         Dropout2d-8           [-1, 16, 24, 24]               0
            Conv2d-9           [-1, 10, 24, 24]             160
             ReLU-10           [-1, 10, 24, 24]               0
        MaxPool2d-11           [-1, 10, 12, 12]               0
           Conv2d-12           [-1, 16, 10, 10]           1,440
             ReLU-13           [-1, 16, 10, 10]               0
      BatchNorm2d-14           [-1, 16,

In [15]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train = datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        #transforms.RandomRotation(((-7.0, 7.0)),fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))

test = datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, **kwargs)

In [16]:
model.eval()

Net(
  (conv1): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout2d(p=0.1, inplace=False)
    (4): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (5): ReLU()
    (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Dropout2d(p=0.1, inplace=False)
  )
  (trans1): Sequential(
    (0): Conv2d(16, 10, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(10, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): ReLU()
    (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout2d(p=0.1, inplace=False)
    (4): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (5): ReLU()
    (6): Bat

In [17]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 8,024 trainable parameters


In [18]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    epoch_loss=0
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()
        scheduler.step()

        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

        pbar.set_description(desc= f'epoch={epoch} Loss={loss.item()} batch_id={batch_idx:05d}')


    train_loss = epoch_loss / len(train_loader.dataset)
    train_acc=100.*correct/len(train_loader.dataset)
    return train_loss,train_acc


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_acc=100. * correct / len(test_loader.dataset)
    return test_loss,test_acc

In [20]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

train_loss_values = []
test_loss_values = []
train_accuracy=[]
test_accuracy=[]

scheduler = OneCycleLR(optimizer, max_lr=0.1,epochs=15,steps_per_epoch=len(train_loader))

for epoch in range(1, 15):
    train_loss,train_acc = train(model, device, train_loader, optimizer, epoch)
    test_loss,test_acc = test(model, device, test_loader)

    train_loss_values.append(train_loss)
    test_loss_values.append(test_loss)

    train_accuracy.append(train_acc)
    test_accuracy.append(test_acc)

    # scheduler.step()

epoch=1 Loss=0.19965820014476776 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.40it/s]
epoch=2 Loss=0.17928145825862885 batch_id=00000:   0%|          | 1/469 [00:00<01:01,  7.58it/s]

Test set: Average loss: 0.0732, Accuracy: 9776/10000 (97.76%)



epoch=2 Loss=0.16851069033145905 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.37it/s]
epoch=3 Loss=0.12103331089019775 batch_id=00000:   0%|          | 1/469 [00:00<01:02,  7.46it/s]

Test set: Average loss: 0.0623, Accuracy: 9787/10000 (97.87%)



epoch=3 Loss=0.12346119433641434 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.36it/s]
epoch=4 Loss=0.09175001829862595 batch_id=00000:   0%|          | 1/469 [00:00<01:00,  7.77it/s]

Test set: Average loss: 0.0380, Accuracy: 9874/10000 (98.74%)



epoch=4 Loss=0.03957116976380348 batch_id=00468: 100%|██████████| 469/469 [01:04<00:00,  7.30it/s]
epoch=5 Loss=0.11241202801465988 batch_id=00000:   0%|          | 1/469 [00:00<01:00,  7.75it/s]

Test set: Average loss: 0.0399, Accuracy: 9872/10000 (98.72%)



epoch=5 Loss=0.22286708652973175 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.37it/s]
epoch=6 Loss=0.08197598904371262 batch_id=00000:   0%|          | 1/469 [00:00<01:07,  6.96it/s]

Test set: Average loss: 0.0340, Accuracy: 9892/10000 (98.92%)



epoch=6 Loss=0.0820297822356224 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.35it/s]
epoch=7 Loss=0.08779382705688477 batch_id=00000:   0%|          | 1/469 [00:00<01:03,  7.33it/s]

Test set: Average loss: 0.0316, Accuracy: 9902/10000 (99.02%)



epoch=7 Loss=0.03478578105568886 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.37it/s]
epoch=8 Loss=0.0877101793885231 batch_id=00000:   0%|          | 1/469 [00:00<00:59,  7.86it/s]

Test set: Average loss: 0.0271, Accuracy: 9911/10000 (99.11%)



epoch=8 Loss=0.040903475135564804 batch_id=00468: 100%|██████████| 469/469 [01:04<00:00,  7.31it/s]
epoch=9 Loss=0.0997300073504448 batch_id=00000:   0%|          | 1/469 [00:00<01:01,  7.60it/s]

Test set: Average loss: 0.0263, Accuracy: 9910/10000 (99.10%)



epoch=9 Loss=0.11018317192792892 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.37it/s]
epoch=10 Loss=0.09990780055522919 batch_id=00000:   0%|          | 1/469 [00:00<01:01,  7.55it/s]

Test set: Average loss: 0.0247, Accuracy: 9914/10000 (99.14%)



epoch=10 Loss=0.1544773429632187 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.41it/s]
epoch=11 Loss=0.08482768386602402 batch_id=00000:   0%|          | 1/469 [00:00<01:01,  7.60it/s]

Test set: Average loss: 0.0223, Accuracy: 9933/10000 (99.33%)



epoch=11 Loss=0.06489575654268265 batch_id=00468: 100%|██████████| 469/469 [01:04<00:00,  7.31it/s]
epoch=12 Loss=0.07248642295598984 batch_id=00000:   0%|          | 1/469 [00:00<01:01,  7.57it/s]

Test set: Average loss: 0.0240, Accuracy: 9923/10000 (99.23%)



epoch=12 Loss=0.09710937738418579 batch_id=00468: 100%|██████████| 469/469 [01:04<00:00,  7.30it/s]
epoch=13 Loss=0.04276479408144951 batch_id=00000:   0%|          | 1/469 [00:00<01:05,  7.16it/s]

Test set: Average loss: 0.0198, Accuracy: 9940/10000 (99.40%)



epoch=13 Loss=0.011621884070336819 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.33it/s]
epoch=14 Loss=0.013580091297626495 batch_id=00000:   0%|          | 1/469 [00:00<01:02,  7.49it/s]

Test set: Average loss: 0.0184, Accuracy: 9942/10000 (99.42%)



epoch=14 Loss=0.03839115425944328 batch_id=00468: 100%|██████████| 469/469 [01:03<00:00,  7.38it/s]


Test set: Average loss: 0.0185, Accuracy: 9945/10000 (99.45%)



In [None]:
import matplotlib.pyplot as plt
% matplotlib inline

import numpy as np
import seaborn as sns

# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
sns.set(font_scale=1)
plt.rcParams["figure.figsize"] = (25,6)

# Plot the learning curve.
fig, (ax1,ax2) = plt.subplots(1,2)
ax1.plot(np.array(train_loss_values), 'r', label="Training Loss")
ax1.plot(np.array(test_loss_values), 'b', label="Validation Loss")

# Label the plot.
ax1.set_title("Training & Validation Loss")
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Loss")
ax1.legend()

ax2.plot(np.array(train_accuracy), 'r', label="Training Accuracy")
ax2.plot(np.array(test_accuracy), 'b', label="Validation Accuracy")

# Label the plot.
ax2.set_title("Training & Validation Accuracy")
ax2.set_xlabel("Epoch")
ax2.set_ylabel("Loss")
ax2.legend()

plt.show()