<a href="https://colab.research.google.com/github/ShubhamVerma16/TSAI_ERA_Session6/blob/master/ERA_Session_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Block 1: Imports

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchsummary import summary

Block 2: Change the architecure for RF = 30

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3, padding=0)
        self.bn1 = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 16, 3, padding=0)
        self.bn2 = nn.BatchNorm2d(16)
        self.conv8 = nn.Conv2d(16, 16, 3, padding=0)
        self.bn3 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, 3, padding=0)
        self.bn4 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.1)

        # Trasition block
        self.conv4 = nn.Conv2d(32, 8, kernel_size=(1, 1), padding=0)


        self.conv5 = nn.Conv2d(8, 16, 3, padding=0)
        self.bn5 = nn.BatchNorm2d(16)
        self.conv9 = nn.Conv2d(16, 16, 3, padding=0)
        self.bn6 = nn.BatchNorm2d(16)
        self.conv6 = nn.Conv2d(16, 16, 3, padding=0)
        self.bn7 = nn.BatchNorm2d(16)
        self.conv7 = nn.Conv2d(16, 32, 3, padding=0)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.dropout2 = nn.Dropout(0.1)

        self.fc1 = nn.Linear(32, 10)

    def forward(self, x):
        x = F.relu(self.bn3(self.conv8(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(x)))))))))
        x = self.pool1(F.relu(self.bn4(self.conv3(x))))
        x = self.dropout1(x)
        x = F.relu(self.conv4(x))
        x = F.relu(self.bn6(self.conv9(F.relu(self.bn5(self.conv5(x))))))
        x = F.relu(self.conv7(F.relu(self.bn7(self.conv6(x)))))
        x = self.pool2(x)
        x = self.dropout2(x)
        x = x.view(-1, 32)
        x = self.fc1(x)
        return F.log_softmax(x)

Block 3: Check number of paramters to be under 20k

In [3]:
# !pip install torchsummary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28), batch_size=-1)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
       BatchNorm2d-2            [-1, 8, 26, 26]              16
            Conv2d-3           [-1, 16, 24, 24]           1,168
       BatchNorm2d-4           [-1, 16, 24, 24]              32
            Conv2d-5           [-1, 16, 22, 22]           2,320
       BatchNorm2d-6           [-1, 16, 22, 22]              32
            Conv2d-7           [-1, 32, 20, 20]           4,640
       BatchNorm2d-8           [-1, 32, 20, 20]              64
         MaxPool2d-9           [-1, 32, 10, 10]               0
          Dropout-10           [-1, 32, 10, 10]               0
           Conv2d-11            [-1, 8, 10, 10]             264
           Conv2d-12             [-1, 16, 8, 8]           1,168
      BatchNorm2d-13             [-1, 16, 8, 8]              32
           Conv2d-14             [-1, 1

  return F.log_softmax(x)


Block 4: Add augmentation to trainer

In [4]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.RandomCrop(28),
                        transforms.RandomErasing(p=0.5),
                        # transforms.ColorJitter(brightness=0.2, contrast=0.2),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 271044006.78it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 57711145.22it/s]


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 43887572.16it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 6769910.72it/s]


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



Block 5: Define train and test

In [5]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

Block 6: Train with Multi step scheduler and Adam optimizer

In [6]:
model = Net().to(device)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[6,12,16], gamma=0.5)

for epoch in range(18):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()

  return F.log_softmax(x)
loss=0.17570102214813232 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.81it/s]



Test set: Average loss: 0.0677, Accuracy: 9790/10000 (98%)



loss=0.1652653068304062 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 16.84it/s]



Test set: Average loss: 0.0526, Accuracy: 9848/10000 (98%)



loss=0.10555830597877502 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.62it/s]



Test set: Average loss: 0.0569, Accuracy: 9819/10000 (98%)



loss=0.09144250303506851 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.63it/s]



Test set: Average loss: 0.0493, Accuracy: 9845/10000 (98%)



loss=0.22551293671131134 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.18it/s]



Test set: Average loss: 0.0370, Accuracy: 9876/10000 (99%)



loss=0.15330958366394043 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.55it/s]



Test set: Average loss: 0.0329, Accuracy: 9898/10000 (99%)



loss=0.08934164047241211 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.64it/s]



Test set: Average loss: 0.0272, Accuracy: 9911/10000 (99%)



loss=0.08328797668218613 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.39it/s]



Test set: Average loss: 0.0247, Accuracy: 9914/10000 (99%)



loss=0.13086916506290436 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.52it/s]



Test set: Average loss: 0.0285, Accuracy: 9906/10000 (99%)



loss=0.1479838639497757 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 16.84it/s]



Test set: Average loss: 0.0270, Accuracy: 9926/10000 (99%)



loss=0.2233399599790573 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.18it/s]



Test set: Average loss: 0.0304, Accuracy: 9915/10000 (99%)



loss=0.25294965505599976 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.23it/s]



Test set: Average loss: 0.0251, Accuracy: 9918/10000 (99%)



loss=0.10004772990942001 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.52it/s]



Test set: Average loss: 0.0222, Accuracy: 9930/10000 (99%)



loss=0.10571103543043137 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.45it/s]



Test set: Average loss: 0.0215, Accuracy: 9932/10000 (99%)



loss=0.19031083583831787 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.17it/s]



Test set: Average loss: 0.0215, Accuracy: 9940/10000 (99%)



loss=0.058495666831731796 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.32it/s]



Test set: Average loss: 0.0239, Accuracy: 9921/10000 (99%)



loss=0.13189150393009186 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.36it/s]



Test set: Average loss: 0.0191, Accuracy: 9936/10000 (99%)



loss=0.1703077107667923 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.45it/s]



Test set: Average loss: 0.0201, Accuracy: 9935/10000 (99%)



Block 7: Train with 1 step scheduler and Adam optimizer

In [7]:
model2 = Net().to(device)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(model2.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.8)

for epoch in range(18):
    train(model2, device, train_loader, optimizer, epoch)
    test(model2, device, test_loader)
    scheduler.step()


  return F.log_softmax(x)
loss=0.17611147463321686 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.40it/s]



Test set: Average loss: 0.0646, Accuracy: 9803/10000 (98%)



loss=0.13816742599010468 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.11it/s]



Test set: Average loss: 0.0541, Accuracy: 9821/10000 (98%)



loss=0.2776428163051605 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.47it/s]



Test set: Average loss: 0.0418, Accuracy: 9866/10000 (99%)



loss=0.09645143896341324 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 16.98it/s]



Test set: Average loss: 0.0325, Accuracy: 9899/10000 (99%)



loss=0.23496700823307037 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.23it/s]



Test set: Average loss: 0.0284, Accuracy: 9910/10000 (99%)



loss=0.17620187997817993 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.49it/s]



Test set: Average loss: 0.0260, Accuracy: 9921/10000 (99%)



loss=0.10646042227745056 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.27it/s]



Test set: Average loss: 0.0270, Accuracy: 9908/10000 (99%)



loss=0.10736765712499619 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.39it/s]



Test set: Average loss: 0.0261, Accuracy: 9911/10000 (99%)



loss=0.10464320331811905 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 16.90it/s]



Test set: Average loss: 0.0223, Accuracy: 9929/10000 (99%)



loss=0.19142498075962067 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.42it/s]



Test set: Average loss: 0.0228, Accuracy: 9923/10000 (99%)



loss=0.15132619440555573 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.44it/s]



Test set: Average loss: 0.0232, Accuracy: 9930/10000 (99%)



loss=0.08544107526540756 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.29it/s]



Test set: Average loss: 0.0218, Accuracy: 9930/10000 (99%)



loss=0.13052237033843994 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.32it/s]



Test set: Average loss: 0.0210, Accuracy: 9927/10000 (99%)



loss=0.10236921161413193 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 16.86it/s]



Test set: Average loss: 0.0216, Accuracy: 9934/10000 (99%)



loss=0.08195343613624573 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.43it/s]



Test set: Average loss: 0.0228, Accuracy: 9920/10000 (99%)



loss=0.04794327914714813 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.28it/s]



Test set: Average loss: 0.0220, Accuracy: 9926/10000 (99%)



loss=0.08465364575386047 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.52it/s]



Test set: Average loss: 0.0206, Accuracy: 9935/10000 (99%)



loss=0.05821748450398445 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.36it/s]



Test set: Average loss: 0.0207, Accuracy: 9933/10000 (99%)



Block 8: Train with 1 step scheduler and SGD optimizer

In [8]:
model_3 = Net().to(device)
optimizer = optim.SGD(model_3.parameters(), lr=0.01, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.8)

for epoch in range(18):
    train(model_3, device, train_loader, optimizer, epoch)
    test(model_3, device, test_loader)
    scheduler.step()


  return F.log_softmax(x)
loss=0.28209736943244934 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.28it/s]



Test set: Average loss: 0.0822, Accuracy: 9739/10000 (97%)



loss=0.2302754521369934 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.48it/s]



Test set: Average loss: 0.0582, Accuracy: 9815/10000 (98%)



loss=0.03260466828942299 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.42it/s]



Test set: Average loss: 0.0411, Accuracy: 9871/10000 (99%)



loss=0.17702990770339966 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.31it/s]



Test set: Average loss: 0.0380, Accuracy: 9889/10000 (99%)



loss=0.11967682838439941 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.52it/s]



Test set: Average loss: 0.0317, Accuracy: 9895/10000 (99%)



loss=0.1361275315284729 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.28it/s]



Test set: Average loss: 0.0303, Accuracy: 9902/10000 (99%)



loss=0.23267145454883575 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.48it/s]



Test set: Average loss: 0.0286, Accuracy: 9912/10000 (99%)



loss=0.18616116046905518 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.41it/s]



Test set: Average loss: 0.0266, Accuracy: 9919/10000 (99%)



loss=0.07040543109178543 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.70it/s]



Test set: Average loss: 0.0269, Accuracy: 9912/10000 (99%)



loss=0.07913694530725479 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.67it/s]



Test set: Average loss: 0.0265, Accuracy: 9912/10000 (99%)



loss=0.13770173490047455 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.17it/s]



Test set: Average loss: 0.0254, Accuracy: 9915/10000 (99%)



loss=0.19049493968486786 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.50it/s]



Test set: Average loss: 0.0245, Accuracy: 9913/10000 (99%)



loss=0.046915698796510696 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.46it/s]



Test set: Average loss: 0.0237, Accuracy: 9919/10000 (99%)



loss=0.18478961288928986 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.53it/s]



Test set: Average loss: 0.0236, Accuracy: 9921/10000 (99%)



loss=0.06623183190822601 batch_id=468: 100%|██████████| 469/469 [00:28<00:00, 16.45it/s]



Test set: Average loss: 0.0240, Accuracy: 9917/10000 (99%)



loss=0.11968610435724258 batch_id=468: 100%|██████████| 469/469 [00:29<00:00, 16.08it/s]



Test set: Average loss: 0.0237, Accuracy: 9915/10000 (99%)



loss=0.13381271064281464 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.48it/s]



Test set: Average loss: 0.0231, Accuracy: 9918/10000 (99%)



loss=0.08739315718412399 batch_id=468: 100%|██████████| 469/469 [00:29<00:00, 16.06it/s]



Test set: Average loss: 0.0231, Accuracy: 9921/10000 (99%)



Block 9: Train with 2 step scheduler and SGD optimizer

In [9]:
model4 = Net().to(device)
optimizer = optim.SGD(model4.parameters(), lr=0.01, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8)

for epoch in range(18):
    train(model4, device, train_loader, optimizer, epoch)
    test(model4, device, test_loader)
    scheduler.step()


  return F.log_softmax(x)
loss=0.16129286587238312 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.79it/s]



Test set: Average loss: 0.0927, Accuracy: 9698/10000 (97%)



loss=0.3369559049606323 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.75it/s]



Test set: Average loss: 0.0513, Accuracy: 9836/10000 (98%)



loss=0.19403080642223358 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.50it/s]



Test set: Average loss: 0.0371, Accuracy: 9879/10000 (99%)



loss=0.13854096829891205 batch_id=468: 100%|██████████| 469/469 [00:28<00:00, 16.46it/s]



Test set: Average loss: 0.0362, Accuracy: 9888/10000 (99%)



loss=0.2132611721754074 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.63it/s]



Test set: Average loss: 0.0343, Accuracy: 9892/10000 (99%)



loss=0.2180536985397339 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.36it/s]



Test set: Average loss: 0.0287, Accuracy: 9904/10000 (99%)



loss=0.10847339779138565 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.29it/s]



Test set: Average loss: 0.0279, Accuracy: 9905/10000 (99%)



loss=0.14636994898319244 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.60it/s]



Test set: Average loss: 0.0286, Accuracy: 9911/10000 (99%)



loss=0.17633746564388275 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.45it/s]



Test set: Average loss: 0.0274, Accuracy: 9909/10000 (99%)



loss=0.20505039393901825 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 17.32it/s]



Test set: Average loss: 0.0267, Accuracy: 9913/10000 (99%)



loss=0.043216902762651443 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.46it/s]



Test set: Average loss: 0.0265, Accuracy: 9916/10000 (99%)



loss=0.12759342789649963 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.73it/s]



Test set: Average loss: 0.0251, Accuracy: 9921/10000 (99%)



loss=0.08608575910329819 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.47it/s]



Test set: Average loss: 0.0254, Accuracy: 9918/10000 (99%)



loss=0.09045607596635818 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.69it/s]



Test set: Average loss: 0.0228, Accuracy: 9931/10000 (99%)



loss=0.1602618247270584 batch_id=468: 100%|██████████| 469/469 [00:27<00:00, 16.94it/s]



Test set: Average loss: 0.0244, Accuracy: 9917/10000 (99%)



loss=0.06405670195817947 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.75it/s]



Test set: Average loss: 0.0233, Accuracy: 9922/10000 (99%)



loss=0.07001777738332748 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.74it/s]



Test set: Average loss: 0.0230, Accuracy: 9926/10000 (99%)



loss=0.06691309064626694 batch_id=468: 100%|██████████| 469/469 [00:26<00:00, 17.78it/s]



Test set: Average loss: 0.0231, Accuracy: 9916/10000 (99%)

