In [5]:
from __future__ import print_function
import torch
import torch.optim as optim
from torchvision import datasets, transforms

from model import Net
from utils import train, test
import torch
import torch.nn as nn
import torch.nn.functional as F


In [14]:

# Prepare for GPU-based training
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

# Prepare the dataset and dataloaders
# Training data
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomApply([transforms.CenterCrop(22), ], p=0.1),
                        transforms.Resize((28, 28)),
                        transforms.RandomRotation((-11., 11.), fill=0),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

# Testing data
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 205874369.24it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 114820562.87it/s]


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 68032299.09it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7021942.04it/s]


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [15]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []


**Light Model**:

***Target***: Building a simple model without any batch normalization, dropout, and global average pooling.

***Results***: Parameters = 8.6K Train Accuracy = 97.01% Test Accuracy = 98.02%

***Analysis***: Model might underperform compared to other versions due to the absence of BN, dropout and GAP also the numbers of paramters have dropped.

In [13]:

# Check if CUDA is available and set PyTorch to use GPU or CPU accordingly
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Initialize the model and send it to GPU if available
model = Net(version=1).to(device)

# Print the model summary
from torchsummary import summary
summary(model, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 512


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
              ReLU-2            [-1, 8, 26, 26]               0
            Conv2d-3           [-1, 10, 24, 24]             730
              ReLU-4           [-1, 10, 24, 24]               0
            Conv2d-5           [-1, 10, 22, 22]             910
              ReLU-6           [-1, 10, 22, 22]               0
         MaxPool2d-7           [-1, 10, 11, 11]               0
            Conv2d-8           [-1, 10, 11, 11]             110
              ReLU-9           [-1, 10, 11, 11]               0
           Conv2d-10             [-1, 10, 9, 9]             910
             ReLU-11             [-1, 10, 9, 9]               0
           Conv2d-12             [-1, 10, 7, 7]             910
             ReLU-13             [-1, 10, 7, 7]               0
           Conv2d-14             [-1, 1

In [16]:
# Define the optimizer
from torch.optim.lr_scheduler import StepLR
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)

# Train and test the model
for epoch in range(1, 16):
    print("Epoch: ", epoch)
    print("Training...")
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Epoch:  1
Training...


Loss=2.3029563426971436 Batch_id=117 Accuracy=11.24: 100%|██████████| 118/118 [00:27<00:00,  4.23it/s]



Test set: Average loss: 2.3011, Accuracy: 1135/10000 (11.35%)

Epoch:  2
Training...


Loss=2.2906429767608643 Batch_id=117 Accuracy=11.24: 100%|██████████| 118/118 [00:28<00:00,  4.16it/s]



Test set: Average loss: 2.3003, Accuracy: 1135/10000 (11.35%)

Epoch:  3
Training...


Loss=2.2844417095184326 Batch_id=117 Accuracy=11.24: 100%|██████████| 118/118 [00:28<00:00,  4.18it/s]



Test set: Average loss: 2.2857, Accuracy: 1135/10000 (11.35%)

Epoch:  4
Training...


Loss=0.33023032546043396 Batch_id=117 Accuracy=59.80: 100%|██████████| 118/118 [00:29<00:00,  4.04it/s]



Test set: Average loss: 0.3174, Accuracy: 9060/10000 (90.60%)

Epoch:  5
Training...


Loss=0.2856599986553192 Batch_id=117 Accuracy=90.80: 100%|██████████| 118/118 [00:28<00:00,  4.12it/s]



Test set: Average loss: 0.1738, Accuracy: 9464/10000 (94.64%)

Epoch:  6
Training...


Loss=0.31251901388168335 Batch_id=117 Accuracy=93.02: 100%|██████████| 118/118 [00:28<00:00,  4.09it/s]



Test set: Average loss: 0.1316, Accuracy: 9583/10000 (95.83%)

Epoch:  7
Training...


Loss=0.13773009181022644 Batch_id=117 Accuracy=94.65: 100%|██████████| 118/118 [00:28<00:00,  4.12it/s]



Test set: Average loss: 0.1121, Accuracy: 9650/10000 (96.50%)

Epoch:  8
Training...


Loss=0.09066584706306458 Batch_id=117 Accuracy=95.52: 100%|██████████| 118/118 [00:29<00:00,  4.04it/s]



Test set: Average loss: 0.1015, Accuracy: 9675/10000 (96.75%)

Epoch:  9
Training...


Loss=0.06053946539759636 Batch_id=117 Accuracy=95.95: 100%|██████████| 118/118 [00:29<00:00,  4.05it/s]



Test set: Average loss: 0.0796, Accuracy: 9740/10000 (97.40%)

Epoch:  10
Training...


Loss=0.1009930670261383 Batch_id=117 Accuracy=96.17: 100%|██████████| 118/118 [00:28<00:00,  4.12it/s]



Test set: Average loss: 0.0735, Accuracy: 9765/10000 (97.65%)

Epoch:  11
Training...


Loss=0.12827371060848236 Batch_id=117 Accuracy=96.38: 100%|██████████| 118/118 [00:28<00:00,  4.16it/s]



Test set: Average loss: 0.0791, Accuracy: 9745/10000 (97.45%)

Epoch:  12
Training...


Loss=0.1139541044831276 Batch_id=117 Accuracy=96.73: 100%|██████████| 118/118 [00:28<00:00,  4.18it/s]



Test set: Average loss: 0.0667, Accuracy: 9789/10000 (97.89%)

Epoch:  13
Training...


Loss=0.02323564887046814 Batch_id=117 Accuracy=96.86: 100%|██████████| 118/118 [00:28<00:00,  4.20it/s]



Test set: Average loss: 0.0570, Accuracy: 9822/10000 (98.22%)

Epoch:  14
Training...


Loss=0.10261467099189758 Batch_id=117 Accuracy=97.03: 100%|██████████| 118/118 [00:28<00:00,  4.20it/s]



Test set: Average loss: 0.0595, Accuracy: 9816/10000 (98.16%)

Epoch:  15
Training...


Loss=0.1254449337720871 Batch_id=117 Accuracy=97.07: 100%|██████████| 118/118 [00:28<00:00,  4.13it/s]



Test set: Average loss: 0.0561, Accuracy: 9826/10000 (98.26%)



**With Batch Normalization (BN)**:

***Target***: Building a model with BN layers , added after each convolution layer to standardize feature maps. As Batch normalization is also to speed up learning and improve generalization

***Results***: Parameters = 8.6K Train Accuracy = 98.76% Test Accuracy = 99.11%

***Analysis***: Batch normalization helped the model train faster and generalize better also the accuracy was improved.

In [17]:

# Check if CUDA is available and set PyTorch to use GPU or CPU accordingly
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Initialize the model and send it to GPU if available
model = Net(version=2).to(device)

# Print the model summary
from torchsummary import summary
summary(model, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 512


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
       BatchNorm2d-2            [-1, 8, 26, 26]              16
              ReLU-3            [-1, 8, 26, 26]               0
            Conv2d-4           [-1, 10, 24, 24]             730
       BatchNorm2d-5           [-1, 10, 24, 24]              20
              ReLU-6           [-1, 10, 24, 24]               0
            Conv2d-7           [-1, 10, 22, 22]             910
       BatchNorm2d-8           [-1, 10, 22, 22]              20
              ReLU-9           [-1, 10, 22, 22]               0
        MaxPool2d-10           [-1, 10, 11, 11]               0
           Conv2d-11           [-1, 10, 11, 11]             110
      BatchNorm2d-12           [-1, 10, 11, 11]              20
             ReLU-13           [-1, 10, 11, 11]               0
           Conv2d-14             [-1, 1

In [18]:
# Define the optimizer
from torch.optim.lr_scheduler import StepLR
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)

# Train and test the model
for epoch in range(1, 16):
    print("Epoch: ", epoch)
    print("Training...")
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Epoch:  1
Training...


Loss=0.13477399945259094 Batch_id=117 Accuracy=83.88: 100%|██████████| 118/118 [00:28<00:00,  4.19it/s]



Test set: Average loss: 0.1201, Accuracy: 9678/10000 (96.78%)

Epoch:  2
Training...


Loss=0.05594182386994362 Batch_id=117 Accuracy=96.10: 100%|██████████| 118/118 [00:28<00:00,  4.18it/s]



Test set: Average loss: 0.0750, Accuracy: 9780/10000 (97.80%)

Epoch:  3
Training...


Loss=0.054112523794174194 Batch_id=117 Accuracy=97.19: 100%|██████████| 118/118 [00:28<00:00,  4.11it/s]



Test set: Average loss: 0.0563, Accuracy: 9818/10000 (98.18%)

Epoch:  4
Training...


Loss=0.014166404493153095 Batch_id=117 Accuracy=97.64: 100%|██████████| 118/118 [00:28<00:00,  4.15it/s]



Test set: Average loss: 0.0509, Accuracy: 9850/10000 (98.50%)

Epoch:  5
Training...


Loss=0.1187276616692543 Batch_id=117 Accuracy=97.99: 100%|██████████| 118/118 [00:28<00:00,  4.10it/s]



Test set: Average loss: 0.0409, Accuracy: 9872/10000 (98.72%)

Epoch:  6
Training...


Loss=0.17414553463459015 Batch_id=117 Accuracy=98.11: 100%|██████████| 118/118 [00:28<00:00,  4.16it/s]



Test set: Average loss: 0.0461, Accuracy: 9842/10000 (98.42%)

Epoch:  7
Training...


Loss=0.09146955609321594 Batch_id=117 Accuracy=98.27: 100%|██████████| 118/118 [00:28<00:00,  4.16it/s]



Test set: Average loss: 0.0373, Accuracy: 9875/10000 (98.75%)

Epoch:  8
Training...


Loss=0.02889256924390793 Batch_id=117 Accuracy=98.34: 100%|██████████| 118/118 [00:28<00:00,  4.12it/s]



Test set: Average loss: 0.0382, Accuracy: 9875/10000 (98.75%)

Epoch:  9
Training...


Loss=0.029868030920624733 Batch_id=117 Accuracy=98.47: 100%|██████████| 118/118 [00:28<00:00,  4.20it/s]



Test set: Average loss: 0.0347, Accuracy: 9885/10000 (98.85%)

Epoch:  10
Training...


Loss=0.06977448612451553 Batch_id=117 Accuracy=98.53: 100%|██████████| 118/118 [00:28<00:00,  4.08it/s]



Test set: Average loss: 0.0306, Accuracy: 9902/10000 (99.02%)

Epoch:  11
Training...


Loss=0.09258631616830826 Batch_id=117 Accuracy=98.58: 100%|██████████| 118/118 [00:28<00:00,  4.16it/s]



Test set: Average loss: 0.0298, Accuracy: 9903/10000 (99.03%)

Epoch:  12
Training...


Loss=0.06256609410047531 Batch_id=117 Accuracy=98.67: 100%|██████████| 118/118 [00:29<00:00,  4.04it/s]



Test set: Average loss: 0.0289, Accuracy: 9908/10000 (99.08%)

Epoch:  13
Training...


Loss=0.0064978767186403275 Batch_id=117 Accuracy=98.64: 100%|██████████| 118/118 [00:28<00:00,  4.16it/s]



Test set: Average loss: 0.0340, Accuracy: 9878/10000 (98.78%)

Epoch:  14
Training...


Loss=0.007993071340024471 Batch_id=117 Accuracy=98.79: 100%|██████████| 118/118 [00:28<00:00,  4.09it/s]



Test set: Average loss: 0.0299, Accuracy: 9903/10000 (99.03%)

Epoch:  15
Training...


Loss=0.02556598000228405 Batch_id=117 Accuracy=98.76: 100%|██████████| 118/118 [00:28<00:00,  4.17it/s]



Test set: Average loss: 0.0278, Accuracy: 9911/10000 (99.11%)



**With Batch Normalization (BN) and Dropout**:

***Target***: Building a model with BN layers and DP layer , added after each convolution layer. Applying dropout for regularization and preventing overfitting.

***Results***: Parameters = 8.6K Train Accuracy = 97.85% Test Accuracy = 99.10%

***Analysis***: Dropout can help in managing overfitting by providing a form of regularization , the Training Accucracy dipped as expected and The test accuracy remained constant as per the prev model.

In [19]:

# Check if CUDA is available and set PyTorch to use GPU or CPU accordingly
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Initialize the model and send it to GPU if available
model = Net(version=3).to(device)

# Print the model summary
from torchsummary import summary
summary(model, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 512


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
       BatchNorm2d-2            [-1, 8, 26, 26]              16
              ReLU-3            [-1, 8, 26, 26]               0
           Dropout-4            [-1, 8, 26, 26]               0
            Conv2d-5           [-1, 10, 24, 24]             730
       BatchNorm2d-6           [-1, 10, 24, 24]              20
              ReLU-7           [-1, 10, 24, 24]               0
           Dropout-8           [-1, 10, 24, 24]               0
            Conv2d-9           [-1, 10, 22, 22]             910
      BatchNorm2d-10           [-1, 10, 22, 22]              20
             ReLU-11           [-1, 10, 22, 22]               0
          Dropout-12           [-1, 10, 22, 22]               0
        MaxPool2d-13           [-1, 10, 11, 11]               0
           Conv2d-14           [-1, 10,

In [20]:
# Define the optimizer
from torch.optim.lr_scheduler import StepLR
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)

# Train and test the model
for epoch in range(1, 16):
    print("Epoch: ", epoch)
    print("Training...")
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Epoch:  1
Training...


Loss=0.19920028746128082 Batch_id=117 Accuracy=76.77: 100%|██████████| 118/118 [00:29<00:00,  4.01it/s]



Test set: Average loss: 0.1647, Accuracy: 9524/10000 (95.24%)

Epoch:  2
Training...


Loss=0.09923535585403442 Batch_id=117 Accuracy=93.72: 100%|██████████| 118/118 [00:28<00:00,  4.17it/s]



Test set: Average loss: 0.0870, Accuracy: 9754/10000 (97.54%)

Epoch:  3
Training...


Loss=0.06630723923444748 Batch_id=117 Accuracy=95.47: 100%|██████████| 118/118 [00:29<00:00,  4.05it/s]



Test set: Average loss: 0.0686, Accuracy: 9803/10000 (98.03%)

Epoch:  4
Training...


Loss=0.04893307387828827 Batch_id=117 Accuracy=96.23: 100%|██████████| 118/118 [00:28<00:00,  4.19it/s]



Test set: Average loss: 0.0561, Accuracy: 9831/10000 (98.31%)

Epoch:  5
Training...


Loss=0.188697949051857 Batch_id=117 Accuracy=96.56: 100%|██████████| 118/118 [00:28<00:00,  4.19it/s]



Test set: Average loss: 0.0496, Accuracy: 9849/10000 (98.49%)

Epoch:  6
Training...


Loss=0.16227638721466064 Batch_id=117 Accuracy=96.82: 100%|██████████| 118/118 [00:28<00:00,  4.15it/s]



Test set: Average loss: 0.0471, Accuracy: 9853/10000 (98.53%)

Epoch:  7
Training...


Loss=0.09810412675142288 Batch_id=117 Accuracy=97.09: 100%|██████████| 118/118 [00:27<00:00,  4.24it/s]



Test set: Average loss: 0.0465, Accuracy: 9846/10000 (98.46%)

Epoch:  8
Training...


Loss=0.10161080211400986 Batch_id=117 Accuracy=97.14: 100%|██████████| 118/118 [00:28<00:00,  4.16it/s]



Test set: Average loss: 0.0443, Accuracy: 9856/10000 (98.56%)

Epoch:  9
Training...


Loss=0.051330506801605225 Batch_id=117 Accuracy=97.36: 100%|██████████| 118/118 [00:28<00:00,  4.21it/s]



Test set: Average loss: 0.0376, Accuracy: 9894/10000 (98.94%)

Epoch:  10
Training...


Loss=0.11690912395715714 Batch_id=117 Accuracy=97.35: 100%|██████████| 118/118 [00:28<00:00,  4.21it/s]



Test set: Average loss: 0.0370, Accuracy: 9891/10000 (98.91%)

Epoch:  11
Training...


Loss=0.0884355828166008 Batch_id=117 Accuracy=97.59: 100%|██████████| 118/118 [00:28<00:00,  4.17it/s]



Test set: Average loss: 0.0335, Accuracy: 9898/10000 (98.98%)

Epoch:  12
Training...


Loss=0.07842995971441269 Batch_id=117 Accuracy=97.67: 100%|██████████| 118/118 [00:28<00:00,  4.09it/s]



Test set: Average loss: 0.0356, Accuracy: 9892/10000 (98.92%)

Epoch:  13
Training...


Loss=0.0208504106849432 Batch_id=117 Accuracy=97.73: 100%|██████████| 118/118 [00:28<00:00,  4.08it/s]



Test set: Average loss: 0.0307, Accuracy: 9908/10000 (99.08%)

Epoch:  14
Training...


Loss=0.04329134523868561 Batch_id=117 Accuracy=97.70: 100%|██████████| 118/118 [00:31<00:00,  3.78it/s]



Test set: Average loss: 0.0355, Accuracy: 9889/10000 (98.89%)

Epoch:  15
Training...


Loss=0.056093569844961166 Batch_id=117 Accuracy=97.85: 100%|██████████| 118/118 [00:28<00:00,  4.14it/s]



Test set: Average loss: 0.0304, Accuracy: 9910/10000 (99.10%)



**With Batch Normalization (BN) and Dropout and GAP**:

***Target***: Create a model with batch normalization, dropout for regularization and global average pooling for better performance.

***Results***: Parameters = 6.7K Train Accuracy = 98.10% Test Accuracy = 98.16%

***Analysis***:This model was expected to perform best amongst all due to the combination of BN, dropout, and GAP. However, it might also be prone to overfitting due to its complexity, the results were not improved so lets try different LR values.

In [21]:

# Check if CUDA is available and set PyTorch to use GPU or CPU accordingly
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Initialize the model and send it to GPU if available
model = Net(version=4).to(device)

# Print the model summary
from torchsummary import summary
summary(model, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 512


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
              ReLU-2            [-1, 8, 26, 26]               0
       BatchNorm2d-3            [-1, 8, 26, 26]              16
           Dropout-4            [-1, 8, 26, 26]               0
            Conv2d-5           [-1, 10, 24, 24]             720
              ReLU-6           [-1, 10, 24, 24]               0
       BatchNorm2d-7           [-1, 10, 24, 24]              20
           Dropout-8           [-1, 10, 24, 24]               0
            Conv2d-9           [-1, 12, 24, 24]             120
        MaxPool2d-10           [-1, 12, 12, 12]               0
           Conv2d-11           [-1, 14, 10, 10]           1,512
             ReLU-12           [-1, 14, 10, 10]               0
      BatchNorm2d-13           [-1, 14, 10, 10]              28
          Dropout-14           [-1, 14,

In [22]:
# Define the optimizer
from torch.optim.lr_scheduler import StepLR
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)

# Train and test the model
for epoch in range(1, 16):
    print("Epoch: ", epoch)
    print("Training...")
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Epoch:  1
Training...


Loss=0.9134268760681152 Batch_id=117 Accuracy=50.09: 100%|██████████| 118/118 [00:29<00:00,  3.99it/s]



Test set: Average loss: 0.9530, Accuracy: 7044/10000 (70.44%)

Epoch:  2
Training...


Loss=0.3257836103439331 Batch_id=117 Accuracy=87.23: 100%|██████████| 118/118 [00:29<00:00,  4.04it/s]



Test set: Average loss: 0.2414, Accuracy: 9488/10000 (94.88%)

Epoch:  3
Training...


Loss=0.18621253967285156 Batch_id=117 Accuracy=94.61: 100%|██████████| 118/118 [00:28<00:00,  4.17it/s]



Test set: Average loss: 0.1007, Accuracy: 9781/10000 (97.81%)

Epoch:  4
Training...


Loss=0.09734547883272171 Batch_id=117 Accuracy=95.99: 100%|██████████| 118/118 [00:28<00:00,  4.11it/s]



Test set: Average loss: 0.0802, Accuracy: 9785/10000 (97.85%)

Epoch:  5
Training...


Loss=0.12387046962976456 Batch_id=117 Accuracy=96.79: 100%|██████████| 118/118 [00:28<00:00,  4.16it/s]



Test set: Average loss: 0.0597, Accuracy: 9851/10000 (98.51%)

Epoch:  6
Training...


Loss=0.14526410400867462 Batch_id=117 Accuracy=97.10: 100%|██████████| 118/118 [00:28<00:00,  4.13it/s]



Test set: Average loss: 0.0509, Accuracy: 9851/10000 (98.51%)

Epoch:  7
Training...


Loss=0.09005102515220642 Batch_id=117 Accuracy=97.28: 100%|██████████| 118/118 [00:28<00:00,  4.16it/s]



Test set: Average loss: 0.0482, Accuracy: 9857/10000 (98.57%)

Epoch:  8
Training...


Loss=0.03712007403373718 Batch_id=117 Accuracy=97.57: 100%|██████████| 118/118 [00:28<00:00,  4.19it/s]



Test set: Average loss: 0.0422, Accuracy: 9875/10000 (98.75%)

Epoch:  9
Training...


Loss=0.08167010545730591 Batch_id=117 Accuracy=97.64: 100%|██████████| 118/118 [00:28<00:00,  4.12it/s]



Test set: Average loss: 0.0394, Accuracy: 9874/10000 (98.74%)

Epoch:  10
Training...


Loss=0.061838310211896896 Batch_id=117 Accuracy=97.76: 100%|██████████| 118/118 [00:28<00:00,  4.18it/s]



Test set: Average loss: 0.0376, Accuracy: 9886/10000 (98.86%)

Epoch:  11
Training...


Loss=0.06540429592132568 Batch_id=117 Accuracy=97.81: 100%|██████████| 118/118 [00:28<00:00,  4.11it/s]



Test set: Average loss: 0.0351, Accuracy: 9889/10000 (98.89%)

Epoch:  12
Training...


Loss=0.09948169440031052 Batch_id=117 Accuracy=98.01: 100%|██████████| 118/118 [00:28<00:00,  4.09it/s]



Test set: Average loss: 0.0316, Accuracy: 9900/10000 (99.00%)

Epoch:  13
Training...


Loss=0.016656411811709404 Batch_id=117 Accuracy=98.02: 100%|██████████| 118/118 [00:28<00:00,  4.10it/s]



Test set: Average loss: 0.0341, Accuracy: 9894/10000 (98.94%)

Epoch:  14
Training...


Loss=0.06390424817800522 Batch_id=117 Accuracy=98.02: 100%|██████████| 118/118 [00:29<00:00,  4.07it/s]



Test set: Average loss: 0.0327, Accuracy: 9892/10000 (98.92%)

Epoch:  15
Training...


Loss=0.15066666901111603 Batch_id=117 Accuracy=98.10: 100%|██████████| 118/118 [00:28<00:00,  4.08it/s]



Test set: Average loss: 0.0321, Accuracy: 9896/10000 (98.96%)



## LR Experimentation for Final Model 4

In [37]:
# Define the optimizer
from torch.optim.lr_scheduler import StepLR
optimizer = optim.SGD(model.parameters(), lr=0.025, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)

# Train and test the model
for epoch in range(1, 16):
    print("Epoch: ", epoch)
    print("Training...")
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Epoch:  1
Training...


Loss=0.044084664434194565 Batch_id=117 Accuracy=98.72: 100%|██████████| 118/118 [00:31<00:00,  3.74it/s]



Test set: Average loss: 0.0232, Accuracy: 9930/10000 (99.30%)

Epoch:  2
Training...


Loss=0.012225211597979069 Batch_id=117 Accuracy=98.71: 100%|██████████| 118/118 [00:31<00:00,  3.73it/s]



Test set: Average loss: 0.0214, Accuracy: 9938/10000 (99.38%)

Epoch:  3
Training...


Loss=0.05009213089942932 Batch_id=117 Accuracy=98.82: 100%|██████████| 118/118 [00:30<00:00,  3.84it/s]



Test set: Average loss: 0.0244, Accuracy: 9928/10000 (99.28%)

Epoch:  4
Training...


Loss=0.025886205956339836 Batch_id=117 Accuracy=98.75: 100%|██████████| 118/118 [00:31<00:00,  3.76it/s]



Test set: Average loss: 0.0205, Accuracy: 9936/10000 (99.36%)

Epoch:  5
Training...


Loss=0.018497753888368607 Batch_id=117 Accuracy=98.78: 100%|██████████| 118/118 [00:31<00:00,  3.70it/s]



Test set: Average loss: 0.0224, Accuracy: 9933/10000 (99.33%)

Epoch:  6
Training...


Loss=0.01872161403298378 Batch_id=117 Accuracy=98.73: 100%|██████████| 118/118 [00:31<00:00,  3.75it/s]



Test set: Average loss: 0.0198, Accuracy: 9944/10000 (99.44%)

Epoch:  7
Training...


Loss=0.04662315547466278 Batch_id=117 Accuracy=98.84: 100%|██████████| 118/118 [00:30<00:00,  3.81it/s]



Test set: Average loss: 0.0214, Accuracy: 9934/10000 (99.34%)

Epoch:  8
Training...


Loss=0.022603964433073997 Batch_id=117 Accuracy=98.78: 100%|██████████| 118/118 [00:32<00:00,  3.67it/s]



Test set: Average loss: 0.0207, Accuracy: 9935/10000 (99.35%)

Epoch:  9
Training...


Loss=0.12752671539783478 Batch_id=117 Accuracy=98.77: 100%|██████████| 118/118 [00:31<00:00,  3.75it/s]



Test set: Average loss: 0.0197, Accuracy: 9938/10000 (99.38%)

Epoch:  10
Training...


Loss=0.05450078472495079 Batch_id=117 Accuracy=98.75: 100%|██████████| 118/118 [00:31<00:00,  3.78it/s]



Test set: Average loss: 0.0210, Accuracy: 9934/10000 (99.34%)

Epoch:  11
Training...


Loss=0.07473001629114151 Batch_id=117 Accuracy=98.78: 100%|██████████| 118/118 [00:31<00:00,  3.70it/s]



Test set: Average loss: 0.0197, Accuracy: 9930/10000 (99.30%)

Epoch:  12
Training...


Loss=0.0934823751449585 Batch_id=117 Accuracy=98.80: 100%|██████████| 118/118 [00:32<00:00,  3.66it/s]



Test set: Average loss: 0.0199, Accuracy: 9938/10000 (99.38%)

Epoch:  13
Training...


Loss=0.03905721753835678 Batch_id=117 Accuracy=98.86: 100%|██████████| 118/118 [00:31<00:00,  3.78it/s]



Test set: Average loss: 0.0187, Accuracy: 9937/10000 (99.37%)

Epoch:  14
Training...


Loss=0.008282456547021866 Batch_id=117 Accuracy=98.83: 100%|██████████| 118/118 [00:31<00:00,  3.75it/s]



Test set: Average loss: 0.0204, Accuracy: 9934/10000 (99.34%)

Epoch:  15
Training...


Loss=0.11240572482347488 Batch_id=117 Accuracy=98.76: 100%|██████████| 118/118 [00:32<00:00,  3.69it/s]



Test set: Average loss: 0.0207, Accuracy: 9935/10000 (99.35%)



In [38]:
# Define the optimizer
from torch.optim.lr_scheduler import StepLR
optimizer = optim.SGD(model.parameters(), lr=0.023, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)

# Train and test the model
for epoch in range(1, 16):
    print("Epoch: ", epoch)
    print("Training...")
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Epoch:  1
Training...


Loss=0.0869583860039711 Batch_id=117 Accuracy=98.81: 100%|██████████| 118/118 [00:32<00:00,  3.65it/s]



Test set: Average loss: 0.0207, Accuracy: 9935/10000 (99.35%)

Epoch:  2
Training...


Loss=0.022402873262763023 Batch_id=117 Accuracy=98.82: 100%|██████████| 118/118 [00:31<00:00,  3.78it/s]



Test set: Average loss: 0.0209, Accuracy: 9943/10000 (99.43%)

Epoch:  3
Training...


Loss=0.0716337189078331 Batch_id=117 Accuracy=98.87: 100%|██████████| 118/118 [00:30<00:00,  3.87it/s]



Test set: Average loss: 0.0219, Accuracy: 9932/10000 (99.32%)

Epoch:  4
Training...


Loss=0.06164575740695 Batch_id=117 Accuracy=98.85: 100%|██████████| 118/118 [00:31<00:00,  3.69it/s]



Test set: Average loss: 0.0198, Accuracy: 9933/10000 (99.33%)

Epoch:  5
Training...


Loss=0.04524430260062218 Batch_id=117 Accuracy=98.87: 100%|██████████| 118/118 [00:30<00:00,  3.84it/s]



Test set: Average loss: 0.0206, Accuracy: 9938/10000 (99.38%)

Epoch:  6
Training...


Loss=0.03254687413573265 Batch_id=117 Accuracy=98.88: 100%|██████████| 118/118 [00:30<00:00,  3.82it/s]



Test set: Average loss: 0.0185, Accuracy: 9942/10000 (99.42%)

Epoch:  7
Training...


Loss=0.02545345574617386 Batch_id=117 Accuracy=98.77: 100%|██████████| 118/118 [00:31<00:00,  3.71it/s]



Test set: Average loss: 0.0206, Accuracy: 9934/10000 (99.34%)

Epoch:  8
Training...


Loss=0.04577462747693062 Batch_id=117 Accuracy=98.86: 100%|██████████| 118/118 [00:30<00:00,  3.84it/s]



Test set: Average loss: 0.0166, Accuracy: 9945/10000 (99.45%)

Epoch:  9
Training...


Loss=0.022862860932946205 Batch_id=117 Accuracy=98.89: 100%|██████████| 118/118 [00:31<00:00,  3.76it/s]



Test set: Average loss: 0.0188, Accuracy: 9940/10000 (99.40%)

Epoch:  10
Training...


Loss=0.035000674426555634 Batch_id=117 Accuracy=98.83: 100%|██████████| 118/118 [00:31<00:00,  3.80it/s]



Test set: Average loss: 0.0195, Accuracy: 9943/10000 (99.43%)

Epoch:  11
Training...


Loss=0.09974264353513718 Batch_id=117 Accuracy=98.88: 100%|██████████| 118/118 [00:30<00:00,  3.83it/s]



Test set: Average loss: 0.0193, Accuracy: 9934/10000 (99.34%)

Epoch:  12
Training...


Loss=0.07049078494310379 Batch_id=117 Accuracy=98.85: 100%|██████████| 118/118 [00:31<00:00,  3.76it/s]



Test set: Average loss: 0.0196, Accuracy: 9938/10000 (99.38%)

Epoch:  13
Training...


Loss=0.03594519570469856 Batch_id=117 Accuracy=98.88: 100%|██████████| 118/118 [00:30<00:00,  3.82it/s]



Test set: Average loss: 0.0206, Accuracy: 9938/10000 (99.38%)

Epoch:  14
Training...


Loss=0.01686198078095913 Batch_id=117 Accuracy=98.87: 100%|██████████| 118/118 [00:30<00:00,  3.87it/s]



Test set: Average loss: 0.0220, Accuracy: 9933/10000 (99.33%)

Epoch:  15
Training...


Loss=0.07558518648147583 Batch_id=117 Accuracy=98.95: 100%|██████████| 118/118 [00:31<00:00,  3.73it/s]



Test set: Average loss: 0.0194, Accuracy: 9937/10000 (99.37%)



In [39]:
# Define the optimizer
from torch.optim.lr_scheduler import StepLR
optimizer = optim.SGD(model.parameters(), lr=0.022, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)

# Train and test the model
for epoch in range(1, 16):
    print("Epoch: ", epoch)
    print("Training...")
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Epoch:  1
Training...


Loss=0.03125995397567749 Batch_id=117 Accuracy=98.92: 100%|██████████| 118/118 [00:31<00:00,  3.79it/s]



Test set: Average loss: 0.0222, Accuracy: 9933/10000 (99.33%)

Epoch:  2
Training...


Loss=0.01921103335916996 Batch_id=117 Accuracy=98.89: 100%|██████████| 118/118 [00:30<00:00,  3.85it/s]



Test set: Average loss: 0.0199, Accuracy: 9938/10000 (99.38%)

Epoch:  3
Training...


Loss=0.01084043737500906 Batch_id=117 Accuracy=98.88: 100%|██████████| 118/118 [00:31<00:00,  3.78it/s]



Test set: Average loss: 0.0198, Accuracy: 9936/10000 (99.36%)

Epoch:  4
Training...


Loss=0.0069467960856854916 Batch_id=117 Accuracy=98.91: 100%|██████████| 118/118 [00:30<00:00,  3.81it/s]



Test set: Average loss: 0.0192, Accuracy: 9936/10000 (99.36%)

Epoch:  5
Training...


Loss=0.09262903779745102 Batch_id=117 Accuracy=98.84: 100%|██████████| 118/118 [00:30<00:00,  3.88it/s]



Test set: Average loss: 0.0229, Accuracy: 9925/10000 (99.25%)

Epoch:  6
Training...


Loss=0.002912788884714246 Batch_id=117 Accuracy=98.92: 100%|██████████| 118/118 [00:31<00:00,  3.73it/s]



Test set: Average loss: 0.0213, Accuracy: 9932/10000 (99.32%)

Epoch:  7
Training...


Loss=0.0031171950977295637 Batch_id=117 Accuracy=98.92: 100%|██████████| 118/118 [00:30<00:00,  3.84it/s]



Test set: Average loss: 0.0190, Accuracy: 9942/10000 (99.42%)

Epoch:  8
Training...


Loss=0.10635564476251602 Batch_id=117 Accuracy=98.93: 100%|██████████| 118/118 [00:30<00:00,  3.84it/s]



Test set: Average loss: 0.0210, Accuracy: 9939/10000 (99.39%)

Epoch:  9
Training...


Loss=0.018912525847554207 Batch_id=117 Accuracy=98.92: 100%|██████████| 118/118 [00:31<00:00,  3.76it/s]



Test set: Average loss: 0.0201, Accuracy: 9932/10000 (99.32%)

Epoch:  10
Training...


Loss=0.013150022365152836 Batch_id=117 Accuracy=98.98: 100%|██████████| 118/118 [00:30<00:00,  3.83it/s]



Test set: Average loss: 0.0209, Accuracy: 9938/10000 (99.38%)

Epoch:  11
Training...


Loss=0.08608231693506241 Batch_id=117 Accuracy=98.91: 100%|██████████| 118/118 [00:30<00:00,  3.87it/s]



Test set: Average loss: 0.0201, Accuracy: 9931/10000 (99.31%)

Epoch:  12
Training...


Loss=0.09888803958892822 Batch_id=117 Accuracy=98.90: 100%|██████████| 118/118 [00:31<00:00,  3.76it/s]



Test set: Average loss: 0.0196, Accuracy: 9934/10000 (99.34%)

Epoch:  13
Training...


Loss=0.04463501647114754 Batch_id=117 Accuracy=98.97: 100%|██████████| 118/118 [00:30<00:00,  3.87it/s]



Test set: Average loss: 0.0185, Accuracy: 9940/10000 (99.40%)

Epoch:  14
Training...


Loss=0.0371144562959671 Batch_id=117 Accuracy=98.91: 100%|██████████| 118/118 [00:31<00:00,  3.73it/s]



Test set: Average loss: 0.0191, Accuracy: 9938/10000 (99.38%)

Epoch:  15
Training...


Loss=0.00762234628200531 Batch_id=117 Accuracy=98.99: 100%|██████████| 118/118 [00:30<00:00,  3.85it/s]



Test set: Average loss: 0.0203, Accuracy: 9938/10000 (99.38%)



## Final Model getting consistent 99.4% accuracy in last few epochs. With LR = 0.021

In [40]:
# Define the optimizer
from torch.optim.lr_scheduler import StepLR
optimizer = optim.SGD(model.parameters(), lr=0.021, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)

# Train and test the model
for epoch in range(1, 16):
    print("Epoch: ", epoch)
    print("Training...")
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Epoch:  1
Training...


Loss=0.04310225322842598 Batch_id=117 Accuracy=98.95: 100%|██████████| 118/118 [00:30<00:00,  3.92it/s]



Test set: Average loss: 0.0210, Accuracy: 9937/10000 (99.37%)

Epoch:  2
Training...


Loss=0.008080837316811085 Batch_id=117 Accuracy=98.94: 100%|██████████| 118/118 [00:31<00:00,  3.75it/s]



Test set: Average loss: 0.0194, Accuracy: 9943/10000 (99.43%)

Epoch:  3
Training...


Loss=0.06493533402681351 Batch_id=117 Accuracy=98.97: 100%|██████████| 118/118 [00:31<00:00,  3.73it/s]



Test set: Average loss: 0.0203, Accuracy: 9933/10000 (99.33%)

Epoch:  4
Training...


Loss=0.062225714325904846 Batch_id=117 Accuracy=98.99: 100%|██████████| 118/118 [00:30<00:00,  3.83it/s]



Test set: Average loss: 0.0193, Accuracy: 9935/10000 (99.35%)

Epoch:  5
Training...


Loss=0.00924408994615078 Batch_id=117 Accuracy=98.94: 100%|██████████| 118/118 [00:30<00:00,  3.82it/s]



Test set: Average loss: 0.0184, Accuracy: 9937/10000 (99.37%)

Epoch:  6
Training...


Loss=0.05589870736002922 Batch_id=117 Accuracy=98.98: 100%|██████████| 118/118 [00:31<00:00,  3.73it/s]



Test set: Average loss: 0.0195, Accuracy: 9936/10000 (99.36%)

Epoch:  7
Training...


Loss=0.1336507946252823 Batch_id=117 Accuracy=98.95: 100%|██████████| 118/118 [00:30<00:00,  3.83it/s]



Test set: Average loss: 0.0202, Accuracy: 9939/10000 (99.39%)

Epoch:  8
Training...


Loss=0.033292870968580246 Batch_id=117 Accuracy=98.94: 100%|██████████| 118/118 [00:30<00:00,  3.84it/s]



Test set: Average loss: 0.0193, Accuracy: 9934/10000 (99.34%)

Epoch:  9
Training...


Loss=0.013349073939025402 Batch_id=117 Accuracy=99.01: 100%|██████████| 118/118 [00:31<00:00,  3.78it/s]



Test set: Average loss: 0.0186, Accuracy: 9943/10000 (99.43%)

Epoch:  10
Training...


Loss=0.044661056250333786 Batch_id=117 Accuracy=98.97: 100%|██████████| 118/118 [00:30<00:00,  3.85it/s]



Test set: Average loss: 0.0193, Accuracy: 9939/10000 (99.39%)

Epoch:  11
Training...


Loss=0.029256120324134827 Batch_id=117 Accuracy=98.97: 100%|██████████| 118/118 [00:30<00:00,  3.82it/s]



Test set: Average loss: 0.0193, Accuracy: 9937/10000 (99.37%)

Epoch:  12
Training...


Loss=0.05465199425816536 Batch_id=117 Accuracy=98.95: 100%|██████████| 118/118 [00:30<00:00,  3.83it/s]



Test set: Average loss: 0.0175, Accuracy: 9941/10000 (99.41%)

Epoch:  13
Training...


Loss=0.0323687382042408 Batch_id=117 Accuracy=98.98: 100%|██████████| 118/118 [00:29<00:00,  3.94it/s]



Test set: Average loss: 0.0162, Accuracy: 9943/10000 (99.43%)

Epoch:  14
Training...


Loss=0.012581090442836285 Batch_id=117 Accuracy=98.97: 100%|██████████| 118/118 [00:31<00:00,  3.74it/s]



Test set: Average loss: 0.0190, Accuracy: 9941/10000 (99.41%)

Epoch:  15
Training...


Loss=0.02066531777381897 Batch_id=117 Accuracy=98.98: 100%|██████████| 118/118 [00:30<00:00,  3.90it/s]



Test set: Average loss: 0.0173, Accuracy: 9944/10000 (99.44%)

