<a href="https://colab.research.google.com/github/KillerStrike17/EVA-5/blob/master/4.%20Architecture%20Basics/Architecture_Basics_in_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installing the Libraries

In [1]:
!pip install torchsummary
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from torchsummary import summary



## Network 1

Network 1: Here I tried to squueze the network to be under 10 k parameters and reach 99.4 percent accuracy. But with any augmentation or LR scheduling.. It is tough..!! And I couldnt do that.. But i was close that guided me to build my second network

In [7]:
DROPOUT_VALUE = 0.05

In [13]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Convolution Block
        # input: 28x28 output: 22x22x12
        self.conv1 = nn.Sequential(
          # RF 3x3
          nn.Conv2d(1, 12, 3), #26
          nn.ReLU(),
          nn.BatchNorm2d(12),
          nn.Dropout(DROPOUT_VALUE),
        
          # RF 5x5
          nn.Conv2d(12, 12, 3), #24
          nn.ReLU(),
          nn.BatchNorm2d(12),
          nn.Dropout(DROPOUT_VALUE),
          
          # RF 7x7
          nn.Conv2d(12, 12, 3), # 22
          nn.ReLU(),
          nn.BatchNorm2d(12),
          nn.Dropout(DROPOUT_VALUE),
        )

        # Transition Block
        # Input 22x22x12 Output 11x11x10
        self.trans1 = nn.Sequential(
          # RF 7x7 
          nn.Conv2d(12, 10, 1), # 22

          # RF 14x14
          nn.MaxPool2d(2, 2), # 11

        )      

        # Convolution Block
        # Input 11x11x10 Output 7x7x20
        self.conv2 = nn.Sequential(
            
          # RF 16x16            
          nn.Conv2d(10, 20, 3), # 9
          nn.ReLU(),
          nn.BatchNorm2d(20),
          nn.Dropout(DROPOUT_VALUE),
          
          # RF 18x18
          nn.Conv2d(20, 20, 3), # 7
          nn.ReLU(),
          nn.BatchNorm2d(20),
        
        )

        # Global Average Pooling
        # Input 7x7x20 Output 1x1x10
        self.gap = nn.Sequential(  
           
          nn.AvgPool2d(7,7),
          # RF 18x18
          nn.Conv2d(20, 10, 1),
        )
        

    '''
    forward: performs a forward pass when model(x) is called
    
    Params
        x: the input data
    
    Returns
        y: the output of the model
    '''
    def forward(self, x):
        x = self.conv1(x)
        x = self.trans1(x)
        x = self.conv2(x)
        x = self.gap(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [14]:

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 12, 26, 26]             120
              ReLU-2           [-1, 12, 26, 26]               0
       BatchNorm2d-3           [-1, 12, 26, 26]              24
           Dropout-4           [-1, 12, 26, 26]               0
            Conv2d-5           [-1, 12, 24, 24]           1,308
              ReLU-6           [-1, 12, 24, 24]               0
       BatchNorm2d-7           [-1, 12, 24, 24]              24
           Dropout-8           [-1, 12, 24, 24]               0
            Conv2d-9           [-1, 12, 22, 22]           1,308
             ReLU-10           [-1, 12, 22, 22]               0
      BatchNorm2d-11           [-1, 12, 22, 22]              24
          Dropout-12           [-1, 12, 22, 22]               0
           Conv2d-13           [-1, 10, 22, 22]             130
        MaxPool2d-14           [-1, 10,



In [15]:
torch.manual_seed(1)
BATCH_SIZE = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=BATCH_SIZE, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=BATCH_SIZE, shuffle=True, **kwargs)


In [16]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    correct = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1,keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Accuracy = {correct/len(train_loader.dataset)}')
    


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [17]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print("\nEpoch:",epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 1


loss=0.1654239445924759 batch_id=468 Accuracy = 0.8420833333333333: 100%|██████████| 469/469 [00:23<00:00, 19.84it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1595, Accuracy: 9595/10000 (95.95%)


Epoch: 2


loss=0.08769126981496811 batch_id=468 Accuracy = 0.9726166666666667: 100%|██████████| 469/469 [00:23<00:00, 19.96it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0859, Accuracy: 9777/10000 (97.77%)


Epoch: 3


loss=0.07154809683561325 batch_id=468 Accuracy = 0.98025: 100%|██████████| 469/469 [00:23<00:00, 20.10it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0564, Accuracy: 9838/10000 (98.38%)


Epoch: 4


loss=0.05420919135212898 batch_id=468 Accuracy = 0.9840166666666667: 100%|██████████| 469/469 [00:23<00:00, 20.13it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0697, Accuracy: 9798/10000 (97.98%)


Epoch: 5


loss=0.08666180819272995 batch_id=468 Accuracy = 0.9851666666666666: 100%|██████████| 469/469 [00:23<00:00, 20.00it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0573, Accuracy: 9828/10000 (98.28%)


Epoch: 6


loss=0.01915805973112583 batch_id=468 Accuracy = 0.9863333333333333: 100%|██████████| 469/469 [00:23<00:00, 20.07it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0435, Accuracy: 9865/10000 (98.65%)


Epoch: 7


loss=0.015277516096830368 batch_id=468 Accuracy = 0.9878833333333333: 100%|██████████| 469/469 [00:23<00:00, 19.69it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0388, Accuracy: 9890/10000 (98.90%)


Epoch: 8


loss=0.03919351473450661 batch_id=468 Accuracy = 0.9880833333333333: 100%|██████████| 469/469 [00:23<00:00, 20.17it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0366, Accuracy: 9890/10000 (98.90%)


Epoch: 9


loss=0.019227156415581703 batch_id=468 Accuracy = 0.9889833333333333: 100%|██████████| 469/469 [00:23<00:00, 19.94it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0366, Accuracy: 9894/10000 (98.94%)


Epoch: 10


loss=0.05307893455028534 batch_id=468 Accuracy = 0.9890833333333333: 100%|██████████| 469/469 [00:23<00:00, 20.16it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0398, Accuracy: 9875/10000 (98.75%)


Epoch: 11


loss=0.0068485611118376255 batch_id=468 Accuracy = 0.98995: 100%|██████████| 469/469 [00:23<00:00, 20.17it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0425, Accuracy: 9863/10000 (98.63%)


Epoch: 12


loss=0.009854767471551895 batch_id=468 Accuracy = 0.9903833333333333: 100%|██████████| 469/469 [00:23<00:00, 19.92it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0327, Accuracy: 9905/10000 (99.05%)


Epoch: 13


loss=0.03904423117637634 batch_id=468 Accuracy = 0.99105: 100%|██████████| 469/469 [00:23<00:00, 19.98it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0437, Accuracy: 9855/10000 (98.55%)


Epoch: 14


loss=0.028298599645495415 batch_id=468 Accuracy = 0.9909333333333333: 100%|██████████| 469/469 [00:23<00:00, 19.98it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0268, Accuracy: 9917/10000 (99.17%)


Epoch: 15


loss=0.06844929605722427 batch_id=468 Accuracy = 0.99175: 100%|██████████| 469/469 [00:23<00:00, 19.99it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0300, Accuracy: 9902/10000 (99.02%)


Epoch: 16


loss=0.005265051499009132 batch_id=468 Accuracy = 0.9917833333333334: 100%|██████████| 469/469 [00:23<00:00, 19.98it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0321, Accuracy: 9900/10000 (99.00%)


Epoch: 17


loss=0.04047529026865959 batch_id=468 Accuracy = 0.9918: 100%|██████████| 469/469 [00:23<00:00, 20.07it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0300, Accuracy: 9903/10000 (99.03%)


Epoch: 18


loss=0.019702846184372902 batch_id=468 Accuracy = 0.9924333333333333: 100%|██████████| 469/469 [00:23<00:00, 19.92it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0263, Accuracy: 9910/10000 (99.10%)


Epoch: 19


loss=0.026455067098140717 batch_id=468 Accuracy = 0.99265: 100%|██████████| 469/469 [00:23<00:00, 19.92it/s]



Test set: Average loss: 0.0581, Accuracy: 9792/10000 (97.92%)



## Network 2

Network 2:

Updated Model: The previous model was not able to cross 99.4, if we look at the training itself, it was stuck around 99.3, thhat means the model wasnot learning,
Hence we need to make an architecture of some more parameters (we needed more receptive field) or we need to use LR scheduling or Data Augmentation.


In [18]:
"""Net: Our Base Model for MNIST Classification
Attributes
    
"""
DROPOUT_VALUE = 0.1
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # Convolution Block
        # input - 28x28x1; output - 22x22x32;
        self.conv1 = nn.Sequential(
            # RF - 3x3
            nn.Conv2d(1, 16, 3, bias=False), # 26
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(DROPOUT_VALUE),

            # RF - 5x5
            nn.Conv2d(16, 16, 3, bias=False), # 24
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(DROPOUT_VALUE),

            # RF - 7x7
            nn.Conv2d(16, 32, 3, bias=False), # 22
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout2d(DROPOUT_VALUE),
        )

        # Transition Block
        # input - 22x22x32; output - 11x11x16
        self.trans1 = nn.Sequential(
            # RF - 7x7
            nn.Conv2d(32, 16, 1, bias=False), # 22
            nn.ReLU(),

            # RF - 14x14
            nn.MaxPool2d(2, 2), # 11
        )
        
        # Convolution Block
        # input - 11x11x16; output - 7x7x16;
        self.conv2 = nn.Sequential(
            # RF - 16x16
            nn.Conv2d(16, 16, 3, bias=False), # 9
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(DROPOUT_VALUE),

            # RF - 18x18
            nn.Conv2d(16, 16, 3, bias=False), # 7
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(DROPOUT_VALUE),

            # RF - 20x20
            nn.Conv2d(16, 16, 3, padding=1, bias=False), # 5
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(DROPOUT_VALUE),

            # RF - 22x22
            nn.Conv2d(16, 16, 3, bias=False), # 5
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(DROPOUT_VALUE),
        )

        # Global Average Pooling
        # input - 5x5x16; output - 1x1x10
        self.avg_pool = nn.Sequential(
            # # RF - 22x22
            nn.Conv2d(16, 10, 1, bias=False),
            nn.AvgPool2d(5)
        )

   
   '''
    forward: performs a forward pass when model(x) is called
    
    Params
        x: the input data
    
    Returns
        y: the output of the model
    ''' 
    def forward(self, x):
        x = self.conv1(x)
        x = self.trans1(x)
        x = self.conv2(x)
        x = self.avg_pool(x)

        x = x.view(-1, 10)
        return F.log_softmax(x)

In [22]:

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
              ReLU-2           [-1, 16, 26, 26]               0
       BatchNorm2d-3           [-1, 16, 26, 26]              32
         Dropout2d-4           [-1, 16, 26, 26]               0
            Conv2d-5           [-1, 16, 24, 24]           2,304
              ReLU-6           [-1, 16, 24, 24]               0
       BatchNorm2d-7           [-1, 16, 24, 24]              32
         Dropout2d-8           [-1, 16, 24, 24]               0
            Conv2d-9           [-1, 32, 22, 22]           4,608
             ReLU-10           [-1, 32, 22, 22]               0
      BatchNorm2d-11           [-1, 32, 22, 22]              64
        Dropout2d-12           [-1, 32, 22, 22]               0
           Conv2d-13           [-1, 16, 22, 22]             512
             ReLU-14           [-1, 16,



In [19]:


torch.manual_seed(1)
BATCH_SIZE = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=BATCH_SIZE, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=BATCH_SIZE, shuffle=True, **kwargs)


In [20]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    correct = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1,keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Accuracy = {correct/len(train_loader.dataset)}')
    


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [24]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print("\nEpoch:",epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 1


loss=0.28821325302124023 batch_id=468 Accuracy = 0.8154833333333333: 100%|██████████| 469/469 [00:14<00:00, 33.47it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0843, Accuracy: 9755/10000 (97.55%)


Epoch: 2


loss=0.14335118234157562 batch_id=468 Accuracy = 0.9583: 100%|██████████| 469/469 [00:13<00:00, 33.73it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0451, Accuracy: 9855/10000 (98.55%)


Epoch: 3


loss=0.12362729758024216 batch_id=468 Accuracy = 0.9689666666666666: 100%|██████████| 469/469 [00:13<00:00, 34.15it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0336, Accuracy: 9896/10000 (98.96%)


Epoch: 4


loss=0.13165722787380219 batch_id=468 Accuracy = 0.9741666666666666: 100%|██████████| 469/469 [00:13<00:00, 33.97it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0327, Accuracy: 9894/10000 (98.94%)


Epoch: 5


loss=0.02326430194079876 batch_id=468 Accuracy = 0.97685: 100%|██████████| 469/469 [00:14<00:00, 33.38it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0309, Accuracy: 9896/10000 (98.96%)


Epoch: 6


loss=0.07458562403917313 batch_id=468 Accuracy = 0.9791666666666666: 100%|██████████| 469/469 [00:14<00:00, 33.00it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0288, Accuracy: 9914/10000 (99.14%)


Epoch: 7


loss=0.17497344315052032 batch_id=468 Accuracy = 0.9807166666666667: 100%|██████████| 469/469 [00:13<00:00, 34.02it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0231, Accuracy: 9927/10000 (99.27%)


Epoch: 8


loss=0.028976092115044594 batch_id=468 Accuracy = 0.98165: 100%|██████████| 469/469 [00:13<00:00, 33.58it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0254, Accuracy: 9922/10000 (99.22%)


Epoch: 9


loss=0.0962783470749855 batch_id=468 Accuracy = 0.9833166666666666: 100%|██████████| 469/469 [00:13<00:00, 33.83it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0222, Accuracy: 9925/10000 (99.25%)


Epoch: 10


loss=0.02901284582912922 batch_id=468 Accuracy = 0.9828333333333333: 100%|██████████| 469/469 [00:13<00:00, 33.68it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0222, Accuracy: 9930/10000 (99.30%)


Epoch: 11


loss=0.02577374130487442 batch_id=468 Accuracy = 0.9840666666666666: 100%|██████████| 469/469 [00:14<00:00, 33.04it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0205, Accuracy: 9921/10000 (99.21%)


Epoch: 12


loss=0.024927696213126183 batch_id=468 Accuracy = 0.9852166666666666: 100%|██████████| 469/469 [00:13<00:00, 34.02it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0223, Accuracy: 9930/10000 (99.30%)


Epoch: 13


loss=0.06254154443740845 batch_id=468 Accuracy = 0.98475: 100%|██████████| 469/469 [00:13<00:00, 34.33it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0210, Accuracy: 9942/10000 (99.42%)


Epoch: 14


loss=0.05956919863820076 batch_id=468 Accuracy = 0.9845833333333334: 100%|██████████| 469/469 [00:13<00:00, 35.10it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0190, Accuracy: 9944/10000 (99.44%)


Epoch: 15


loss=0.055184122174978256 batch_id=468 Accuracy = 0.9856333333333334: 100%|██████████| 469/469 [00:14<00:00, 33.23it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0199, Accuracy: 9945/10000 (99.45%)


Epoch: 16


loss=0.037638209760189056 batch_id=468 Accuracy = 0.9856833333333334: 100%|██████████| 469/469 [00:13<00:00, 34.34it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0180, Accuracy: 9950/10000 (99.50%)


Epoch: 17


loss=0.07343341410160065 batch_id=468 Accuracy = 0.9867333333333334: 100%|██████████| 469/469 [00:13<00:00, 33.86it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0159, Accuracy: 9949/10000 (99.49%)


Epoch: 18


loss=0.04972821846604347 batch_id=468 Accuracy = 0.9868833333333333: 100%|██████████| 469/469 [00:13<00:00, 34.31it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0183, Accuracy: 9946/10000 (99.46%)


Epoch: 19


loss=0.0613422691822052 batch_id=468 Accuracy = 0.9872666666666666: 100%|██████████| 469/469 [00:14<00:00, 33.47it/s]



Test set: Average loss: 0.0168, Accuracy: 9949/10000 (99.49%)

