<a href="https://colab.research.google.com/github/OptimumCoder/EVA4/blob/master/S4_Testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [0]:
class Net(nn.Module):
      def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, 3),  # (3,28) > (3,26)
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2),   # (3,26) > (3,13)
            nn.Conv2d(32, 16, 1), # (3,13) > (1,13)
            nn.Dropout(0.25)
            
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 3), # (1,13) > (3,11)
            nn.ReLU(),
            nn.BatchNorm2d(32),
            # nn.AvgPool2d(2, stride=2),
            nn.Conv2d(32, 32, 3), # (1,13) > (3,11)
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2),   # (3,11) > (3,5)
            nn.Conv2d(32, 16, 1),
            nn.Dropout(0.25)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(16, 16, 3), # (3,5) > (3,3)
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(0.25)
        )
        
        self.conv4 = nn.Conv2d(16, 10, 2) 
        # self.conv4 = nn.AvgPool2d(4)

        # self.conv4 = nn.Sequential(
        #     nn.AvgPool2d(2),
        #     nn.Conv2d(16,10,1)
        # )
                
        
      def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)   
        x = x.view(-1, 10)   
        return F.log_softmax(x, dim=1)


In [64]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 26, 26]             320
              ReLU-2           [-1, 32, 26, 26]               0
       BatchNorm2d-3           [-1, 32, 26, 26]              64
         MaxPool2d-4           [-1, 32, 13, 13]               0
            Conv2d-5           [-1, 16, 13, 13]             528
           Dropout-6           [-1, 16, 13, 13]               0
            Conv2d-7           [-1, 32, 11, 11]           4,640
              ReLU-8           [-1, 32, 11, 11]               0
       BatchNorm2d-9           [-1, 32, 11, 11]              64
           Conv2d-10             [-1, 32, 9, 9]           9,248
             ReLU-11             [-1, 32, 9, 9]               0
      BatchNorm2d-12             [-1, 32, 9, 9]              64
        MaxPool2d-13             [-1, 32, 4, 4]               0
           Conv2d-14             [-1, 1

In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [67]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)



  0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=2.446976900100708 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=2.446976900100708 batch_id=0:   0%|          | 1/469 [00:00<00:51,  9.06it/s][A[A

loss=2.4708352088928223 batch_id=1:   0%|          | 1/469 [00:00<00:51,  9.06it/s][A[A

loss=2.3800981044769287 batch_id=2:   0%|          | 1/469 [00:00<00:51,  9.06it/s][A[A

loss=2.4617691040039062 batch_id=3:   0%|          | 1/469 [00:00<00:51,  9.06it/s][A[A

loss=2.4617691040039062 batch_id=3:   1%|          | 4/469 [00:00<00:41, 11.19it/s][A[A

loss=2.3327560424804688 batch_id=4:   1%|          | 4/469 [00:00<00:41, 11.19it/s][A[A

loss=2.258636474609375 batch_id=5:   1%|          | 4/469 [00:00<00:41, 11.19it/s] [A[A

loss=2.222956418991089 batch_id=6:   1%|          | 4/469 [00:00<00:41, 11.19it/s][A[A

loss=2.222956418991089 batch_id=6:   1%|▏         | 7/469 [00:00<00:34, 13.32it/s][A[A

loss=2.1633968353271484 batch_id=7:   1%|▏         | 


Test set: Average loss: 0.0608, Accuracy: 9803/10000 (98%)





loss=0.0940384566783905 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.06367778778076172 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.06367778778076172 batch_id=1:   0%|          | 2/469 [00:00<00:28, 16.37it/s][A[A

loss=0.15165328979492188 batch_id=2:   0%|          | 2/469 [00:00<00:28, 16.37it/s][A[A

loss=0.07880081236362457 batch_id=3:   0%|          | 2/469 [00:00<00:28, 16.37it/s][A[A

loss=0.12716610729694366 batch_id=4:   0%|          | 2/469 [00:00<00:28, 16.37it/s][A[A

loss=0.11547873914241791 batch_id=5:   0%|          | 2/469 [00:00<00:28, 16.37it/s][A[A

loss=0.11547873914241791 batch_id=5:   1%|▏         | 6/469 [00:00<00:24, 19.06it/s][A[A

loss=0.1373225897550583 batch_id=6:   1%|▏         | 6/469 [00:00<00:24, 19.06it/s] [A[A

loss=0.15574708580970764 batch_id=7:   1%|▏         | 6/469 [00:00<00:24, 19.06it/s][A[A

loss=0.08127596974372864 batch_id=8:   1%|▏         | 6/469 [00:00<00:24, 19.06it/s][A[A

los


Test set: Average loss: 0.0382, Accuracy: 9879/10000 (99%)





loss=0.10552877187728882 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.10552877187728882 batch_id=0:   0%|          | 1/469 [00:00<00:50,  9.21it/s][A[A

loss=0.060139529407024384 batch_id=1:   0%|          | 1/469 [00:00<00:50,  9.21it/s][A[A

loss=0.08794812858104706 batch_id=2:   0%|          | 1/469 [00:00<00:50,  9.21it/s] [A[A

loss=0.06455960869789124 batch_id=3:   0%|          | 1/469 [00:00<00:50,  9.21it/s][A[A

loss=0.046046238392591476 batch_id=4:   0%|          | 1/469 [00:00<00:50,  9.21it/s][A[A

loss=0.046046238392591476 batch_id=4:   1%|          | 5/469 [00:00<00:39, 11.86it/s][A[A

loss=0.05068032816052437 batch_id=5:   1%|          | 5/469 [00:00<00:39, 11.86it/s] [A[A

loss=0.06452222168445587 batch_id=6:   1%|          | 5/469 [00:00<00:39, 11.86it/s][A[A

loss=0.08569920808076859 batch_id=7:   1%|          | 5/469 [00:00<00:39, 11.86it/s][A[A

loss=0.08569920808076859 batch_id=7:   2%|▏         | 8/469 [00:00<00:31, 14.47it


Test set: Average loss: 0.0355, Accuracy: 9887/10000 (99%)





loss=0.06391535699367523 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.06391535699367523 batch_id=0:   0%|          | 1/469 [00:00<00:55,  8.36it/s][A[A

loss=0.07605715095996857 batch_id=1:   0%|          | 1/469 [00:00<00:55,  8.36it/s][A[A

loss=0.08116478472948074 batch_id=2:   0%|          | 1/469 [00:00<00:55,  8.36it/s][A[A

loss=0.0722004622220993 batch_id=3:   0%|          | 1/469 [00:00<00:55,  8.36it/s] [A[A

loss=0.0722004622220993 batch_id=3:   1%|          | 4/469 [00:00<00:44, 10.45it/s][A[A

loss=0.01934945583343506 batch_id=4:   1%|          | 4/469 [00:00<00:44, 10.45it/s][A[A

loss=0.04932079091668129 batch_id=5:   1%|          | 4/469 [00:00<00:44, 10.45it/s][A[A

loss=0.05881889536976814 batch_id=6:   1%|          | 4/469 [00:00<00:44, 10.45it/s][A[A

loss=0.05881889536976814 batch_id=6:   1%|▏         | 7/469 [00:00<00:36, 12.77it/s][A[A

loss=0.021280527114868164 batch_id=7:   1%|▏         | 7/469 [00:00<00:36, 12.77it/s][


Test set: Average loss: 0.0313, Accuracy: 9895/10000 (99%)





loss=0.07391064614057541 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.07391064614057541 batch_id=0:   0%|          | 1/469 [00:00<00:49,  9.51it/s][A[A

loss=0.05108310282230377 batch_id=1:   0%|          | 1/469 [00:00<00:49,  9.51it/s][A[A

loss=0.05021043121814728 batch_id=2:   0%|          | 1/469 [00:00<00:49,  9.51it/s][A[A

loss=0.034500524401664734 batch_id=3:   0%|          | 1/469 [00:00<00:49,  9.51it/s][A[A

loss=0.034500524401664734 batch_id=3:   1%|          | 4/469 [00:00<00:39, 11.88it/s][A[A

loss=0.013453098013997078 batch_id=4:   1%|          | 4/469 [00:00<00:39, 11.88it/s][A[A

loss=0.08371583372354507 batch_id=5:   1%|          | 4/469 [00:00<00:39, 11.88it/s] [A[A

loss=0.06233981251716614 batch_id=6:   1%|          | 4/469 [00:00<00:39, 11.88it/s][A[A

loss=0.159724161028862 batch_id=7:   1%|          | 4/469 [00:00<00:39, 11.88it/s]  [A[A

loss=0.159724161028862 batch_id=7:   2%|▏         | 8/469 [00:00<00:31, 14.74it/s]


Test set: Average loss: 0.0287, Accuracy: 9901/10000 (99%)





loss=0.06668028235435486 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.165338397026062 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s]  [A[A

loss=0.165338397026062 batch_id=1:   0%|          | 2/469 [00:00<00:28, 16.64it/s][A[A

loss=0.09570876508951187 batch_id=2:   0%|          | 2/469 [00:00<00:28, 16.64it/s][A[A

loss=0.013691535219550133 batch_id=3:   0%|          | 2/469 [00:00<00:28, 16.64it/s][A[A

loss=0.025327179580926895 batch_id=4:   0%|          | 2/469 [00:00<00:28, 16.64it/s][A[A

loss=0.021202031522989273 batch_id=5:   0%|          | 2/469 [00:00<00:28, 16.64it/s][A[A

loss=0.021202031522989273 batch_id=5:   1%|▏         | 6/469 [00:00<00:23, 19.57it/s][A[A

loss=0.04132113233208656 batch_id=6:   1%|▏         | 6/469 [00:00<00:23, 19.57it/s] [A[A

loss=0.02166372537612915 batch_id=7:   1%|▏         | 6/469 [00:00<00:23, 19.57it/s][A[A

loss=0.06589475274085999 batch_id=8:   1%|▏         | 6/469 [00:00<00:23, 19.57it/s][A[A



Test set: Average loss: 0.0232, Accuracy: 9923/10000 (99%)





loss=0.02550681307911873 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.08596627414226532 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.08596627414226532 batch_id=1:   0%|          | 2/469 [00:00<00:27, 16.91it/s][A[A

loss=0.04026138409972191 batch_id=2:   0%|          | 2/469 [00:00<00:27, 16.91it/s][A[A

loss=0.07832533121109009 batch_id=3:   0%|          | 2/469 [00:00<00:27, 16.91it/s][A[A

loss=0.04093017429113388 batch_id=4:   0%|          | 2/469 [00:00<00:27, 16.91it/s][A[A

loss=0.04566200077533722 batch_id=5:   0%|          | 2/469 [00:00<00:27, 16.91it/s][A[A

loss=0.04566200077533722 batch_id=5:   1%|▏         | 6/469 [00:00<00:23, 19.83it/s][A[A

loss=0.09402602165937424 batch_id=6:   1%|▏         | 6/469 [00:00<00:23, 19.83it/s][A[A

loss=0.03184482827782631 batch_id=7:   1%|▏         | 6/469 [00:00<00:23, 19.83it/s][A[A

loss=0.055315978825092316 batch_id=8:   1%|▏         | 6/469 [00:00<00:23, 19.83it/s][A[A

l


Test set: Average loss: 0.0239, Accuracy: 9927/10000 (99%)





loss=0.06253869831562042 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.06253869831562042 batch_id=0:   0%|          | 1/469 [00:00<00:56,  8.31it/s][A[A

loss=0.03823836147785187 batch_id=1:   0%|          | 1/469 [00:00<00:56,  8.31it/s][A[A

loss=0.05484120920300484 batch_id=2:   0%|          | 1/469 [00:00<00:56,  8.31it/s][A[A

loss=0.020846404135227203 batch_id=3:   0%|          | 1/469 [00:00<00:56,  8.31it/s][A[A

loss=0.020846404135227203 batch_id=3:   1%|          | 4/469 [00:00<00:44, 10.56it/s][A[A

loss=0.009173665195703506 batch_id=4:   1%|          | 4/469 [00:00<00:44, 10.56it/s][A[A

loss=0.0218423493206501 batch_id=5:   1%|          | 4/469 [00:00<00:44, 10.56it/s]  [A[A

loss=0.01203470304608345 batch_id=6:   1%|          | 4/469 [00:00<00:44, 10.56it/s][A[A

loss=0.01203470304608345 batch_id=6:   1%|▏         | 7/469 [00:00<00:36, 12.63it/s][A[A

loss=0.07508033514022827 batch_id=7:   1%|▏         | 7/469 [00:00<00:36, 12.63it/


Test set: Average loss: 0.0241, Accuracy: 9929/10000 (99%)





loss=0.01464567705988884 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.009701158851385117 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.009701158851385117 batch_id=1:   0%|          | 2/469 [00:00<00:29, 15.69it/s][A[A

loss=0.018681474030017853 batch_id=2:   0%|          | 2/469 [00:00<00:29, 15.69it/s][A[A

loss=0.06729871034622192 batch_id=3:   0%|          | 2/469 [00:00<00:29, 15.69it/s] [A[A

loss=0.02936488762497902 batch_id=4:   0%|          | 2/469 [00:00<00:29, 15.69it/s][A[A

loss=0.02936488762497902 batch_id=4:   1%|          | 5/469 [00:00<00:25, 17.96it/s][A[A

loss=0.06512778997421265 batch_id=5:   1%|          | 5/469 [00:00<00:25, 17.96it/s][A[A

loss=0.0468161441385746 batch_id=6:   1%|          | 5/469 [00:00<00:25, 17.96it/s] [A[A

loss=0.027743415907025337 batch_id=7:   1%|          | 5/469 [00:00<00:25, 17.96it/s][A[A

loss=0.014145836234092712 batch_id=8:   1%|          | 5/469 [00:00<00:25, 17.96it/s][A


Test set: Average loss: 0.0207, Accuracy: 9927/10000 (99%)





loss=0.0035080835223197937 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.10170409083366394 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s]  [A[A

loss=0.10170409083366394 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.92it/s][A[A

loss=0.033493783324956894 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.92it/s][A[A

loss=0.07293659448623657 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.92it/s] [A[A

loss=0.05053691565990448 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.92it/s][A[A

loss=0.086497001349926 batch_id=5:   0%|          | 2/469 [00:00<00:26, 17.92it/s]  [A[A

loss=0.086497001349926 batch_id=5:   1%|▏         | 6/469 [00:00<00:22, 20.58it/s][A[A

loss=0.032076288014650345 batch_id=6:   1%|▏         | 6/469 [00:00<00:22, 20.58it/s][A[A

loss=0.02211841195821762 batch_id=7:   1%|▏         | 6/469 [00:00<00:22, 20.58it/s] [A[A

loss=0.011930897831916809 batch_id=8:   1%|▏         | 6/469 [00:00<00:22, 20.58it/s][A


Test set: Average loss: 0.0214, Accuracy: 9929/10000 (99%)





loss=0.025876514613628387 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.025138434022665024 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.025138434022665024 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.38it/s][A[A

loss=0.027856288477778435 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.38it/s][A[A

loss=0.007901951670646667 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.38it/s][A[A

loss=0.015954822301864624 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.38it/s][A[A

loss=0.05516122281551361 batch_id=5:   0%|          | 2/469 [00:00<00:26, 17.38it/s] [A[A

loss=0.05516122281551361 batch_id=5:   1%|▏         | 6/469 [00:00<00:22, 20.43it/s][A[A

loss=0.027421608567237854 batch_id=6:   1%|▏         | 6/469 [00:00<00:22, 20.43it/s][A[A

loss=0.057444993406534195 batch_id=7:   1%|▏         | 6/469 [00:00<00:22, 20.43it/s][A[A

loss=0.05296393111348152 batch_id=8:   1%|▏         | 6/469 [00:00<00:22, 20.43it/s] 


Test set: Average loss: 0.0216, Accuracy: 9924/10000 (99%)





loss=0.025491338223218918 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.025491338223218918 batch_id=0:   0%|          | 1/469 [00:00<00:47,  9.78it/s][A[A

loss=0.009310781955718994 batch_id=1:   0%|          | 1/469 [00:00<00:47,  9.78it/s][A[A

loss=0.09048625826835632 batch_id=2:   0%|          | 1/469 [00:00<00:47,  9.78it/s] [A[A

loss=0.012534968554973602 batch_id=3:   0%|          | 1/469 [00:00<00:47,  9.78it/s][A[A

loss=0.04860950633883476 batch_id=4:   0%|          | 1/469 [00:00<00:47,  9.78it/s] [A[A

loss=0.04860950633883476 batch_id=4:   1%|          | 5/469 [00:00<00:37, 12.34it/s][A[A

loss=0.022301489487290382 batch_id=5:   1%|          | 5/469 [00:00<00:37, 12.34it/s][A[A

loss=0.0765315443277359 batch_id=6:   1%|          | 5/469 [00:00<00:37, 12.34it/s]  [A[A

loss=0.049279410392045975 batch_id=7:   1%|          | 5/469 [00:00<00:37, 12.34it/s][A[A

loss=0.049279410392045975 batch_id=7:   2%|▏         | 8/469 [00:00<00:31, 14


Test set: Average loss: 0.0204, Accuracy: 9934/10000 (99%)





loss=0.022721827030181885 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.022721827030181885 batch_id=0:   0%|          | 1/469 [00:00<00:48,  9.59it/s][A[A

loss=0.04726666584610939 batch_id=1:   0%|          | 1/469 [00:00<00:48,  9.59it/s] [A[A

loss=0.009596139192581177 batch_id=2:   0%|          | 1/469 [00:00<00:48,  9.59it/s][A[A

loss=0.01693926379084587 batch_id=3:   0%|          | 1/469 [00:00<00:48,  9.59it/s] [A[A

loss=0.021503431722521782 batch_id=4:   0%|          | 1/469 [00:00<00:48,  9.59it/s][A[A

loss=0.026613298803567886 batch_id=5:   0%|          | 1/469 [00:00<00:48,  9.59it/s][A[A

loss=0.026613298803567886 batch_id=5:   1%|▏         | 6/469 [00:00<00:37, 12.48it/s][A[A

loss=0.038861583918333054 batch_id=6:   1%|▏         | 6/469 [00:00<00:37, 12.48it/s][A[A

loss=0.009386859834194183 batch_id=7:   1%|▏         | 6/469 [00:00<00:37, 12.48it/s][A[A

loss=0.06576454639434814 batch_id=8:   1%|▏         | 6/469 [00:00<00:37, 12


Test set: Average loss: 0.0189, Accuracy: 9940/10000 (99%)





loss=0.011881127953529358 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.009682238101959229 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.009682238101959229 batch_id=1:   0%|          | 2/469 [00:00<00:25, 18.27it/s][A[A

loss=0.03922176733613014 batch_id=2:   0%|          | 2/469 [00:00<00:25, 18.27it/s] [A[A

loss=0.06842311471700668 batch_id=3:   0%|          | 2/469 [00:00<00:25, 18.27it/s][A[A

loss=0.04554656893014908 batch_id=4:   0%|          | 2/469 [00:00<00:25, 18.27it/s][A[A

loss=0.02684687450528145 batch_id=5:   0%|          | 2/469 [00:00<00:25, 18.27it/s][A[A

loss=0.02684687450528145 batch_id=5:   1%|▏         | 6/469 [00:00<00:21, 21.15it/s][A[A

loss=0.0027589984238147736 batch_id=6:   1%|▏         | 6/469 [00:00<00:21, 21.15it/s][A[A

loss=0.03478533402085304 batch_id=7:   1%|▏         | 6/469 [00:00<00:21, 21.15it/s]  [A[A

loss=0.05161412060260773 batch_id=8:   1%|▏         | 6/469 [00:00<00:21, 21.15it/s][


Test set: Average loss: 0.0186, Accuracy: 9936/10000 (99%)





loss=0.14063459634780884 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.037996046245098114 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.037996046245098114 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.86it/s][A[A

loss=0.005354039371013641 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.86it/s][A[A

loss=0.02731289342045784 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.86it/s] [A[A

loss=0.008690983057022095 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.86it/s][A[A

loss=0.022640295326709747 batch_id=5:   0%|          | 2/469 [00:00<00:26, 17.86it/s][A[A

loss=0.022640295326709747 batch_id=5:   1%|▏         | 6/469 [00:00<00:22, 20.46it/s][A[A

loss=0.04107058420777321 batch_id=6:   1%|▏         | 6/469 [00:00<00:22, 20.46it/s] [A[A

loss=0.05903422832489014 batch_id=7:   1%|▏         | 6/469 [00:00<00:22, 20.46it/s][A[A

loss=0.05318424478173256 batch_id=8:   1%|▏         | 6/469 [00:00<00:22, 20.46it/s][


Test set: Average loss: 0.0195, Accuracy: 9936/10000 (99%)





loss=0.02210964262485504 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.023834455758333206 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.023834455758333206 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.35it/s][A[A

loss=0.022426098585128784 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.35it/s][A[A

loss=0.07329849898815155 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.35it/s] [A[A

loss=0.033644694834947586 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.35it/s][A[A

loss=0.01703866943717003 batch_id=5:   0%|          | 2/469 [00:00<00:26, 17.35it/s] [A[A

loss=0.01703866943717003 batch_id=5:   1%|▏         | 6/469 [00:00<00:22, 20.65it/s][A[A

loss=0.01553904265165329 batch_id=6:   1%|▏         | 6/469 [00:00<00:22, 20.65it/s][A[A

loss=0.026566214859485626 batch_id=7:   1%|▏         | 6/469 [00:00<00:22, 20.65it/s][A[A

loss=0.03262466937303543 batch_id=8:   1%|▏         | 6/469 [00:00<00:22, 20.65it/s] [


Test set: Average loss: 0.0199, Accuracy: 9941/10000 (99%)





loss=0.029423926025629044 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.08508867025375366 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s] [A[A

loss=0.08508867025375366 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.72it/s][A[A

loss=0.07197222113609314 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.72it/s][A[A

loss=0.037878695875406265 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.72it/s][A[A

loss=0.011881623417139053 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.72it/s][A[A

loss=0.011881623417139053 batch_id=4:   1%|          | 5/469 [00:00<00:23, 19.74it/s][A[A

loss=0.007771007716655731 batch_id=5:   1%|          | 5/469 [00:00<00:23, 19.74it/s][A[A

loss=0.039414048194885254 batch_id=6:   1%|          | 5/469 [00:00<00:23, 19.74it/s][A[A

loss=0.035927869379520416 batch_id=7:   1%|          | 5/469 [00:00<00:23, 19.74it/s][A[A

loss=0.035927869379520416 batch_id=7:   2%|▏         | 8/469 [00:00<00:21, 21.53it/s]


Test set: Average loss: 0.0189, Accuracy: 9947/10000 (99%)





loss=0.01214367151260376 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.01872938498854637 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.01872938498854637 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.80it/s][A[A

loss=0.015365798026323318 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.80it/s][A[A

loss=0.008968759328126907 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.80it/s][A[A

loss=0.011443573981523514 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.80it/s][A[A

loss=0.00971372053027153 batch_id=5:   0%|          | 2/469 [00:00<00:26, 17.80it/s] [A[A

loss=0.00971372053027153 batch_id=5:   1%|▏         | 6/469 [00:00<00:22, 20.44it/s][A[A

loss=0.035549458116292953 batch_id=6:   1%|▏         | 6/469 [00:00<00:22, 20.44it/s][A[A

loss=0.03574904054403305 batch_id=7:   1%|▏         | 6/469 [00:00<00:22, 20.44it/s] [A[A

loss=0.08207444846630096 batch_id=8:   1%|▏         | 6/469 [00:00<00:22, 20.44it/s][A


Test set: Average loss: 0.0192, Accuracy: 9946/10000 (99%)





loss=0.02726486325263977 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.02726486325263977 batch_id=0:   0%|          | 1/469 [00:00<00:55,  8.48it/s][A[A

loss=0.00947154313325882 batch_id=1:   0%|          | 1/469 [00:00<00:55,  8.48it/s][A[A

loss=0.008493315428495407 batch_id=2:   0%|          | 1/469 [00:00<00:55,  8.48it/s][A[A

loss=0.008753113448619843 batch_id=3:   0%|          | 1/469 [00:00<00:55,  8.48it/s][A[A

loss=0.008753113448619843 batch_id=3:   1%|          | 4/469 [00:00<00:44, 10.44it/s][A[A

loss=0.035445909947156906 batch_id=4:   1%|          | 4/469 [00:00<00:44, 10.44it/s][A[A

loss=0.00647328794002533 batch_id=5:   1%|          | 4/469 [00:00<00:44, 10.44it/s] [A[A

loss=0.002908002585172653 batch_id=6:   1%|          | 4/469 [00:00<00:44, 10.44it/s][A[A

loss=0.002908002585172653 batch_id=6:   1%|▏         | 7/469 [00:00<00:36, 12.69it/s][A[A

loss=0.010074831545352936 batch_id=7:   1%|▏         | 7/469 [00:00<00:36, 12.6


Test set: Average loss: 0.0181, Accuracy: 9943/10000 (99%)

