In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(1,8,3, padding = 1), nn.BatchNorm2d(8), nn.Dropout(0.1))

        self.conv2 = nn.Sequential(nn.Conv2d(8,16,3, padding = 1), nn.BatchNorm2d(16), nn.Dropout(0.1))
        self.conv3 = nn.Sequential(nn.Conv2d(16,32,3, padding = 1), nn.BatchNorm2d(32), nn.Dropout(0.1))
        #self.conv4 = nn.Sequential(nn.Conv2d(32,64,3, padding = 1), nn.BatchNorm2d(64), nn.Dropout(0.1))

        self.pool1 = nn.MaxPool2d(2,2)
        self.conv5 = nn.Sequential(nn.Conv2d(32,8,1), nn.BatchNorm2d(8), nn.Dropout(0.1))

        self.conv6 = nn.Sequential(nn.Conv2d(8,16,3, padding = 1), nn.BatchNorm2d(16), nn.Dropout(0.1))
        self.conv7 = nn.Sequential(nn.Conv2d(16,16,3, padding = 1), nn.BatchNorm2d(16), nn.Dropout(0.1))
        #self.conv8 = nn.Sequential(nn.Conv2d(32,64,3, padding = 1), nn.BatchNorm2d(64), nn.Dropout(0.1))

        #self.pool2 = nn.MaxPool2d(2,2)
        #self.conv9 = nn.Sequential(nn.Conv2d(64,8,1), nn.BatchNorm2d(8), nn.Dropout(0.1))

        self.conv10 = nn.Sequential(nn.Conv2d(16,16,3), nn.BatchNorm2d(16), nn.Dropout(0.1))
        self.conv11 = nn.Sequential(nn.Conv2d(16,16,3), nn.BatchNorm2d(16), nn.Dropout(0.1))
        self.conv12 = nn.Sequential(nn.Conv2d(16,16,3), nn.BatchNorm2d(16), nn.Dropout(0.1))
        #self.conv13 = nn.Sequential(nn.Conv2d(32,32,3), nn.BatchNorm2d(32), nn.Dropout(0.1))

        #self.conv14 = nn.Conv2d(,16,3)
        #self.conv12 = nn.Conv2d(16,10,3)

        self.conv15 = nn.Conv2d(16,10,1)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv3(F.relu(self.conv2(F.relu(self.conv1(x)))))))
        x = F.relu(self.conv7(F.relu(self.conv6(F.relu(self.conv5(x))))))
        x = F.relu(self.conv12(F.relu(self.conv11(F.relu(self.conv10(x))))))
        #x = F.relu(self.conv10(F.relu(self.conv9(x))))
        #x = F.relu(self.conv12(F.relu(self.conv11(F.relu(self.conv10(x))))))

        x = F.avg_pool2d(x,(6,6))
        
        x = self.conv15(x)
        x = x.view(-1, 10)

        return F.log_softmax(x)

In [66]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))





[A[A

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
       BatchNorm2d-2            [-1, 8, 28, 28]              16
           Dropout-3            [-1, 8, 28, 28]               0
            Conv2d-4           [-1, 16, 28, 28]           1,168
       BatchNorm2d-5           [-1, 16, 28, 28]              32
           Dropout-6           [-1, 16, 28, 28]               0
            Conv2d-7           [-1, 32, 28, 28]           4,640
       BatchNorm2d-8           [-1, 32, 28, 28]              64
           Dropout-9           [-1, 32, 28, 28]               0
        MaxPool2d-10           [-1, 32, 14, 14]               0
           Conv2d-11            [-1, 8, 14, 14]             264
      BatchNorm2d-12            [-1, 8, 14, 14]              16
          Dropout-13            [-1, 8, 14, 14]               0
           Conv2d-14           [-1, 16,



In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
          test_loss, correct, len(test_loader.dataset),
          100. * correct / len(test_loader.dataset)))

In [69]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.015, momentum=0.9)

for epoch in range(1, 20):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)





loss=2.347456932067871 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=2.3584787845611572 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=2.3584787845611572 batch_id=1:   0%|          | 2/469 [00:00<00:28, 16.30it/s][A[A

loss=2.2942662239074707 batch_id=2:   0%|          | 2/469 [00:00<00:28, 16.30it/s][A[A

loss=2.3553242683410645 batch_id=3:   0%|          | 2/469 [00:00<00:28, 16.30it/s][A[A

loss=2.2826056480407715 batch_id=4:   0%|          | 2/469 [00:00<00:28, 16.30it/s][A[A

loss=2.3245959281921387 batch_id=5:   0%|          | 2/469 [00:00<00:28, 16.30it/s][A[A

loss=2.3245959281921387 batch_id=5:   1%|▏         | 6/469 [00:00<00:24, 18.84it/s][A[A

loss=2.3320205211639404 batch_id=6:   1%|▏         | 6/469 [00:00<00:24, 18.84it/s][A[A

loss=2.3084468841552734 batch_id=7:   1%|▏         | 6/469 [00:00<00:24, 18.84it/s][A[A

loss=2.3078575134277344 batch_id=8:   1%|▏         | 6/469 [00:00<00:24, 18.84it/s][A[A

loss=2.28828


Test set: Average loss: 0.1320, Accuracy: 9617/10000 (96%)





loss=0.04770373925566673 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.1718059480190277 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s] [A[A

loss=0.1718059480190277 batch_id=1:   0%|          | 2/469 [00:00<00:30, 15.56it/s][A[A

loss=0.15362781286239624 batch_id=2:   0%|          | 2/469 [00:00<00:30, 15.56it/s][A[A

loss=0.1000732034444809 batch_id=3:   0%|          | 2/469 [00:00<00:30, 15.56it/s] [A[A

loss=0.13526952266693115 batch_id=4:   0%|          | 2/469 [00:00<00:30, 15.56it/s][A[A

loss=0.12885314226150513 batch_id=5:   0%|          | 2/469 [00:00<00:30, 15.56it/s][A[A

loss=0.12885314226150513 batch_id=5:   1%|▏         | 6/469 [00:00<00:25, 18.13it/s][A[A

loss=0.10035805404186249 batch_id=6:   1%|▏         | 6/469 [00:00<00:25, 18.13it/s][A[A

loss=0.07620961219072342 batch_id=7:   1%|▏         | 6/469 [00:00<00:25, 18.13it/s][A[A

loss=0.11699260771274567 batch_id=8:   1%|▏         | 6/469 [00:00<00:25, 18.13it/s][A[A

los


Test set: Average loss: 0.0805, Accuracy: 9742/10000 (97%)





loss=0.029772575944662094 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.03915834426879883 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s] [A[A

loss=0.03915834426879883 batch_id=1:   0%|          | 2/469 [00:00<00:27, 16.82it/s][A[A

loss=0.07685043662786484 batch_id=2:   0%|          | 2/469 [00:00<00:27, 16.82it/s][A[A

loss=0.05393798276782036 batch_id=3:   0%|          | 2/469 [00:00<00:27, 16.82it/s][A[A

loss=0.09172150492668152 batch_id=4:   0%|          | 2/469 [00:00<00:27, 16.82it/s][A[A

loss=0.1187397912144661 batch_id=5:   0%|          | 2/469 [00:00<00:27, 16.82it/s] [A[A

loss=0.1187397912144661 batch_id=5:   1%|▏         | 6/469 [00:00<00:23, 19.53it/s][A[A

loss=0.032305873930454254 batch_id=6:   1%|▏         | 6/469 [00:00<00:23, 19.53it/s][A[A

loss=0.047430191189050674 batch_id=7:   1%|▏         | 6/469 [00:00<00:23, 19.53it/s][A[A

loss=0.07224981486797333 batch_id=8:   1%|▏         | 6/469 [00:00<00:23, 19.53it/s] [A[A


Test set: Average loss: 0.0368, Accuracy: 9888/10000 (99%)





loss=0.07979457080364227 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.07648886740207672 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.07648886740207672 batch_id=1:   0%|          | 2/469 [00:00<00:27, 16.96it/s][A[A

loss=0.024242021143436432 batch_id=2:   0%|          | 2/469 [00:00<00:27, 16.96it/s][A[A

loss=0.10231006890535355 batch_id=3:   0%|          | 2/469 [00:00<00:27, 16.96it/s] [A[A

loss=0.04106587916612625 batch_id=4:   0%|          | 2/469 [00:00<00:27, 16.96it/s][A[A

loss=0.03302236646413803 batch_id=5:   0%|          | 2/469 [00:00<00:27, 16.96it/s][A[A

loss=0.03302236646413803 batch_id=5:   1%|▏         | 6/469 [00:00<00:23, 20.04it/s][A[A

loss=0.05735703557729721 batch_id=6:   1%|▏         | 6/469 [00:00<00:23, 20.04it/s][A[A

loss=0.1292348951101303 batch_id=7:   1%|▏         | 6/469 [00:00<00:23, 20.04it/s] [A[A

loss=0.06354762613773346 batch_id=8:   1%|▏         | 6/469 [00:00<00:23, 20.04it/s][A[A




Test set: Average loss: 0.0295, Accuracy: 9910/10000 (99%)





loss=0.05966230854392052 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.062487535178661346 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.062487535178661346 batch_id=1:   0%|          | 2/469 [00:00<00:29, 15.78it/s][A[A

loss=0.027492957189679146 batch_id=2:   0%|          | 2/469 [00:00<00:29, 15.78it/s][A[A

loss=0.018926754593849182 batch_id=3:   0%|          | 2/469 [00:00<00:29, 15.78it/s][A[A

loss=0.03893023729324341 batch_id=4:   0%|          | 2/469 [00:00<00:29, 15.78it/s] [A[A

loss=0.0922263115644455 batch_id=5:   0%|          | 2/469 [00:00<00:29, 15.78it/s] [A[A

loss=0.0922263115644455 batch_id=5:   1%|▏         | 6/469 [00:00<00:24, 18.56it/s][A[A

loss=0.09493691474199295 batch_id=6:   1%|▏         | 6/469 [00:00<00:24, 18.56it/s][A[A

loss=0.023465711623430252 batch_id=7:   1%|▏         | 6/469 [00:00<00:24, 18.56it/s][A[A

loss=0.0764862671494484 batch_id=8:   1%|▏         | 6/469 [00:00<00:24, 18.56it/s]  [A


Test set: Average loss: 0.0334, Accuracy: 9900/10000 (99%)





loss=0.03742718696594238 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.07483163475990295 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.07483163475990295 batch_id=1:   0%|          | 2/469 [00:00<00:29, 15.94it/s][A[A

loss=0.062426138669252396 batch_id=2:   0%|          | 2/469 [00:00<00:29, 15.94it/s][A[A

loss=0.0624568834900856 batch_id=3:   0%|          | 2/469 [00:00<00:29, 15.94it/s]  [A[A

loss=0.025793248787522316 batch_id=4:   0%|          | 2/469 [00:00<00:29, 15.94it/s][A[A

loss=0.025793248787522316 batch_id=4:   1%|          | 5/469 [00:00<00:25, 18.45it/s][A[A

loss=0.0733393132686615 batch_id=5:   1%|          | 5/469 [00:00<00:25, 18.45it/s]  [A[A

loss=0.019009152427315712 batch_id=6:   1%|          | 5/469 [00:00<00:25, 18.45it/s][A[A

loss=0.06445109844207764 batch_id=7:   1%|          | 5/469 [00:00<00:25, 18.45it/s] [A[A

loss=0.06445109844207764 batch_id=7:   2%|▏         | 8/469 [00:00<00:22, 20.28it/s][A


Test set: Average loss: 0.0267, Accuracy: 9921/10000 (99%)





loss=0.07209166884422302 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.07209166884422302 batch_id=0:   0%|          | 1/469 [00:00<00:46,  9.96it/s][A[A

loss=0.08602629601955414 batch_id=1:   0%|          | 1/469 [00:00<00:46,  9.96it/s][A[A

loss=0.03854462876915932 batch_id=2:   0%|          | 1/469 [00:00<00:46,  9.96it/s][A[A

loss=0.02796131744980812 batch_id=3:   0%|          | 1/469 [00:00<00:46,  9.96it/s][A[A

loss=0.02189680002629757 batch_id=4:   0%|          | 1/469 [00:00<00:46,  9.96it/s][A[A

loss=0.02189680002629757 batch_id=4:   1%|          | 5/469 [00:00<00:36, 12.65it/s][A[A

loss=0.014166884124279022 batch_id=5:   1%|          | 5/469 [00:00<00:36, 12.65it/s][A[A

loss=0.09727688133716583 batch_id=6:   1%|          | 5/469 [00:00<00:36, 12.65it/s] [A[A

loss=0.03340008482336998 batch_id=7:   1%|          | 5/469 [00:00<00:36, 12.65it/s][A[A

loss=0.027301356196403503 batch_id=8:   1%|          | 5/469 [00:00<00:36, 12.65it/s


Test set: Average loss: 0.0281, Accuracy: 9910/10000 (99%)





loss=0.024843720719218254 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.024843720719218254 batch_id=0:   0%|          | 1/469 [00:00<00:47,  9.78it/s][A[A

loss=0.007304154336452484 batch_id=1:   0%|          | 1/469 [00:00<00:47,  9.78it/s][A[A

loss=0.029528118669986725 batch_id=2:   0%|          | 1/469 [00:00<00:47,  9.78it/s][A[A

loss=0.012662556022405624 batch_id=3:   0%|          | 1/469 [00:00<00:47,  9.78it/s][A[A

loss=0.08912432938814163 batch_id=4:   0%|          | 1/469 [00:00<00:47,  9.78it/s] [A[A

loss=0.08912432938814163 batch_id=4:   1%|          | 5/469 [00:00<00:37, 12.47it/s][A[A

loss=0.06294766068458557 batch_id=5:   1%|          | 5/469 [00:00<00:37, 12.47it/s][A[A

loss=0.013558968901634216 batch_id=6:   1%|          | 5/469 [00:00<00:37, 12.47it/s][A[A

loss=0.0019673891365528107 batch_id=7:   1%|          | 5/469 [00:00<00:37, 12.47it/s][A[A

loss=0.0019673891365528107 batch_id=7:   2%|▏         | 8/469 [00:00<00:30, 1


Test set: Average loss: 0.0290, Accuracy: 9912/10000 (99%)





loss=0.02564706653356552 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.02564706653356552 batch_id=0:   0%|          | 1/469 [00:00<00:49,  9.53it/s][A[A

loss=0.02766784280538559 batch_id=1:   0%|          | 1/469 [00:00<00:49,  9.53it/s][A[A

loss=0.04114913567900658 batch_id=2:   0%|          | 1/469 [00:00<00:49,  9.53it/s][A[A

loss=0.10369973629713058 batch_id=3:   0%|          | 1/469 [00:00<00:49,  9.53it/s][A[A

loss=0.04297255724668503 batch_id=4:   0%|          | 1/469 [00:00<00:49,  9.53it/s][A[A

loss=0.04297255724668503 batch_id=4:   1%|          | 5/469 [00:00<00:38, 12.15it/s][A[A

loss=0.059390921145677567 batch_id=5:   1%|          | 5/469 [00:00<00:38, 12.15it/s][A[A

loss=0.03625919669866562 batch_id=6:   1%|          | 5/469 [00:00<00:38, 12.15it/s] [A[A

loss=0.023630229756236076 batch_id=7:   1%|          | 5/469 [00:00<00:38, 12.15it/s][A[A

loss=0.011310044676065445 batch_id=8:   1%|          | 5/469 [00:00<00:38, 12.15it/


Test set: Average loss: 0.0282, Accuracy: 9918/10000 (99%)





loss=0.03788769245147705 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.03341188654303551 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.03341188654303551 batch_id=1:   0%|          | 2/469 [00:00<00:27, 17.13it/s][A[A

loss=0.026531711220741272 batch_id=2:   0%|          | 2/469 [00:00<00:27, 17.13it/s][A[A

loss=0.004352353513240814 batch_id=3:   0%|          | 2/469 [00:00<00:27, 17.13it/s][A[A

loss=0.013982564210891724 batch_id=4:   0%|          | 2/469 [00:00<00:27, 17.13it/s][A[A

loss=0.01444745808839798 batch_id=5:   0%|          | 2/469 [00:00<00:27, 17.13it/s] [A[A

loss=0.01444745808839798 batch_id=5:   1%|▏         | 6/469 [00:00<00:23, 19.61it/s][A[A

loss=0.009133987128734589 batch_id=6:   1%|▏         | 6/469 [00:00<00:23, 19.61it/s][A[A

loss=0.046481430530548096 batch_id=7:   1%|▏         | 6/469 [00:00<00:23, 19.61it/s][A[A

loss=0.05953829362988472 batch_id=8:   1%|▏         | 6/469 [00:00<00:23, 19.61it/s] [A


Test set: Average loss: 0.0208, Accuracy: 9937/10000 (99%)





loss=0.05828849598765373 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.011778993532061577 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.011778993532061577 batch_id=1:   0%|          | 2/469 [00:00<00:27, 16.98it/s][A[A

loss=0.025386888533830643 batch_id=2:   0%|          | 2/469 [00:00<00:27, 16.98it/s][A[A

loss=0.021052438765764236 batch_id=3:   0%|          | 2/469 [00:00<00:27, 16.98it/s][A[A

loss=0.07032756507396698 batch_id=4:   0%|          | 2/469 [00:00<00:27, 16.98it/s] [A[A

loss=0.0044854916632175446 batch_id=5:   0%|          | 2/469 [00:00<00:27, 16.98it/s][A[A

loss=0.0044854916632175446 batch_id=5:   1%|▏         | 6/469 [00:00<00:23, 19.68it/s][A[A

loss=0.0041955262422561646 batch_id=6:   1%|▏         | 6/469 [00:00<00:23, 19.68it/s][A[A

loss=0.009822648018598557 batch_id=7:   1%|▏         | 6/469 [00:00<00:23, 19.68it/s] [A[A

loss=0.018804937601089478 batch_id=8:   1%|▏         | 6/469 [00:00<00:23, 19.68i


Test set: Average loss: 0.0220, Accuracy: 9938/10000 (99%)





loss=0.02275136299431324 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.018410347402095795 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.018410347402095795 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.68it/s][A[A

loss=0.07199439406394958 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.68it/s] [A[A

loss=0.017758142203092575 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.68it/s][A[A

loss=0.011885061860084534 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.68it/s][A[A

loss=0.048126284033060074 batch_id=5:   0%|          | 2/469 [00:00<00:26, 17.68it/s][A[A

loss=0.048126284033060074 batch_id=5:   1%|▏         | 6/469 [00:00<00:22, 20.35it/s][A[A

loss=0.012092486023902893 batch_id=6:   1%|▏         | 6/469 [00:00<00:22, 20.35it/s][A[A

loss=0.06349246203899384 batch_id=7:   1%|▏         | 6/469 [00:00<00:22, 20.35it/s] [A[A

loss=0.01875597983598709 batch_id=8:   1%|▏         | 6/469 [00:00<00:22, 20.35it/s]


Test set: Average loss: 0.0203, Accuracy: 9937/10000 (99%)





loss=0.013052891939878464 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.013052891939878464 batch_id=0:   0%|          | 1/469 [00:00<00:53,  8.72it/s][A[A

loss=0.07404951006174088 batch_id=1:   0%|          | 1/469 [00:00<00:53,  8.72it/s] [A[A

loss=0.07334834337234497 batch_id=2:   0%|          | 1/469 [00:00<00:53,  8.72it/s][A[A

loss=0.011458944529294968 batch_id=3:   0%|          | 1/469 [00:00<00:53,  8.72it/s][A[A

loss=0.011458944529294968 batch_id=3:   1%|          | 4/469 [00:00<00:42, 11.04it/s][A[A

loss=0.023017067462205887 batch_id=4:   1%|          | 4/469 [00:00<00:42, 11.04it/s][A[A

loss=0.016870297491550446 batch_id=5:   1%|          | 4/469 [00:00<00:42, 11.04it/s][A[A

loss=0.01383250206708908 batch_id=6:   1%|          | 4/469 [00:00<00:42, 11.04it/s] [A[A

loss=0.025315657258033752 batch_id=7:   1%|          | 4/469 [00:00<00:42, 11.04it/s][A[A

loss=0.025315657258033752 batch_id=7:   2%|▏         | 8/469 [00:00<00:33, 13


Test set: Average loss: 0.0223, Accuracy: 9932/10000 (99%)





loss=0.045950740575790405 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.012735016644001007 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.012735016644001007 batch_id=1:   0%|          | 2/469 [00:00<00:28, 16.27it/s][A[A

loss=0.01758977770805359 batch_id=2:   0%|          | 2/469 [00:00<00:28, 16.27it/s] [A[A

loss=0.04207976162433624 batch_id=3:   0%|          | 2/469 [00:00<00:28, 16.27it/s][A[A

loss=0.030546944588422775 batch_id=4:   0%|          | 2/469 [00:00<00:28, 16.27it/s][A[A

loss=0.005280639976263046 batch_id=5:   0%|          | 2/469 [00:00<00:28, 16.27it/s][A[A

loss=0.005280639976263046 batch_id=5:   1%|▏         | 6/469 [00:00<00:24, 18.83it/s][A[A

loss=0.0016673989593982697 batch_id=6:   1%|▏         | 6/469 [00:00<00:24, 18.83it/s][A[A

loss=0.07940984517335892 batch_id=7:   1%|▏         | 6/469 [00:00<00:24, 18.83it/s]  [A[A

loss=0.045028604567050934 batch_id=8:   1%|▏         | 6/469 [00:00<00:24, 18.83it/


Test set: Average loss: 0.0189, Accuracy: 9940/10000 (99%)





loss=0.03903555870056152 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.013343892991542816 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.013343892991542816 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.72it/s][A[A

loss=0.021428603678941727 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.72it/s][A[A

loss=0.013063568621873856 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.72it/s][A[A

loss=0.019084002822637558 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.72it/s][A[A

loss=0.008994340896606445 batch_id=5:   0%|          | 2/469 [00:00<00:26, 17.72it/s][A[A

loss=0.008994340896606445 batch_id=5:   1%|▏         | 6/469 [00:00<00:22, 20.75it/s][A[A

loss=0.015046894550323486 batch_id=6:   1%|▏         | 6/469 [00:00<00:22, 20.75it/s][A[A

loss=0.012015087530016899 batch_id=7:   1%|▏         | 6/469 [00:00<00:22, 20.75it/s][A[A

loss=0.028874587267637253 batch_id=8:   1%|▏         | 6/469 [00:00<00:22, 20.75it/s]


Test set: Average loss: 0.0177, Accuracy: 9951/10000 (100%)





loss=0.021973852068185806 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.02538498491048813 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s] [A[A

loss=0.02538498491048813 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.70it/s][A[A

loss=0.056620705872774124 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.70it/s][A[A

loss=0.016114432364702225 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.70it/s][A[A

loss=0.014107149094343185 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.70it/s][A[A

loss=0.014107149094343185 batch_id=4:   1%|          | 5/469 [00:00<00:23, 20.04it/s][A[A

loss=0.09673891961574554 batch_id=5:   1%|          | 5/469 [00:00<00:23, 20.04it/s] [A[A

loss=0.006294373422861099 batch_id=6:   1%|          | 5/469 [00:00<00:23, 20.04it/s][A[A

loss=0.028165923431515694 batch_id=7:   1%|          | 5/469 [00:00<00:23, 20.04it/s][A[A

loss=0.028165923431515694 batch_id=7:   2%|▏         | 8/469 [00:00<00:20, 22.17it/s]


Test set: Average loss: 0.0172, Accuracy: 9944/10000 (99%)





loss=0.007159091532230377 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.030307143926620483 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.030307143926620483 batch_id=1:   0%|          | 2/469 [00:00<00:28, 16.45it/s][A[A

loss=0.02205006033182144 batch_id=2:   0%|          | 2/469 [00:00<00:28, 16.45it/s] [A[A

loss=0.026629162952303886 batch_id=3:   0%|          | 2/469 [00:00<00:28, 16.45it/s][A[A

loss=0.013868827372789383 batch_id=4:   0%|          | 2/469 [00:00<00:28, 16.45it/s][A[A

loss=0.009763915091753006 batch_id=5:   0%|          | 2/469 [00:00<00:28, 16.45it/s][A[A

loss=0.009763915091753006 batch_id=5:   1%|▏         | 6/469 [00:00<00:23, 19.73it/s][A[A

loss=0.012078655883669853 batch_id=6:   1%|▏         | 6/469 [00:00<00:23, 19.73it/s][A[A

loss=0.04009861871600151 batch_id=7:   1%|▏         | 6/469 [00:00<00:23, 19.73it/s] [A[A

loss=0.051532477140426636 batch_id=8:   1%|▏         | 6/469 [00:00<00:23, 19.73it/s


Test set: Average loss: 0.0189, Accuracy: 9937/10000 (99%)





loss=0.006584659218788147 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.014147790148854256 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.014147790148854256 batch_id=1:   0%|          | 2/469 [00:00<00:28, 16.54it/s][A[A

loss=0.04569646343588829 batch_id=2:   0%|          | 2/469 [00:00<00:28, 16.54it/s] [A[A

loss=0.010433878749608994 batch_id=3:   0%|          | 2/469 [00:00<00:28, 16.54it/s][A[A

loss=0.007689617574214935 batch_id=4:   0%|          | 2/469 [00:00<00:28, 16.54it/s][A[A

loss=0.013771981000900269 batch_id=5:   0%|          | 2/469 [00:00<00:28, 16.54it/s][A[A

loss=0.013771981000900269 batch_id=5:   1%|▏         | 6/469 [00:00<00:23, 19.31it/s][A[A

loss=0.03014378994703293 batch_id=6:   1%|▏         | 6/469 [00:00<00:23, 19.31it/s] [A[A

loss=0.016575388610363007 batch_id=7:   1%|▏         | 6/469 [00:00<00:23, 19.31it/s][A[A

loss=0.04997226595878601 batch_id=8:   1%|▏         | 6/469 [00:00<00:23, 19.31it/s]


Test set: Average loss: 0.0153, Accuracy: 9944/10000 (99%)





loss=0.003911182284355164 batch_id=0:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.027939118444919586 batch_id=1:   0%|          | 0/469 [00:00<?, ?it/s][A[A

loss=0.027939118444919586 batch_id=1:   0%|          | 2/469 [00:00<00:26, 17.62it/s][A[A

loss=0.02203402668237686 batch_id=2:   0%|          | 2/469 [00:00<00:26, 17.62it/s] [A[A

loss=0.00812067836523056 batch_id=3:   0%|          | 2/469 [00:00<00:26, 17.62it/s][A[A

loss=0.0692121610045433 batch_id=4:   0%|          | 2/469 [00:00<00:26, 17.62it/s] [A[A

loss=0.04596433416008949 batch_id=5:   0%|          | 2/469 [00:00<00:26, 17.62it/s][A[A

loss=0.04596433416008949 batch_id=5:   1%|▏         | 6/469 [00:00<00:22, 20.50it/s][A[A

loss=0.002781011164188385 batch_id=6:   1%|▏         | 6/469 [00:00<00:22, 20.50it/s][A[A

loss=0.04307011142373085 batch_id=7:   1%|▏         | 6/469 [00:00<00:22, 20.50it/s] [A[A

loss=0.01059998944401741 batch_id=8:   1%|▏         | 6/469 [00:00<00:22, 20.50it/s][A


Test set: Average loss: 0.0159, Accuracy: 9952/10000 (100%)

