In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchinfo import summary
from tqdm import tqdm
from model import MnistNet

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
device

device(type='cuda')

In [2]:
model = MnistNet()
summary(model, input_size=(1, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
MnistNet                                 [1, 10]                   --
├─Sequential: 1-1                        [1, 8, 12, 12]            --
│    └─Conv2d: 2-1                       [1, 8, 26, 26]            72
│    └─BatchNorm2d: 2-2                  [1, 8, 26, 26]            16
│    └─Dropout: 2-3                      [1, 8, 26, 26]            --
│    └─ReLU: 2-4                         [1, 8, 26, 26]            --
│    └─Conv2d: 2-5                       [1, 16, 24, 24]           1,152
│    └─BatchNorm2d: 2-6                  [1, 16, 24, 24]           32
│    └─Dropout: 2-7                      [1, 16, 24, 24]           --
│    └─ReLU: 2-8                         [1, 16, 24, 24]           --
│    └─Conv2d: 2-9                       [1, 8, 24, 24]            128
│    └─MaxPool2d: 2-10                   [1, 8, 12, 12]            --
├─Sequential: 1-2                        [1, 16, 6, 6]             --
│    └─Conv

In [3]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                        transforms.RandomAffine(degrees=10,  scale=(0.95, 1.05)),
                        # transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
                    batch_size=batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
                    batch_size=batch_size, shuffle=True, **kwargs)


In [4]:
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    pbar = tqdm(train_loader)
    correct = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'Epoch={epoch} Batch={batch_idx} loss={loss.item():.7f} Accuracy={100. * correct / len(train_loader.dataset):.2f}%')


def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.7f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [5]:
model = MnistNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.015)
# optimizer = optim.SGD(model.parameters(), lr=0.015, momentum=0.9)
criterion = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.4, verbose=True)

for epoch in range(20):
    train(model, device, train_loader, optimizer, criterion, epoch)
    test(model, device, test_loader, criterion) 
    scheduler.step()

Adjusting learning rate of group 0 to 1.5000e-02.


Epoch=0 Batch=468 loss=0.1062707 Accuracy=90.91%: 100%|██████████| 469/469 [00:10<00:00, 44.82it/s]



Test set: Average loss: 0.0004583, Accuracy: 9828/10000 (98.28%)

Adjusting learning rate of group 0 to 1.5000e-02.


Epoch=1 Batch=468 loss=0.0760556 Accuracy=97.00%: 100%|██████████| 469/469 [00:10<00:00, 42.83it/s]



Test set: Average loss: 0.0003724, Accuracy: 9850/10000 (98.50%)

Adjusting learning rate of group 0 to 1.5000e-02.


Epoch=2 Batch=468 loss=0.1601883 Accuracy=97.48%: 100%|██████████| 469/469 [00:10<00:00, 43.56it/s]



Test set: Average loss: 0.0003454, Accuracy: 9861/10000 (98.61%)

Adjusting learning rate of group 0 to 1.5000e-02.


Epoch=3 Batch=468 loss=0.0473514 Accuracy=97.66%: 100%|██████████| 469/469 [00:10<00:00, 45.30it/s]



Test set: Average loss: 0.0003399, Accuracy: 9871/10000 (98.71%)

Adjusting learning rate of group 0 to 1.5000e-02.


Epoch=4 Batch=468 loss=0.1223510 Accuracy=97.88%: 100%|██████████| 469/469 [00:10<00:00, 42.83it/s]



Test set: Average loss: 0.0002233, Accuracy: 9913/10000 (99.13%)

Adjusting learning rate of group 0 to 1.5000e-02.


Epoch=5 Batch=468 loss=0.0516343 Accuracy=97.95%: 100%|██████████| 469/469 [00:10<00:00, 43.90it/s]



Test set: Average loss: 0.0002884, Accuracy: 9890/10000 (98.90%)

Adjusting learning rate of group 0 to 1.5000e-02.


Epoch=6 Batch=468 loss=0.0829413 Accuracy=98.00%: 100%|██████████| 469/469 [00:11<00:00, 42.23it/s]



Test set: Average loss: 0.0002314, Accuracy: 9903/10000 (99.03%)

Adjusting learning rate of group 0 to 6.0000e-03.


Epoch=7 Batch=468 loss=0.0218383 Accuracy=98.44%: 100%|██████████| 469/469 [00:11<00:00, 41.24it/s]



Test set: Average loss: 0.0001749, Accuracy: 9933/10000 (99.33%)

Adjusting learning rate of group 0 to 6.0000e-03.


Epoch=8 Batch=468 loss=0.0519804 Accuracy=98.53%: 100%|██████████| 469/469 [00:10<00:00, 45.39it/s]



Test set: Average loss: 0.0001585, Accuracy: 9939/10000 (99.39%)

Adjusting learning rate of group 0 to 6.0000e-03.


Epoch=9 Batch=468 loss=0.0046923 Accuracy=98.50%: 100%|██████████| 469/469 [00:10<00:00, 44.72it/s]



Test set: Average loss: 0.0001731, Accuracy: 9932/10000 (99.32%)

Adjusting learning rate of group 0 to 6.0000e-03.


Epoch=10 Batch=468 loss=0.0186869 Accuracy=98.50%: 100%|██████████| 469/469 [00:11<00:00, 41.76it/s]



Test set: Average loss: 0.0001478, Accuracy: 9944/10000 (99.44%)

Adjusting learning rate of group 0 to 6.0000e-03.


Epoch=11 Batch=468 loss=0.0358143 Accuracy=98.50%: 100%|██████████| 469/469 [00:10<00:00, 43.57it/s]



Test set: Average loss: 0.0001564, Accuracy: 9944/10000 (99.44%)

Adjusting learning rate of group 0 to 6.0000e-03.


Epoch=12 Batch=468 loss=0.0221422 Accuracy=98.57%: 100%|██████████| 469/469 [00:10<00:00, 42.97it/s]



Test set: Average loss: 0.0002138, Accuracy: 9917/10000 (99.17%)

Adjusting learning rate of group 0 to 6.0000e-03.


Epoch=13 Batch=468 loss=0.0318980 Accuracy=98.53%: 100%|██████████| 469/469 [00:11<00:00, 41.52it/s]



Test set: Average loss: 0.0001625, Accuracy: 9938/10000 (99.38%)

Adjusting learning rate of group 0 to 2.4000e-03.


Epoch=14 Batch=468 loss=0.0072799 Accuracy=98.78%: 100%|██████████| 469/469 [00:11<00:00, 42.47it/s]



Test set: Average loss: 0.0001497, Accuracy: 9941/10000 (99.41%)

Adjusting learning rate of group 0 to 2.4000e-03.


Epoch=15 Batch=468 loss=0.0469378 Accuracy=98.81%: 100%|██████████| 469/469 [00:11<00:00, 42.37it/s]



Test set: Average loss: 0.0001564, Accuracy: 9940/10000 (99.40%)

Adjusting learning rate of group 0 to 2.4000e-03.


Epoch=16 Batch=468 loss=0.0120237 Accuracy=98.75%: 100%|██████████| 469/469 [00:11<00:00, 41.50it/s]



Test set: Average loss: 0.0001616, Accuracy: 9940/10000 (99.40%)

Adjusting learning rate of group 0 to 2.4000e-03.


Epoch=17 Batch=468 loss=0.0100550 Accuracy=98.74%: 100%|██████████| 469/469 [00:10<00:00, 43.33it/s]



Test set: Average loss: 0.0001547, Accuracy: 9938/10000 (99.38%)

Adjusting learning rate of group 0 to 2.4000e-03.


Epoch=18 Batch=468 loss=0.0358378 Accuracy=98.79%: 100%|██████████| 469/469 [00:10<00:00, 42.89it/s]



Test set: Average loss: 0.0001385, Accuracy: 9944/10000 (99.44%)

Adjusting learning rate of group 0 to 2.4000e-03.


Epoch=19 Batch=468 loss=0.0402809 Accuracy=98.85%: 100%|██████████| 469/469 [00:11<00:00, 41.43it/s]



Test set: Average loss: 0.0001502, Accuracy: 9936/10000 (99.36%)

Adjusting learning rate of group 0 to 2.4000e-03.
