In [1]:
from torchvision import datasets, transforms
import torch
import torch.nn.functional as F


# Hyperparams
batch_size = 50
loss_func = F.cross_entropy
epochs = 50

# GPU/CPU
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)
use_cuda = torch.cuda.is_available()

# Datasets
train_kwargs = {'batch_size': batch_size, 'shuffle': True}
val_kwargs = {'batch_size': batch_size}
if use_cuda:
    cuda_kwargs = {'num_workers': 1,
                   'pin_memory': True}
    train_kwargs.update(cuda_kwargs)
    val_kwargs.update(cuda_kwargs)

transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
dataset1 = datasets.MNIST('data', train=True, download=True, transform=transform)
dataset2 = datasets.MNIST('data', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
val_loader = torch.utils.data.DataLoader(dataset2, **val_kwargs)

cuda


In [2]:
import torch.nn as nn

# model
    
model = nn.Sequential(
    nn.Conv2d(1, 32, 5, 1, padding=(2, 2)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 64, 5, 1, padding=(2, 2)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(7 * 7 * 64, 1024),
    nn.ReLU(),
    nn.Linear(1024, 10)
)

model.to(device)

Sequential(
  (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=3136, out_features=1024, bias=True)
  (8): ReLU()
  (9): Linear(in_features=1024, out_features=10, bias=True)
)

In [3]:
# optimizer
import torch.optim as optim

learning_rate = 1e-4
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [4]:
def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

# Training
best_val_acc = 0

for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_func(output, target)
        loss.backward()
        optimizer.step()
    with torch.no_grad():
        model.eval()
        val_acc = 0
        for batch_idx, (data, target) in enumerate(val_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_acc += accuracy(output, target)
    
        val_acc = val_acc / len(val_loader)

        print(f'Val Loss acc {epoch} epoch(s): {val_acc}')

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model, 'cnn_mnist')
        

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Val Loss acc 0 epoch(s): 0.9810001850128174
Val Loss acc 1 epoch(s): 0.9854000806808472
Val Loss acc 2 epoch(s): 0.9899000525474548
Val Loss acc 3 epoch(s): 0.9885002970695496
Val Loss acc 4 epoch(s): 0.9905002117156982
Val Loss acc 5 epoch(s): 0.9910000562667847
Val Loss acc 6 epoch(s): 0.9912000894546509
Val Loss acc 7 epoch(s): 0.992900013923645
Val Loss acc 8 epoch(s): 0.9912999272346497
Val Loss acc 9 epoch(s): 0.989500105381012
Val Loss acc 10 epoch(s): 0.9906999468803406
Val Loss acc 11 epoch(s): 0.9913001656532288
Val Loss acc 12 epoch(s): 0.9900001883506775
Val Loss acc 13 epoch(s): 0.9911001920700073
Val Loss acc 14 epoch(s): 0.9922000169754028
Val Loss acc 15 epoch(s): 0.9910001158714294
Val Loss acc 16 epoch(s): 0.9911000728607178
Val Loss acc 17 epoch(s): 0.991300106048584
Val Loss acc 18 epoch(s): 0.9922000765800476
Val Loss acc 19 epoch(s): 0.9920001029968262
Val Loss acc 20 epoch(s): 0.9912000298500061
Val Loss acc 21 epoch(s): 0.9923000931739807
Val Loss acc 22 epoch(s