In [1]:
import torch
import torch.nn as nn
import torchvision

device = torch.device("cuda")

# Load observations from the mnist dataset. The observations are divided into a training set and a test set
mnist_train = torchvision.datasets.MNIST('./data', train=True, download=True)
train_x = mnist_train.data.reshape(-1, 1, 28, 28).float()  # torch.functional.nn.conv2d argument must include channels (1)
train_y = torch.zeros((mnist_train.targets.shape[0], 10))  # Create output tensor
train_y[torch.arange(mnist_train.targets.shape[0]), mnist_train.targets] = 1  # Populate output

mnist_test = torchvision.datasets.MNIST('./data', train=False, download=True)
test_x = mnist_test.data.reshape(-1, 1, 28, 28).float()  # torch.functional.nn.conv2d argument must include channels (1)
test_y = torch.zeros((mnist_test.targets.shape[0], 10))  # Create output tensor
test_y[torch.arange(mnist_test.targets.shape[0]), mnist_test.targets] = 1  # Populate output

# Normalization of inputs
mean = train_x.mean()
std = train_x.std()
train_x = (train_x - mean) / std
test_x = (test_x - mean) / std

# Divide training data into batches to speed up optimization
batches = 600
x_train_batches = torch.split(train_x, batches)
y_train_batches = torch.split(train_y, batches)

In [2]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        self.logits = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, padding=2), 
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), 
            nn.Conv2d(32, 64, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), 
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 1024),
            nn.Flatten(),
            nn.Linear(1024, 10)).to(device)

    # Predictor
    def f(self, x):
        return torch.softmax(self.logits(x), dim=1)

    # Cross Entropy loss
    def loss(self, x, y):
        return nn.functional.cross_entropy(self.logits(x), y.argmax(1)).to(device)

    # Accuracy
    def accuracy(self, x, y):
        return torch.mean(torch.eq(self.f(x).argmax(1), y.argmax(1)).float()).to(device)

In [3]:
model = CNNModel()

# Optimize: adjust W and b to minimize loss using stochastic gradient descent
optimizer = torch.optim.Adam(model.parameters(), 0.001)
for epoch in range(20):
    for batch in range(len(x_train_batches)):
        model.loss(x_train_batches[batch].to(device), y_train_batches[batch].to(device)).backward()  # Compute loss gradients
        optimizer.step()  # Perform optimization by adjusting W and b,
        optimizer.zero_grad()  # Clear gradients for next step

    print("accuracy = %s" % model.accuracy(test_x.to(device), test_y.to(device))) 

accuracy = tensor(0.9806, device='cuda:0')
accuracy = tensor(0.9861, device='cuda:0')
accuracy = tensor(0.9874, device='cuda:0')
accuracy = tensor(0.9883, device='cuda:0')
accuracy = tensor(0.9862, device='cuda:0')
accuracy = tensor(0.9840, device='cuda:0')
accuracy = tensor(0.9879, device='cuda:0')
accuracy = tensor(0.9878, device='cuda:0')
accuracy = tensor(0.9887, device='cuda:0')
accuracy = tensor(0.9832, device='cuda:0')
accuracy = tensor(0.9863, device='cuda:0')
accuracy = tensor(0.9866, device='cuda:0')
accuracy = tensor(0.9848, device='cuda:0')
accuracy = tensor(0.9866, device='cuda:0')
accuracy = tensor(0.9836, device='cuda:0')
accuracy = tensor(0.9873, device='cuda:0')
accuracy = tensor(0.9892, device='cuda:0')
accuracy = tensor(0.9899, device='cuda:0')
accuracy = tensor(0.9918, device='cuda:0')
accuracy = tensor(0.9905, device='cuda:0')
