In [12]:
import torch
import torch.nn as nn
import torchvision

# Load observations from the mnist dataset. The observations are divided into a training set and a test set
mnist_train = torchvision.datasets.MNIST('./data', train=True, download=True)
x_train = mnist_train.data.reshape(-1, 1, 28, 28).float()  # torch.functional.nn.conv2d argument must include channels (1)
y_train = torch.zeros((mnist_train.targets.shape[0], 10))  # Create output tensor
y_train[torch.arange(mnist_train.targets.shape[0]), mnist_train.targets] = 1  # Populate output

mnist_test = torchvision.datasets.MNIST('./data', train=False, download=True)
x_test = mnist_test.data.reshape(-1, 1, 28, 28).float()  # torch.functional.nn.conv2d argument must include channels (1)
y_test = torch.zeros((mnist_test.targets.shape[0], 10))  # Create output tensor
y_test[torch.arange(mnist_test.targets.shape[0]), mnist_test.targets] = 1  # Populate output



In [13]:
# Normalization of inputs
mean = x_train.mean()
std = x_train.std()
x_train = (x_train - mean) / std
x_test = (x_test - mean) / std

# Divide training data into batches to speed up optimization
batches = 600
x_train_batches = torch.split(x_train, batches)
y_train_batches = torch.split(y_train, batches)

In [22]:

class ConvolutionalNeuralNetworkModel(nn.Module):
    def __init__(self):
        super().__init__()

        # Model layers (includes initialized model variables):
        self.logits = nn.Sequential(
					nn.Conv2d(1, 32, kernel_size=5, padding=2), # 32@28x28
					nn.MaxPool2d(kernel_size=2), # 32@14x14
					nn.Conv2d(32, 64, kernel_size=5, padding=2), # 32@14x14
					nn.MaxPool2d(kernel_size=2), # 64@7x7 
					nn.Flatten(), 
					nn.Linear(64 * 7 * 7, 10)) # 64@7x7

    # Predictor
    def f(self, x):
        return torch.softmax(self.logits(x), dim=1)

    # Cross Entropy loss
    def loss(self, x, y):
        return nn.functional.cross_entropy(self.logits(x), y.argmax(1))

    # Accuracy
    def accuracy(self, x, y):
        return torch.mean(torch.eq(self.f(x).argmax(1), y.argmax(1)).float())

In [23]:
model = ConvolutionalNeuralNetworkModel()

# Optimize: adjust W and b to minimize loss using stochastic gradient descent
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
for epoch in range(20):
	for batch in range(len(x_train_batches)):
			loss = model.loss(x_train_batches[batch], y_train_batches[batch])  # Compute loss gradients
			loss.backward()  # Compute loss gradients
			optimizer.step()  # Perform optimization by adjusting W and b,
			optimizer.zero_grad()  # Clear gradients for next step

	print("accuracy = %s" % model.accuracy(x_test, y_test))

# Output from original code, lr=1e-03, epochs=20: accuracy = tensor(0.9806)
# With added convolution and max-pool layers, lr=1e-03, epochs=20: accuracy = tensor(0.9828)

[1,    10] loss: 1.210
[1,    20] loss: 0.406
[1,    30] loss: 0.369
[1,    40] loss: 0.234
[1,    50] loss: 0.219
[1,    60] loss: 0.173
[1,    70] loss: 0.153
[1,    80] loss: 0.137
[1,    90] loss: 0.122
[1,   100] loss: 0.082
accuracy = tensor(0.9745)
[2,    10] loss: 0.088
[2,    20] loss: 0.091
[2,    30] loss: 0.096
[2,    40] loss: 0.065
[2,    50] loss: 0.079
[2,    60] loss: 0.076
[2,    70] loss: 0.072
[2,    80] loss: 0.074
[2,    90] loss: 0.066
[2,   100] loss: 0.049
accuracy = tensor(0.9816)
[3,    10] loss: 0.056
[3,    20] loss: 0.056
[3,    30] loss: 0.052
[3,    40] loss: 0.044
[3,    50] loss: 0.058
[3,    60] loss: 0.054
[3,    70] loss: 0.055
[3,    80] loss: 0.060
[3,    90] loss: 0.054
[3,   100] loss: 0.042
accuracy = tensor(0.9819)
[4,    10] loss: 0.045
[4,    20] loss: 0.044
[4,    30] loss: 0.040
[4,    40] loss: 0.035
[4,    50] loss: 0.047
[4,    60] loss: 0.044
[4,    70] loss: 0.047
[4,    80] loss: 0.048
[4,    90] loss: 0.045
[4,   100] loss: 0.035
ac