# Toy Neural Net (Multilayer Perceptron with Autograd Backpropogation)

Goal: Train a multilayer perceptron to recognize numbers from images using the MNIST database.

Made with help from:
- https://nextjournal.com/gkoehler/pytorch-mnist
- https://github.com/WatChMaL/ExampleNotebooks/blob/master/MNIST%20MLP.ipynb

# Loading the MNIST database

Initializing a trainLoader and testLoader for training and evaluation:

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchvision

# Set parameters for dataset to fetch
trainBatchSize = 64
testBatchSize = 1000

# Fetch datasets (28x28 pixels)
trainLoader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor()])),
  batch_size=trainBatchSize, shuffle=True)

testLoader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor()])),
  batch_size=testBatchSize, shuffle=True)

Plot some of the test data for fun:

In [2]:
import matplotlib.pyplot as plt

examples = enumerate(testLoader)
idx, (exampleData, exampleTargets) = next(examples)

fig = plt.figure()
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.tight_layout()
    plt.imshow(exampleData[i][0], cmap='gray', interpolation='none')
    plt.title("Ground Truth: {}".format(exampleTargets[i]))
    plt.xticks([])
    plt.yticks([])

# MLP Implementation

Time to implement the neural net. Somewhat arbitrarily, I decided to use an MLP with 2 hidden layers of 500, then 100 neurons, with an input layer of 28x28 = 784 neurons and an output layer of 10 neurons corresponding to the digits.

In [3]:
class MLP(nn.Module):
    # Constructor initializes two hidden layers with neuron counts specified by the user
    def __init__(self, L1, L2):
        super(MLP, self).__init__()
        # input -(W1)-> L1 -(W2)-> L2 -(W3)-> output
        self._classifier = nn.Sequential(
            nn.Linear(28*28, L1), nn.ReLU(),
            nn.Linear(L1, L2), nn.ReLU(),
            nn.Linear(L2, 10)
        )
        
    def forward(self, x):
        # Flatten 2d input into 1d array:
        x_flat = x.view(-1, np.prod(x.size()[1:]))
        # Run through classifier
        return self._classifier(x_flat)

Next, define a BLOB class to organize training objects:

In [4]:
# Empty class definition
class BLOB:
    pass
# Dynamic attribute allocation
blob = BLOB()
blob.net = MLP(256, 128) # HERE SHE IS
blob.errFn = nn.CrossEntropyLoss() # Error-defining function is softmax
blob.optim = torch.optim.Adam(blob.net.parameters()) # Adam optimizer algorithm
blob.iter = 0 # training iteration number
blob.data = None # data for training/analysis
blob.expect = None # correct values

# Forward evolution function
# REQUIRES: blob arg must have all attributes set
def forward(blob, train=True):
    with torch.set_grad_enabled(train):
        # Get prediction from MLP
        pred = blob.net(blob.data)
        # Training stuff
        loss = -1
        if blob.expect is not None:
            # blob.expect.requires_grad(False) # Something odd is happening on this line, commenting it out seems to work
            loss = blob.errFn(pred, blob.expect)
        # Another dynamically assigned attribute
        blob.loss = loss
        
        pred = torch.argmax(pred, dim=1)
        correctTensor = (pred == blob.expect) # Tensor of booleans corresponding to correct predictions
        acc = correctTensor.sum().item() / float(pred.nelement()) # Accuracy of MLP
        
        return {'prediction' : pred.detach().numpy(),
                'loss'       : loss.detach().item(),
                'accuracy'   : acc}
    
# Backprop initialization
# REQUIRES: blob arg must first have been run through forward
def backward(blob):
    blob.optim.zero_grad()
    blob.loss.backward()
    blob.optim.step()

# Training

Now for the training loop. This first part is just to figure out what the heck trainLoader actually contains:

In [5]:
for i, data in enumerate(trainLoader):
    if (i > 0):
        break
    print("i:")
    print(i)
    print("data:")
    print(data)

i:
0
data:
[tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        ...,


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 

And now for the real training:

In [6]:
# Set MLP to training mode
blob.net.train()
# Set number of training iterations
maxIter = 500
# Set reporting interval
repIter = 10
# Training loop
for i, data in enumerate(trainLoader):
    blob.iter = i
    # data consists of a list of two tensors [img data, values]
    # unpack data into appropriate attributes in blob
    blob.data, blob.expect = data
    # test MLP
    res = forward(blob)
    # Report if current iteration is a multiple of the reporting interval
    if (blob.iter % repIter) == 0:
        print('Iteration:', blob.iter,'\t| Loss:', res['loss'],'\t| Accuracy:',res['accuracy'])
    if (blob.iter + 1 >= maxIter):
        break
    # Backprop
    backward(blob)

Iteration: 0 	| Loss: 2.3064396381378174 	| Accuracy: 0.09375
Iteration: 10 	| Loss: 1.9630744457244873 	| Accuracy: 0.515625
Iteration: 20 	| Loss: 1.278629183769226 	| Accuracy: 0.671875
Iteration: 30 	| Loss: 0.7999889850616455 	| Accuracy: 0.765625
Iteration: 40 	| Loss: 0.8534802794456482 	| Accuracy: 0.734375
Iteration: 50 	| Loss: 0.4231760501861572 	| Accuracy: 0.890625
Iteration: 60 	| Loss: 0.4366120994091034 	| Accuracy: 0.859375
Iteration: 70 	| Loss: 0.42479944229125977 	| Accuracy: 0.890625
Iteration: 80 	| Loss: 0.4140319228172302 	| Accuracy: 0.84375
Iteration: 90 	| Loss: 0.3230949938297272 	| Accuracy: 0.921875
Iteration: 100 	| Loss: 0.2714061439037323 	| Accuracy: 0.953125
Iteration: 110 	| Loss: 0.41483134031295776 	| Accuracy: 0.859375
Iteration: 120 	| Loss: 0.45617344975471497 	| Accuracy: 0.890625
Iteration: 130 	| Loss: 0.5029744505882263 	| Accuracy: 0.890625
Iteration: 140 	| Loss: 0.5219317674636841 	| Accuracy: 0.90625
Iteration: 150 	| Loss: 0.18307410180

# Evaluation

Now to test the MLP on data it hasn't seen before:

In [8]:
def evaluate(blob, loader):
    # Set MLP to evalutation mode
    blob.net.eval()
    # Initialize result containers
    accuracy, expected, prediction = [], [], []
    for i, data in enumerate(loader):
        blob.data, blob.expect = data
        res = forward(blob)
        accuracy.append(res['accuracy'])
        prediction.append(res['prediction'])
        expected.append(blob.expect)
    # Organize result arrays
    accuracy = np.hstack(accuracy)
    expected = np.hstack(expected)
    prediction = np.hstack(prediction)
    return accuracy, expected, prediction

# For the training set:
accuracy, label, prediction = evaluate(blob, trainLoader)
print("TRAIN\taccuracy mean",accuracy.mean(),"std",accuracy.std())

# For the testing set:
accuracy, label, prediction = evaluate(blob, testLoader)
print("TEST\taccuracy mean",accuracy.mean(),"std",accuracy.std())

TRAIN	accuracy mean 0.9481609808102346 std 0.028033261387632912
TEST	accuracy mean 0.9452999999999999 std 0.005060632371551982


YAY! My first real neural net!