# Pipeline for Training and Collection of MNIST Missclassifications

## Imports and Hyperparameters

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import Subset

from src.mnist.models import FC, CNN
from src.util import split_train_val, test, train, save_model

import matplotlib.pyplot as plt

import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# this should print 'cuda' if you are assigned a GPU
print(device)

train_batch_size = 100
test_batch_size = 100
n_epochs = 5
learning_rate = 1e-2
seed = 100
input_dim = 28 * 28
out_dim = 10
num_hidden_layers = 2
layer_size = 100
momentum = 0.9

fc_model_params = [
    (2, 100),
    (2, 200),
    (3, 100),
    (3, 200),
    (4, 100),
    (4, 200),
]

cnn_model_params = [
    0,
    1,
    2,
    3,
    4
]

cuda


## Load Data

In [2]:
transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),])
train_dataset = torchvision.datasets.MNIST('./datasets/', train=True, download=True, transform=transforms)
test_dataset = torchvision.datasets.MNIST('./datasets/', train=False, download=True, transform=transforms)

# sanity check
print('training data size:{}'.format(len(train_dataset)))
print('test data size:{}'.format(len(test_dataset)))

training data size:60000
test data size:10000


In [3]:
train_dataset, val_dataset = split_train_val(train_dataset, valid_ratio=1/6)
print('training data size:{}'.format(len(train_dataset)))
print('validation data size:{}'.format(len(val_dataset)))

training data size:50000
validation data size:10000


## Loaders

In [4]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=train_batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

# sanity check
print('training data size:{}'.format(len(train_loader.dataset)))
print('validation data size:{}'.format(len(val_loader.dataset)))
print('test data size:{}'.format(len(test_loader.dataset)))

training data size:50000
validation data size:10000
test data size:10000


## Create Networks

In [5]:
fc_models = [FC(input_dim, out_dim, num_hidden_layers, layer_size) for num_hidden_layers, layer_size in fc_model_params]
cnn_models = [CNN(input_dim, out_dim, num_hidden_layers) for num_hidden_layers in cnn_model_params]

## Train Networks

In [6]:
for i, model in enumerate(fc_models):
    model.to(device)

    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

    print("Training FC model: {}".format(model))

    for epoch in range(1, n_epochs + 1):
        train(model, train_loader, optimizer, epoch, device)

    test(model, val_loader, device)

    print("Saving FC model: {}".format(model))
    save_model(model, dataset="MNIST", filename=str(fc_model_params[i][0]) + "x" + str(fc_model_params[i][1]))

Training FC model: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)





	Accuracy: 78.07%





	Accuracy: 92.13%





	Accuracy: 94.09%





	Accuracy: 95.23%





	Accuracy: 95.96%
Test set: Avg. loss: -963.7257, Accuracy: 9587/10000 (95.87%)
Saving FC model: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)
Training FC model: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=784, out_features=200, bias=True)
    (1): Linear(in_features=200, out_features=200, bias=True)
    (2): Linear(in_features=200, out_features=10, bias=True)
  )
)





	Accuracy: 80.67%





	Accuracy: 92.40%





	Accuracy: 94.38%





	Accuracy: 95.65%





	A

In [7]:
for i, model in enumerate(cnn_models):
    model.to(device)
    
    print("Training CNN model: {}".format(model))

    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

    for epoch in range(1, n_epochs + 1):
        train(model, train_loader, optimizer, epoch, device)
    
    test(model, val_loader, device)

    print("Saving CNN model: {}".format(model))
    save_model(model, dataset="MNIST", filename="CNN_" + str(cnn_model_params[i]))
    

Training CNN model: CNN(
  (layer_list): ModuleList(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=1568, out_features=10, bias=True)
  )
)





	Accuracy: 92.43%





	Accuracy: 96.35%





	Accuracy: 96.88%





	Accuracy: 97.21%





	Accuracy: 97.31%
Test set: Avg. loss: -1347.2754, Accuracy: 9712/10000 (97.12%)
Saving CNN model: CNN(
  (layer_list): ModuleList(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=1568, out_features=10, bias=True)
  )
)
Training CNN m

## Missclassification Tracking

Images tracked as tuples (batch_idx, image_idx) and can be accessed from dataset as test_dataset[batch_idx][image_idx]

In [8]:
misses = dict()
for model in fc_models + cnn_models:
    print("Starting model test: {}".format(model))
    model.eval()

    with torch.no_grad():
        
        for i, (data, target) in enumerate(test_loader):

            data = data.to(device)
            target = target.to(device)

            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)

            missed = pred.not_equal(target.data.view_as(pred)).view(-1).cpu().numpy()

            for j, miss in enumerate(missed):
                if miss:
                    if (i, j) in misses:
                        misses[(i, j)] += 1
                    else:
                        misses[(i, j)] = 1

    print("Finished")

Starting model test: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)
Finished
Starting model test: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=784, out_features=200, bias=True)
    (1): Linear(in_features=200, out_features=200, bias=True)
    (2): Linear(in_features=200, out_features=10, bias=True)
  )
)
Finished
Starting model test: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)
Finished
Starting model test: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=784, out_features=200, bias=True)
    (1): Linear(in_features=200, out_features=200, bias=True)
  

In [9]:
significant = {k: v for k, v in misses.items() if v > 5}
print(len(significant))

224


In [10]:
y = significant.values()

fig = plt.figure(figsize=(12, 8), facecolor="w")
plt.hist(y, [x-0.5 for x in range(11)], edgecolor="k")
plt.xlabel("Number of Missclassifications")
plt.ylabel("Frequency")
plt.savefig("plots/MNIST/im_freq.png")
plt.close()

In [11]:
for sample in significant:

    idx = sample[0]*test_batch_size + sample[1]
    im = test_loader.dataset[idx][0].view(1, 28, 28)
    label = test_loader.dataset[idx][1]

    preds = np.array([model(im.unsqueeze(0).to(device)).argmax(dim=1, keepdim=True).cpu().numpy() for model in fc_models + cnn_models]).flatten()
    fig = plt.figure(figsize=(12, 8), facecolor="w")
    plt.hist(preds, [x-0.5 for x in range(10)], edgecolor="k")
    plt.xlabel("Predicted Label")
    plt.ylabel("Frequency")
    plt.title("True Label: {}".format(label))
    plt.savefig("plots/MNIST/images/im_" + str(idx) + "_freq.png")
    plt.close()

    fig = plt.figure()
    plt.imshow(im.squeeze().cpu().numpy(), cmap="gray")
    plt.title("Actual: {}".format(test_loader.dataset.targets[idx]))
    plt.savefig("plots/MNIST/images/im_" + str(idx) + ".png")
    plt.close()