# Pipeline for Training and Collection of MNIST Missclassifications

## Imports and Hyperparameters

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import Subset

from src.cifar.models import *
from src.util import split_train_val, test, train, save_model

import matplotlib.pyplot as plt

import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# this should print 'cuda' if you are assigned a GPU
print(device)

train_batch_size = 100
test_batch_size = 100
n_epochs = 25
learning_rate = 1e-2
seed = 100
input_dim = 32*32*3
out_dim = 10
num_hidden_layers = 2
layer_size = 100
momentum = 0.9

fc_model_params = [
    (2, 100),
    (2, 200),
    (3, 100),
    (3, 200),
    (4, 100),
    (4, 200),
]

complex_models = [
    "cifar10_resnet20",
    "cifar10_vgg11_bn",
    "cifar10_mobilenetv2_x0_5",
    "cifar10_shufflenetv2_x0_5",
    "cifar10_repvgg_a0"
]

cuda


## Load Data

In [2]:
transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
train_dataset = torchvision.datasets.CIFAR10('./datasets/', train=True, download=True, transform=transforms)
test_dataset = torchvision.datasets.CIFAR10('./datasets/', train=False, download=True, transform=transforms)

raw_test_data = torchvision.datasets.CIFAR10('./datasets/', train=False, download=True, transform=torchvision.transforms.ToTensor())

# sanity check
print('training data size:{}'.format(len(train_dataset)))
print('test data size:{}'.format(len(test_dataset)))

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
training data size:50000
test data size:10000


In [3]:
train_dataset, val_dataset = split_train_val(train_dataset, valid_ratio=1/6)
print('training data size:{}'.format(len(train_dataset)))
print('validation data size:{}'.format(len(val_dataset)))

training data size:41667
validation data size:8333


## Loaders

In [4]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=train_batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)
raw_test_loader = torch.utils.data.DataLoader(raw_test_data, batch_size=test_batch_size, shuffle=False)

# sanity check
print('training data size:{}'.format(len(train_loader.dataset)))
print('validation data size:{}'.format(len(val_loader.dataset)))
print('test data size:{}'.format(len(test_loader.dataset)))

training data size:41667
validation data size:8333
test data size:10000


## Create Networks

In [5]:
fc_models = [FC(input_dim, out_dim, num_hidden_layers, layer_size) for num_hidden_layers, layer_size in fc_model_params]
complex_models = load_pretrained_models(complex_models)

Using cache found in C:\Users\willi/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master
Using cache found in C:\Users\willi/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master
Using cache found in C:\Users\willi/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master
Using cache found in C:\Users\willi/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master
Using cache found in C:\Users\willi/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master


## Train Networks

In [6]:
for i, model in enumerate(fc_models):
    model.to(device)

    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

    print("Training FC model: {}".format(model))

    for epoch in range(1, n_epochs + 1):
        train(model, train_loader, optimizer, epoch, device)

    test(model, val_loader, device)

    print("Saving FC model: {}".format(model))
    save_model(model, dataset="CIFAR10", filename=str(fc_model_params[i][0]) + "x" + str(fc_model_params[i][1]))

Training FC model: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=3072, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)





	Accuracy: 38.47%





	Accuracy: 47.07%





	Accuracy: 50.37%





	Accuracy: 52.99%





	Accuracy: 55.04%





	Accuracy: 56.76%





	Accuracy: 58.47%





	Accuracy: 60.00%





	Accuracy: 61.17%





	Accuracy: 62.23%





	Accuracy: 63.45%





	Accuracy: 64.44%





	Accuracy: 65.46%





	Accuracy: 66.28%





	Accuracy: 67.23%





	Accuracy: 68.17%





	Accuracy: 68.62%





	Accuracy: 69.39%





	Accuracy: 70.68%





	Accuracy: 71.06%





	Accuracy: 71.85%





	Accuracy: 72.41%





	Accuracy: 72.78%





	Accuracy: 73.54%





	Accuracy: 73.69%
Test set: Avg. loss: -467.9237, Accuracy: 4140/8333 (49.68%)
Saving FC model: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=3072, out_features=100, bias=True)
    (1): 

## Missclassification Tracking

Images tracked as tuples (batch_idx, image_idx) and can be accessed from dataset as test_dataset[batch_idx][image_idx]

In [7]:
misses = dict()
complex_list = list(complex_models.values())
for model in fc_models + complex_list:
    print("Starting model test: {}".format(model))

    model.to(device)
    model.eval()

    with torch.no_grad():
        
        for i, (data, target) in enumerate(test_loader):

            data = data.to(device)
            target = target.to(device)

            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)

            missed = pred.not_equal(target.data.view_as(pred)).view(-1).cpu().numpy()

            for j, miss in enumerate(missed):
                if miss:
                    if (i, j) in misses:
                        misses[(i, j)] += 1
                    else:
                        misses[(i, j)] = 1

    print("Finished")

Starting model test: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=3072, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)
Finished
Starting model test: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=3072, out_features=200, bias=True)
    (1): Linear(in_features=200, out_features=200, bias=True)
    (2): Linear(in_features=200, out_features=10, bias=True)
  )
)
Finished
Starting model test: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=3072, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)
Finished
Starting model test: FC(
  (layer_list): ModuleList(
    (0): Linear(in_features=3072, out_features=200, bias=True)
    (1): Linear(in_features=200, out_features=200, bias=True

In [8]:
all_miss = {k: v for k, v in misses.items() if v > 0}
significant = {k: v for k, v in misses.items() if v > 5}
print(len(significant))
print(len(all_miss))

2477
7978


In [9]:
y = all_miss.values()

fig = plt.figure(figsize=(12, 8), facecolor="w")
plt.hist(y, [x-0.5 for x in range(11)], edgecolor="k")
plt.xlabel("Number of Missclassifications")
plt.ylabel("Frequency")
plt.savefig("plots/CIFAR/im_freq.png")
plt.close()

In [10]:
for sample in significant:

    idx = sample[0]*test_batch_size + sample[1]
    im_net = test_loader.dataset[idx][0].view(3, 32, 32)
    im_raw = raw_test_loader.dataset[idx][0].view(3, 32, 32)
    label = test_loader.dataset[idx][1]

    preds = np.array([model(im_net.unsqueeze(0).to(device)).argmax(dim=1, keepdim=True).cpu().numpy() for model in fc_models + complex_list]).flatten()
    fig = plt.figure(figsize=(12, 8), facecolor="w")
    plt.hist(preds, [x-0.5 for x in range(10)], edgecolor="k")
    plt.xlabel("Predicted Label")
    plt.ylabel("Frequency")
    plt.title("True Label: {}".format(label))
    plt.savefig("plots/CIFAR/images/im_" + str(idx) + "_freq.png")
    plt.close()

    fig = plt.figure()
    plt.imshow(im_raw.squeeze().cpu().numpy().transpose((1,2,0)))
    plt.title("Actual: {}".format(test_loader.dataset.targets[idx]))
    plt.savefig("plots/CIFAR/images/im_" + str(idx) + ".png")
    plt.close()

In [12]:
import os
print(len(os.listdir("plots/CIFAR/images"))/2)

2477.0
