In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from matplotlib import pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [6]:
#Load the Data
class MnistData(Dataset):
    def __init__(self, digits_data):
        self.labels = np.array(OneHotEncoder().fit_transform(digits_data.values[:,0][:,np.newaxis]).todense()).astype(np.single)
        
        self.imdata = (digits_data.values[:,1:]/255.0).astype(np.single)
    
    def __len__(self):
        return self.labels.shape[0]
    
    def __getitem__(self, index):
        return self.labels[index], self.imdata[index]

    def visualize(self, index):
        plt.imshow(self.imdata[index].reshape(28,28), cmap = plt.cm.bone)

        
digits_data = pd.read_csv("data/mnist_digits/train.csv")
train_df, val_df = train_test_split(digits_data, test_size = 0.3)

train_digits = MnistData(train_df)
val_digits = MnistData(val_df)

In [7]:
#Define the neural network
class SingleLayerDigitNet(nn.Module):
    def __init__(self):
        super(SingleLayerDigitNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 10)
    
    def forward(self, x):
        x = self.fc1(x)
        return x

In [8]:
#Try making a test prediction with the untrained network
imnum = 11
singlelayer_digitnet = SingleLayerDigitNet()

label, imdata = train_digits[imnum]

pred_label = np.argmax(digitnet(torch.Tensor(imdata)).detach().numpy())
print("True label is {}, nn prediction is {}".format(np.argmax(label), pred_label))
train_digits.visualize(imnum)

NameError: name 'digitnet' is not defined

In [None]:
#Helper class for Neural Network training
class TrainingStats(object):
    def __init__(self):
        self.losses = []
        self.accs = []
        self.reset_runningdata()
        
    def reset_runningdata(self):
        self.loss_running = 0
        self.predictions_running = []
        self.labels_running = []
    
    def add(self, predictions, labels, loss):
        self.loss_running += loss
        self.predictions_running.append(predictions)
        self.labels_running.append(labels)
        
    def new_epoch(self):
        self.losses.append(self.loss_running)
        preds = np.hstack(self.predictions_running)
        labels = np.hstack(self.labels_running)
        
        self.accs.append(100*(preds == labels).sum()/len(labels))
        self.reset_runningdata()

#Network training function
def train_network(train_dataloader, val_dataloader, model, loss_func, optimizer, train_params):
    trainstats = TrainingStats()
    valstats = TrainingStats()
    
    print("Epoch, train loss, train acc, val loss, val acc")
    for epochnum in range(train_params["epochs"]):
        #Get validation stats
        for val_data in val_dataloader:
            val_labels = val_data[0]
            val_images = val_data[1]
            
            val_prediction = model(val_images)
            val_loss     = loss_func(val_prediction, val_labels).item()
        
        valstats.add(np.argmax(val_prediction.detach().numpy(), axis = 1), 
                     np.argmax(val_labels.detach().numpy(), axis = 1),
                     val_loss)
        valstats.new_epoch()
            
        #Train the model
        for batch_idx, data_batch in enumerate(train_dataloader):
            optimizer.zero_grad()
    
            label_batch = data_batch[0]
            image_batch = data_batch[1]
    
            train_prediction = model(image_batch)
            train_loss       = loss_func(train_prediction, label_batch)

            train_loss.backward()
            optimizer.step()
            
            trainstats.add(np.argmax(train_prediction.detach().numpy(), axis = 1),
                           np.argmax(label_batch.detach().numpy(), axis = 1),
                           train_loss.item())
    
        trainstats.new_epoch()
            
        print("{} {:.2f} {:.2f}% {:.2f} {:.2f}%".format(epochnum, 
                                                        trainstats.losses[-1]/len(training_dataloader.dataset),
                                                        trainstats.accs[-1],
                                                        valstats.losses[-1]/len(val_dataloader.dataset),
                                                        valstats.accs[-1]))
    return trainstats, valstats

In [None]:
train_params = {"epochs": 25,
                "batch_size": 5,
                "learningRate": 0.01}

singlelayer_digitnet = SingleLayerDigitNet() #retrain from start

training_dataloader = torch.utils.data.DataLoader(train_digits,
                                                  batch_size = train_params["batch_size"])

val_dataloader = torch.utils.data.DataLoader(val_digits,
                                             batch_size = len(val_digits))

loss_func = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(singlelayer_digitnet.parameters(),
                            lr = train_params['learningRate'])

trainstats, valstats = train_network(training_dataloader,
                                     val_dataloader,
                                     singlelayer_digitnet,
                                     loss_func,
                                     optimizer,
                                     train_params)


In [None]:
def visualize_firstlayer_weights(network):    
    for m in network.modules():
        if isinstance(m, nn.Linear):
            weights = m.weight        
            break
    
    fig, axs = plt.subplots(nrows=2, ncols=5, figsize=(20, 8),
                           subplot_kw={'xticks': [], 'yticks': []})
    for i, ax in enumerate(axs.flat):    
        ax.imshow(weights[i].detach().numpy().reshape(28, 28), 
                  plt.cm.bone)

#The weights look like the numbers, the model has learned useful and generalizable features!
visualize_firstlayer_weights(singlelayer_digitnet)

In [None]:
#Train a more complicated network with less data

train_params = {"epochs": 100,
                "batch_size": 1,
                "learningRate": 0.01}

#Define the neural network
class MultiLayerDigitNet(nn.Module):
    def __init__(self):
        super(MultiLayerDigitNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 14*14)
        self.fc2 = nn.Linear(14*14, 100)
        self.fc3 = nn.Linear(100, 10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

multilayer_digitnet = MultiLayerDigitNet() #retrain from start

training_dataloader = torch.utils.data.DataLoader(MnistData(train_df.groupby("label").first().reset_index()),
                                                  batch_size = train_params["batch_size"])

val_dataloader = torch.utils.data.DataLoader(val_digits,
                                             batch_size = len(val_digits))

loss_func = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(multilayer_digitnet.parameters(),
                            lr = train_params['learningRate'])

trainstats, valstats = train_network(training_dataloader,
                                     val_dataloader,
                                     multilayer_digitnet,
                                     loss_func,
                                     optimizer,
                                     train_params)

#The accuracy is very high for the training data but poor for the validation data.
#The model is overfitting

In [None]:
#The weights are no longer recognizable
visualize_firstlayer_weights(multilayer_digitnet)