In [None]:
# %% Deep learning - Section 19.175
#    CNN on shifted MNIST

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [1]:
# %% Libraries and modules
import numpy                  as np
import matplotlib.pyplot      as plt
import torch
import torch.nn               as nn
import seaborn                as sns
import copy
import torch.nn.functional    as F
import pandas                 as pd
import scipy.stats            as stats
import sklearn.metrics        as skm
import time
import sys
import imageio.v2             as imageio
import torchvision
import torchvision.transforms as T

from torch.utils.data                 import DataLoader,TensorDataset,Dataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from scipy.stats                      import zscore
from sklearn.decomposition            import PCA
from scipy.signal                     import convolve2d
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')
plt.style.use('default')


In [59]:
# %% Data

# Load data
data = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# Split labels from data
labels = data[:,0]
data   = data[:,1:]

# Normalise data (original range is (0,255))
data_norm = data / np.max(data)

# Reshape to 2D actual images
data_norm = data_norm.reshape(data_norm.shape[0],1,28,28)


In [69]:
# %% Create train and test datasets

# Convert to tensor (float and integers)
data_tensor   = torch.tensor(data_norm).float()
labels_tensor = torch.tensor(labels).long()

# Split data with scikitlearn (10% test data)
train_data,test_data,train_labels,test_labels = train_test_split(data_tensor,labels_tensor,test_size=0.1)

# Convert to PyTorch datasets
train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

# Convert into DataLoader objects
batch_size   = 32
train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True,drop_last=True)
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])


In [None]:
# %% How to shift an image

# Grab one 2D image
tmp = test_loader.dataset.tensors[0][0,:,:].squeeze()

# Shift the image (pytorch calls it "rolling"), dim=0 for vertical shift
tmpS = torch.roll(tmp,8,dims=1)

# Plot
phi = ( 1 + np.sqrt(5) ) / 2
fig,ax = plt.subplots(1,2,figsize=(1.5*5*phi,5))

ax[0].imshow(tmp, cmap='gray')
ax[0].set_title('Original')

ax[1].imshow(tmpS, cmap='gray')
ax[1].set_title('Shifted (rolled)')

plt.savefig('figure4_cnn_shifted_mnist.png')
plt.show()
files.download('figure4_cnn_shifted_mnist.png')


In [48]:
# %% Shift all images in the train set

# Train set
for i in range(train_loader.dataset.tensors[0].shape[0]):

    # Get the image
    img = train_loader.dataset.tensors[0][i,0,:,:]

    # Reshape and roll by max. 10 pixels
    rand_roll = np.random.randint(-10,11)
    img       = torch.roll( img,rand_roll,dims=1 )

    # Put back into the matrix
    train_loader.dataset.tensors[0][i,0,:,:] = img


In [70]:
# %% Shift all images in the test set
# Note: you can re-run the previous cell to confirm the shifting

# Test set
for i in range(test_loader.dataset.tensors[0].shape[0]):

    # Get the image
    img = test_loader.dataset.tensors[0][i,0,:,:]

    # Reshape and roll by max. 10 pixels
    rand_roll = np.random.randint(-10,11)
    img       = torch.roll( img,rand_roll,dims=1 )

    # Put back into the matrix
    test_loader.dataset.tensors[0][i,0,:,:] = img


In [51]:
# %% Function to generate the model

def gen_model(printing_toggle=False):

    class mnist_CNN(nn.Module):
        def __init__(self,printing_toggle):
            super().__init__()

            # Convolution layer 1
            # size = np.floor( (28+2*1-5)/1 )+1 = 26/2 = 13 (divide by 2 because
            # will have maxpool with extent 2)
            self.conv1 = nn.Conv2d(1,10,kernel_size=5,stride=1,padding=1)

            # Convolution layer 2
            # size = np.floor( (13+2*1-5)/1 )+1 = 11/2 = 5 (divide by 2 because
            # will have maxpool with extent 2)
            self.conv2 = nn.Conv2d(10,20,kernel_size=5,stride=1,padding=1)

            # Number of units expected in fully connected layer (out of conv2);
            # note that fc1 layer has no padding nor kernel, so we set those
            # params to be 0 and 1 respectively; we can also square because the
            # images are squares
            expected_size = np.floor( 5+2*0-1 ) + 1
            expected_size = 20*int(expected_size**2)

            # Fully connected layer
            self.fc1 = nn.Linear(expected_size,50)

            # Output layer
            self.output = nn.Linear(50,10)

            # Toggle for the printing of tensor sizes during forward propagation
            self.print = printing_toggle

        def forward(self,x):

            # Print input layer size
            print(f'Input size: {x.shape}') if self.print else None

            # MaxPool and ReLu on convolution layer 1
            x = F.relu(F.max_pool2d(self.conv1(x),2))
            print(f'Conv. layer 1 size: {x.shape}') if self.print else None

            # MaxPool and ReLu on convolution layer 2
            x = F.relu(F.max_pool2d(self.conv2(x),2))
            print(f'Conv. layer 2 size: {x.shape}') if self.print else None

            # Vectorise for linear layer
            n_units = x.shape.numel() / x.shape[0]
            x       = x.view(-1,int(n_units))
            print(f'Vectorised conv. 2 layer size: {x.shape}') if self.print else None

            # Linear and output layers
            x = F.relu(self.fc1(x))
            print(f'Linear layer size: {x.shape}') if self.print else None
            x = self.output(x)
            print(f'Output layer size: {x.shape}') if self.print else None

            return x

    # Create model instance
    CNN = mnist_CNN(printing_toggle)

    # Loss function
    loss_fun = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.Adam(CNN.parameters(),lr=0.001)

    return CNN,loss_fun,optimizer


In [52]:
# %% Function to train the model

def train_model():

    # Parameters, model instance, inizialise vars
    num_epochs = 30
    CNN,loss_fun,optimizer = gen_model()

    losses    = []
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training batches
        batch_acc  = []
        batch_loss = []

        for X,y in train_loader:

            # Forward propagation and loss
            yHat = CNN(X)
            loss = loss_fun(yHat,y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss and accuracy from this batch
            batch_loss.append(loss.item())

            matches     = torch.argmax(yHat,axis=1) == y
            matches_num = matches.float()
            accuracy    = 100 * torch.mean(matches_num)
            batch_acc.append(accuracy)

        losses.append( np.mean(batch_loss) )
        train_acc.append( np.mean(batch_acc) )

        # Test accuracy
        CNN.eval()

        with torch.no_grad():
            X,y = next(iter(test_loader))
            yHat = CNN(X)
            test_acc.append( 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()) )

        CNN.train()

    return train_acc,test_acc,losses,CNN


In [72]:
# %% Run the model

# Takes ~5 mins
train_acc,test_acc,losses,CNN = train_model()


In [None]:
# Plotting

phi = (1 + np.sqrt(5)) / 2
fig,ax = plt.subplots(1,2,figsize=(1.5*phi*5,5))

ax[0].plot(losses,'s-')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Model loss')

ax[1].plot(train_acc,'s-',label='Train')
ax[1].plot(test_acc,'o-',label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title(f'Final model test accuracy: {test_acc[-1]:.2f}%')
ax[1].legend()

plt.savefig('figure5_cnn_shifted_mnist.png')
plt.show()
files.download('figure5_cnn_shifted_mnist.png')


In [None]:
# %% Exercise 1
#    Don't translate the train images; only the test images. How does the model do now? What does this tell you about
#    what the model learned during training? (Tip: compare the test performance here to a similar performance in the ANN
#    model.)

# Well, not sure I was expecting such a drop in the performance, but the test
# eccuracy went down to 50%; now, this is still better than the FFN version,
# where I had a catastrophic collapse to 30%
# Also tried to add only some shifted images to the train dataset, and still
# test on only shifted images, in this case the performance jumps back
# immediately, so injecting even a small-ish amount of shifted data helps a lot


In [71]:
# %% Exercise 1
#    Continue ...

# Train set (shift ~20% images)
for i in range(train_loader.dataset.tensors[0].shape[0]):

    # Apply random shift with approx. overall 30% probability
    if np.random.rand() < 0.20:

        # Get the image
        img = train_loader.dataset.tensors[0][i,0,:,:]

        # Reshape and roll by max. 10 pixels
        rand_roll = np.random.randint(-10,11)
        img       = torch.roll( img,rand_roll,dims=1 )

        # Put back into the matrix
        train_loader.dataset.tensors[0][i,0,:,:] = img
