In [None]:
# %% Deep learning - Section 11.109
#    Code challenge 15: data normalisation

#    1) Start drom code from video 11.108
#    2) Use the same model architecture and metaparameters
#    3) Try the three following type of data normalisation (min-max scaling
#       between 0 and 1, and confirm by printing data range):
#       I)   Normalise train and test data
#       II)  Normalise train but not test data
#       III) Normalise test but not train data
#    4) Plot accuracy and loss for each of them

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F
import pandas              as pd
import scipy.stats         as stats
import time

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Data

# Load data
data = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# Split labels from data
labels = data[:,0]
data   = data[:,1:]


In [None]:
# %% Create train and test datasets

# Convert to tensor (float and integers)
data_tensor   = torch.tensor(data).float()
labels_tensor = torch.tensor(labels).long()

# Split data with scikitlearn (10% test data)
train_data,test_data,train_labels,test_labels = train_test_split(data_tensor,labels_tensor,test_size=0.1)

# Normalise train and/or test data separately (original range is (0,255))
train_data = train_data / torch.max(train_data)
test_data  = test_data  / torch.max(test_data)

print( f'Range of train data: [ {train_data.min():.1f} , {train_data.max():.1f} ]' )
print( f'Range of test data:  [ {test_data.min():.1f} , {test_data.max():.1f} ]' )

# Convert to PyTorch datasets
train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

# Convert into DataLoader objects
batch_size   = 32
train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True,drop_last=True)
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])


In [None]:
# %% Function to generate the model

def gen_model():

    class mnist_FFN(nn.Module):
        def __init__(self):
            super().__init__()

            # Architecture
            self.input  = nn.Linear(784,64)
            self.fc1    = nn.Linear( 64,32)
            self.fc2    = nn.Linear( 32,32)
            self.output = nn.Linear( 32,10)

        # Forward propagation (log-softmax because NLLLoss instead of CrossEntropyLoss)
        def forward(self,x):

            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = torch.log_softmax( self.output(x),axis=1 )

            return x

    # Create model instance
    ANN = mnist_FFN()

    # Loss function
    loss_fun = nn.NLLLoss()

    # Optimizer (SGD to slow down learning for illustration purpose)
    optimizer = torch.optim.SGD(ANN.parameters(),lr=0.01)

    return ANN,loss_fun,optimizer


In [None]:
# Test the model on one batch

ANN,loss_fun,optimizer = gen_model()

X,y  = next(iter(train_loader))
yHat = ANN(X)

# Print log-softmax output (size should be batch_size by output nodes)
print(yHat)
print(yHat.shape)
print()

# Print probabilities
print(torch.exp(yHat))
print()

# Compute loss
loss = loss_fun(yHat,y)
print('Loss: ')
print(loss)


In [None]:
# %% Function to train the model

def train_model():

    # Parameters, model instance, inizialise vars
    num_epochs = 60
    ANN,loss_fun,optimizer = gen_model()

    losses_trn = []
    losses_tst = []
    train_acc  = []
    test_acc   = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training batches
        batch_acc  = []
        batch_loss = []

        for X,y in train_loader:

            # Forward propagation and loss
            yHat = ANN(X)
            loss = loss_fun(yHat,y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss and accuracy from this batch
            batch_loss.append(loss.item())

            matches     = torch.argmax(yHat,axis=1) == y
            matches_num = matches.float()
            accuracy    = 100 * torch.mean(matches_num)
            batch_acc.append(accuracy)

        losses_trn.append( np.mean(batch_loss) )
        train_acc.append( np.mean(batch_acc) )

        # Test accuracy
        ANN.eval()

        with torch.no_grad():
            X,y = next(iter(test_loader))
            yHat = ANN(X)
        test_acc.append( 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()) )
        loss = loss_fun(yHat,y)
        losses_tst.append(loss.item())

        ANN.train()

    return train_acc,test_acc,losses_trn,losses_tst,ANN


In [None]:
# %% Run the training

train_acc,test_acc,losses_trn,losses_tst,ANN = train_model()


In [None]:
# %% Plotting

phi = ( 1 + np.sqrt(5) ) / 2
fig,ax = plt.subplots(1,2,figsize=(1.5*6*phi,6))

ax[0].plot(losses_trn,label='Train loss')
ax[0].plot(losses_tst,label='Test loss')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_ylim([0,3])
ax[0].set_title('Model loss')

ax[1].plot(train_acc,label='Train accuracy')
ax[1].plot(test_acc,label='Test accuracy')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_ylim([10,100])
ax[1].set_title(f'Final model test accuracy: {test_acc[-1]:.2f}%\nNormalised train and test data')
ax[1].legend()

plt.savefig('figure23_code_challenge_15.png')

plt.show()

files.download('figure23_code_challenge_15.png')


In [None]:
# %% Exercise 1
#    Try different normalizations, e.g., [-1 0] or [10 15]. How do these affect learning?

# The hypothesis is that as long as the train and test data are normalised, the
# results shouldn't change based on the normalisation range (i.e., [0,1] is just
# a practical range). The output seems to support this hypothesis for the range
# [-1,0], but for the range [10,15] the performance seems to be a bit affected
# since the accuracy drops slightly; I admit I'm a bit puzzled by this.

# Modify range of normalisation (normalise between 0 and 1 and then scale)
data_tensor   = torch.tensor(data).float()
labels_tensor = torch.tensor(labels).long()

train_data,test_data,train_labels,test_labels = train_test_split(data_tensor,labels_tensor,test_size=0.1)

a,b = -1,0
train_data = train_data / torch.max(train_data)
test_data  = test_data  / torch.max(test_data)
train_data = train_data * (b-a) + a
test_data  = test_data  * (b-a) + a
print( f'Range of train data: [ {train_data.min():.1f} , {train_data.max():.1f} ]' )
print( f'Range of test data:  [ {test_data.min():.1f} , {test_data.max():.1f} ]' )

train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

batch_size   = 32
train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True,drop_last=True)
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])


In [None]:
# %% Exercise 2
#    Correlate loss and accuracy over epochs, and visualize in a scatterplot. Do the loss and
#    accuracy functions really provide different information, or do the two variables reflect
#    the same underlying performance? (Note that your conclusion here is based exclusively on this
#    dataset and this architecture!)

# In this specific case, the correlation is nearly perfect, in the sense that as
# losses decrease, accuracy increases according to a linear map; so one could
# say that here the two variables provide basically the same information, even
# though comnceptually they are very different (loss tells you whether the
# model is learning, accuracy how good is the overall performance); also note
# that this graph shows only data for a min-max normalisation [0,1] for both
# train and test data

# Compute correlations
corr_train = np.corrcoef(losses_trn,train_acc)[0,1]
corr_test  = np.corrcoef(losses_tst,test_acc)[0,1]
print(f"Correlation (Train loss vs. train accuracy): {corr_train:.3f}")
print(f"Correlation (Test loss vs. test accuracy):  {corr_test:.3f}")

# Plotting
phi = ( 1 + np.sqrt(5) ) / 2
fig,ax = plt.subplots(1,2,figsize=(1.5*6*phi,6))

ax[0].scatter(losses_trn,train_acc,color='royalblue',alpha=0.7)
ax[0].set_title(f'Train loss vs. train accuracy\n(r = {corr_train:.3f})')
ax[0].set_xlabel('Train loss')
ax[0].set_ylabel('Train Accuracy (%)')
train_fit = np.polyfit(losses_trn,train_acc,1)
x_vals    = np.linspace(min(losses_trn),max(losses_trn),100)
y_vals    = train_fit[0] * x_vals + train_fit[1]
ax[0].plot(x_vals,y_vals,linestyle='--',color='blue',linewidth=2,alpha=0.6)

ax[1].scatter(losses_tst,test_acc,color='crimson',alpha=0.7)
ax[1].set_title(f'Test loss vs. test accuracy\n(r = {corr_test:.3f})')
ax[1].set_xlabel('Test loss')
ax[1].set_ylabel('Test Accuracy (%)')
test_fit = np.polyfit(losses_tst,test_acc,1)
x_vals   = np.linspace(min(losses_tst),max(losses_tst),100)
y_vals   = test_fit[0] * x_vals + test_fit[1]
ax[1].plot(x_vals,y_vals,linestyle='--',color='red',linewidth=2)

plt.tight_layout()

plt.savefig('figure28_code_challenge_15_extra2.png')

plt.show()

files.download('figure28_code_challenge_15_extra2.png')
