In [None]:
# %% Deep learning - Section 14.136
#    FFN project 1: a gratuitously complex adding machine
#    1) Build an FFN that can add 2 integers between -10 and 10
#    2) Input is 2 integers, output their sum
#    3) You are free to select the architecture and the metaparameters
#    4) Once the model is built, run it 10 times and report performance
#    5) Visualise true and predicted sums (round to integer)

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F
import pandas              as pd
import scipy.stats         as stats
import sklearn.metrics     as skm
import time

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Function to generate the data

def gen_dataset(sample_number=20):

    # Generate the pairs of integers and get their true sum
    data   = np.random.randint(low=-10,high=11,size=(sample_number,2))
    labels = np.sum(data,axis=1)

    # Covert to tensor (add extra col to labels)
    data_T   = torch.tensor(data).float()
    labels_T = torch.tensor(labels).float().view(-1,1)

    # Split data with scikitlearn (train, dev, test)
    train_data,tmp_data, train_labels,tmp_labels = train_test_split(data_T,labels_T,test_size=0.2)
    dev_data,test_data, dev_labels,test_labels   = train_test_split(tmp_data,tmp_labels,test_size=0.5)

    # PyTorch datasets
    train_data = TensorDataset(train_data,train_labels)
    dev_data   = TensorDataset(dev_data,dev_labels)
    test_data  = TensorDataset(test_data,test_labels)

    # DataLoader objects
    batch_size   = 16
    train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True)
    dev_loader   = DataLoader(dev_data,batch_size=dev_data.tensors[0].shape[0])
    test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])

    return train_loader,dev_loader,test_loader


In [None]:
# %% Test data function

train_loader,_,_ = gen_dataset(sample_number=50)

print(f"Int pairs:\n{train_loader.dataset.tensors[0]}")
print()
print(f"True sums:\n{train_loader.dataset.tensors[1]}")


In [None]:
# %% Model class

# Optional parametrised metaparameters:
#  > number of layers and of units per layer
#  > starting learning rate
#  > optimizer (e.g. 'SGD', 'RMSprop', or 'Adam')
#  > L2 regularisation
#  > activation function (e.g., 'ReLU', 'LeakyReLU', 'ReLU6', or 'GELU')

def gen_model(n_units=16,n_layers=2,lr=0.01,optim='SGD',L2_lambda=0,act_fun='ReLU'):

    class model(nn.Module):
        def __init__(self,n_units,n_layers):
            super().__init__()

            # Dictionary to store the layers and the activation function
            self.layers  = nn.ModuleDict()
            self.nLayers = n_layers
            self.act_fun = act_fun

            # Architecture (input, hidden, output)
            # Input layer
            self.layers['input'] = nn.Linear(2,n_units)

            # Hidden layers
            for i in range(n_layers):
                self.layers[f'hidden{i}'] = nn.Linear(n_units,n_units)

            # Output layer
            self.layers['output'] = nn.Linear(n_units,1)

        def forward(self,x):

            # Input layer
            x = self.layers['input'](x)

            # Hidden layers (fetch selected activation function)
            act_fun = getattr(torch.nn,self.act_fun)()
            for i in range(self.nLayers):
                x = act_fun(self.layers[f'hidden{i}'](x))

            # Output layer
            x = self.layers['output'](x)

            return x

    # Model instance, loss function, and optimizer
    ANN       = model(n_units,n_layers)
    loss_fun  = nn.MSELoss()
    opti_fun  = getattr( torch.optim,optim )
    optimizer = opti_fun(ANN.parameters(),lr=lr,weight_decay=L2_lambda)

    return ANN,loss_fun,optimizer


In [None]:
# %% Test model function

n_units   = 16
n_layers  = 2
lr        = 0.01
optim_alg = 'Adam'
L2_decay  = 0.01
act_fun   = 'ReLU'

ANN,loss_fun,optimizer = gen_model(n_units,n_layers,lr,optim_alg,L2_decay,act_fun)
print(ANN)
print(loss_fun)
print(optimizer)


In [None]:
# %% Function to train the model

# Optional parametrised metaparameters:
#  > number of epochs
#  > tol is hard-coded but set the tolerance to consider a prediction coorect

def train_model(num_epochs=50):

    # Epochs and fresh model instance
    num_epochs = num_epochs
    ANN,loss_fun,optimizer = gen_model(n_units,n_layers,lr,optim,L2_lambda,act_fun)

    # Preallocate vars
    train_loss  = torch.zeros(num_epochs)
    train_psacc = torch.zeros(num_epochs)
    dev_loss    = torch.zeros(num_epochs)
    dev_psacc   = torch.zeros(num_epochs)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_loss = []
        batch_acc  = []

        for X,y in train_loader:

            # Forward pass, backpropagation, and optimizer step
            yHat = ANN(X)
            loss = loss_fun(yHat,y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss and pseudo-accuracy from this batch
            batch_loss.append(loss.item())
            tol = 0.1
            batch_acc.append( 100*torch.mean((torch.abs(yHat-y)<tol).float()) )

        train_loss[epoch_i]  = np.mean(batch_loss).item()
        train_psacc[epoch_i] = np.mean(batch_acc).item()

        # Test loss and pseudo-accuracy
        ANN.eval()

        with torch.no_grad():
            X,y  = next(iter(dev_loader))
            yHat = ANN(X)
            tol  = 0.1
            dev_loss[epoch_i]  = loss_fun(yHat,y)
            dev_psacc[epoch_i] = 100*torch.mean((torch.abs(yHat-y)<tol).float())

        ANN.train()

    return train_loss,train_psacc,dev_loss,dev_psacc,ANN


In [None]:
# %% Test the whole setting

# Generate data
train_loader,dev_loader,test_loader = gen_dataset(sample_number=3000)

# Set parameters and generate model
n_units    = 32
n_layers   = 2
lr         = 0.0001
optim      = 'SGD'
L2_lambda  = 0
act_fun    = 'ReLU'
num_epochs = 100

ANN,loss_fun,optimizer = gen_model( n_units   = n_units,
                                    n_layers  = n_layers,
                                    lr        = lr,
                                    optim     = optim,
                                    L2_lambda = L2_lambda,
                                    act_fun   = act_fun )

# Train model
train_loss,train_psacc,dev_loss,dev_psacc,ANN = train_model(num_epochs=num_epochs)


In [None]:
# %% Plotting

phi = (1 + np.sqrt(5)) / 2
fig,axs = plt.subplots(1,2,figsize=(1.5*phi*6,6))

# Train loss
l1 = axs[0].plot(train_loss.numpy(),label="Loss")[0]
axs[0].set_yscale("log")
axs[0].set_ylim(1e-3,1e-1)
axs[0].set_title("Training set loss and pseudo-accuracy")
axs[0].set_xlabel("Epoch")
axs[0].set_ylabel("MSE loss (log-scaled)")

ax0b = axs[0].twinx()
l2 = ax0b.plot(train_psacc.numpy(),label="Pseudo-acc",color='tab:orange')[0]
ax0b.set_ylim(0,102)

axs[0].legend(handles=[l1,l2],loc='center right')

# Dev loss
l3 = axs[1].plot(dev_loss.numpy(),label="Loss")[0]
axs[1].set_yscale("log")
axs[1].set_ylim(1e-3,1e-1)
axs[1].set_title("Development set loss and pseudo-accuracy")
axs[1].set_xlabel("Epoch")

ax1b = axs[1].twinx()
l4 = ax1b.plot(dev_psacc.numpy(),label="Pseudo-acc",color='tab:orange')[0]
ax1b.set_ylim(0,102)
ax1b.set_ylabel("Pseudoaccuracy")

axs[1].legend(handles=[l3,l4],loc='center right')

plt.tight_layout()

plt.savefig('figure1_ffn_project_1.png')

plt.show()

files.download('figure1_ffn_project_1.png')


In [None]:
# %% Test the model on the proper test set

# Get predictions from test set
ANN.eval()
with torch.no_grad():
    X,y   = next(iter(test_loader))
    preds = ANN(X)


# Flatten dimentions, get correct prediction (within tolerance) and accuracy
tolerance = 0.1
y_true    = y.numpy().ravel()
y_pred    = preds.numpy().ravel()
correct   = np.abs(y_pred-y_true)<=tolerance

pseudo_acc = 100*torch.mean((torch.abs(preds-y)<=tolerance).float())

# Plotting (true values, correct values, and wrong values within tollerance)
phi = (1 + np.sqrt(5)) / 2
plt.figure(figsize=(1.5*phi*6,6))

plt.plot(y_true,label="True Sum",marker='o',markerfacecolor='None',linestyle='',color='tab:blue',zorder=1)

plt.scatter(np.where(correct)[0],y_pred[correct],color='green',marker='x',label=f"Predicted (|error| ≤ {tolerance})")
plt.scatter(np.where(~correct)[0],y_pred[~correct],color='red',marker='x',label=f"Predicted (|error| > {tolerance})")

plt.title(f"True vs. predicted sums in test set (pseudo-accuracy = {pseudo_acc:.2f}%)")
plt.xlabel("Sample index")
plt.ylabel("Sum")
plt.legend()
plt.grid(alpha=0.3)

plt.savefig('figure2_ffn_project_1.png')

plt.show()

files.download('figure2_ffn_project_1.png')


In [None]:
# %% Check reproducibility on a few runs

# Generate data and set parameters
train_loader,dev_loader,test_loader = gen_dataset(sample_number=3000)

n_units    = 32
n_layers   = 2
lr         = 0.0001
optim      = 'SGD'
L2_lambda  = 0
act_fun    = 'ReLU'
num_epochs = 100

# Loop
for i in range(10):

    ANN,loss_fun,optimizer = gen_model( n_units   = n_units,
                                        n_layers  = n_layers,
                                        lr        = lr,
                                        optim     = optim,
                                        L2_lambda = L2_lambda,
                                        act_fun   = act_fun )

    train_loss,train_psacc,dev_loss,dev_psacc,ANN = train_model(num_epochs=num_epochs)

    print(f"Model instance {i+1}, final train pseudo-accuracy = {torch.mean(train_psacc[-5:]):.1f}%, final dev pseudo-accuracy = {torch.mean(dev_psacc[-5:]):.1f}%")
