In [None]:
# %% Deep learning - Section 7.55
#    Depth vs. breadth - II

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F

from google.colab                     import files
from torchsummary                     import summary
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Import Iris dataset

iris = sns.load_dataset('iris')

# Convert from pandas df to tensor
data = torch.tensor(iris[iris.columns[0:4]].values).float()

# Species to numbers
labels = torch.zeros(len(data),dtype=torch.long)
labels[iris.species=='setosa']     = 0
labels[iris.species=='versicolor'] = 1
labels[iris.species=='virginica']  = 2


In [None]:
# %% Class for the model
#    Flexibly loop over model depth/breadth

class ANNiris(nn.Module):
    def __init__(self,nUnits,nLayers):
        super().__init__()

        # Dictionary to store the layers
        self.layers  = nn.ModuleDict()
        self.nLayers = nLayers

        # Input layer
        self.layers['input'] = nn.Linear(4,nUnits)

        # Hidden layers
        for i in range(nLayers):
            self.layers[f'hidden{i}'] = nn.Linear(nUnits,nUnits)

        # Output layer
        self.layers['output'] = nn.Linear(nUnits,3)

    # Forward propagation
    def forward(self,x):

        # Input layer
        x = self.layers['input'](x)

        # Hidden layers
        for i in range(self.nLayers):
            x = F.relu(self.layers[f'hidden{i}'](x))

        # Output layer
        x = self.layers['output'](x)

        return x


In [None]:
# %% Generate an instance of the model and check it

nUnitsPerLayer = 12
nLayers        = 4

model = ANNiris(nUnitsPerLayer,nLayers)
model


In [None]:
# %% Run the model to check its internal consistency

# Samples and dimentions
tmpx = torch.randn(10,4)

# Run the model
y = model(tmpx)

# Show the output shape and the output
print(y.shape)
print( )
print(y)


In [None]:
# %% Function to train the model

def train_model(theModel):

    # Loss function and optimizer
    loss_fun  = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(theModel.parameters(),lr=0.01)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Forward pass
        yHat = theModel(data)

        # Loss computation
        loss = loss_fun(yHat,labels)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Final pass and accuracy
    predictions = theModel(data)
    pred_labels = torch.argmax(predictions,axis=1)
    tot_acc     = 100*torch.mean((pred_labels==labels).float())

    # Total numbers of trainable parameters in the model
    n_params = sum( p.numel() for p in theModel.parameters() if p.requires_grad )

    # Function output
    return tot_acc,n_params


In [None]:
# %% Test the function to train the model

num_epochs = 2500
output     = train_model(model)

# Check output
print(output)


In [None]:
# %% Parametric experiment on model depth and breadth
#    This cell takes ~ 3 mins

# Define model parameters (num of hidden layers and units per hidden layer)
num_layers = range(1,6)
num_units  = np.arange(4,101,3)

# Preallocate output matrices
accuracies = np.zeros(( len(num_units),len(num_layers) ))
tot_params = np.zeros(( len(num_units),len(num_layers) ))

# Number of trainig epochs
num_epochs = 500

# Buckle up, here's the experiment!
for unit_i in range(len(num_units)):
    for layer_i in range(len(num_layers)):

        # Fresh model instance
        model = ANNiris(num_units[unit_i],num_layers[layer_i])

        # Run model and store outputs
        tot_acc,n_params = train_model(model)

        accuracies[unit_i,layer_i] = tot_acc
        tot_params[unit_i,layer_i] = n_params


In [None]:
# %% Plotting
#    Accuracy as function of model depth

fig,ax = plt.subplots(1,figsize=(12,6))

ax.plot(num_units,accuracies,'o-',markerfacecolor='w',markersize=9)
ax.plot(num_units[[0,-1]],[33,33],'--',color=[.75,.75,.75])
ax.plot(num_units[[0,-1]],[67,67],'--',color=[.75,.75,.75])
ax.legend([f'{n} hidden layers' for n in num_layers],loc='lower right')
ax.set_ylabel('Accuracy (%)')
ax.set_xlabel('Number of hidden units')
ax.set_title('Accuracy over model depth and breadth')

plt.savefig('figure81_number_hidden_units.png')

plt.show()

files.download('figure81_number_hidden_units.png')


In [None]:
# %% What about the number of parameters ?
#    Notice how the number of trainable parameters does not correlate with the performace

# Vectorise for convenience
x = tot_params.flatten()
y = accuracies.flatten()

# Correlation
r = np.corrcoef(x,y)[0,1]

# Scatter plot
plt.plot(x,y,'o')
plt.xlabel('Number of parameters')
plt.ylabel('Accuracy (%)')
plt.title(f'Correlation = {r:.2f}')

plt.savefig('figure82_number_parameters.png')

plt.show()

files.download('figure82_number_parameters.png')


In [None]:
# %% Exercise 1
#    Try it again with 1000 training epochs. Do the deeper models eventually learn?

# The deeper models do improve, but the general pattern is preserved and the deeper
# models keep underperforming


In [None]:
# %% Exercise 2
#    The categories are coded a "0", "1", and "2". Is there something special about those numbers?
#    Recode the labels to be, e.g., 5, 10, and 17. Or perhaps -2, 0, and 2. Is the model still able to learn?

# Nothing special, these are just dummy variables to code for different groups; however, the loss
# function (cross-entropy) expects non-negative integers to label the classes, and for better code
# readability it's probably better to stick to a range of the type: [0, num_classes-1]
