In [None]:
# %% Deep learning - Section 7.54
#    Comparing the number of hidden units

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy             as np
import matplotlib.pyplot as plt
import torch
import torch.nn          as nn
import seaborn           as sns
import copy

from google.colab                     import files
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Import Iris dataset and plot

iris = sns.load_dataset('iris')
iris.head()

sns.pairplot(iris,hue='species')

plt.show()


In [None]:
# %% Organise data

# From pandas df to tensor
data = torch.tensor( iris[iris.columns[0:4]].values ).float()

# Species to number
labels = torch.zeros(len(data),dtype=torch.long)
labels[iris.species=='setosa']     = 0
labels[iris.species=='versicolor'] = 1
labels[iris.species=='virginica']  = 2

labels


In [None]:
# %% Functions


In [None]:
# %% Function to build the model

def gen_iris_model(nHidden):

    # Architecture (softcoded hidden units)
    ANNiris = nn.Sequential(
                 nn.Linear(4,nHidden),        # input layer
                 nn.ReLU(),                   # a.f.
                 nn.Linear(nHidden,nHidden),  # hidden layer
                 nn.ReLU(),                   # a.f.
                 nn.Linear(nHidden,3),        # output layer
                 #nn.Softmax(dim=1)           # final a.f. (illustration purpose)
                 )

    # Loss function
    loss_fun = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.SGD(ANNiris.parameters(),lr=0.01)

    return ANNiris, loss_fun, optimizer


In [None]:
# %% Function to train the model

def train_iris_model(ANNiris):

    for epoch_i in range(num_epochs):

        # Forward propagation
        yHat = ANNiris(data)

        # Compute loss
        loss = loss_fun(yHat,labels)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Final forward pass
    predictions = ANNiris(data)
    pred_labels = torch.argmax(predictions,axis=1)

    return 100*torch.mean( (pred_labels==labels).float() )


In [None]:
# Run the parametric experiment

num_epochs = 150
num_hidden = np.arange(1,129)  # Exclusive upper bound
accuracies = []

for unit_i in num_hidden:

    # Create fresh model instance
    ANNiris,loss_fun,optimizer = gen_iris_model(unit_i)

    # Run model
    acc = train_iris_model(ANNiris)
    accuracies.append(acc)


In [None]:
# Plotting

fig,ax = plt.subplots(1,figsize=(12,6))

ax.plot(accuracies,'o-',markerfacecolor='w',markersize=8)
ax.plot(num_hidden[[0,-1]],[33,33],'--',color=[.75,.75,.75])
ax.plot(num_hidden[[0,-1]],[67,67],'--',color=[.75,.75,.75])
ax.set_xlabel('Number of hidden units')
ax.set_ylabel('Accuracy')
ax.set_title(f'Number of epochs: {num_epochs} - Learning rate: 0.01')
fig.suptitle('Accuracy for different numbers of units in hidden layer')

plt.savefig('figure72_number_hidden_units.png')

plt.show()

files.download('figure72_number_hidden_units.png')


In [None]:
# %% Exercise 1
#    The results here show that models with fewer than ~50 hidden units have lackluster performance. Would these models
#    eventually learn if they were given more training epochs? Try this by re-running the experiment using 500 epochs.
#    Tip: Copy/paste the plotting code into a new cell to keep both plots. Or, take screenshots of the plots.

# The model performace increases dramatically with more epochs, and even 10/15 units in the hidden layer seem enough
# to get a ceiling performace. Not sure about what method is more computationally expensive


In [None]:
# %% Exercise 2
#    Going back to 150 epochs, explore the effect of changing the learning rate. This doesn't need to be a full parametric
#    experiment; you can simply try is again using learning rates of .1, .01 (what we used in the video), and .001.

# A learning rate of 0.001 is too small and keeps the model from converging to some optimal minimum, a learning
# rate of 0.1 is a bit coarse, it produces relatively high accuracy values, but the model remains highly variable
# also for a high number of units in the hidden layer


In [None]:
# %% Exercise 3
#    With simple models and small datasets, it's possible to test many different parameter settings. However, larger
#    models take longer to train, and so running 128 tests is not always feasible. Modify the code to have the number of
#    hidden units range from 1 to 128 in steps of 14. Plot the results on top of the results using steps of 1 (that is,
#    show both results in the same graph). Does your interpretation change with fewer experiment runs?

# No reason to expect radically different results, even though some differences might occur
# due to stocasticity in the algorithm, the conclusions are basically the same

# Run the parametric experiment with steps of 1 and 14
num_epochs    = 150
num_hidden    = np.arange(1,129)    # Exclusive upper bound
num_hidden_14 = np.arange(1,129,14)

accuracies_1  = []
accuracies_14 = []

for unit_i in num_hidden:

    # Create fresh model instance and run model
    ANNiris,loss_fun,optimizer = gen_iris_model(unit_i)
    acc = train_iris_model(ANNiris)

    # Get all accuracies
    accuracies_1.append(acc)

# Get accuracies for hidden layer in steps of 14 (no need to recompute)
accuracies_14 = np.full_like(accuracies_1,np.nan,dtype=np.float32)

for i,val in enumerate(num_hidden):
    if val in num_hidden_14:
        accuracies_14[i] = accuracies_1[i]


# Plotting
valid_indices = ~np.isnan(accuracies_14)
accuracies_14 = accuracies_14[valid_indices]

fig,ax = plt.subplots(1,figsize=(12,6))

ax.plot(num_hidden,   accuracies_1, 'o-',markerfacecolor='w',markersize=8)
ax.plot(num_hidden_14,accuracies_14,'s-',markerfacecolor='w',markersize=8)
ax.plot(num_hidden[[0,-1]],[33,33],'--',color=[.75,.75,.75])
ax.plot(num_hidden[[0,-1]],[67,67],'--',color=[.75,.75,.75])
ax.set_xlabel('Number of hidden units')
ax.set_ylabel('Accuracy')
ax.set_title(f'Number of epochs: {num_epochs} - Learning rate: 0.01')
fig.suptitle('Accuracy for different numbers of units in hidden layer')

plt.savefig('figure76_number_hidden_units_extra3.png')

plt.show()

files.download('figure76_number_hidden_units_extra3.png')


In [None]:
# %% Try without extrapolation but by recomputing

# Run the parametric experiment with steps of 1 and 14
num_epochs    = 150
num_hidden    = np.arange(1,129)    # Exclusive upper bound
num_hidden_14 = np.arange(1,129,14)

accuracies_1  = []
accuracies_14 = []

for unit_i in num_hidden:

    # Create fresh model instance and run model
    ANNiris,loss_fun,optimizer = gen_iris_model(unit_i)
    acc = train_iris_model(ANNiris)

    # Get all accuracies
    accuracies_1.append(acc)

for unit_i in num_hidden_14:

    # Create fresh model instance and run model
    ANNiris,loss_fun,optimizer = gen_iris_model(unit_i)
    acc = train_iris_model(ANNiris)

    # Get all accuracies
    accuracies_14.append(acc)


# Plotting
fig,ax = plt.subplots(1,figsize=(12,6))

ax.plot(num_hidden,   accuracies_1, 'o-',markerfacecolor='w',markersize=8)
ax.plot(num_hidden_14,accuracies_14,'s-',markerfacecolor='w',markersize=8)
ax.plot(num_hidden[[0,-1]],[33,33],'--',color=[.75,.75,.75])
ax.plot(num_hidden[[0,-1]],[67,67],'--',color=[.75,.75,.75])
ax.set_xlabel('Number of hidden units')
ax.set_ylabel('Accuracy')
ax.set_title(f'Number of epochs: {num_epochs} - Learning rate: 0.01')
fig.suptitle('Accuracy for different numbers of units in hidden layer')

plt.savefig('figure77_number_hidden_units_extra3.png')

plt.show()

files.download('figure77_number_hidden_units_extra3.png')
