In [None]:
# %% Deep learning - Section 7.53
#    Code challenge 5: more qwerties

#    1) Integrate code from binary and multioutput classification
#    2) Make 3 groups of data and classifiy them with an ANN
#    3) Try with a 2-4-3 architecture
#    4) Plot losses and accuracy over epochs, and classification probabilities

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import copy

from google.colab                     import files
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Data

nClust = 100
blur   = 1

A = [1, 1]
B = [5, 1]
C = [3,-2]

a = [ A[0]+np.random.randn(nClust)*blur, A[1]+np.random.randn(nClust)*blur ]
b = [ B[0]+np.random.randn(nClust)*blur, B[1]+np.random.randn(nClust)*blur ]
c = [ C[0]+np.random.randn(nClust)*blur, C[1]+np.random.randn(nClust)*blur ]

# True labels
labels_np = np.vstack(( np.zeros((nClust,1)), np.ones((nClust,1)), 2*np.ones((nClust,1)) ))

# Concatenate
data_np = np.hstack((a,b,c)).T

# Convert into torch tensor
data   = torch.tensor(data_np).float()
labels = (torch.tensor(labels_np,dtype=torch.long))
labels = labels.squeeze()


In [None]:
# Plotting

fig = plt.figure(figsize=(7,7))

plt.plot( data[np.where(labels==0)[0],0],data[np.where(labels==0)[0],1],'s' )
plt.plot( data[np.where(labels==1)[0],0],data[np.where(labels==1)[0],1],'s' )
plt.plot( data[np.where(labels==2)[0],0],data[np.where(labels==2)[0],1],'s' )
plt.title('Some data')
plt.xlabel('x1')
plt.ylabel('x2')

plt.savefig('figure67_code_challenge_5.png')

plt.show()

files.download('figure67_code_challenge_5.png')


In [None]:
# %% Build the model

# Architecture
ANNclassify = nn.Sequential(
                 nn.Linear(2,4),    # input layer
                 nn.ReLU(),         # a.f.
                 nn.Linear(4,3),    # output layer
                 nn.Softmax(dim=1), # final activation unit
                 )

# Loss function (includes [Log]Softmax)
loss_fun = nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.SGD(ANNclassify.parameters(),lr=0.01)


In [None]:
# %% Train the model

num_epochs  = 10000
losses      = torch.zeros(num_epochs)
ongoing_acc = []

for epoch_i in range(num_epochs):

    # Forward propagation
    yHat = ANNclassify(data)

    # Loss
    loss = loss_fun(yHat,labels)
    losses[epoch_i] = loss

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Accuracy over iteration (argmax takes the index with the highest val in yHat rows)
    matches     = torch.argmax(yHat,axis=1) == labels  # booleans
    matches_num = matches.float()                      # booleans2numeric
    acc_perc    = 100*torch.mean(matches_num)          # percent
    ongoing_acc.append(acc_perc)                       # append to list

# Final forward pass
predictions = ANNclassify(data)

pred_labels = torch.argmax(predictions,axis=1)
tot_acc     = 100*torch.mean((pred_labels == labels).float())

print(f'Final accuracy = {tot_acc.item():.4f}%')


In [None]:
# %% Plotting

fig,ax = plt.subplots(1,2,figsize=(12,4))

ax[0].plot(losses.detach())
ax[0].set_ylabel('Loss')
ax[0].set_xlabel('Epoch')
ax[0].set_title('Losses over epochs')

ax[1].plot(ongoing_acc)
ax[1].set_ylabel('Accuracy')
ax[1].set_xlabel('Epoch')
ax[1].set_title('Accuracy over epochs')

plt.savefig('figure68_code_challenge_5.png')

plt.show()

files.download('figure68_code_challenge_5.png')


In [None]:
# %% Plotting

sm = nn.Softmax(1)

fig = plt.figure(figsize=(15,5))

plt.plot(sm(yHat.detach()),'s',markerfacecolor='none')
plt.xlabel('Stimulus number')
plt.ylabel('Probability')
plt.legend(['Group 1','Group 2','Group 3'],framealpha=0.75)
plt.title('Classification probabilities')

plt.savefig('figure69_code_challenge_5.png')

plt.show()

files.download('figure69_code_challenge_5.png')


In [None]:
# %% Exercise 1
#    Does the model always do well? Re-run the entire notebook multiple times and see if it always reaches high accuracy
#    (e.g., >90%). What do you think would be ways to improve the performance stability of the model?

# Not super sure about this point, the model I set up tends to run reliably between 92-94%, and 10000 epochs (as I
# noticed from the graphs at the beginning), seem way too many (e.g., see accuracy evolution plot). However,
# I then realised that at first I run the model without the additional explicit Softmax pass in the end, see next
# exploration for that.


In [None]:
# %% Exercise 2
#    You'll learn in the section "Metaparameters" that CrossEntropyLoss computes log-softmax internally. Does that mean
#    that the Softmax() layer in the model needs to be there? Does it hurt or help? If you remove that final layer, what
#    would change and what would be the same in the rest of the notebook?
#    (Note about this problem: If it feels too advanced, then revisit this problem after the "Metaparameters" section.)

# As mentioned above, running the model without the extra Softmax actually makes the learning a bit faster; adding the extra
# explicit activation function makes the accuracy to increase more slowly. Also, the loss does not decrease as much as in
# the above example. I'm not sure why though...maybe in the latter  case we are actually applying the softmax twice to the
# output layer?
# As for the rest of the code, the only modification needed without the extra softamx is that you need to apply it
# when plotting the classification probabilities.
