In [None]:
# %% Deep learning - Section 7.46
#    Code challenge 4: manipulate regression slopes

#    1) Write a fuction that builds and train the model, and outputs final
#       prediction and loss
#    2) Write a function that creates data x and output y; x same as previous
#       video, y = m*x + randn/2, n = 50
#    3) In a parametric experiment, vary the slope from -2 to +2 in 21 steps,
#       repeat the experiment 50 times and average over them.
#    4) Plot both loss and accuray as function of slopes

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import copy

from google.colab                     import files
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Functions

# Model function
def build_model(x,y):

    # Build the model
    ANNreg = nn.Sequential(
                nn.Linear(1,1),   # input layer (num inputs, num outputs)
                nn.ReLU(),        # activation function
                nn.Linear(1,1)    # output layer (num inputs, num outputs)
                )

    # Training parameters
    learning_rate = 0.05
    loss_fun  = nn.MSELoss()
    optimizer = torch.optim.SGD(ANNreg.parameters(),lr=learning_rate)

    # Train the model
    num_epochs = 500
    losses = torch.zeros(num_epochs)

    for epoch_i in range(num_epochs):

        yHat = ANNreg(x)

        loss = loss_fun(yHat,y)
        losses[epoch_i] = loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Final forward pass and loss
    predictions = ANNreg(x)
    testloss    = (predictions-y).pow(2).mean()

    # Compute accuracy as correlation between prediction and data
    accuracy = np.corrcoef(y.T,predictions.detach().T)[0,1]

    return accuracy,testloss

# Data function
def gen_data(m):

    n = 50
    x = torch.randn(n,1)
    y = m*x + torch.randn(n,1)/2

    return x,y


In [None]:
# %% Parametric experiment on slopes

# Around 4 min run
iterations = 50
slopes     = np.linspace(-2,2,21)

accuracies = torch.zeros(len(slopes))
losses     = torch.zeros(len(slopes))

for slop_idx,slope_val in enumerate(slopes):

    temp_accuracies = torch.zeros(iterations)
    temp_losses     = torch.zeros(iterations)

    for iter in range(iterations):

        # Call functions
        x,y     = gen_data(slope_val)
        acc,los = build_model(x,y)

        # Store temporary accuracies and losses
        temp_accuracies[iter] = acc
        temp_losses[iter]     = los

    # Compute average acc and loss for iter, .nanmean() exclude nan datapoints that might occur
    accuracies[slop_idx] = temp_accuracies.detach().nanmean()
    losses[slop_idx]     = temp_losses.detach().nanmean()


In [None]:
# %% Plotting

fig,ax = plt.subplots(1,2,figsize=(10,4))

ax[0].plot(slopes,losses,'b-',label='Loss')
ax[0].set_xlabel('Slope')
ax[0].set_ylabel('Loss')
ax[0].set_title('Slopes against model losses')
ax[0].legend()
ax[0].grid(True)

ax[1].plot(slopes,accuracies,'r-',label='Accuracy')
ax[1].set_xlabel('Slope')
ax[1].set_ylabel('Accuracy')
ax[1].set_title('Slopes against model perfomance')
ax[1].legend()
ax[1].grid(True)

plt.tight_layout()

plt.savefig('figure14_code_challenge_4.png')

plt.show()

files.download('figure14_code_challenge_4.png')

# Nota Bene:
# The graphs show nicely how the losses decrease for low correlations, but also the performance
# becomes quite crappy for correlations close to zero. This is due to the fact that the variance is
# simply larger for steeper slopes; in other words, losses cannt be compared on different data
# unless appropriately normalised. Performace on the other hand drops for low correlations because
# x is not so informative about y, and the model thus not have much information about y; this leads to
# an important point, ANNs do not predict data values, they only learn relationships across variables.
