In [None]:
# %% Deep learning - Section 16.154
#    Code challenge 24: how many units?
#
#    1) Start from code from video 16.153 (mnist dataset)
#    2) Vary parametrically the number of encoding units (10-500 in 12 steps),
#       and the number of bottleneck units (5-100 in 8 steps); store the
#       average loss over the last 3 epochs
#    3) Train in minibatches using all the data, but without using dataloaders
#    4) Optional: print a single line progess report as the parametric
#       experiment goes on (only one line)
#    5) Plot the losses in a matrix (encoder vs. bottleneck)

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [3]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F
import pandas              as pd
import scipy.stats         as stats
import sklearn.metrics     as skm
import time
import sys

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from scipy.stats                      import zscore
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')
plt.style.use('default')


In [5]:
# %% Data

# Load data
data = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# Split labels from data
labels = data[:,0]
data   = data[:,1:]

# Normalise data (original range is (0,255))
data_norm = data / np.max(data)

# Convert to tensor
data_tensor = torch.tensor(data_norm).float()


In [69]:
# %% Model class

def gen_model(n_d,n_l,lr=0.001):

    class mnist_AE(nn.Module):
        def __init__(self):
            super().__init__()

            # Architecture
            self.input  = nn.Linear(784,n_d)
            self.encode = nn.Linear(n_d,n_l)
            self.mid    = nn.Linear(n_l,n_d)
            self.decode = nn.Linear(n_d,784)

        # Forward propagation (sigmoid to scale between 0 and 1)
        def forward(self,x):

            x = F.relu(self.input(x))
            x = F.relu(self.encode(x))
            x = F.relu(self.mid(x))
            x = torch.sigmoid(self.decode(x))

            return x

    # Generate model instance
    ANN = mnist_AE()

    # Loss function
    loss_fun = nn.MSELoss()

    # Optimizer
    optimizer = torch.optim.Adam(ANN.parameters(),lr=lr)

    return ANN,loss_fun,optimizer


In [70]:
# %% Function to train the model

def train_model(ANN,loss_fun,optimizer):

    # Parameters, inizialise vars
    num_epochs = 3
    batch_size = 32
    n_samples  = data_tensor.shape[0]
    losses     = []

    # Loop over epochs (no minibatch loop)
    for epoch_i in range(num_epochs):

        batch_losses = []
        batch_sizes  = []

        # Select a random subset of images
        rand_idx = np.random.permutation(data_tensor.shape[0]).astype(int)

        for i in range(0,n_samples,batch_size):

            # Pick a sample
            sample = rand_idx[i:i+batch_size]
            X      = data_tensor[sample,:]

            # Forward propagation and loss (pass data themselves to loss_fun)
            yHat = ANN(X)
            loss = loss_fun(yHat,X)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Batch mean loss and actual batch size (last sample might be <32)
            batch_losses.append(loss.item())
            batch_sizes.append(X.shape[0])

        # Current epoch loss
        losses.append(np.average(batch_losses,weights=batch_sizes))

    # Average of the last three losses
    loss_avg = np.mean(losses[-3:])

    return losses,loss_avg,ANN


In [90]:
# %% Parametric experiment

# Parameters
n_decoder  = np.linspace(10,500,12).astype(int)
n_latent   = np.linspace(5,100,8).astype(int)
total_exps = len(n_decoder)*len(n_latent)

results = np.zeros((len(n_decoder),len(n_latent)))
count   = 1

# Loop over parameters (~15 secs per loop; ~24 mins full nodes experiment)
for i,d in enumerate(n_decoder):
    for j,l in enumerate(n_latent):

        # Train and fit
        ANN,loss_fun,optimizer = gen_model(d,l)
        _,results[i,j],_       = train_model(ANN,loss_fun,optimizer)

        # Update message
        msg = "Finished experiment {}/{}".format(count,total_exps)
        sys.stdout.write('\r'+msg)
        sys.stdout.flush()
        count += 1


In [None]:
# %% Plotting

phi = (1 + np.sqrt(5)) / 2
fig = plt.figure(figsize=(phi*5,5))

extent = [n_latent[0],n_latent[-1],n_decoder[0],n_decoder[-1]]
plt.imshow(results,aspect='auto',cmap='jet',extent=extent)

dx = (n_latent[-1] - n_latent[0]) / len(n_latent)
dy = (n_decoder[-1] - n_decoder[0]) / len(n_decoder)

y_centers = n_decoder[-1] - (np.arange(len(n_decoder)) + 0.5) * dy
x_centers = n_latent[0] + (np.arange(len(n_latent)) + 0.5) * dx

plt.xticks(x_centers, n_latent)
plt.yticks(y_centers, n_decoder)

plt.xlabel('Number of bottleneck units')
plt.ylabel('Number of encoding and deconding units')
plt.title('Losses over nodes parametric experiment')
plt.colorbar()

plt.savefig('figure26_code_challenge_24.png')
plt.show()
files.download('figure26_code_challenge_24.png')


In [None]:
# %% Exercise 1
#    Because the full experiment takes a long time, it's not pratical to add another factor. Fix the number of encoder
#    units to 100 and instead parametrically explore the learning rate. You don't need so many learning rates, just use
#    [.0001, .001, .01]. The results can be shown in a line plot, with one line per lr and bottleneck units on the x-axis.

# Parametric experiment (takes ~4 mins)
n_decoder  = int(100)
n_latent   = np.linspace(5,100,8).astype(int)
l_rates    = [.0001, .001, .01]
total_exps = len(l_rates)*len(n_latent)

results_ex1 = np.zeros((len(n_latent),len(l_rates)))
count       = 1

for i,l in enumerate(n_latent):
    for j,lr in enumerate(l_rates):

        ANN,loss_fun,optimizer = gen_model(n_decoder,l,lr)
        _,results_ex1[i,j],_   = train_model(ANN,loss_fun,optimizer)

        msg = "Finished experiment {}/{}".format(count,total_exps)
        sys.stdout.write('\r'+msg)
        sys.stdout.flush()
        count += 1


In [None]:
# %% Exercise 2
#    Continue ...

# Plotting
phi = (1 + np.sqrt(5)) / 2
fig = plt.figure(figsize=(phi*5,5))

plt.plot(n_latent,results_ex1)

plt.xticks(n_latent)
plt.xlabel('Number of bottleneck units')
plt.ylabel('Loss (average over last 3 epochs)')
plt.title('Losses over nodes parametric experiment\n(learning rates)')
plt.legend(l_rates)

plt.savefig('figure27_code_challenge_24.png')
plt.show()
files.download('figure27_code_challenge_24.png')


In [None]:
# %% Exercise 2
#    Smooth transitions across parameters are easy to interpret. But the image plot shows a sharp transition for small
#    numbers of bottleneck units. This rings alarm bells for any experimental scientist! It means that something is
#    happening at that region of parameter space and you should investigate. Thus, re-run the experiment but change the
#    parameters to focus specifically on the region of the parameter space where there are large changes in the results.

# Parametric experiment narrowing around the sharp transition (takes ~7 mins)
n_decoder  = np.linspace(10,54,12).astype(int)
n_latent   = np.linspace(5,19,8).astype(int)
total_exps = len(n_decoder)*len(n_latent)

results_ex2 = np.zeros((len(n_decoder),len(n_latent)))
count       = 1

for i,d in enumerate(n_decoder):
    for j,l in enumerate(n_latent):

        ANN,loss_fun,optimizer = gen_model(d,l)
        _,results_ex2[i,j],_       = train_model(ANN,loss_fun,optimizer)

        msg = "Finished experiment {}/{}".format(count,total_exps)
        sys.stdout.write('\r'+msg)
        sys.stdout.flush()
        count += 1


In [None]:
# %% Exercise 2
#    Continue ...

# Plotting
phi = (1 + np.sqrt(5)) / 2
fig = plt.figure(figsize=(phi*5,5))

extent = [n_latent[0],n_latent[-1],n_decoder[0],n_decoder[-1]]
plt.imshow(results_ex2,aspect='auto',cmap='jet',extent=extent)

dx = (n_latent[-1] - n_latent[0]) / len(n_latent)
dy = (n_decoder[-1] - n_decoder[0]) / len(n_decoder)

y_centers = n_decoder[-1] - (np.arange(len(n_decoder)) + 0.5) * dy
x_centers = n_latent[0] + (np.arange(len(n_latent)) + 0.5) * dx

plt.xticks(x_centers, n_latent)
plt.yticks(y_centers, n_decoder)

plt.xlabel('Number of bottleneck units')
plt.ylabel('Number of encoding and deconding units')
plt.title('Losses over nodes parametric experiment')
plt.colorbar()

plt.savefig('figure28_code_challenge_24_extra2.png')
plt.show()
files.download('figure28_code_challenge_24_extra2.png')
