In [None]:
# %% Deep learning - Section 16.155
#    AEs for occlusion

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [3]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F
import pandas              as pd
import scipy.stats         as stats
import sklearn.metrics     as skm
import time
import sys

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from scipy.stats                      import zscore
from sklearn.decomposition            import PCA
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')
plt.style.use('default')


In [2]:
# %% Data

# Load data
data = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# Split labels from data
labels = data[:,0]
data   = data[:,1:]

# Normalise data (original range is (0,255))
data_norm = data / np.max(data)

# Convert to tensor
data_tensor = torch.tensor(data_norm).float()
labels_tensor = torch.tensor(labels).long()


In [4]:
# %% Model class

def gen_model():

    class mnist_AE(nn.Module):
        def __init__(self):
            super().__init__()

            # Architecture
            self.input  = nn.Linear(784,150)
            self.encode = nn.Linear(150, 15)
            self.mid    = nn.Linear( 15,150)
            self.decode = nn.Linear(150,784)

        # Forward propagation (store and return also latent layer)
        def forward(self,x):

            x = F.relu(self.input(x))
            l = F.relu(self.encode(x))
            x = F.relu(self.mid(l))
            x = torch.sigmoid(self.decode(x))

            return x,l

    # Generate model instance
    ANN = mnist_AE()

    # Loss function
    loss_fun = nn.MSELoss()

    # Optimizer
    optimizer = torch.optim.Adam(ANN.parameters(),lr=0.001)

    return ANN,loss_fun,optimizer


In [None]:
# %% Test on some data

ANN,loss_fun,optimizer = gen_model()

X = data_tensor[:5,:]
yHat = ANN(X)

print('Input shape:')
print(X.shape)
print()

print('yHat is now a tuple:')
print(type(yHat),len(yHat))
print()

print('Shape of model output:')
print(yHat[0].shape)
print()

print('Shape of encoding layer output:')
print(yHat[1].shape)


In [7]:
# %% Function to train the model

def train_model(ANN,loss_fun,optimizer):

    # Parameters, inizialise vars
    num_epochs = 10000
    losses     = []

    # Loop over epochs (no minibatch loop)
    for epoch_i in range(num_epochs):

        # Select only a random subset of images
        random_i = np.random.choice(data_tensor.shape[0],size=32)
        X        = data_tensor[random_i,:]

        # Forward propagation and loss (for training we only need the final
        # output, so select only the first element of the tuple yHat)
        yHat = ANN(X)[0]
        loss = loss_fun(yHat,X)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Loss in this epoch
        losses.append(loss.item())

    return losses,ANN


In [8]:
# %% Train and fit

ANN,loss_fun,optimizer = gen_model()
losses,ANN             = train_model(ANN,loss_fun,optimizer)


In [None]:
# %% Plotting

phi = (1 + np.sqrt(5)) / 2
fig = plt.figure(figsize=(phi*5,5))

plt.plot(losses,'-')
plt.xlabel('Epochs')
plt.ylabel('Model loss')
plt.title('Model loss over epochs')

plt.savefig('figure39_latent_code.png')
plt.show()
files.download('figure39_latent_code.png')


In [None]:
# %% Inspect the latent layer

# Get data
yHat,latent = ANN(data_tensor)

print(yHat.shape)
print(latent.shape)


In [None]:
# %% Plotting

phi = (1 + np.sqrt(5)) / 2
fig,ax = plt.subplots(1,2,figsize=(1.5*phi*5,5))

ax[0].hist(latent.flatten().detach(),100)
ax[0].set_xlabel('Latent activation value')
ax[0].set_ylabel('Count')
ax[0].set_title('Distribution of latent units activations')

ax[1].imshow(latent.detach(),aspect='auto',vmin=0,vmax=10,cmap='plasma')
ax[1].set_xlabel('Latent node')
ax[1].set_ylabel('Image number')
ax[1].set_title('All latent activations')

plt.savefig('figure40_latent_code.png')
plt.show()
files.download('figure40_latent_code.png')


In [39]:
# Compute average latent activation for each digit type

# Preallocate (latent shape by 10 digits)
latent_average = np.zeros((latent.shape[1],10))
latent_std     = np.zeros((latent.shape[1],10))

# Find pics by category, average and std of latent layer output
for i in range(10):

    digit_i             = np.where(labels==i)
    latent_average[:,i] = torch.mean(latent[digit_i,:],axis=1).detach().numpy()
    latent_std[:,i]     = torch.std(latent[digit_i,:],axis=1).detach().numpy()


In [None]:
# %% Plotting

cmap = plt.cm.plasma(np.linspace(0.2,0.9,10))

phi = (1 + np.sqrt(5)) / 2
fig = plt.figure(figsize=(phi*5,5))

for i in range(10):
    plt.plot(latent_average[:,i],'s-',color=cmap[i])

plt.legend(range(10),loc=(1.01,.4))
plt.xticks(range(15))
plt.xlabel('Latent node number')
plt.ylabel('Average activation')
plt.title("Model's internal representation of the numbers")

plt.savefig('figure41_latent_code.png')
plt.show()
files.download('figure41_latent_code.png')


In [36]:
# %% Explore a compressed space (PCA)

# For the data select c=15 to match latent but also to speed up computations
pca_data   = PCA(n_components=15).fit(data)
pca_latent = PCA( ).fit(latent.detach())

# Get projection of data onto the eigenvectors
scores_data   = pca_data.fit_transform(data)
scores_latent = pca_latent.fit_transform(latent.detach())


In [None]:
# %% Plotting

# Eigenspectrum (scree plot)
phi = (1 + np.sqrt(5)) / 2
fig = plt.figure(figsize=(phi*5,5))
plt.plot(100*pca_data.explained_variance_ratio_,'s-',label='Data PCA')
plt.plot(100*pca_latent.explained_variance_ratio_,'o-',label='Latent PCA')

plt.xlabel('Components')
plt.ylabel('Percent variance explained')
plt.title('PCA eigenspectrum\n(note the higher vals for the latent space)')
plt.legend()

plt.savefig('figure42_latent_code.png')
plt.show()
files.download('figure42_latent_code.png')


In [None]:
# %% Plotting

# Projections (note both the segregation and the overlap; and this is just a toy
# example with mnist, so much for understandability)
phi = (1 + np.sqrt(5)) / 2
fig,ax = plt.subplots(1,2,figsize=(1.5*phi*5,5))

for lab in range(10):
    ax[0].plot(scores_data[labels==lab,0],scores_data[labels==lab,1],'o',markersize=3,alpha=.4)
    ax[1].plot(scores_latent[labels==lab,0],scores_latent[labels==lab,1],'o',markersize=3,alpha=.4)

for i in range(2):
    ax[i].set_xlabel('Projection eigenvec. 1')
    ax[i].set_ylabel('Projection eigenvec. 2')
    ax[i].legend(range(10))

ax[0].set_title('Data eigendecomposition')
ax[1].set_title('Latent code eigendecomposition')

plt.savefig('figure43_latent_code.png')
plt.show()
files.download('figure43_latent_code.png')


In [None]:
# %% Exercise 1
#    Are you surprised that the latent activations (e.g., from the histogram) are all non-negative? Is that because of
#    the image normalization, or what is causing those values to be all non-negative?

# This one drove me crazy for a moment, then I realised we were talking about
# the output of the layer, not the weights; now it's trivial, we used a ReLU
# activation function so all the negative values are clipped, while the
# non-negative vaules stay the same


In [None]:
# %% Exercise 2
#    Averages don't tell the whole story. Redraw the "Model's internal representation" line plot but using standard
#    deviation instead of mean. This graph will tell you if any numbers, or units, have particularly higher variability
#    than others. Is this the case, and does the std plot give you any more insight into the model's learned representation?

# Plotting
cmap = plt.cm.plasma(np.linspace(0.2,0.9,10))

phi = (1 + np.sqrt(5)) / 2
fig = plt.figure(figsize=(phi*5,5))

for i in range(10):
    plt.plot(latent_std[:,i],'s-',color=cmap[i])

plt.legend(range(10),loc=(1.01,.4))
plt.xticks(range(15))
plt.xlabel('Latent node number')
plt.ylabel('Sts of activation')
plt.title("Model's internal representation of the numbers")

plt.savefig('figure44_latent_code_extra2.png')
plt.show()
files.download('figure44_latent_code_extra2.png')


In [None]:
# %% Exercise 3
#    The PC-space plots are tricky to interpret: This is a 15-dimensional space but 13 dimensions are projected onto two.
#    It's possible that the numbers are better separated in other dimensions, just like a 2D photograph of someone standing
#    behind a tree makes them inseparable whereas they are separable in the original 3D space. Modify the plot to show
#    PC dimensions 2&3 instead of 1&2.

# Plotting
phi = (1 + np.sqrt(5)) / 2
fig,ax = plt.subplots(1,2,figsize=(1.5*phi*5,5))

for lab in range(10):
    ax[0].plot(scores_data[labels==lab,0],scores_data[labels==lab,2],'o',markersize=3,alpha=.4)
    ax[1].plot(scores_latent[labels==lab,0],scores_latent[labels==lab,2],'o',markersize=3,alpha=.4)

for i in range(2):
    ax[i].set_xlabel('Projection eigenvec. 1')
    ax[i].set_ylabel('Projection eigenvec. 3')
    ax[i].legend(range(10))

ax[0].set_title('Data eigendecomposition')
ax[1].set_title('Latent code eigendecomposition')

plt.savefig('figure45_latent_code_extra3.png')
plt.show()
files.download('figure45_latent_code_extra3.png')
