In [None]:
# %% Deep learning - Section 16.153
#    Denoising MNIST

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [2]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F
import pandas              as pd
import scipy.stats         as stats
import sklearn.metrics     as skm
import time

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from scipy.stats                      import zscore
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')
plt.style.use('default')


In [140]:
# %% Data

# Load data
data = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# Split labels from data
labels = data[:,0]
data   = data[:,1:]

# Normalise data (original range is (0,255))
data_norm = data / np.max(data)

# Convert to tensor
data_tensor = torch.tensor(data_norm).float()


In [141]:
# %% Model class

# No need to create train and test datasets!

def gen_model():

    class mnist_AE(nn.Module):
        def __init__(self):
            super().__init__()

            # Architecture
            self.input  = nn.Linear(784,250)
            self.encode = nn.Linear(250, 50)
            self.mid    = nn.Linear( 50,250)
            self.decode = nn.Linear(250,784)

        # Forward propagation (sigmoid to scale between 0 and 1)
        def forward(self,x):

            x = F.relu(self.input(x))
            x = F.relu(self.encode(x))
            x = F.relu(self.mid(x))
            x = torch.sigmoid(self.decode(x))

            return x

    # Generate model instance
    ANN = mnist_AE()

    # Loss function
    loss_fun = nn.MSELoss()

    # Optimizer
    optimizer = torch.optim.Adam(ANN.parameters(),lr=0.001)

    return ANN,loss_fun,optimizer


In [None]:
# %% Test the model

ANN,loss_fun,optimizer = gen_model()

X    = data_tensor[:5,:]
yHat = ANN(X)

print(X.shape)
print(yHat.shape)


In [None]:
# %% Plotting

phi = (1 + np.sqrt(5)) / 2
fig,axs = plt.subplots(2,5,figsize=(1.5*phi*5,5))

for i in range(5):
    axs[0,i].imshow(X[i,:].view(28,28).detach() ,cmap='gray')
    axs[1,i].imshow(yHat[i,:].view(28,28).detach() ,cmap='gray')
    axs[0,i].set_xticks([]), axs[0,i].set_yticks([])
    axs[1,i].set_xticks([]), axs[1,i].set_yticks([])

plt.suptitle('Auch!\n(bad pre-training performance)')

plt.savefig('figure1_denoising_mnist.png')
plt.show()
files.download('figure1_denoising_mnist.png')


In [142]:
# %% Function to train the model

def train_model():

    # Model instance
    ANN,loss_fun,optimizer = gen_model()

    # Parameters, inizialise vars
    num_epochs = 10000
    losses     = []

    # Loop over epochs (no minibatch loop)
    for epoch_i in range(num_epochs):

        # Select a random subset of images
        random_i = np.random.choice(data_tensor.shape[0],size=32)
        X        = data_tensor[random_i,:]

        # Forward propagation and loss (note how here the loss is not taking the
        # correct labels, but rather the data themselves, the same data that
        # have been input into the model)
        yHat = ANN(X)
        loss = loss_fun(yHat,X)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Loss in this epoch
        losses.append(loss.item())

    return losses,ANN


In [None]:
# %% Train and plot losses

# Train (takes ~2 mins)
losses,ANN = train_model()
print(f'Final loss: {losses[-1]:.4f}')


In [None]:
# %% Plotting

phi = (1 + np.sqrt(5)) / 2
plt.figure(figsize=(phi*5,5))

plt.plot(losses,'-')
plt.xlabel('Epochs')
plt.ylabel('Model loss')
plt.title('Model loss over epochs')

plt.savefig('figure2_denoising_mnist.png')
plt.show()
files.download('figure2_denoising_mnist.png')


In [None]:
# %% Plotting

X = data_tensor[:5,:]
yHat = ANN(X)

phi = (1 + np.sqrt(5)) / 2
fig,axs = plt.subplots(2,5,figsize=(1.5*phi*5,5))

for i in range(5):
    axs[0,i].imshow(X[i,:].view(28,28).detach() ,cmap='gray')
    axs[1,i].imshow(yHat[i,:].view(28,28).detach() ,cmap='gray')
    axs[0,i].set_xticks([]), axs[0,i].set_yticks([])
    axs[1,i].set_xticks([]), axs[1,i].set_yticks([])

plt.suptitle('Post-training performance')

plt.savefig('figure3_denoising_mnist.png')
plt.show()
files.download('figure3_denoising_mnist.png')


In [None]:
# %% A common use of autoencoders (denoising)

# Get a small set of images and add uniform noise to simulate a noisy input
X       = data_tensor[:10,:]
X_noise = X + torch.rand_like(X)/4

# clip at 1 to maintain normalisation
X_noise[X_noise>1] = 1

# Plotting
phi = (1 + np.sqrt(5)) / 2
fig,axs = plt.subplots(2,5,figsize=(1.5*phi*5,5))

for i in range(5):
    axs[0,i].imshow(X[i,:].view(28,28).detach() ,cmap='gray')
    axs[1,i].imshow(X_noise[i,:].view(28,28).detach() ,cmap='gray')
    axs[0,i].set_xticks([]), axs[0,i].set_yticks([])
    axs[1,i].set_xticks([]), axs[1,i].set_yticks([])

plt.suptitle('Noisy data')

plt.savefig('figure4_denoising_mnist.png')
plt.show()
files.download('figure4_denoising_mnist.png')


In [None]:
# %% Run the model on simulated noisy data

# Model pass
Y = ANN(X_noise)

# Plotting
phi = (1 + np.sqrt(5)) / 2
fig,axs = plt.subplots(3,10,figsize=(1.5*phi*5,5))

for i in range(10):
    axs[0,i].imshow(X[i,:].view(28,28).detach(),cmap='gray')
    axs[1,i].imshow(X_noise[i,:].view(28,28).detach(),cmap='gray')
    axs[2,i].imshow(Y[i,:].view(28,28).detach(),cmap='gray')
    axs[0,i].set_xticks([]), axs[0,i].set_yticks([])
    axs[1,i].set_xticks([]), axs[1,i].set_yticks([])
    axs[2,i].set_xticks([]), axs[2,i].set_yticks([])

plt.suptitle('Look at that, a bit distorted but nice for such a small model.')

plt.savefig('figure5_denoising_mnist.png')
plt.show()
files.download('figure5_denoising_mnist.png')


In [116]:
# %% Exercise 1
#    Because these are continuous data, mean-squared-error is the correct loss function. But I mentioned in the previous
#    video that binary cross-entropy loss is *sometimes* used in autoencoders. Does the loss function make a difference
#    for this problem? Why?

# Surprisingly, and somewhat puzzling, the reconstructed data are not great even
# though the loss of the model decreases over epochs;Â that said, you can kind of
# see the digits floating around in a cloud

# Binarise data (optional)
data   = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')
labels = data[:,0]
data   = data[:,1:]

data_norm   = data / np.max(data)
data_norm   = np.where (data_norm>.5,1,0)
data_tensor = torch.tensor(data_norm).float()

# Model class
def gen_model():

    class mnist_AE(nn.Module):
        def __init__(self):
            super().__init__()

            self.input  = nn.Linear(784,250)
            self.encode = nn.Linear(250, 50)
            self.mid    = nn.Linear( 50,250)
            self.decode = nn.Linear(250,784)

        # Forward propagation (remove sigmoid, implemented in BCEWithLogitsLoss)
        def forward(self,x):

            x = F.relu(self.input(x))
            x = F.relu(self.encode(x))
            x = F.relu(self.mid(x))
            x = self.decode(x)

            return x

    # Generate model instance
    ANN       = mnist_AE()
    loss_fun  = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(ANN.parameters(),lr=0.001)

    return ANN,loss_fun,optimizer


In [None]:
# %% Exercise 2
#    Change the number of units in the latent layer to 10. How does that affect the loss and denoising? Don't turn this
#    into a full parametric experiment -- that's for the next video!

# The loss is a bit higer that the basic model, and indeed the reconstructed
# data are not as good; the denoising also breaks down even though some shapes
# are still recognisable, while some shapes are clearly the results of a feature
# mixing. Quite interesting because we know that the MNIST dataset contains 10
# categories, the 10 digits, so one could hypothesise that 10 dimentions are
# enough to make the model differentiate between them; and yet it seems that
# more than 10 dimentions are necessary to get a decent performance ("decent" =
# eyeballing some data), suggesting that for each digit there is more than one
# relevant dimention (and maybe some of them are even non-orthogonal, e.g., see
# how the 5s look like 8s)


In [128]:
# %% Exercise 3
#    The code here picks samples randomly, which means many samples are skipped, and some could be repeated. Change the
#    code so that the model goes through every item exactly once per epoch. The order should be randomized to avoid
#    possible order effects. You'll probably want to reduce the number of epochs!

# The basic peformance increases quite neatly! And even the denoising part does,
# despite some "inaccuracies"

# Create a DataLoader
dataset     = TensorDataset(data_tensor)
data_loader = DataLoader(dataset,batch_size=32,shuffle=True,drop_last=False)

# Function to train the model
def train_model():

    ANN,loss_fun,optimizer = gen_model()

    num_epochs = 50
    losses     = []

    for epoch_i in range(num_epochs):

        batch_loss = []

        for (X,) in data_loader:

            yHat = ANN(X)
            loss = loss_fun(yHat,X)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_loss.append(loss.item())

        losses.append( np.mean(batch_loss) )

    return losses,ANN


In [134]:
# %% Exercise 4
#    Is it necessary to normalize the data to a range of [0 1]? What are arguments for normalization, and arguments why
#    it may not be critical (in this problem)?

# I'd say data normalisation is quite necessary, otherwise model fitting blows
# up completely, probably due to the numerical instability in weight adjustment
# (e.g., large values in the data might just produce large losses that are not
# actually related to the importance of a feature)

# Non-normalised data
data   = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')
labels = data[:,0]
data   = data[:,1:]

data_tensor = torch.tensor(data).float()


In [None]:
# %% Exercise 5
#    The autoencoder did a pretty decent job at denoising the images. How far can you push this? Try adding more noise
#    to the images and re-running the test code (you don't need to retrain the model). Is the autoencoder robust to a
#    a larger amount of noise?

# What is large is relative, but the performace can deteriorate quite quickly and
# drops completely when the noise has the same magnitude as the data

# Trying various noise levels (double, half, etc.)
X_noise = X + torch.rand_like(X)/2
X_noise = X + torch.rand_like(X)/8
X_noise = X + torch.rand_like(X)/1
X_noise = X + torch.rand_like(X)/12
