In [None]:
# %% Deep learning - Section 11.106
#    The MNIST dataset

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F
import pandas              as pd
import scipy.stats         as stats
import time

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Import the data set (comes with colab)

data = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# Data shape
data.shape


In [None]:
# %% Get labels (number IDs) and remove from data

labels = data[:,0]
data   = data[:,1:]

print(labels.shape)
print(data.shape)
print(labels)


In [None]:
# %% Plotting

phi = ( 1 + np.sqrt(5) ) / 2
fig,axs = plt.subplots(3,4,figsize=(6*phi,6))

for ax in axs.flatten():

    # Pick random image
    rand_img = np.random.randint(0,high=data.shape[0])

    # Plot image
    img = np.reshape(data[rand_img,:],(28,28))
    ax.imshow(img,cmap='gray')

    # Title
    ax.set_title('The number %i' %labels[rand_img])

plt.suptitle('How humans see the data',fontsize=18)
plt.tight_layout(rect=[0,0,1,.95])

plt.savefig('figure1_mnist_dataset.png')

plt.show()

files.download('figure1_mnist_dataset.png')


In [None]:
# %% Plotting

phi = ( 1 + np.sqrt(5) ) / 2
fig,axs = plt.subplots(3,4,figsize=(6*phi,6))

for ax in axs.flatten():

    # Pick random image
    rand_img = np.random.randint(0,high=data.shape[0])

    # Plot image
    ax.plot(data[rand_img,:],'ko')

    # Title
    ax.set_title('The number %i' %labels[rand_img])

plt.suptitle('How the FFN model see the data',fontsize=18)
plt.tight_layout(rect=[0,0,1,.95])

plt.savefig('figure2_mnist_dataset.png')

plt.show()

files.download('figure2_mnist_dataset.png')


In [None]:
# %% Plotting

# Explore some example 7s
sevens = np.where(labels==7)[0]

phi = ( 1 + np.sqrt(5) ) / 2
fig,axs = plt.subplots(3,4,figsize=(6*phi,6))

for i,ax in enumerate(axs.flatten()):

    # Plot image
    img = np.reshape(data[sevens[i],:],(28,28))
    ax.imshow(img,cmap='gray')
    ax.axis('off')

plt.suptitle('Examples of the number 7',fontsize=18)
plt.tight_layout(rect=[0,0,1,.95])

plt.savefig('figure3_mnist_dataset.png')

plt.show()

files.download('figure3_mnist_dataset.png')


In [None]:
# %% Plotting

# Explore how similar are the sevens (correlations, unique correlations, average)
print(data[sevens,:].shape)
c = np.corrcoef(data[sevens,:])

phi = ( 1 + np.sqrt(5) ) / 2
fig,ax = plt.subplots(1,3,figsize=(6*phi,6))

ax[0].imshow(c,vmin=0,vmax=1,cmap='jet')
ax[0].set_title('Correlation matrix across all 7s')

unique_c = np.triu(c,k=1).flatten()
ax[1].hist(unique_c[unique_c!=0],bins=120)
ax[1].set_title('All unique correlations')
ax[1].set_xlabel('Correlations')

avg_sevens = np.reshape(np.mean(data[sevens,:],axis=0),(28,28))
ax[2].imshow(avg_sevens,cmap='gray')
ax[2].set_title('Average of all 7s')

plt.tight_layout()

plt.savefig('figure4_mnist_dataset.png')

plt.show()

files.download('figure4_mnist_dataset.png')
