In [None]:
# %% Deep learning - Section 12.126
#    Save the best performing model

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F
import pandas              as pd
import scipy.stats         as stats
import time

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% A note

# This procedure entails "researcher overfitting", as we are deliberately picking
# the bets model, we thus need a train, dev, and a test set


In [None]:
# %% How to save the highest of random numbers

best = [0,0]

for i in range(10):

    # "Train" model
    accuracy = np.random.rand()

    # See if it's better than the previous run
    if accuracy > best[0]:
        best = [accuracy,i]

print( f'Highest "accuracy" was {100*best[0]:.2f}% in run {best[1]+1}.' )


In [None]:
# %% Data

# %% Data

n_clust = 300
blur    = 1

A = [ 1,1 ]
B = [ 5,1 ]
C = [ 4,3 ]

a = [ A[0]+np.random.randn(n_clust)*blur, A[1]+np.random.randn(n_clust)*blur ]
b = [ B[0]+np.random.randn(n_clust)*blur, B[1]+np.random.randn(n_clust)*blur ]
c = [ C[0]+np.random.randn(n_clust)*blur, C[1]+np.random.randn(n_clust)*blur ]

# True labels
labels_np = np.hstack(( np.zeros((n_clust)),
                        np.ones( (n_clust)),
                        np.ones( (n_clust))+1 ))

# Concatanate into a matrix
data_np = np.hstack((a,b,c)).T

# Convert to PyTorch tensor
data   = torch.tensor(data_np).float()
labels = torch.tensor(labels_np).long()

# Plotting (with distance from origin)
phi = (1 + np.sqrt(5)) / 2
fig = plt.figure(figsize=(phi*6,6))

cmaps = plt.cm.plasma(np.linspace(0.2,0.9,len(np.unique(labels))))

plt.plot(data[np.where(labels==0)[0],0],data[np.where(labels==0)[0],1],'s',color=cmaps[0],alpha=.5)
plt.plot(data[np.where(labels==1)[0],0],data[np.where(labels==1)[0],1],'o',color=cmaps[1],alpha=.5)
plt.plot(data[np.where(labels==2)[0],0],data[np.where(labels==2)[0],1],'^',color=cmaps[2],alpha=.5)

plt.grid(color=[.9,.9,.9])
plt.title('Some data')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')

plt.savefig('figure38_save_best_model.png')

plt.show()

files.download('figure38_save_best_model.png')


In [None]:
# %% Create train and test datasets

# Partition of data
partitions = [ 3*n_clust-400,200,200 ]

# Split all data with scikitlearn (train + others)
train_data,other_data, train_labels,other_labels = train_test_split(data,labels,train_size=partitions[0])

# Split other data with scikitlearn (dev + test)
dev_data,test_data, dev_labels,test_labels = train_test_split(other_data,other_labels,train_size=partitions[1])

print('   Total data size: ' + str(data.shape) + '\n')
print('Training data size: ' + str(train_data.shape))
print('  Devset data size: ' + str(dev_data.shape))
print('    Test data size: ' + str(test_data.shape))

# Convert to PyTorch Datasets
train_data = TensorDataset(train_data,train_labels)
dev_data   = TensorDataset(dev_data,dev_labels)
test_data  = TensorDataset(test_data,test_labels)

# Convert to dataloader object
batch_size   = 30
train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True,drop_last= True)
dev_loader   = DataLoader(dev_data,batch_size=dev_data.tensors[0].shape[0])
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])


In [None]:
# %% Model class

def gen_model():
    class model(nn.Module):
        def __init__(self):
            super().__init__()

            # Architecture
            self.input = nn.Linear(2,8)
            self.hid = nn.Linear(8,8)
            self.output = nn.Linear(8,3)

        def forward(self,x):

            x = F.relu(self.input(x))
            x = F.relu(self.hid(x))
            x = self.output(x)

            return x

    # Model instance, loss function, and optimiser
    ANN       = model()
    loss_fun  = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(ANN.parameters(),lr=0.01)

    return ANN,loss_fun,optimizer


In [None]:
# %% Function to train the model

def train_model():

    best_model = {'Accuracy':0, 'Model':None}

    # Number of epochs and model instance
    num_epochs = 100
    ANN,loss_function,optimizer = gen_model()

    # Preallocate variables
    losses    = torch.zeros(num_epochs)
    train_acc = torch.zeros(num_epochs)
    dev_acc   = torch.zeros(num_epochs)

    # Training loop
    for epoch_i in range(num_epochs):

        # Batches loop
        batch_acc  = []
        batch_loss = []

        for X,y in train_loader:

            # Forward prop and loss
            yHat = ANN(X)
            loss = loss_function(yHat,y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Compute loss and accuracy for this batch
            batch_loss.append(loss.item())
            batch_acc.append(100*torch.mean((torch.argmax(yHat,axis=1)==y).float()).item())

        # Compute loss and accuracy for the epoch
        losses[epoch_i]    = np.mean(batch_loss)
        train_acc[epoch_i] = np.mean(batch_acc)

        # Test accuracy (switch to evaluation mode and then back to training
        # mode to save up computation)
        ANN.eval()
        X,y = next(iter(dev_loader))
        with torch.no_grad():
            yHat = ANN(X)

        dev_acc[epoch_i] = 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()).item()
        ANN.train()

        # Store this model if it's the best so far
        if dev_acc[epoch_i] > best_model['Accuracy']:

            best_model['Accuracy'] = dev_acc[-1]
            best_model['Model']    = copy.deepcopy(ANN.state_dict())

    return train_acc,dev_acc,losses,ANN,best_model


In [None]:
# %% Run the model

train_acc,dev_acc,losses,ANN,best_model = train_model()
print(f'Best accuracy: {best_model["Accuracy"]:.2f}%')


In [None]:
# %% Plotting

phi = (1 + np.sqrt(5)) / 2
fig,ax = plt.subplots(1,2,figsize=(1.5*phi*6,6))

ax[0].plot(losses,'o-')
ax[0].set_ylabel('Loss')
ax[0].set_xlabel('epoch')
ax[0].set_title('Losses')

ax[1].plot(train_acc,'o-',label='Train')
ax[1].plot(dev_acc,'o-',label='Devset')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_xlabel('Epoch')
ax[1].set_title('Accuracy')
#ax[1].set_ylim([85,95])
#ax[1].set_xlim([80,105])
ax[1].legend()

plt.savefig('figure39_save_best_model.png')

plt.show()

files.download('figure39_save_best_model.png')


In [None]:
# %% Test on actual test set - intuitive but wrong way

# Pass data through best model (won't work)
X,y  = next(iter(test_loader))
yHat = best_model["Model"](X)


In [None]:
# %% Test on actual test set - correct way

# Recreate best model and run through test data
best_network = gen_model()[0]
best_network.load_state_dict(best_model["Model"])

X,y  = next(iter(test_loader))
yHat = best_network(X)

best_acc = 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()).item()
print(f'Best accuracy: {best_acc:.2f}%')


In [None]:
# %% Plotting

phi = (1 + np.sqrt(5)) / 2
fig = plt.figure(figsize=(phi*6,6))

plt.plot(train_acc,'o-',label='Train accuracy')
plt.plot(dev_acc,'o-',label='Dev set accuracy')
plt.plot([0,len(dev_acc)],[best_acc,best_acc],'r--',label='Best dev model on test data')
plt.ylabel('Accuracy (%)')
plt.xlabel('Epoch')
plt.title('Accuracy')
plt.ylim([best_acc-5,best_acc+5])
plt.legend()

plt.savefig('figure43_save_best_model.png')

plt.show()

files.download('figure43_save_best_model.png')


In [None]:
# %% A note - More datasets

# If you want more data, the most popular repository is now kagggle.com; it's
# focused on competiotion, but you can still get the data and see others'
# solutions.

# Otherwise you can also google by topic and see what comes up.

