In [None]:
# %% Deep learning - Section 9.72
#    Dropout regularisation in practice

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Some synthetic nonlinear data

n_clust = 200

th = np.linspace(0,4*np.pi,n_clust)
r1 = 10
r2 = 15

# Data
a = [ r1*np.cos(th) + np.random.randn(n_clust)*3,
      r1*np.sin(th) + np.random.randn(n_clust)   ]
b = [ r2*np.cos(th) + np.random.randn(n_clust),
      r2*np.sin(th) + np.random.randn(n_clust)*3 ]

data = np.hstack(( a,b )).T

# Labels
labels_np = np.vstack(( np.zeros((n_clust,1)),np.ones((n_clust,1)) ))

# Convert to PyTorch tensor
data   = torch.tensor(data).float()
labels = torch.tensor(labels_np).float()


In [None]:
# %% Plotting

fig = plt.figure(figsize=(5,5))

plt.plot(data[np.where(labels==0)[0],0],data[np.where(labels==0)[0],1],'s')
plt.plot(data[np.where(labels==1)[0],0],data[np.where(labels==1)[0],1],'o')
plt.title("Some data")
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')

plt.savefig('figure1_dropout_regularisation.png')

plt.show()

files.download('figure1_dropout_regularisation.png')


In [None]:
# %% Split data into DataLoaders

# Split data with scikitlearn
train_data,test_data,train_labels,test_labels = train_test_split(data,labels,test_size=0.2)

# Convert to PyTorch datasets
train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

# Convert to DataLoader objects
batch_size   = 16
#batch_size  = int(train_data.tensors[0].shape[0]/4) # but hard-coding is often better to avoid huge batches
train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True)
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])


In [None]:
# %% Create the model class

class model_class(nn.Module):
    def __init__(self,dropout_rate):
        super().__init__()

        # Layers
        self.input  = nn.Linear(  2,128)
        self.hidden = nn.Linear(128,128)
        self.output = nn.Linear(128,1  )

        # Parameters
        self.dr = dropout_rate

    # Forward propagation
    def forward(self,x):

        # Pass through input layer
        x = F.relu(self.input(x))
        # Dropout after input layer (training=self.training means to turn the dropout off during evaluation mode)
        x = F.dropout(x,p=self.dr,training=self.training)

        # Pass through the hidden layer
        x = F.relu(self.hidden(x))
        # Dropout after hidden layer
        x = F.dropout(x,p=self.dr,training=self.training)

        # Pass through output layer (no dropoout here!)
        x = self.output(x)

        return x


In [None]:
# %% Quick test

tmp_net  = model_class(0.25)

tmp_data = torch.randn((10,2))
yHat     = tmp_net(tmp_data)

print(yHat)


In [None]:
# %% Function to create the model

def gen_model(drop_rate):

    # Grab an instance of the model class
    ANN = model_class(drop_rate)

    # Loss function
    loss_fun = nn.BCEWithLogitsLoss()

    # Optimizer
    optimizer = torch.optim.SGD(ANN.parameters(),lr=0.002)

    return ANN,loss_fun,optimizer


In [None]:
# %% Function to train the model

# Parameters
num_epochs = 1000

# Note how here the model, the loss function, and the optimizer are inputs
def train_model(ANN,loss_fun,optimizer):

    # Preallocate accuracies
    train_acc = []
    test_acc  = []

    # Loop over epochs and batches
    for epoch_i in range(num_epochs):

        # Switch training mode on
        ANN.train()

        batch_acc= []

        for X,y in train_loader:

            # Forward propagation and loss
            yHat = ANN(X)
            loss = loss_fun(yHat,y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Batch training accuracy
            batch_acc.append( 100*torch.mean(((yHat>0)==y).float()).item() )

        # Average training accuracy of batches
        train_acc.append(np.mean(batch_acc))

        # Test accuracy (switch training off, extract X,y from dataloader, final pass)
        ANN.eval()
        X,y = next(iter(test_loader))
        yHat = ANN(X)
        test_acc.append( 100*torch.mean(((yHat>0)==y).float()).item() )

    # Function output
    return train_acc, test_acc


In [None]:
# %% Function for 1D smoothing filter

# Try k=1,5,20
def smooth(x,k=20):
    return np.convolve(x,np.ones(k)/k,mode='same')


In [None]:
# %% Functions for 1D smoothing filter

# Improved for edge effects - adaptive window
def smooth_adaptive(x,k=20):
    smoothed = np.zeros_like(x)
    half_k   = k // 2

    for i in range(len(x)):
        start       = max(0, i-half_k)
        end         = min(len(x), i+half_k + 1)
        smoothed[i] = np.mean(x[start:end])

    return smoothed

# Improved for edge effects - padding ('edge' repeat edge value)
def smooth_padding(x,k=20):
    pad_width = k // 2
    padded    = np.pad(x,pad_width, mode='edge')
    kernel    = np.ones(k)/k

    return np.convolve(padded,kernel,mode='valid')


In [None]:
# %% Test the model

drop_rate = 0

ANN,loss_fun,optimizer = gen_model(drop_rate)
train_acc,test_acc     = train_model(ANN,loss_fun,optimizer)


In [None]:
# Plotting

fig = plt.figure(figsize=(10,5))

plt.plot(smooth(train_acc),'s-')
plt.plot(smooth(test_acc),'o-')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.ylim([20,100])
plt.legend(['Train','Test'])
plt.title(f'Dropout rate = {drop_rate}')

plt.savefig('figure2_dropout_regularisation.png')

plt.show()

files.download('figure2_dropout_regularisation.png')


In [None]:
# Plotting with improved smoothing

# Adaptive window
fig = plt.figure(figsize=(10,5))

plt.plot(smooth_adaptive(train_acc),'s-')
plt.plot(smooth_adaptive(test_acc),'o-')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.ylim([20,100])
plt.legend(['Train','Test'])
plt.title(f'Dropout rate = {drop_rate}')

plt.savefig('figure10_dropout_regularisation_extra1.png')

plt.show()

files.download('figure10_dropout_regularisation_extra1.png')

# Padding
fig = plt.figure(figsize=(10,5))

plt.plot(smooth_padding(train_acc),'s-')
plt.plot(smooth_padding(test_acc),'o-')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.ylim([20,100])
plt.legend(['Train','Test'])
plt.title(f'Dropout rate = {drop_rate}')

plt.savefig('figure11_dropout_regularisation_extra1.png')

plt.show()

files.download('figure11_dropout_regularisation_extra1.png')


In [None]:
# %% Parametric experiment over dropout rates

drop_rates = np.arange(10)/10
results    = np.zeros((len(drop_rates),2))

for drop_i in range(len(drop_rates)):

    # Generate and train model
    ANN,loss_fun,optimizer = gen_model(drop_rates[drop_i])
    train_acc,test_acc     = train_model(ANN,loss_fun,optimizer)

    # Store accuracies from last 100 epochs
    results[drop_i,0] = np.mean(train_acc[-100:])
    results[drop_i,1] = np.mean(test_acc[-100:])


In [None]:
# %% Plotting

fig,ax = plt.subplots(1,2,figsize=(15,5))

ax[0].plot(drop_rates,results,'o-')
ax[0].set_xlabel('Dropout proportion')
ax[0].set_ylabel('Average accuracy')
ax[0].legend(['Train','Test'])

ax[1].plot(drop_rates,-np.diff(results,axis=1),'o-')
ax[1].plot([0,.9],[0,0],'k--')
ax[1].set_xlabel('Dropout proportion')
ax[1].set_ylabel('Train-test difference (in percent of acc)')

plt.savefig('figure3_dropout_regularisation.png')

plt.show()

files.download('figure3_dropout_regularisation.png')


In [None]:
# %% Exercise 1
#    Explore the effects of the smoothing parameter ('k' in the smooth() function). How much smoothing is "too much"?
#    Note that this is a subjective judgment; the goal here is to gain some familiarity with smoothing filters.

# Indeed it's quite subjective but higher k values produce smoother graphs but larger edge effects (wider filter)


In [None]:
# %% Exercise 2
#    Dropout wasn't too impressive here. Perhaps it would be more helpful with a different number of nodes in the hidden
#    layer? Try running the experiment again using half as many hidden nodes, and twice as many nodes. Tip: take screenshots
#    of each result to compare the three runs.

# Nope, modifying the width of the network doesn't really change anything
