In [None]:
# %% Deep learning - Section 9.73
#    Dropout example 2

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Import Iris dataset

iris = sns.load_dataset('iris')

# Convert from pandas df to tensor
data = torch.tensor(iris[iris.columns[0:4]].values).float()

# Species to numbers
labels = torch.zeros(len(data),dtype=torch.long)
labels[iris.species=='setosa']     = 0
labels[iris.species=='versicolor'] = 1
labels[iris.species=='virginica']  = 2


In [None]:
# %% Split into train and test data

# Split with scikitlearn
train_data,test_data,train_labels,test_labels = train_test_split(data,labels,test_size=0.2)

# Convert into PyTorch datasets
train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

# Convert into DataLoader objects
batch_size   = 16
train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True)
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])


In [None]:
# %% Model class

class model_class(nn.Module):
    def __init__(self,dropout_rate):
        super().__init__()

        # Layers
        self.input  = nn.Linear( 4,12)
        self.hidden = nn.Linear(12,12)
        self.output = nn.Linear(12,3 )

        # Parameters
        self.dr = dropout_rate

    def forward(self,x):

        # Input (switch off dropout during evaluation)
        x = F.relu(self.input(x))
        x = F.dropout(x,p=self.dr,training=self.training)

        # Hidden
        x = F.relu(self.hidden(x))
        x = F.dropout(x,p=self.dr,training=self.training)

        # Output
        x = self.output(x)

        return x


In [None]:
# Test model

tmp_net  = model_class(dropout_rate=0.25)
tmp_data = torch.randn((10,4))

yHat = tmp_net(tmp_data)
yHat


In [None]:
# %% Function to create model instance

def gen_model(dropout_rate):

    # Model instance, loss, optimizer
    ANN       = model_class(dropout_rate)
    loss_fun  = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(ANN.parameters(),lr=0.005)

    return ANN,loss_fun,optimizer


In [None]:
# %% Function to train the model

# Global parameters
num_epochs = 500

def train_model():

    # Accuracies initialise
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Switch learning on
        ANN.train()

        # Loop over training data batches
        batch_acc = []
        for X,y in train_loader:

            # Forward propagation and loss
            yHat = ANN(X)
            loss = loss_fun(yHat,y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Training accuracy for current batch
            batch_acc.append( 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()).item() )

        # Get average accuracy over epochs
        train_acc.append( np.mean(batch_acc) )

        # Test accuracy
        ANN.eval()
        X,y         = next(iter(test_loader))
        pred_labels = torch.argmax(ANN(X),axis=1)
        test_acc.append( 100*torch.mean((pred_labels==y).float()).item() )

    # Function output
    return train_acc,test_acc


In [None]:
# %% Test model

dropoutRate            = 0.0
ANN,loss_fun,optimizer = gen_model(dropoutRate)
train_acc,test_acc     = train_model()


In [None]:
# %% Plotting

fig = plt.figure(figsize=(10,5))

plt.plot(train_acc,'s-')
plt.plot(test_acc,'o-')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend(['Train','Test'])
plt.title(f'Dropout rate = {dropoutRate}')

plt.savefig('figure12_dropout_regularisation.png')

plt.show()

files.download('figure12_dropout_regularisation.png')


In [None]:
# %% Parametric experiment on dropout rates

dropoutRates = np.arange(10)/10
results      = np.zeros((len(dropoutRates),2))

for drop_i in range(len(dropoutRates)):

    # Generate and train the model
    ANN,loss_fun,optimizer = gen_model(dropoutRates[drop_i])
    train_acc,test_acc     = train_model()

    # Store accuracies
    results[drop_i,0] = np.mean(train_acc[-50:])
    results[drop_i,1] = np.mean(test_acc[-50:])


In [None]:
# plot the experiment results
fig,ax = plt.subplots(1,2,figsize=(15,5))

ax[0].plot(dropoutRates,results,'o-')
ax[0].set_xlabel('Dropout proportion')
ax[0].set_ylabel('Average accuracy')
ax[0].legend(['Train','Test'])

ax[1].plot(dropoutRates,-np.diff(results,axis=1),'o-')
ax[1].plot([0,.9],[0,0],'k--')
ax[1].set_xlabel('Dropout proportion')
ax[1].set_ylabel('Train-test difference (acc%)')

plt.savefig('figure13_dropout_regularisation.png')

plt.show()

files.download('figure13_dropout_regularisation.png')


In [None]:
# %% Exercise 1
#    Remove the ReLU nonlinearity from the network. Does that change the effect of dropout proportion on performance?

# Interesting case, with a linear model the training accuracy seems to decrease with a somewhat quadratic pattern,
# while the test accuracy stays constant, no matter the dropout proportion. Not sure about how to explain to myself
# this discrepancy. Sure the linear model boils down to a single layer, no matter the complexity of the original
# network, but I would have expected the training data and the test data to behave similarly (?).

class model_class(nn.Module):
    def __init__(self,dropout_rate):
        super().__init__()

        # Layers
        self.input  = nn.Linear( 4,12)
        self.hidden = nn.Linear(12,12)
        self.output = nn.Linear(12,3 )

        # Parameters
        self.dr = dropout_rate

    def forward(self,x):

        # Input (switch off dropout during evaluation)
        x = self.input(x)
        x = F.dropout(x,p=self.dr,training=self.training)

        # Hidden
        x = self.hidden(x)
        x = F.dropout(x,p=self.dr,training=self.training)

        # Output
        x = self.output(x)

        return x


In [None]:
# %% Exercise 2
#    I mentioned that dropout doesn't necessarily improve performance for shallow models. What happens if you increase
#    the complexity of this model, for example by adding several additional (and wider) hidden layers?

# Here I tried two options, (1) keep the same depth and make the model broader, and (2) increase the depth
# rather than the width (see code below). In both cases, making the model more complex seems to help a bit
# with the performance and the dropout regularisation; the performance with both the training and test data
# is "pushed up" a bit and stays higher for longer before collapsing for really high values of dropout rate.
# So yes, as mentioned in the lectures, dropout regularisation benefits from more complex models (among
# other things).

# Wider model
class model_class(nn.Module):
    def __init__(self,dropout_rate):
        super().__init__()

        # Layers
        self.input  = nn.Linear(  4,128)
        self.hidden = nn.Linear(128,128)
        self.output = nn.Linear(128,3 )

        # Parameters
        self.dr = dropout_rate

    def forward(self,x):

        # Input (switch off dropout during evaluation)
        x = F.relu(self.input(x))
        x = F.dropout(x,p=self.dr,training=self.training)

        # Hidden
        x = F.relu(self.hidden(x))
        x = F.dropout(x,p=self.dr,training=self.training)

        # Output
        x = self.output(x)

        return x

# Deeper model
class model_class_deep(nn.Module):
    def __init__(self,dropout_rate):
        super().__init__()

        # Layers
        self.input    = nn.Linear( 4,12)
        self.hidden1  = nn.Linear(12,12)
        self.hidden2  = nn.Linear(12,12)
        self.hidden3  = nn.Linear(12,12)
        self.hidden4  = nn.Linear(12,12)
        self.output   = nn.Linear(12,3 )

        self.dr = dropout_rate

    def forward(self,x):
        # Input layer
        x = F.relu(self.input(x))
        x = F.dropout(x,p=self.dr,training=self.training)

        # Hidden layer 1
        x = F.relu(self.hidden1(x))
        x = F.dropout(x,p=self.dr,training=self.training)

        # Hidden layer 2
        x = F.relu(self.hidden2(x))
        x = F.dropout(x,p=self.dr,training=self.training)

        # Hidden layer 3
        x = F.relu(self.hidden3(x))
        x = F.dropout(x,p=self.dr,training=self.training)

        # Hidden layer 4
        x = F.relu(self.hidden4(x))
        x = F.dropout(x,p=self.dr,training=self.training)

        # Output layer
        x = self.output(x)

        return x
