# Coursework 1

This notebook is intended to be used as a starting point for your experiments. The instructions can be found in the MLP2024_25_CW1_Spec.pdf (see Learn,  Assignment Submission, Coursework 1). The methods provided here are just helper functions. If you want more complex graphs such as side by side comparisons of different experiments you should learn more about matplotlib and implement them. Before each experiment remember to re-initialize neural network weights and reset the data providers so you get a properly initialized experiment. For each experiment try to keep most hyperparameters the same except the one under investigation so you can understand what the effects of each are.

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for num_epochs epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    #fig_1 = plt.figure(figsize=(8, 4))
    #ax_1 = fig_1.add_subplot(111)
    #for k in ['error(train)', 'error(valid)']:
    #    ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
    #              stats[1:, keys[k]], label=k)
    #ax_1.legend(loc=0)
    #ax_1.set_xlabel('Epoch number')
    #ax_1.set_ylabel('Error')

    # Plot the change in the validation and training set accuracy over training.
    #fig_2 = plt.figure(figsize=(8, 4))
    #ax_2 = fig_2.add_subplot(111)
    #for k in ['acc(train)', 'acc(valid)']:
    #    ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
    #              stats[1:, keys[k]], label=k)
    #ax_2.legend(loc=0)
    #ax_2.set_xlabel('Epoch number')
    #ax_2.set_xlabel('Accuracy')
    
    return stats, keys, run_time#, fig_1, ax_1, fig_2, ax_2

In [2]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
import sys
# sys.path.append('/path/to/mlpractical')
from mlp.data_providers import MNISTDataProvider, EMNISTDataProvider

# Seed a random number generator
seed = 11102019 
rng = np.random.RandomState(seed)
batch_size = 100
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)

KeysView(NpzFile 'C:\\Users\\jptur\\~\\mlpractical\\data\\emnist-train.npz' with keys: inputs, targets)
KeysView(NpzFile 'C:\\Users\\jptur\\~\\mlpractical\\data\\emnist-valid.npz' with keys: inputs, targets)


In [3]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

%pip install tqdm

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule
from mlp.optimisers import Optimiser

# Setup hyperparameters
learning_rate = 0.0009
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 128

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

# Create model with ONE hidden layer
#model = MultipleLayerModel([
#    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # hidden layer
#    ReluLayer(),
#    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
#])


error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

# Remember to use notebook=False when you write a script to be run in a terminal
#_ = train_model_and_plot_stats(
#    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

Note: you may need to restart the kernel to use updated packages.


In [None]:
learning_rate = 0.0009
num_epochs = 100
stats_interval = 1

input_dim, output_dim = 784, 47

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

hidden_dims_list = [32, 64, 128]

#-----------------Thirty two----------------------

train_data.reset()
valid_data.reset()

hidden_dim = 32
    
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

thirty_two = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

#-----------------Sixty four----------------------

train_data.reset()
valid_data.reset()

hidden_dim = 64
    
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

sixty_four = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

#---------------------------128------------

train_data.reset()
valid_data.reset()

hidden_dim = 128
    
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

one_twenty_eight = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)
    

In [None]:
print(learning_rate)

widths = [(thirty_two, '32'), (sixty_four, '64'), (one_twenty_eight, '128')]

#return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

fig_1 = plt.figure(figsize=(8, 4))
ax_1 = fig_1.add_subplot(111)
for width in widths:
    ax_1.plot(np.arange(1, width[0][0].shape[0]) * stats_interval, 
          width[0][0][1:, width[0][1]['error(train)']], label='width '+ width[1]+'(train)')
for width in widths:
    ax_1.plot(np.arange(1, width[0][0].shape[0]) * stats_interval, 
          width[0][0][1:, width[0][1]['error(valid)']], label='width '+ width[1]+'(valid)', linestyle='dashed')
ax_1.legend(loc=0)
ax_1.set_xlabel('Epoch number')
ax_1.set_ylabel('Error')

plt.savefig('error_curve_width.png')

    # Plot the change in the validation and training set accuracy over training.
fig_2 = plt.figure(figsize=(8, 4))
ax_2 = fig_2.add_subplot(111)
for width in widths:
    ax_2.plot(np.arange(1, width[0][0].shape[0]) * stats_interval, 
          width[0][0][1:, width[0][1]['acc(train)']], label='width '+ width[1]+'(train)')
for width in widths:
    ax_2.plot(np.arange(1, width[0][0].shape[0]) * stats_interval, 
          width[0][0][1:, width[0][1]['acc(valid)']], label='width '+ width[1]+'(valid)', linestyle='dashed')
ax_2.legend(loc=0)
ax_2.set_xlabel('Epoch number')
ax_2.set_xlabel('Accuracy')

plt.savefig('acc_curve_width.png')

plt.show()

In [None]:
hidden_dim = 128
train_data.reset()
valid_data.reset()

# Create model with TWO hidden layers
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # second hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

# Remember to use notebook=False when you write a script to be run in a terminal
two_layer = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

In [None]:
# Create model with ONE hidden layers
train_data.reset()
valid_data.reset()

model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

# Remember to use notebook=False when you write a script to be run in a terminal
one_layer = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

In [None]:
# Create model with THREE hidden layers
train_data.reset()
valid_data.reset()

model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # second hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # third hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

# Remember to use notebook=False when you write a script to be run in a terminal
three_layer = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

In [None]:
depths = [(one_layer, '1'), (two_layer, '2'), (three_layer, '3')]

#return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

fig_1 = plt.figure(figsize=(8, 4))
ax_1 = fig_1.add_subplot(111)
for depth in depths:
    ax_1.plot(np.arange(1, depth[0][0].shape[0]) * stats_interval, 
          depth[0][0][1:, depth[0][1]['error(train)']], label='depth '+ depth[1]+'(train)')
for depth in depths:
    ax_1.plot(np.arange(1, depth[0][0].shape[0]) * stats_interval, 
          depth[0][0][1:, depth[0][1]['error(valid)']], label='depth '+ depth[1]+'(valid)', linestyle='dashed')
ax_1.legend(loc=0)
ax_1.set_xlabel('Epoch number')
ax_1.set_ylabel('Error')

plt.savefig('error_curve_depth.png')

    # Plot the change in the validation and training set accuracy over training.
fig_2 = plt.figure(figsize=(8, 4))
ax_2 = fig_2.add_subplot(111)
for depth in depths:
    ax_2.plot(np.arange(1, depth[0][0].shape[0]) * stats_interval, 
          depth[0][0][1:, depth[0][1]['acc(train)']], label='depth '+ depth[1]+'(train)')
for depth in depths:
    ax_2.plot(np.arange(1, depth[0][0].shape[0]) * stats_interval, 
          depth[0][0][1:, depth[0][1]['acc(valid)']], label='depth '+ depth[1]+'(valid)', linestyle='dashed')
ax_2.legend(loc=0)
ax_2.set_xlabel('Epoch number')
ax_2.set_xlabel('Accuracy')

plt.savefig('acc_curve_depth.png')

plt.show()

In [None]:
print('----Changing amount of ReLu units----')
print('keys: ' + str(depths[0][0][1]))
for width in widths:
    print(str(width[1]) + ': ' + str(width[0][0][-1]))
print('-----Changing amount of ReLu Layers----')
for depth in depths:
    print(str(depth[1]) + ': ' + str(depth[0][0][-1]))

In [1]:
train_data.reset()
valid_data.reset()

learning_rate = 10 ** -4
print(learning_rate)
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 128

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)


model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # second hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # third hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

# Remember to use notebook=False when you write a script to be run in a terminal
baseline = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

0.0001


NameError: name 'CrossEntropySoftmaxError' is not defined

In [None]:
print('keys: ' + str(baseline[0][1]))
print(str(baseline[0][-1]))

In [None]:
from mlp.layers import DropoutLayer

#----------DROPOUT--------------
train_data.reset()
valid_data.reset()

learning_rate = 10 ** -4
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 128

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)


model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer
    DropoutLayer(incl_prob=0.6)
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # second hidden layer
    DropoutLayer(incl_prob=0.6)
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # third hidden layer
    DropoutLayer(incl_prob=0.6)
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

# Remember to use notebook=False when you write a script to be run in a terminal
dropout_model = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

In [None]:
print('keys: ' + str(dropout_model[0][1]))
print(str(dropout_model[0][-1]))

In [None]:
from mlp.penalties import L1Penalty

#----------L1 Penalty--------------
train_data.reset()
valid_data.reset()

learning_rate = 10 ** -4
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 128

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)


model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer
    L1Penalty(5e-4),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # second hidden layer
    L1Penalty(5e-4),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # third hidden layer
    L1Penalty(5e-4),
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

# Remember to use notebook=False when you write a script to be run in a terminal
l_one_model = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

In [None]:
print('keys: ' + str(l_one_model[0][1]))
print(str(l_one_model[0][-1]))

In [None]:
from mlp.penalties import L1Penalty

#----------L2 Penalty--------------
train_data.reset()
valid_data.reset()

learning_rate = 10 ** -4
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 128

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)


model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer
    L2Penalty(5e-4),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # second hidden layer
    L2Penalty(5e-4),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # third hidden layer
    L2Penalty(5e-4),
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

# Remember to use notebook=False when you write a script to be run in a terminal
l_two_model = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

In [None]:
print('keys: ' + str(l_two_model[0][1]))
print(str(l_two_model[0][-1]))

In [None]:
from mlp.penalties import L1Penalty

#----------Label smoothing--------------
train_data.reset()
valid_data.reset()

learning_rate = 10 ** -4
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 128

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng, smooth_labels=True)
valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng, smooth_labels=True)


model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # second hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # third hidden layer
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer
])

# Remember to use notebook=False when you write a script to be run in a terminal
label_smooth_model = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)