# Coursework 1

This notebook is intended to be used as a starting point for your experiments. The instructions can be found in the instructions file located under spec/coursework1.pdf. The methods provided here are just helper functions. If you want more complex graphs such as side by side comparisons of different experiments you should learn more about matplotlib and implement them. Before each experiment remember to re-initialize neural network weights and reset the data providers so you get a properly initialized experiment. For each experiment try to keep most hyperparameters the same except the one under investigation so you can understand what the effects of each are.

In [None]:
from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer
from mlp.layers import LeakyReluLayer, RandomReluLayer, ParametricReluLayer, ExponentialLinearUnitLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule, GradientDescentLearningRule
from mlp.optimisers import Optimiser
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline
plt.style.use('ggplot')

def get_file_name(prefix, learning_rate, hidden_layers_num, hidden_dim, num_epochs, 
                  activation=None, optimizer=None, index=None, alpha=None, 
                  lower=None, upper=None, suffix=None):
    
    file_name = prefix 
    
    if activation is not None:
        file_name = file_name + "_" + str(activation)
    if optimizer is not None:
        file_name = file_name + "_" + str(optimizer)
    
    file_name = file_name + "_" + str(learning_rate) + "_" + str(hidden_layers_num) + "_" + str(hidden_dim) + "_" + str(num_epochs)
    
    if index is not None:
        file_name = file_name + "_index" + str(index)
    if alpha is not None:
        file_name = file_name + "_alpha" + str(alpha)
    if lower is not None:    
        file_name = file_name + "_lower" + str(lower)
    if upper is not None:    
        file_name = file_name + "_upper" + str(upper)
        
    if suffix is not None:
        file_name = file_name + str(suffix)
    
    return file_name
    
def save_figures(fig_1, fig_2, learning_rate, num_hidden_layers, hidden_units, num_epochs, 
                 activation=None, optimizer=None, index=None, alpha=None, 
                 lower=None, upper=None, suffix=None):
    fig_1.tight_layout()
    fig_2.tight_layout()
    fig_1_type = "figures/error"
    fig_2_type = "figures/acc"
    fig_1_name = get_file_name(fig_1_type, learning_rate, num_hidden_layers, hidden_units, num_epochs, 
                               activation=activation, optimizer=optimizer, index=index, alpha=alpha, 
                               lower=lower, upper=upper, suffix=suffix)
    fig_2_name = get_file_name(fig_2_type, learning_rate, num_hidden_layers, hidden_units, num_epochs, 
                               activation=activation, optimizer=optimizer, index=index, alpha=alpha, 
                               lower=lower, upper=upper, suffix=suffix)
    
    fig_1.savefig(fig_1_name)
    fig_2.savefig(fig_2_name)

def train_model_and_plot_stats(
        model, error, learning_rule, learning_rate, train_data, valid_data, num_hidden_layers,
        hidden_units, num_epochs, stats_interval, 
        notebook=True, test_data=None, activation=None, optimizer=None, alpha=None, 
        lower=None, upper=None, index=None):

    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}


    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, 
        notebook=notebook, test_dataset=test_data)

    stats, keys, run_time, test_stats, model = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)


    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')
    

    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    plt.show()
    save_figures(fig_1, fig_2, learning_rate, num_hidden_layers, hidden_units, num_epochs, 
                 activation=activation, optimizer=optimizer, index=index, alpha=alpha, 
                 lower=lower, upper=upper, suffix=".pdf")
    
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2, test_stats, model

def test_model(
        model, error, learning_rule, train_data, valid_data, test_data, num_epochs, stats_interval, 
        notebook=True):

    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}


    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, 
        notebook=notebook, test_dataset=test_data)


    test_results = optimiser.test(test_data)

    return rest_results
    


In [None]:
def save_model_params(model_params, file_name):
    """
    Saves model weights into a txt file
    """
    with open(file_name, 'w') as f:
        for item in model_params:
            f.write("%s\n" % item)
            
def save_results(val_results, test_results, learning_rate, hidden_layers_num, hidden_dim, num_epochs, activation=None,
                optimizer=None, index=None, alpha=None, lower=None, upper=None):
    file = open("results.txt", "a")
    if index is not None:
        file.write("index: " + str(index) + "\n")
    if lower is not None:
        file.write("lower: " + str(lower) + "\n")
    if upper is not None:
        file.write("upper: " + str(upper) + "\n")
    if alpha is not None:
        file.write("alpha: " + str(alpha) + "\n")
    if optimizer is not None:
        file.write("optimizer: " + str(optimizer) + "\n")
    if activation is not None:
        file.write("activation: " + str(activation) + "\n")
    file.write("learning_rate: " + str(learning_rate) + "\n")
    file.write("num_hidden_layers: " + str(hidden_layers_num) + "\n")
    file.write("hidden_dim: " + str(hidden_dim) + "\n")
    file.write("num_epochs: " + str(num_epochs) + "\n")
    
    file.write("error(val): " + str(val_results[0]) + "\n")
    file.write("accuracy(val):"  + str(val_results[1])  + "\n")
    
    for k, v in test_results.items():
        file.write(str(k) + ":  " + str(v) + "\n")
    file.write("\n")
    
    file.close()

            


In [None]:
def append_activation(layers, activation, alpha=None, lower=None, upper=None):
    if activation == "LeakyReluLayer":
        layers.append( LeakyReluLayer(alpha=alpha) )
    elif activation == "ParametricReluLayer":
        layers.append( ParametricReluLayer(alpha=alpha) )    
    elif activation == "ExponentialLinearUnitLayer":
        layers.append( ExponentialLinearUnitLayer(alpha=alpha) )    
    elif activation == "RandomReluLayer":
        layers.append( RandomReluLayer(lower=lower, upper=upper) )    
        
    return layers

In [None]:

import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider, EMNISTDataProvider


seed = 11102019 
rng = np.random.RandomState(seed)
batch_size = 100

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]


print("Training set: ")
train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng)
print("Validation set: ")
valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)
print("Testing set: ")
test_data  = EMNISTDataProvider('test', batch_size=batch_size, rng=rng)

In [None]:

learning_rate         = 0.01    
num_epochs            = 50
stats_interval        = 1
input_dim, output_dim = 784, 47
hidden_dims           = [128]
hidden_dim            = 128
hidden_layers_num     = 3  
activations           = ["LeakyReluLayer", "RandomReluLayer", "ParametricReluLayer", "ExponentialLinearUnitLayer"]
optimizer             = "SGD"

for index in range(1,4):
    for activation in activations: 
        weights_init = GlorotUniformInit(rng=rng)
        biases_init  = ConstantInit(0.)


        layers = []

        layers.append( AffineLayer(input_dim, hidden_dim, weights_init, biases_init) )
        layers = append_activation(layers, activation) 

        for i in range(hidden_layers_num):
            layers.append( AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init) )
            layers = append_activation(layers, activation) 


        layers.append( AffineLayer(hidden_dim, output_dim, weights_init, biases_init) )


        model = MultipleLayerModel( layers )

        error = CrossEntropySoftmaxError()

        learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
        

        _ = train_model_and_plot_stats(
            model, error, learning_rule, learning_rate, train_data, valid_data, hidden_layers_num, hidden_dim, 
            num_epochs, stats_interval, notebook=True, test_data=test_data, activation=activation, 
            optimizer=optimizer, index=index)


        stats = _[0]
        stats_file_name = get_file_name("stats/stats", learning_rate, hidden_layers_num, hidden_dim, 
                                        num_epochs, activation=activation, optimizer=optimizer, index=index)
        np.savetxt(stats_file_name, stats)


        val_results = stats[-1]
        val_results = val_results[2:]
        test_results = _[-2]
        save_results(val_results, test_results, learning_rate, hidden_layers_num, 
                     hidden_dim, num_epochs, activation=activation, optimizer=optimizer, index=index)


        trained_model =  _[-1]
        model_params  = trained_model.params
        params_file_name = get_file_name("model_params/model_params", learning_rate, hidden_layers_num, hidden_dim, 
                                         num_epochs, activation=activation, optimizer=optimizer, 
                                         index=index, suffix=".txt")
        save_model_params(model_params, params_file_name)
    

# Testing Leaky RELU alpha values

In [None]:
# setup hyperparameters
learning_rate         = 0.01            
num_epochs            = 50
stats_interval        = 1
input_dim, output_dim = 784, 47
hidden_dims           = [128]
hidden_dim            = 128
hidden_layers_num     = 3  
activations           = ["LeakyReluLayer", "RandomReluLayer", "ParametricReluLayer", "ExponentialLinearUnitLayer"]
activation            = "LeakyReluLayer"
optimizer             = "SGD"
alphas                = [0.2, 0.3]

for alpha in alphas: 
    for index in range(1,4):
        weights_init = GlorotUniformInit(rng=rng)
        biases_init  = ConstantInit(0.)


        layers = []

        layers.append( AffineLayer(input_dim, hidden_dim, weights_init, biases_init) )
        layers = append_activation(layers, activation, alpha) 


        for i in range(hidden_layers_num):
            layers.append( AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init) )
            layers = append_activation(layers, activation, alpha) 


        layers.append( AffineLayer(hidden_dim, output_dim, weights_init, biases_init) )


        model = MultipleLayerModel( layers )

        error = CrossEntropySoftmaxError()

        #learning_rule = AdamLearningRule(learning_rate=learning_rate)
        learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)

        _ = train_model_and_plot_stats(
            model, error, learning_rule, learning_rate, train_data, valid_data, hidden_layers_num, hidden_dim, 
            num_epochs, stats_interval, notebook=True, test_data=test_data, activation=activation, 
            optimizer=optimizer, index=index, alpha=alpha)


        stats = _[0]
        stats_file_name = get_file_name("stats/stats", learning_rate, hidden_layers_num, hidden_dim, 
                                        num_epochs, activation=activation, optimizer=optimizer, index=index, alpha=alpha)
        np.savetxt(stats_file_name, stats)

        val_results = stats[-1]
        val_results = val_results[2:]
        test_results = _[-2]
        save_results(val_results, test_results, learning_rate, hidden_layers_num, 
                     hidden_dim, num_epochs, activation=activation, optimizer=optimizer, index=index, alpha=alpha)


        trained_model =  _[-1]
        model_params  = trained_model.params
        params_file_name = get_file_name("model_params/model_params", learning_rate, hidden_layers_num, hidden_dim, 
                                         num_epochs, activation=activation, optimizer=optimizer, 
                                         index=index, alpha=alpha, suffix=".txt")
        save_model_params(model_params, params_file_name)

# ELU

In [None]:
# setup hyperparameters
learning_rate         = 0.01      # best performing from the baseline experiments            
num_epochs            = 50
stats_interval        = 1
input_dim, output_dim = 784, 47
hidden_dims           = [128]
hidden_dim            = 128
hidden_layers_num     = 3  
activations           = ["LeakyReluLayer", "RandomReluLayer", "ParametricReluLayer", "ExponentialLinearUnitLayer"]
activation            = "ExponentialLinearUnitLayer"
optimizer             = "SGD"
alphas                = [0.2]

for index in range(1,4):
    for alpha in alphas: 
        weights_init = GlorotUniformInit(rng=rng)
        biases_init  = ConstantInit(0.)

        # Create the model
        layers = []
        # Append initial layer + activation
        layers.append( AffineLayer(input_dim, hidden_dim, weights_init, biases_init) )
        layers = append_activation(layers, activation, alpha) 

        # Create specified number of hidden layers with appropriate hidden dimension
        for i in range(hidden_layers_num):
            layers.append( AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init) )
            layers = append_activation(layers, activation, alpha) 

        # Append final layer 
        layers.append( AffineLayer(hidden_dim, output_dim, weights_init, biases_init) )

        # Create the model based on layers
        model = MultipleLayerModel( layers )

        error = CrossEntropySoftmaxError()
        # Use a basic gradient descent learning rule
        #learning_rule = AdamLearningRule(learning_rate=learning_rate)
        learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
        
        #Remember to use notebook=False when you write a script to be run in a terminal
        _ = train_model_and_plot_stats(
            model, error, learning_rule, learning_rate, train_data, valid_data, hidden_layers_num, hidden_dim, 
            num_epochs, stats_interval, notebook=True, test_data=test_data, activation=activation, 
            optimizer=optimizer, index=index, alpha=alpha)

        # save all training & validation stats to a file
        stats = _[0]
        stats_file_name = get_file_name("ELU/stats/stats", learning_rate, hidden_layers_num, hidden_dim, 
                                        num_epochs, activation=activation, optimizer=optimizer, index=index, alpha=alpha)
        np.savetxt(stats_file_name, stats)

        # retrieve validation and test results and save them to a file 
        val_results = stats[-1]
        val_results = val_results[2:]
        test_results = _[-2]
        save_results(val_results, test_results, learning_rate, hidden_layers_num, 
                     hidden_dim, num_epochs, activation=activation, optimizer=optimizer, index=index, alpha=alpha)

        # retrieve model parameters and save it to a txt file
        trained_model =  _[-1]
        model_params  = trained_model.params
        params_file_name = get_file_name("ELU/model_params/model_params", learning_rate, hidden_layers_num, hidden_dim, 
                                         num_epochs, activation=activation, optimizer=optimizer, 
                                         index=index, alpha=alpha, suffix=".txt")
        save_model_params(model_params, params_file_name)

# Parametric ReLU 

In [None]:
# setup hyperparameters
learning_rate         = 0.01      # best performing from the baseline experiments            
num_epochs            = 25
stats_interval        = 1
input_dim, output_dim = 784, 47
hidden_dims           = [128]
hidden_dim            = 128
hidden_layers_num     = 3  
activations           = ["LeakyReluLayer", "RandomReluLayer", "ParametricReluLayer", "ExponentialLinearUnitLayer"]
activation            = "ParametricReluLayer"
optimizer             = "SGD"
alphas                = [0.1, 0.001]

for alpha in alphas: 
    for index in range(1,4):
        weights_init = GlorotUniformInit(rng=rng)
        biases_init  = ConstantInit(0.)

        # Create the model
        layers = []
        # Append initial layer + activation
        layers.append( AffineLayer(input_dim, hidden_dim, weights_init, biases_init) )
        layers = append_activation(layers, activation, alpha) 

        # Create specified number of hidden layers with appropriate hidden dimension
        for i in range(hidden_layers_num):
            layers.append( AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init) )
            layers = append_activation(layers, activation, alpha) 

        # Append final layer 
        layers.append( AffineLayer(hidden_dim, output_dim, weights_init, biases_init) )

        # Create the model based on layers
        model = MultipleLayerModel( layers )

        error = CrossEntropySoftmaxError()
        # Use a basic gradient descent learning rule
        #learning_rule = AdamLearningRule(learning_rate=learning_rate)
        learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
        
        #Remember to use notebook=False when you write a script to be run in a terminal
        _ = train_model_and_plot_stats(
            model, error, learning_rule, learning_rate, train_data, valid_data, hidden_layers_num, hidden_dim, 
            num_epochs, stats_interval, notebook=True, test_data=test_data, activation=activation, 
            optimizer=optimizer, index=index, alpha=alpha)

        # save all training & validation stats to a file
        stats = _[0]
        stats_file_name = get_file_name("PReLU/stats/stats", learning_rate, hidden_layers_num, hidden_dim, 
                                        num_epochs, activation=activation, optimizer=optimizer, index=index, alpha=alpha)
        np.savetxt(stats_file_name, stats)

        # retrieve validation and test results and save them to a file 
        val_results = stats[-1]
        val_results = val_results[2:]
        test_results = _[-2]
        save_results(val_results, test_results, learning_rate, hidden_layers_num, 
                     hidden_dim, num_epochs, activation=activation, optimizer=optimizer, index=index, alpha=alpha)

        # retrieve model parameters and save it to a txt file
        trained_model =  _[-1]
        model_params  = trained_model.params
        params_file_name = get_file_name("PReLU/model_params/model_params", learning_rate, hidden_layers_num, hidden_dim, 
                                         num_epochs, activation=activation, optimizer=optimizer, 
                                         index=index, alpha=alpha, suffix=".txt")
        save_model_params(model_params, params_file_name)

# Random Relu bound

In [None]:
# setup hyperparameters
learning_rate         = 0.01      # best performing from the baseline experiments            
num_epochs            = 50
stats_interval        = 1
input_dim, output_dim = 784, 47
hidden_dims           = [128]
hidden_dim            = 128
hidden_layers_num     = 3  
activations           = ["LeakyReluLayer", "RandomReluLayer", "ParametricReluLayer", "ExponentialLinearUnitLayer"]
activation            = "RandomReluLayer"
optimizer             = "SGD"
alphas                = [0.1, 0.001]
lower                 = 0.01
upper                 = 0.2

for index in range(1,4):
    weights_init = GlorotUniformInit(rng=rng)
    biases_init  = ConstantInit(0.)

    # Create the model
    layers = []
    # Append initial layer + activation
    layers.append( AffineLayer(input_dim, hidden_dim, weights_init, biases_init) )
    layers = append_activation(layers, activation, lower=lower, upper=upper) 

    # Create specified number of hidden layers with appropriate hidden dimension
    for i in range(hidden_layers_num):
        layers.append( AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init) )
        layers = append_activation(layers, activation, lower=lower, upper=upper) 

    # Append final layer 
    layers.append( AffineLayer(hidden_dim, output_dim, weights_init, biases_init) )

    # Create the model based on layers
    model = MultipleLayerModel( layers )

    error = CrossEntropySoftmaxError()

    #learning_rule = AdamLearningRule(learning_rate=learning_rate)
    learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)

    #Remember to use notebook=False when you write a script to be run in a terminal
    _ = train_model_and_plot_stats(
        model, error, learning_rule, learning_rate, train_data, valid_data, hidden_layers_num, hidden_dim, 
        num_epochs, stats_interval, notebook=True, test_data=test_data, activation=activation, 
        optimizer=optimizer, index=index, lower=lower, upper=upper)

    # save all training & validation stats to a file
    stats = _[0]
    stats_file_name = get_file_name("RReLU/stats/stats", learning_rate, hidden_layers_num, hidden_dim, 
                                    num_epochs, activation=activation, optimizer=optimizer, index=index,
                                    lower=lower, upper=upper)
    np.savetxt(stats_file_name, stats)

    # retrieve validation and test results and save them to a file 
    val_results = stats[-1]
    val_results = val_results[2:]
    test_results = _[-2]
    save_results(val_results, test_results, learning_rate, hidden_layers_num, 
                 hidden_dim, num_epochs, activation=activation, optimizer=optimizer, 
                 lower=lower, upper=upper, index=index)

    # retrieve model parameters and save it to a txt file
    trained_model =  _[-1]
    model_params  = trained_model.params
    params_file_name = get_file_name("RReLU/model_params/model_params", learning_rate, hidden_layers_num, hidden_dim, 
                                     num_epochs, activation=activation, optimizer=optimizer, lower=lower, upper=upper,
                                     index=index, suffix=".txt")
    save_model_params(model_params, params_file_name)

# LReLU 

In [None]:
# setup hyperparameters
learning_rate         = 0.01      # best performing from the baseline experiments            
num_epochs            = 100
stats_interval        = 1
input_dim, output_dim = 784, 47
hidden_dims           = [128]
hidden_dim            = 128
hidden_layers_num     = 3  
activations           = ["LeakyReluLayer", "RandomReluLayer", "ParametricReluLayer", "ExponentialLinearUnitLayer"]
activation            = "LeakyReluLayer"
optimizer             = "SGD"
alphas                = [0.001, 0.1]
alpha                 = 0.01

for index in range(1,4):
    weights_init = GlorotUniformInit(rng=rng)
    biases_init  = ConstantInit(0.)

    # Create the model
    layers = []
    # Append initial layer + activation
    layers.append( AffineLayer(input_dim, hidden_dim, weights_init, biases_init) )
    layers = append_activation(layers, activation, alpha) 

    # Create specified number of hidden layers with appropriate hidden dimension
    for i in range(hidden_layers_num):
        layers.append( AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init) )
        layers = append_activation(layers, activation, alpha) 

    # Append final layer 
    layers.append( AffineLayer(hidden_dim, output_dim, weights_init, biases_init) )

    # Create the model based on layers
    model = MultipleLayerModel( layers )

    error = CrossEntropySoftmaxError()
    # Use a basic gradient descent learning rule
    #learning_rule = AdamLearningRule(learning_rate=learning_rate)
    learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)

    #Remember to use notebook=False when you write a script to be run in a terminal
    _ = train_model_and_plot_stats(
        model, error, learning_rule, learning_rate, train_data, valid_data, hidden_layers_num, hidden_dim, 
        num_epochs, stats_interval, notebook=True, test_data=test_data, activation=activation, 
        optimizer=optimizer, index=index, alpha=alpha)

    # save all training & validation stats to a file
    stats = _[0]
    stats_file_name = get_file_name("stats/stats", learning_rate, hidden_layers_num, hidden_dim, 
                                    num_epochs, activation=activation, optimizer=optimizer, index=index, alpha=alpha)
    np.savetxt(stats_file_name, stats)

    # retrieve validation and test results and save them to a file 
    val_results = stats[-1]
    val_results = val_results[2:]
    test_results = _[-2]
    save_results(val_results, test_results, learning_rate, hidden_layers_num, 
                 hidden_dim, num_epochs, activation=activation, optimizer=optimizer, index=index, alpha=alpha)

    # retrieve model parameters and save it to a txt file
    trained_model =  _[-1]
    model_params  = trained_model.params
    params_file_name = get_file_name("model_params/model_params", learning_rate, hidden_layers_num, hidden_dim, 
                                     num_epochs, activation=activation, optimizer=optimizer, 
                                     index=index, alpha=alpha, suffix=".txt")
    save_model_params(model_params, params_file_name)

# 4.3 2B 

In [None]:
#setup hyperparameters
learning_rates        = [0.001, 0.01, 0.1, 0.0001, 1]      
num_epochs            = 100
stats_interval        = 1
input_dim, output_dim = 784, 47
hidden_dims           = [32, 64, 128] #, 64, 128
hidden_layers_nums    = [1, 2, 3] #, 2, 3  
activation            = "NoActivation"
optimizer             = "SGD"

for learning_rate in learning_rates:
    for hidden_layers_num in hidden_layers_nums: 
        for hidden_dim in hidden_dims: 
            for index in range(1,4):        #make multiple experiments
                # initialise layers of the model
                layers = []

                weights_init = GlorotUniformInit(rng=rng)
                biases_init  = ConstantInit(0.)

                # Append initial layer
                layers.append( AffineLayer(input_dim, hidden_dim, weights_init, biases_init) )

                # Create specified number of hidden layers with appropriate hidden dimension
                for i in range(hidden_layers_num):
                    layers.append( AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init) )

                # Append final layer 
                layers.append( AffineLayer(hidden_dim, output_dim, weights_init, biases_init) )

                # Create the model based on layers
                model = MultipleLayerModel( layers )

                error = CrossEntropySoftmaxError()
                # Use a basic gradient descent learning rule
                learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)

                #Remember to use notebook=False when you write a script to be run in a terminal
                _ = train_model_and_plot_stats(
                    model, error, learning_rule, learning_rate, train_data, valid_data, hidden_layers_num, hidden_dim, 
                    num_epochs, stats_interval, notebook=True, test_data=test_data, activation=activation, 
                    optimizer=optimizer, index=index)

                # save all training & validation stats to a file
                stats = _[0]
                stats_file_name = get_file_name("no_activation/stats/stats", learning_rate, hidden_layers_num, hidden_dim, 
                                                num_epochs, activation=activation, optimizer=optimizer, index=index)
                np.savetxt(stats_file_name, stats)

                # retrieve validation and test results and save them to a file 
                val_results = stats[-1]
                val_results = val_results[2:]
                test_results = _[-2]
                save_results(val_results, test_results, learning_rate, hidden_layers_num, 
                             hidden_dim, num_epochs, activation=activation, optimizer=optimizer, index=index)

                # retrieve model parameters and save it to a txt file
                trained_model =  _[-1]
                model_params  = trained_model.params
                params_file_name = get_file_name("no_activation/model_params/model_params", learning_rate, hidden_layers_num, hidden_dim, 
                                                 num_epochs, activation=activation, optimizer=optimizer, 
                                                 index=index, suffix=".txt")
                save_model_params(model_params, params_file_name)

