Compare different activation functions

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)
batch_size = 50
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=batch_size, rng=rng)

In [None]:
from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit, SELUInit
from mlp.learning_rules import GradientDescentLearningRule
from mlp.optimisers import Optimiser

In [None]:
#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 10, 100

In [None]:
# 2 hidden layers Sigmoid Layer
learning_rate = 0.1
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    SigmoidLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    SigmoidLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
Sigmoid_Layer = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)   
plt.show()

print('    final error(train) = {0:.2e}'.format(Sigmoid_Layer[0][-1, Sigmoid_Layer[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(Sigmoid_Layer[0][-1, Sigmoid_Layer[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(Sigmoid_Layer[0][-1, Sigmoid_Layer[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(Sigmoid_Layer[0][-1, Sigmoid_Layer[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(Sigmoid_Layer[2] * 1. / num_epochs))

final_errors_train.append(Sigmoid_Layer[0][-1, Sigmoid_Layer[1]['error(train)']])
final_errors_valid.append(Sigmoid_Layer[0][-1, Sigmoid_Layer[1]['error(valid)']])
final_accs_train.append(Sigmoid_Layer[0][-1, Sigmoid_Layer[1]['acc(train)']])
final_accs_valid.append(Sigmoid_Layer[0][-1, Sigmoid_Layer[1]['acc(valid)']])



In [None]:
# 2 hidden layers Relu Layer
learning_rate = 0.1
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
Relu_Layer = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)
plt.show()


print('    final error(train) = {0:.2e}'.format(Relu_Layer[0][-1, Relu_Layer[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(Relu_Layer[0][-1, Relu_Layer[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(Relu_Layer[0][-1, Relu_Layer[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(Relu_Layer[0][-1, Relu_Layer[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(Relu_Layer[2] * 1. / num_epochs))

final_errors_train.append(Relu_Layer[0][-1, Relu_Layer[1]['error(train)']])
final_errors_valid.append(Relu_Layer[0][-1, Relu_Layer[1]['error(valid)']])
final_accs_train.append(Relu_Layer[0][-1, Relu_Layer[1]['acc(train)']])
final_accs_valid.append(Relu_Layer[0][-1, Relu_Layer[1]['acc(valid)']])

In [None]:
# 2 hidden layers LeakyRelu Layer
learning_rate = 0.1
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
Leaky_ReluLayer = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)
plt.show()



print('    final error(train) = {0:.2e}'.format(Leaky_ReluLayer[0][-1, Leaky_ReluLayer[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(Leaky_ReluLayer[0][-1, Leaky_ReluLayer[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(Leaky_ReluLayer[0][-1, Leaky_ReluLayer[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(Leaky_ReluLayer[0][-1, Leaky_ReluLayer[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(Leaky_ReluLayer[2] * 1. / num_epochs))

final_errors_train.append(Leaky_ReluLayer[0][-1, Leaky_ReluLayer[1]['error(train)']])
final_errors_valid.append(Leaky_ReluLayer[0][-1, Leaky_ReluLayer[1]['error(valid)']])
final_accs_train.append(Leaky_ReluLayer[0][-1, Leaky_ReluLayer[1]['acc(train)']])
final_accs_valid.append(Leaky_ReluLayer[0][-1, Leaky_ReluLayer[1]['acc(valid)']])

In [None]:
# 2 hidden layers ELU Layer
learning_rate = 0.1
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    ELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    ELULayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
ELU_Layer = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)   
plt.show()


print('    final error(train) = {0:.2e}'.format(ELU_Layer[0][-1, ELU_Layer[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(ELU_Layer[0][-1, ELU_Layer[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(ELU_Layer[0][-1, ELU_Layer[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(ELU_Layer[0][-1, ELU_Layer[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(ELU_Layer[2] * 1. / num_epochs))

final_errors_train.append(ELU_Layer[0][-1, ELU_Layer[1]['error(train)']])
final_errors_valid.append(ELU_Layer[0][-1, ELU_Layer[1]['error(valid)']])
final_accs_train.append(ELU_Layer[0][-1, ELU_Layer[1]['acc(train)']])
final_accs_valid.append(ELU_Layer[0][-1, ELU_Layer[1]['acc(valid)']])

In [None]:
# 2 hidden layers SELU Layer
learning_rate = 0.1
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
SELU_Layer = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)    
plt.show()



print('    final error(train) = {0:.2e}'.format(SELU_Layer[0][-1, SELU_Layer[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(SELU_Layer[0][-1, SELU_Layer[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(SELU_Layer[0][-1, SELU_Layer[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(SELU_Layer[0][-1, SELU_Layer[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(SELU_Layer[2] * 1. / num_epochs))

final_errors_train.append(SELU_Layer[0][-1, SELU_Layer[1]['error(train)']])
final_errors_valid.append(SELU_Layer[0][-1, SELU_Layer[1]['error(valid)']])
final_accs_train.append(SELU_Layer[0][-1, SELU_Layer[1]['acc(train)']])
final_accs_valid.append(SELU_Layer[0][-1, SELU_Layer[1]['acc(valid)']])

In [None]:
models = [Sigmoid_Layer, Relu_Layer, Leaky_ReluLayer, 
          ELU_Layer, SELU_Layer]
models_name = ['SigmoidLayer', 'ReluLayer', 'LeakyReluLayer', 
               'ELULayer', 'SELULayer']
fig_1 = plt.figure(figsize=(10, 12))
ax_1 = fig_1.add_subplot(211)
for model in models:
    for k in ['error(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_2 = fig_1.add_subplot(212)
for model in models:
    for k in ['error(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(8.7, 12))
ax_3 = fig_2.add_subplot(211)
for model in models:
    for k in ['acc(train)']:
        stats = model[0]
        keys = model[1]
        ax_3.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_4 = fig_2.add_subplot(212)
for model in models:
    for k in ['acc(valid)']:
        stats = model[0]
        keys = model[1]
        ax_4.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_ylim(0.0,0.6)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_1.set_title('Error(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_ylim(0.0,0.6)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_2.set_title('Error(Valid)', fontsize = 14)
plt.tight_layout()
ax_3.legend(models_name,loc=0)
ax_3.set_ylim(0.84,1.01)
ax_3.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_3.set_ylabel('ACC', fontsize = 12, fontweight = 1000)
ax_3.set_title('ACC(Train)', fontsize = 14)
ax_4.legend(models_name,loc=0)
ax_4.set_ylim(0.84,1.01)
ax_4.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_4.set_ylabel('ACC', fontsize = 12, fontweight = 1000)
ax_4.set_title('ACC(Valid)', fontsize = 14)
plt.tight_layout()
fig_1.savefig('compare_activation_function_error.pdf')
fig_2.savefig('compare_activation_function_acc.pdf')

fix learning rate with 5 hidden layers model

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)
batch_size = 50
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=batch_size, rng=rng)

In [None]:
from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit, SELUInit
from mlp.learning_rules import GradientDescentLearningRule
from mlp.optimisers import Optimiser

In [None]:
#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 10, 100

In [None]:
# 5 hidden layers LeakyRelu Layer LT = 0.05
learning_rate = 0.05  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_5_005 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()
print('    final error(train) = {0:.2e}'.format(LeakyRelu_5_005[0][-1, LeakyRelu_5_005[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_5_005[0][-1, LeakyRelu_5_005[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_5_005[0][-1, LeakyRelu_5_005[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_5_005[0][-1, LeakyRelu_5_005[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_5_005[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_5_005[0][-1, LeakyRelu_5_005[1]['error(train)']])
final_errors_valid.append(LeakyRelu_5_005[0][-1, LeakyRelu_5_005[1]['error(valid)']])
final_accs_train.append(LeakyRelu_5_005[0][-1, LeakyRelu_5_005[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_5_005[0][-1, LeakyRelu_5_005[1]['acc(valid)']])

In [None]:
# 5 hidden layers LeakyRelu Layer LT = 0.10
learning_rate = 0.10  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_5_010 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()
print('    final error(train) = {0:.2e}'.format(LeakyRelu_5_010[0][-1, LeakyRelu_5_010[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_5_010[0][-1, LeakyRelu_5_010[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_5_010[0][-1, LeakyRelu_5_010[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_5_010[0][-1, LeakyRelu_5_010[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_5_010[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_5_010[0][-1, LeakyRelu_5_010[1]['error(train)']])
final_errors_valid.append(LeakyRelu_5_010[0][-1, LeakyRelu_5_010[1]['error(valid)']])
final_accs_train.append(LeakyRelu_5_010[0][-1, LeakyRelu_5_010[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_5_010[0][-1, LeakyRelu_5_010[1]['acc(valid)']])

In [None]:
# 5 hidden layers LeakyRelu Layer LT = 0.20
learning_rate = 0.20  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_5_020 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()
print('    final error(train) = {0:.2e}'.format(LeakyRelu_5_020[0][-1, LeakyRelu_5_020[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_5_020[0][-1, LeakyRelu_5_020[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_5_020[0][-1, LeakyRelu_5_020[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_5_020[0][-1, LeakyRelu_5_020[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_5_020[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_5_020[0][-1, LeakyRelu_5_020[1]['error(train)']])
final_errors_valid.append(LeakyRelu_5_020[0][-1, LeakyRelu_5_020[1]['error(valid)']])
final_accs_train.append(LeakyRelu_5_020[0][-1, LeakyRelu_5_020[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_5_020[0][-1, LeakyRelu_5_020[1]['acc(valid)']])

In [None]:
models = [LeakyRelu_5_005, LeakyRelu_5_010, LeakyRelu_5_020]
models_name = ['LeakyRelu_5_005', 'LeakyRelu_5_010', 'LeakyRelu_5_020']
fig_1 = plt.figure(figsize=(20, 8))
ax_1 = fig_1.add_subplot(121)
for model in models:
    for k in ['error(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(15.3, 8))
ax_2 = fig_2.add_subplot(122)
for model in models:
    for k in ['error(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_1.set_title('Error(Train)', fontsize = 14)
plt.tight_layout()
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_2.set_title('Error(Valid)', fontsize = 14)
plt.tight_layout()
fig_1.savefig('LR-errortrain.pdf')
fig_2.savefig('LR-errorvalid.pdf')

In [None]:
models = [LeakyRelu_5_005, LeakyRelu_5_010, LeakyRelu_5_020]
models_name = ['LeakyRelu_5_005', 'LeakyRelu_5_010', 'LeakyRelu_5_020']
fig_1 = plt.figure(figsize=(20, 8))
ax_1 = fig_1.add_subplot(121)
for model in models:
    for k in ['acc(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(15.3, 8))
ax_2 = fig_2.add_subplot(122)
for model in models:
    for k in ['acc(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Acc', fontsize = 12)
ax_1.set_title('Acc(Train)', fontsize = 14)
plt.tight_layout()
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_2.set_title('Acc(Valid)', fontsize = 14)
plt.tight_layout()
fig_1.savefig('LR-acctrain.pdf')
fig_2.savefig('LR-accvalid.pdf')

In [None]:
#j = 0
#print('| learning_rate | final error(train) | final error(valid) | final acc(train) | final acc(valid) |')
#print('|------------|--------------------|--------------------|------------------|------------------|')
#for learning_rate in learning_rates:
#    print('| {0:.2f}        | {1:.2e}           | {2:.2e}           |  {3:.2f}            | {4:.2f}             |'
#          .format(learning_rate, 
#                  final_errors_train[j], final_errors_valid[j],
#                  final_accs_train[j], final_accs_valid[j]))
#    j += 1

Compare different number of hidden layers and

In [None]:
#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 10, 100

In [None]:
# 2 hidden layers LeakyRelu Layer
learning_rate = 0.05  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()
print('    final error(train) = {0:.2e}'.format(LeakyRelu_2[0][-1, LeakyRelu_2[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_2[0][-1, LeakyRelu_2[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_2[0][-1, LeakyRelu_2[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_2[0][-1, LeakyRelu_2[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_2[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_2[0][-1, LeakyRelu_2[1]['error(train)']])
final_errors_valid.append(LeakyRelu_2[0][-1, LeakyRelu_2[1]['error(valid)']])
final_accs_train.append(LeakyRelu_2[0][-1, LeakyRelu_2[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_2[0][-1, LeakyRelu_2[1]['acc(valid)']])

In [None]:
# 3 hidden layers LeakyRelu Layer
learning_rate = 0.05  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_3 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_3[0][-1, LeakyRelu_3[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_3[0][-1, LeakyRelu_3[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_3[0][-1, LeakyRelu_3[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_3[0][-1, LeakyRelu_3[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_3[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_3[0][-1, LeakyRelu_3[1]['error(train)']])
final_errors_valid.append(LeakyRelu_3[0][-1, LeakyRelu_3[1]['error(valid)']])
final_accs_train.append(LeakyRelu_3[0][-1, LeakyRelu_3[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_3[0][-1, LeakyRelu_3[1]['acc(valid)']])

In [None]:
# 4 hidden layers LeakyRelu Layer
learning_rate = 0.05  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_4 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_4[0][-1, LeakyRelu_4[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_4[0][-1, LeakyRelu_4[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_4[0][-1, LeakyRelu_4[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_4[0][-1, LeakyRelu_4[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_4[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_4[0][-1, LeakyRelu_4[1]['error(train)']])
final_errors_valid.append(LeakyRelu_4[0][-1, LeakyRelu_4[1]['error(valid)']])
final_accs_train.append(LeakyRelu_4[0][-1, LeakyRelu_4[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_4[0][-1, LeakyRelu_4[1]['acc(valid)']])

In [None]:
# 5 hidden layers LeakyRelu Layer
learning_rate = 0.05  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_5 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_5[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(train)']])
final_errors_valid.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(valid)']])
final_accs_train.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(valid)']])

In [None]:
# 6 hidden layers LeakyRelu Layer
learning_rate = 0.05 # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_6 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_6[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(train)']])
final_errors_valid.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(valid)']])
final_accs_train.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(valid)']])

In [None]:
# 7 hidden layers LeakyRelu Layer
learning_rate = 0.05  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []


rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_7 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_7[0][-1, LeakyRelu_7[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_7[0][-1, LeakyRelu_7[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_7[0][-1, LeakyRelu_7[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_7[0][-1, LeakyRelu_7[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_7[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_7[0][-1, LeakyRelu_7[1]['error(train)']])
final_errors_valid.append(LeakyRelu_7[0][-1, LeakyRelu_7[1]['error(valid)']])
final_accs_train.append(LeakyRelu_7[0][-1, LeakyRelu_7[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_7[0][-1, LeakyRelu_7[1]['acc(valid)']])

In [None]:
# 8 hidden layers LeakyRelu Layer
learning_rate = 0.05  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []


    
rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_8 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_8[0][-1, LeakyRelu_8[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_8[0][-1, LeakyRelu_8[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_8[0][-1, LeakyRelu_8[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_8[0][-1, LeakyRelu_8[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_8[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_8[0][-1, LeakyRelu_8[1]['error(train)']])
final_errors_valid.append(LeakyRelu_8[0][-1, LeakyRelu_8[1]['error(valid)']])
final_accs_train.append(LeakyRelu_8[0][-1, LeakyRelu_8[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_8[0][-1, LeakyRelu_8[1]['acc(valid)']])

In [None]:
models = [LeakyRelu_2, LeakyRelu_3, LeakyRelu_4, 
          LeakyRelu_5, LeakyRelu_6, LeakyRelu_7, LeakyRelu_8]
models_name = ['LeakyRelu_2', 'LeakyRelu_3', 'LeakyRelu_4', 
               'LeakyRelu_5', 'LeakyRelu_6', 'LeakyRelu_7', 'LeakyRelu_8']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['error(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['error(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12)
ax_1.set_ylabel('Error', fontsize = 12)
ax_1.set_title('Error(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12)
ax_2.set_ylabel('Error', fontsize = 12)
ax_2.set_title('Error(Valid)', fontsize = 14)
fig_1.savefig('numberofhiddenlayer_errt.pdf')
fig_2.savefig('numberofhiddenlayer_errv.pdf')

In [None]:
models = [LeakyRelu_2, LeakyRelu_3, LeakyRelu_4, 
          LeakyRelu_5, LeakyRelu_6, LeakyRelu_7, LeakyRelu_8]
models_name = ['LeakyRelu_2', 'LeakyRelu_3', 'LeakyRelu_4', 
               'LeakyRelu_5', 'LeakyRelu_6', 'LeakyRelu_7', 'LeakyRelu_8']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['acc(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['acc(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12)
ax_1.set_ylabel('Acc', fontsize = 12)
ax_1.set_title('Acc(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12)
ax_2.set_ylabel('Acc', fontsize = 12)
ax_2.set_title('Acc(Valid)', fontsize = 14)
fig_1.savefig('numberofhiddenlayer_acct.pdf')
fig_2.savefig('numberofhiddenlayer_accv.pdf')

FanIn 

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)
batch_size = 50
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=batch_size, rng=rng)

In [None]:
from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit, SELUInit, UniformInit
from mlp.learning_rules import GradientDescentLearningRule
from mlp.optimisers import Optimiser

In [None]:
#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 10, 100

In [None]:
# 5 hidden layers LeakyRelu Layer
learning_rate = 0.10  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

int_scale_in = np.sqrt(3/input_dim)
int_scale_hid = np.sqrt(3/hidden_dim)

weights_init_in = UniformInit(-int_scale_in,int_scale_in,rng=rng)
weights_init_hid = UniformInit(-int_scale_hid,int_scale_hid,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_in, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_hid, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_5 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_5[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(train)']])
final_errors_valid.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(valid)']])
final_accs_train.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(valid)']])

In [None]:
# 6 hidden layers LeakyRelu Layer
learning_rate = 0.10 # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

int_scale_in = np.sqrt(3/input_dim)
int_scale_hid = np.sqrt(3/hidden_dim)

weights_init_in = UniformInit(-int_scale_in,int_scale_in,rng=rng)
weights_init_hid = UniformInit(-int_scale_hid,int_scale_hid,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_in, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_hid, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_6 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_6[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(train)']])
final_errors_valid.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(valid)']])
final_accs_train.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(valid)']])

In [None]:
models = [LeakyRelu_5, LeakyRelu_6]
models_name = ['LeakyRelu_5', 'LeakyRelu_6']
fig_1 = plt.figure(figsize=(20, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['error(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(15.3, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['error(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_1.set_title('Error(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_2.set_title('Error(Valid)', fontsize = 14)
fig_1.savefig('is_in_errt.pdf')
fig_2.savefig('is_in_errv.pdf')

In [None]:
models = [LeakyRelu_5, LeakyRelu_6]
models_name = ['LeakyRelu_5', 'LeakyRelu_6']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['acc(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['acc(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_1.set_title('Acc(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_2.set_title('Acc(Valid)', fontsize = 14)
fig_1.savefig('is_in_acct.pdf')
fig_2.savefig('is_in_accv.pdf')

FanOut

In [None]:
#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 10, 100

In [None]:
# 5 hidden layers LeakyRelu Layer
learning_rate = 0.10  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

int_scale_hid = np.sqrt(3/hidden_dim)
int_scale_out = np.sqrt(3/output_dim)

weights_init_hid = UniformInit(-int_scale_hid,int_scale_hid,rng=rng)
weights_init_out = UniformInit(-int_scale_out,int_scale_out,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_out, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_5 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_5[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(train)']])
final_errors_valid.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['error(valid)']])
final_accs_train.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_5[0][-1, LeakyRelu_5[1]['acc(valid)']])

In [None]:
# 6 hidden layers LeakyRelu Layer
learning_rate = 0.10 # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

int_scale_hid = np.sqrt(3/hidden_dim)
int_scale_out = np.sqrt(3/output_dim)

weights_init_hid = UniformInit(-int_scale_hid,int_scale_hid,rng=rng)
weights_init_out = UniformInit(-int_scale_out,int_scale_out,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_out, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_6 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_6[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(train)']])
final_errors_valid.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['error(valid)']])
final_accs_train.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_6[0][-1, LeakyRelu_6[1]['acc(valid)']])

In [None]:
models = [LeakyRelu_5, LeakyRelu_6]
models_name = ['LeakyRelu_5', 'LeakyRelu_6']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['error(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['error(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_1.set_title('Error(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_2.set_title('Error(Valid)', fontsize = 14)
fig_1.savefig('is_out_errt.pdf')
fig_2.savefig('is_out_errv.pdf')

In [None]:
models = [LeakyRelu_5, LeakyRelu_6]
models_name = ['LeakyRelu_5', 'LeakyRelu_6']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['acc(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['acc(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_1.set_title('Acc(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_2.set_title('Acc(Valid)', fontsize = 14)
fig_1.savefig('is_out_acct.pdf')
fig_2.savefig('is_out_accv.pdf')

FanInOut

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)
batch_size = 50
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=batch_size, rng=rng)

In [None]:
from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit, SELUInit
from mlp.learning_rules import GradientDescentLearningRule
from mlp.optimisers import Optimiser
from mlp.initialisers import UniformInit

In [None]:
#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 10, 100

In [None]:
# 4 hidden layers LeakyRelu Layer
learning_rate = 0.10  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

int_scale_in = np.sqrt(6/input_dim+hidden_dim)
int_scale_hid = np.sqrt(6/hidden_dim+hidden_dim)
int_scale_out = np.sqrt(6/hidden_dim+output_dim)

weights_init_in = UniformInit(-int_scale_in,int_scale_in,rng=rng)
weights_init_hid = UniformInit(-int_scale_hid,int_scale_hid,rng=rng)
weights_init_out = UniformInit(-int_scale_out,int_scale_out,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_in, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_out, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_4 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_4[0][-1, LeakyRelu_4[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_4[0][-1, LeakyRelu_4[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_4[0][-1, LeakyRelu_4[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_4[0][-1, LeakyRelu_4[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_4[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_4[0][-1, LeakyRelu_4[1]['error(train)']])
final_errors_valid.append(LeakyRelu_4[0][-1, LeakyRelu_4[1]['error(valid)']])
final_accs_train.append(LeakyRelu_4[0][-1, LeakyRelu_4[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_4[0][-1, LeakyRelu_4[1]['acc(valid)']])

In [None]:
# 2 hidden layers LeakyRelu Layer
learning_rate = 0.10 # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

int_scale_in = np.sqrt(6/input_dim+hidden_dim)
int_scale_hid = np.sqrt(6/hidden_dim+hidden_dim)
int_scale_out = np.sqrt(6/hidden_dim+output_dim)

weights_init_in = UniformInit(-int_scale_in,int_scale_in,rng=rng)
weights_init_hid = UniformInit(-int_scale_hid,int_scale_hid,rng=rng)
weights_init_out = UniformInit(-int_scale_out,int_scale_out,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_in, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    LeakyReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_out, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
LeakyRelu_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(LeakyRelu_2[0][-1, LeakyRelu_2[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(LeakyRelu_2[0][-1, LeakyRelu_2[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(LeakyRelu_2[0][-1, LeakyRelu_2[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(LeakyRelu_2[0][-1, LeakyRelu_2[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(LeakyRelu_2[2] * 1. / num_epochs))

final_errors_train.append(LeakyRelu_2[0][-1, LeakyRelu_2[1]['error(train)']])
final_errors_valid.append(LeakyRelu_2[0][-1, LeakyRelu_2[1]['error(valid)']])
final_accs_train.append(LeakyRelu_2[0][-1, LeakyRelu_2[1]['acc(train)']])
final_accs_valid.append(LeakyRelu_2[0][-1, LeakyRelu_2[1]['acc(valid)']])

In [None]:
models = [LeakyRelu_2]
models_name = ['LeakyRelu_2']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['error(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['error(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_1.set_title('Error(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_2.set_title('Error(Valid)', fontsize = 14)
fig_1.savefig('is_inout_errt.pdf')
fig_2.savefig('is_inout_errv.pdf')

In [None]:
models = [LeakyRelu_2]
models_name = ['LeakyRelu_2']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['acc(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['acc(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_1.set_title('Acc(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_2.set_title('Acc(Valid)', fontsize = 14)
fig_1.savefig('is_inout_acct.pdf')
fig_2.savefig('is_inout_accv.pdf')

FanInNor

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)
batch_size = 50
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=batch_size, rng=rng)

In [None]:
from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit, SELUInit
from mlp.learning_rules import GradientDescentLearningRule
from mlp.optimisers import Optimiser

In [None]:
#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 10, 100

In [None]:
# 2 hidden layers SELU Layer
learning_rate = 0.1
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init_in = SELUInit(0,input_dim,rng=rng)
weights_init_hid = SELUInit(0,hidden_dim,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_in, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_hid, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
SELULayer_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()
print('    final error(train) = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(SELULayer_2[2] * 1. / num_epochs))

final_errors_train.append(SELULayer_2[0][-1, SELULayer_2[1]['error(train)']])
final_errors_valid.append(SELULayer_2[0][-1, SELULayer_2[1]['error(valid)']])
final_accs_train.append(SELULayer_2[0][-1, SELULayer_2[1]['acc(train)']])
final_accs_valid.append(SELULayer_2[0][-1, SELULayer_2[1]['acc(valid)']])

In [None]:
# 3 hidden layers SELULayer Layer
learning_rate = 0.10  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()


weights_init_in = SELUInit(0,input_dim,rng=rng)
weights_init_hid = SELUInit(0,hidden_dim,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_in, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_hid, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
SELULayer_3 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(SELULayer_3[2] * 1. / num_epochs))

final_errors_train.append(SELULayer_3[0][-1, SELULayer_3[1]['error(train)']])
final_errors_valid.append(SELULayer_3[0][-1, SELULayer_3[1]['error(valid)']])
final_accs_train.append(SELULayer_3[0][-1, SELULayer_3[1]['acc(train)']])
final_accs_valid.append(SELULayer_3[0][-1, SELULayer_3[1]['acc(valid)']])

In [None]:
models = [SELULayer_2, SELULayer_3]
models_name = ['SELULayer_2', 'SELULayer_3']
fig_1 = plt.figure(figsize=(20, 8))
ax_1 = fig_1.add_subplot(121)
for model in models:
    for k in ['error(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(15.3, 8))
ax_2 = fig_2.add_subplot(122)
for model in models:
    for k in ['error(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_1.set_title('Error(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_2.set_title('Error(Valid)', fontsize = 14)
fig_1.savefig('noris_in_errt.pdf')
fig_2.savefig('noris_in_errv.pdf')

In [None]:
models = [SELULayer_2, SELULayer_3]
models_name = ['SELULayer_2', 'SELULayer_3']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['acc(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['acc(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_1.set_title('Acc(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_2.set_title('Acc(Valid)', fontsize = 14)
fig_1.savefig('noris_in_acct.pdf')
fig_2.savefig('noris_in_accv.pdf')

FanOutNor

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)
batch_size = 50
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=batch_size, rng=rng)

In [None]:
from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit, SELUInit, NormalInit
from mlp.learning_rules import GradientDescentLearningRule
from mlp.optimisers import Optimiser

In [None]:
#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 10, 100

In [None]:
# 2 hidden layers SELU Layer
learning_rate = 0.1
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init_hid = SELUInit(0,hidden_dim,rng=rng)
weights_init_out = SELUInit(0,output_dim,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_out, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
SELULayer_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()
print('    final error(train) = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(SELULayer_2[2] * 1. / num_epochs))

final_errors_train.append(SELULayer_2[0][-1, SELULayer_2[1]['error(train)']])
final_errors_valid.append(SELULayer_2[0][-1, SELULayer_2[1]['error(valid)']])
final_accs_train.append(SELULayer_2[0][-1, SELULayer_2[1]['acc(train)']])
final_accs_valid.append(SELULayer_2[0][-1, SELULayer_2[1]['acc(valid)']])

In [None]:
# 3 hidden layers SELULayer Layer
learning_rate = 0.10  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()


weights_init_hid = SELUInit(0,hidden_dim,rng=rng)
weights_init_out = SELUInit(0,output_dim,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_out, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
SELULayer_3 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(SELULayer_3[2] * 1. / num_epochs))

final_errors_train.append(SELULayer_3[0][-1, SELULayer_3[1]['error(train)']])
final_errors_valid.append(SELULayer_3[0][-1, SELULayer_3[1]['error(valid)']])
final_accs_train.append(SELULayer_3[0][-1, SELULayer_3[1]['acc(train)']])
final_accs_valid.append(SELULayer_3[0][-1, SELULayer_3[1]['acc(valid)']])

In [None]:
models = [SELULayer_2, SELULayer_3]
models_name = ['SELULayer_2', 'SELULayer_3']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['error(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['error(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_1.set_title('Error(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_2.set_title('Error(Valid)', fontsize = 14)
fig_1.savefig('noris_out_errt.pdf')
fig_2.savefig('noris_out_errv.pdf')

In [None]:
models = [SELULayer_2, SELULayer_3]
models_name = ['SELULayer_2', 'SELULayer_3']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['acc(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['acc(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_1.set_title('Acc(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_2.set_title('Acc(Valid)', fontsize = 14)
fig_1.savefig('noris_out_acct.pdf')
fig_2.savefig('noris_out_accv.pdf')

FanInOutNor

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)
batch_size = 50
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=batch_size, rng=rng)

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)
batch_size = 50
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=batch_size, rng=rng)

In [None]:
#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 10, 100

In [None]:
# 2 hidden layers SELU Layer
learning_rate = 0.1
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init_in = SELUInit(0,input_dim,rng=rng)
weights_init_hid = SELUInit(0,hidden_dim,rng=rng)
weights_init_out = SELUInit(0,output_dim,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_in, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_out, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
SELULayer_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()
print('    final error(train) = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(SELULayer_2[0][-1, SELULayer_2[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(SELULayer_2[2] * 1. / num_epochs))

final_errors_train.append(SELULayer_2[0][-1, SELULayer_2[1]['error(train)']])
final_errors_valid.append(SELULayer_2[0][-1, SELULayer_2[1]['error(valid)']])
final_accs_train.append(SELULayer_2[0][-1, SELULayer_2[1]['acc(train)']])
final_accs_valid.append(SELULayer_2[0][-1, SELULayer_2[1]['acc(valid)']])

In [None]:
# 3 hidden layers SELULayer Layer
learning_rate = 0.10  # scale for random parameter initialisation
final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

rng.seed(seed)
train_data.reset()
valid_data.reset()

weights_init_in = SELUInit(0,input_dim,rng=rng)
weights_init_hid = SELUInit(0,hidden_dim,rng=rng)
weights_init_out = SELUInit(0,output_dim,rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init_in, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init_hid, biases_init), 
    SELULayer(),
    AffineLayer(hidden_dim, output_dim, weights_init_out, biases_init)
])

error = CrossEntropySoftmaxError()
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
SELULayer_3 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

    
plt.show()

print('    final error(train) = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(SELULayer_3[0][-1, SELULayer_3[1]['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(SELULayer_3[2] * 1. / num_epochs))

final_errors_train.append(SELULayer_3[0][-1, SELULayer_3[1]['error(train)']])
final_errors_valid.append(SELULayer_3[0][-1, SELULayer_3[1]['error(valid)']])
final_accs_train.append(SELULayer_3[0][-1, SELULayer_3[1]['acc(train)']])
final_accs_valid.append(SELULayer_3[0][-1, SELULayer_3[1]['acc(valid)']])

In [None]:
models = [SELULayer_2, SELULayer_3]
models_name = ['SELULayer_2', 'SELULayer_3']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['error(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['error(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_1.set_title('Error(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Error', fontsize = 12, fontweight = 1000)
ax_2.set_title('Error(Valid)', fontsize = 14)
fig_1.savefig('noris_inout_errt.pdf')
fig_2.savefig('noris_inout_errv.pdf')

In [None]:
models = [SELULayer_2, SELULayer_3]
models_name = ['SELULayer_2', 'SELULayer_3']
fig_1 = plt.figure(figsize=(10, 8))
ax_1 = fig_1.add_subplot(111)
for model in models:
    for k in ['acc(train)']:
        stats = model[0]
        keys = model[1]
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
fig_2 = plt.figure(figsize=(10, 8))
ax_2 = fig_2.add_subplot(111)
for model in models:
    for k in ['acc(valid)']:
        stats = model[0]
        keys = model[1]
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
ax_1.legend(models_name,loc=0)
ax_1.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_1.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_1.set_title('Acc(Train)', fontsize = 14)
ax_2.legend(models_name,loc=0)
ax_2.set_xlabel('Epoch number',fontsize = 12, fontweight = 1000)
ax_2.set_ylabel('Acc', fontsize = 12, fontweight = 1000)
ax_2.set_title('Acc(Valid)', fontsize = 14)
fig_1.savefig('noris_inout_acct.pdf')
fig_2.savefig('noris_inout_accv.pdf')