In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True,scheduler=None,fileName=None):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook,scheduler=scheduler)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    if fileName != None:
        errorFile = fileName + "_error.pdf"
        accFile = fileName + "_acc.pdf"
        fig_1.tight_layout() # This minimises whitespace around the axes. 
        fig_1.savefig(errorFile)
        fig_2.tight_layout() # This minimises whitespace around the axes. 
        fig_2.savefig(accFile)
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider, EMNISTDataProvider

# Seed a random number generator
seed = 11102018 
rng = np.random.RandomState(seed)
batch_size = 100
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)
test_data = EMNISTDataProvider('test', batch_size=batch_size, rng=rng)

KeysView(<numpy.lib.npyio.NpzFile object at 0x7ff270372e48>)
KeysView(<numpy.lib.npyio.NpzFile object at 0x7ff270372438>)
KeysView(<numpy.lib.npyio.NpzFile object at 0x7ff270372e48>)


In [None]:
##############SGD With scheduler with no restarts

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule,GradientDescentLearningRule
from mlp.optimisers import Optimiser
from mlp.schedulers import CosineAnnealingWithWarmRestarts

#setup hyperparameters
learning_rate = 0.0075
learning_ranges = [1.05,1.1,1.15,1.2]
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 100

for learning_range in learning_ranges:
    

    weights_init = GlorotUniformInit(rng=rng)
    biases_init = ConstantInit(0.)
    model = MultipleLayerModel([
        AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
        ReluLayer(),
        AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
    ])

    sgd_scheduler_no_restart = CosineAnnealingWithWarmRestarts(min_learning_rate=learning_rate / learning_range, max_learning_rate=learning_rate * learning_range,
                                                       total_iters_per_period=100,
                                                       max_learning_rate_discount_factor=0.9,
                                                       period_iteration_expansion_factor=1.0)

    error = CrossEntropySoftmaxError()

    # Use a basic gradient descent learning rule
    learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)

    #Remember to use notebook=False when you write a script to be run in a terminal
    _ = train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True,scheduler=sgd_scheduler_no_restart)

HBox(children=(IntProgress(value=0), HTML(value='')))

learning_rate 0.007875
epoch 1


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 1: 3.1s to complete
    error(train)=3.04e+00, acc(train)=2.65e-01, error(valid)=3.05e+00, acc(valid)=2.51e-01


learning_rate 0.00787481936584817
epoch 2


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 2: 2.9s to complete
    error(train)=1.72e+00, acc(train)=5.30e-01, error(valid)=1.72e+00, acc(valid)=5.30e-01


learning_rate 0.007874277641656779
epoch 3


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 3: 2.8s to complete
    error(train)=1.42e+00, acc(train)=6.02e-01, error(valid)=1.42e+00, acc(valid)=5.98e-01


learning_rate 0.007873375362042198
epoch 4


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 4: 2.9s to complete
    error(train)=1.28e+00, acc(train)=6.37e-01, error(valid)=1.29e+00, acc(valid)=6.30e-01


learning_rate 0.00787211341744548
epoch 5


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 5: 2.9s to complete
    error(train)=1.19e+00, acc(train)=6.61e-01, error(valid)=1.19e+00, acc(valid)=6.58e-01


learning_rate 0.007870493053253578
epoch 6


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 6: 2.9s to complete
    error(train)=1.12e+00, acc(train)=6.81e-01, error(valid)=1.12e+00, acc(valid)=6.76e-01


learning_rate 0.007868515868570323
epoch 7


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 7: 2.8s to complete
    error(train)=1.05e+00, acc(train)=6.98e-01, error(valid)=1.06e+00, acc(valid)=6.96e-01


learning_rate 0.007866183814638291
epoch 8


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 8: 2.8s to complete
    error(train)=1.00e+00, acc(train)=7.10e-01, error(valid)=1.01e+00, acc(valid)=7.07e-01


learning_rate 0.007863499192913159
epoch 9


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 9: 2.8s to complete
    error(train)=9.55e-01, acc(train)=7.22e-01, error(valid)=9.66e-01, acc(valid)=7.18e-01


learning_rate 0.007860464652792453
epoch 10


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 10: 2.8s to complete
    error(train)=9.10e-01, acc(train)=7.33e-01, error(valid)=9.23e-01, acc(valid)=7.31e-01


learning_rate 0.007857083189000904
epoch 11


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 11: 2.9s to complete
    error(train)=8.68e-01, acc(train)=7.45e-01, error(valid)=8.83e-01, acc(valid)=7.41e-01


learning_rate 0.007853358138635028
epoch 12


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 12: 2.9s to complete
    error(train)=8.37e-01, acc(train)=7.52e-01, error(valid)=8.53e-01, acc(valid)=7.46e-01


learning_rate 0.007849293177869806
epoch 13


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 13: 2.9s to complete
    error(train)=8.07e-01, acc(train)=7.61e-01, error(valid)=8.27e-01, acc(valid)=7.55e-01


learning_rate 0.007844892318330743
epoch 14


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 14: 2.8s to complete
    error(train)=7.89e-01, acc(train)=7.62e-01, error(valid)=8.09e-01, acc(valid)=7.54e-01


learning_rate 0.007840159903134882
epoch 15


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 15: 2.9s to complete
    error(train)=7.62e-01, acc(train)=7.71e-01, error(valid)=7.84e-01, acc(valid)=7.63e-01


learning_rate 0.00783510060260467
epoch 16


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 16: 2.9s to complete
    error(train)=7.42e-01, acc(train)=7.74e-01, error(valid)=7.67e-01, acc(valid)=7.66e-01


learning_rate 0.007829719409658915
epoch 17


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 17: 2.9s to complete
    error(train)=7.23e-01, acc(train)=7.81e-01, error(valid)=7.49e-01, acc(valid)=7.72e-01


learning_rate 0.007824021634885372
epoch 18


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 18: 3.0s to complete
    error(train)=7.05e-01, acc(train)=7.85e-01, error(valid)=7.31e-01, acc(valid)=7.76e-01


learning_rate 0.007818012901299845
epoch 19


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 19: 3.0s to complete
    error(train)=6.89e-01, acc(train)=7.90e-01, error(valid)=7.17e-01, acc(valid)=7.82e-01


learning_rate 0.0078116991387969376
epoch 20


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 20: 2.9s to complete
    error(train)=6.73e-01, acc(train)=7.93e-01, error(valid)=7.05e-01, acc(valid)=7.82e-01


learning_rate 0.007805086578297972
epoch 21


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 21: 2.9s to complete
    error(train)=6.60e-01, acc(train)=7.95e-01, error(valid)=6.90e-01, acc(valid)=7.84e-01


learning_rate 0.007798181745601815
epoch 22


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 22: 2.9s to complete
    error(train)=6.54e-01, acc(train)=7.97e-01, error(valid)=6.86e-01, acc(valid)=7.88e-01


learning_rate 0.007790991454944708
epoch 23


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 23: 2.9s to complete
    error(train)=6.40e-01, acc(train)=8.02e-01, error(valid)=6.80e-01, acc(valid)=7.89e-01


learning_rate 0.007783522802275436
epoch 24


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 24: 2.8s to complete
    error(train)=6.30e-01, acc(train)=8.04e-01, error(valid)=6.67e-01, acc(valid)=7.94e-01


learning_rate 0.007775783158252481
epoch 25


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 25: 2.9s to complete
    error(train)=6.12e-01, acc(train)=8.09e-01, error(valid)=6.51e-01, acc(valid)=7.96e-01


learning_rate 0.007767780160970075
epoch 26


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 26: 3.0s to complete
    error(train)=6.06e-01, acc(train)=8.11e-01, error(valid)=6.46e-01, acc(valid)=7.95e-01


learning_rate 0.007759521708420323
epoch 27


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 27: 3.0s to complete
    error(train)=5.93e-01, acc(train)=8.14e-01, error(valid)=6.35e-01, acc(valid)=8.00e-01


learning_rate 0.0077510159506988365
epoch 28


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 28: 3.0s to complete
    error(train)=5.84e-01, acc(train)=8.16e-01, error(valid)=6.29e-01, acc(valid)=8.01e-01


learning_rate 0.007742271281961574
epoch 29


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 29: 2.8s to complete
    error(train)=5.78e-01, acc(train)=8.17e-01, error(valid)=6.24e-01, acc(valid)=8.02e-01


learning_rate 0.0077332963321408215
epoch 30


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 30: 2.9s to complete
    error(train)=5.70e-01, acc(train)=8.20e-01, error(valid)=6.16e-01, acc(valid)=8.06e-01


learning_rate 0.0077240999584284945
epoch 31


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 31: 2.9s to complete
    error(train)=5.66e-01, acc(train)=8.19e-01, error(valid)=6.16e-01, acc(valid)=8.04e-01


learning_rate 0.007714691236535155
epoch 32


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 32: 2.9s to complete
    error(train)=5.51e-01, acc(train)=8.25e-01, error(valid)=6.01e-01, acc(valid)=8.08e-01


learning_rate 0.0077050794517333826
epoch 33


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 33: 3.0s to complete
    error(train)=5.49e-01, acc(train)=8.25e-01, error(valid)=6.01e-01, acc(valid)=8.08e-01


learning_rate 0.007695274089694332
epoch 34


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 34: 2.9s to complete
    error(train)=5.36e-01, acc(train)=8.29e-01, error(valid)=5.87e-01, acc(valid)=8.13e-01


learning_rate 0.007685284827126521
epoch 35


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 35: 3.3s to complete
    error(train)=5.32e-01, acc(train)=8.30e-01, error(valid)=5.88e-01, acc(valid)=8.14e-01


learning_rate 0.007675121522226084
epoch 36


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 36: 3.2s to complete
    error(train)=5.32e-01, acc(train)=8.29e-01, error(valid)=5.89e-01, acc(valid)=8.11e-01


learning_rate 0.007664794204947928
epoch 37


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 37: 3.4s to complete
    error(train)=5.20e-01, acc(train)=8.34e-01, error(valid)=5.76e-01, acc(valid)=8.15e-01


learning_rate 0.007654313067107375
epoch 38


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 38: 2.9s to complete
    error(train)=5.23e-01, acc(train)=8.32e-01, error(valid)=5.82e-01, acc(valid)=8.13e-01


learning_rate 0.00764368845232207
epoch 39


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 39: 2.8s to complete
    error(train)=5.11e-01, acc(train)=8.34e-01, error(valid)=5.72e-01, acc(valid)=8.15e-01


learning_rate 0.00763293084580408
epoch 40


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 40: 2.8s to complete
    error(train)=5.04e-01, acc(train)=8.37e-01, error(valid)=5.67e-01, acc(valid)=8.19e-01


learning_rate 0.007622050864012258
epoch 41


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 41: 2.9s to complete
    error(train)=5.00e-01, acc(train)=8.39e-01, error(valid)=5.63e-01, acc(valid)=8.21e-01


learning_rate 0.007611059244175075
epoch 42


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 42: 2.9s to complete
    error(train)=4.97e-01, acc(train)=8.40e-01, error(valid)=5.62e-01, acc(valid)=8.20e-01


learning_rate 0.007599966833694277
epoch 43


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 43: 2.9s to complete
    error(train)=4.92e-01, acc(train)=8.41e-01, error(valid)=5.58e-01, acc(valid)=8.23e-01


learning_rate 0.0075887845794398055
epoch 44


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 44: 2.9s to complete
    error(train)=4.85e-01, acc(train)=8.42e-01, error(valid)=5.54e-01, acc(valid)=8.23e-01


learning_rate 0.00757752351694656
epoch 45


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 45: 2.9s to complete
    error(train)=4.84e-01, acc(train)=8.42e-01, error(valid)=5.54e-01, acc(valid)=8.22e-01


learning_rate 0.007566194759523656
epoch 46


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 46: 2.8s to complete
    error(train)=4.83e-01, acc(train)=8.44e-01, error(valid)=5.51e-01, acc(valid)=8.24e-01


learning_rate 0.007554809487286933
epoch 47


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 47: 2.9s to complete
    error(train)=4.75e-01, acc(train)=8.46e-01, error(valid)=5.48e-01, acc(valid)=8.23e-01


learning_rate 0.007543378936125528
epoch 48


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 48: 2.9s to complete
    error(train)=4.70e-01, acc(train)=8.47e-01, error(valid)=5.44e-01, acc(valid)=8.23e-01


learning_rate 0.0075319143866134095
epoch 49


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 49: 2.9s to complete
    error(train)=4.71e-01, acc(train)=8.47e-01, error(valid)=5.48e-01, acc(valid)=8.24e-01


learning_rate 0.007520427152876815
epoch 50


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 50: 2.8s to complete
    error(train)=4.64e-01, acc(train)=8.48e-01, error(valid)=5.40e-01, acc(valid)=8.27e-01


learning_rate 0.007508928571428571
epoch 51


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 51: 2.8s to complete
    error(train)=4.61e-01, acc(train)=8.50e-01, error(valid)=5.39e-01, acc(valid)=8.27e-01


learning_rate 0.007497429989980328
epoch 52


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 52: 2.8s to complete
    error(train)=4.58e-01, acc(train)=8.51e-01, error(valid)=5.34e-01, acc(valid)=8.28e-01


learning_rate 0.007485942756243733
epoch 53


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 53: 2.8s to complete
    error(train)=4.55e-01, acc(train)=8.50e-01, error(valid)=5.32e-01, acc(valid)=8.29e-01


learning_rate 0.007474478206731615
epoch 54


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 54: 2.9s to complete
    error(train)=4.52e-01, acc(train)=8.51e-01, error(valid)=5.34e-01, acc(valid)=8.27e-01


learning_rate 0.00746304765557021
epoch 55


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 55: 2.9s to complete
    error(train)=4.50e-01, acc(train)=8.52e-01, error(valid)=5.31e-01, acc(valid)=8.29e-01


learning_rate 0.007451662383333487
epoch 56


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 56: 2.8s to complete
    error(train)=4.47e-01, acc(train)=8.52e-01, error(valid)=5.31e-01, acc(valid)=8.32e-01


learning_rate 0.007440333625910582
epoch 57


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 57: 2.8s to complete
    error(train)=4.40e-01, acc(train)=8.55e-01, error(valid)=5.26e-01, acc(valid)=8.29e-01


learning_rate 0.007429072563417337
epoch 58


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 58: 2.9s to complete
    error(train)=4.38e-01, acc(train)=8.55e-01, error(valid)=5.28e-01, acc(valid)=8.28e-01


learning_rate 0.007417890309162865
epoch 59


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 59: 2.8s to complete
    error(train)=4.34e-01, acc(train)=8.57e-01, error(valid)=5.23e-01, acc(valid)=8.32e-01


learning_rate 0.007406797898682068
epoch 60


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 60: 2.9s to complete
    error(train)=4.34e-01, acc(train)=8.55e-01, error(valid)=5.26e-01, acc(valid)=8.30e-01


learning_rate 0.007395806278844885
epoch 61


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 61: 2.9s to complete
    error(train)=4.27e-01, acc(train)=8.59e-01, error(valid)=5.18e-01, acc(valid)=8.33e-01


learning_rate 0.007384926297053063
epoch 62


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 62: 2.9s to complete
    error(train)=4.26e-01, acc(train)=8.59e-01, error(valid)=5.20e-01, acc(valid)=8.32e-01


learning_rate 0.007374168690535073
epoch 63


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 63: 2.8s to complete
    error(train)=4.24e-01, acc(train)=8.59e-01, error(valid)=5.17e-01, acc(valid)=8.33e-01


learning_rate 0.007363544075749768
epoch 64


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 64: 2.8s to complete
    error(train)=4.25e-01, acc(train)=8.59e-01, error(valid)=5.21e-01, acc(valid)=8.32e-01


learning_rate 0.007353062937909214
epoch 65


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 65: 2.8s to complete
    error(train)=4.23e-01, acc(train)=8.58e-01, error(valid)=5.21e-01, acc(valid)=8.32e-01


learning_rate 0.007342735620631058
epoch 66


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 66: 2.9s to complete
    error(train)=4.18e-01, acc(train)=8.60e-01, error(valid)=5.16e-01, acc(valid)=8.33e-01


learning_rate 0.007332572315730622
epoch 67


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 67: 2.8s to complete
    error(train)=4.16e-01, acc(train)=8.60e-01, error(valid)=5.14e-01, acc(valid)=8.31e-01


learning_rate 0.007322583053162811
epoch 68


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 68: 2.8s to complete
    error(train)=4.15e-01, acc(train)=8.61e-01, error(valid)=5.17e-01, acc(valid)=8.33e-01


learning_rate 0.00731277769112376
epoch 69


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 69: 2.9s to complete
    error(train)=4.11e-01, acc(train)=8.62e-01, error(valid)=5.14e-01, acc(valid)=8.32e-01


learning_rate 0.007303165906321987
epoch 70


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 70: 2.8s to complete
    error(train)=4.08e-01, acc(train)=8.63e-01, error(valid)=5.15e-01, acc(valid)=8.32e-01


learning_rate 0.007293757184428648
epoch 71


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 71: 2.8s to complete
    error(train)=4.14e-01, acc(train)=8.61e-01, error(valid)=5.20e-01, acc(valid)=8.33e-01


learning_rate 0.007284560810716321
epoch 72


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 72: 2.9s to complete
    error(train)=4.01e-01, acc(train)=8.66e-01, error(valid)=5.08e-01, acc(valid)=8.37e-01


learning_rate 0.007275585860895569
epoch 73


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 73: 2.9s to complete
    error(train)=4.05e-01, acc(train)=8.64e-01, error(valid)=5.13e-01, acc(valid)=8.34e-01


learning_rate 0.007266841192158306
epoch 74


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 74: 2.9s to complete
    error(train)=3.97e-01, acc(train)=8.68e-01, error(valid)=5.05e-01, acc(valid)=8.38e-01


learning_rate 0.007258335434436819
epoch 75


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 75: 2.9s to complete
    error(train)=4.01e-01, acc(train)=8.65e-01, error(valid)=5.11e-01, acc(valid)=8.34e-01


learning_rate 0.007250076981887068
epoch 76


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 76: 2.9s to complete
    error(train)=3.96e-01, acc(train)=8.67e-01, error(valid)=5.09e-01, acc(valid)=8.37e-01


learning_rate 0.007242073984604662
epoch 77


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 77: 2.8s to complete
    error(train)=3.95e-01, acc(train)=8.67e-01, error(valid)=5.10e-01, acc(valid)=8.36e-01


learning_rate 0.007234334340581706
epoch 78


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 78: 2.8s to complete
    error(train)=3.92e-01, acc(train)=8.67e-01, error(valid)=5.09e-01, acc(valid)=8.35e-01


learning_rate 0.007226865687912434
epoch 79


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 79: 2.8s to complete
    error(train)=3.94e-01, acc(train)=8.66e-01, error(valid)=5.12e-01, acc(valid)=8.32e-01


learning_rate 0.007219675397255328
epoch 80


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 80: 2.8s to complete
    error(train)=3.90e-01, acc(train)=8.68e-01, error(valid)=5.06e-01, acc(valid)=8.36e-01


learning_rate 0.0072127705645591705
epoch 81


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 81: 2.9s to complete
    error(train)=3.90e-01, acc(train)=8.68e-01, error(valid)=5.07e-01, acc(valid)=8.38e-01


learning_rate 0.007206158004060205
epoch 82


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 82: 2.9s to complete
    error(train)=3.85e-01, acc(train)=8.68e-01, error(valid)=5.06e-01, acc(valid)=8.33e-01


learning_rate 0.007199844241557298
epoch 83


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 83: 2.9s to complete
    error(train)=3.85e-01, acc(train)=8.69e-01, error(valid)=5.07e-01, acc(valid)=8.36e-01


learning_rate 0.0071938355079717705
epoch 84


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 84: 2.9s to complete
    error(train)=3.82e-01, acc(train)=8.70e-01, error(valid)=5.07e-01, acc(valid)=8.36e-01


learning_rate 0.0071881377331982285
epoch 85


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 85: 2.9s to complete
    error(train)=3.77e-01, acc(train)=8.73e-01, error(valid)=5.00e-01, acc(valid)=8.39e-01


learning_rate 0.007182756540252472
epoch 86


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Epoch 86: 2.9s to complete
    error(train)=3.82e-01, acc(train)=8.71e-01, error(valid)=5.10e-01, acc(valid)=8.37e-01


learning_rate 0.0071776972397222605
epoch 87


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

In [None]:
##############SGD With scheduler with special restarts

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule,GradientDescentLearningRule
from mlp.optimisers import Optimiser
from mlp.schedulers import CosineAnnealingWithWarmRestarts

#setup hyperparameters
learning_rate = 0.0075
learning_ranges = [1.05,1.1,1.15,1.2]
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 100

for learning_range in learning_ranges:
    

    weights_init = GlorotUniformInit(rng=rng)
    biases_init = ConstantInit(0.)
    model = MultipleLayerModel([
        AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
        ReluLayer(),
        AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
    ])

    sgd_scheduler_special = CosineAnnealingWithWarmRestarts(min_learning_rate=learning_rate / learning_range, max_learning_rate=learning_rate * learning_range,
                                                       total_iters_per_period=25,
                                                       max_learning_rate_discount_factor=0.9,
                                                       period_iteration_expansion_factor=3.0)

    error = CrossEntropySoftmaxError()

    # Use a basic gradient descent learning rule
    learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)

    #Remember to use notebook=False when you write a script to be run in a terminal
    _ = train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True,scheduler=sgd_scheduler_special)

In [None]:
############# Adam BaseLine with no restart scheduler

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule,GradientDescentLearningRule,RMSPropLearningRule
from mlp.optimisers import Optimiser
from mlp.schedulers import CosineAnnealingWithWarmRestarts

#setup hyperparameters
learning_rate = 1e-4
learning_ranges = [1.05,1.1,1.15,1.2]
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 100

for learning_range in learning_ranges:
    

    weights_init = GlorotUniformInit(rng=rng)
    biases_init = ConstantInit(0.)
    model = MultipleLayerModel([
        AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
        ReluLayer(),
        AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
    ])

    adam_scheduler_no_restart = CosineAnnealingWithWarmRestarts(min_learning_rate=learning_rate / learning_range, max_learning_rate=learning_rate * learning_range,
                                                       total_iters_per_period=100,
                                                       max_learning_rate_discount_factor=0.9,
                                                       period_iteration_expansion_factor=1.0)

    error = CrossEntropySoftmaxError() 

    # Use a basic gradient descent learning rule
    learning_rule = AdamLearningRule(learning_rate=learning_rate)

    #Remember to use notebook=False when you write a script to be run in a terminal
    _ = train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True,scheduler=adam_scheduler_no_restart)

In [None]:
############## Adam With scheduler with specified restarts

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule,GradientDescentLearningRule,RMSPropLearningRule
from mlp.optimisers import Optimiser
from mlp.schedulers import CosineAnnealingWithWarmRestarts

#setup hyperparameters
learning_rate = 1e-4
learning_ranges = [1.05,1.1,1.15,1.2]
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 100

for learning_range in learning_ranges:
    

    weights_init = GlorotUniformInit(rng=rng)
    biases_init = ConstantInit(0.)
    model = MultipleLayerModel([
        AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
        ReluLayer(),
        AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
    ])

    adam_scheduler_special = CosineAnnealingWithWarmRestarts(min_learning_rate=learning_rate / learning_range, max_learning_rate=learning_rate * learning_range,
                                                       total_iters_per_period=25,
                                                       max_learning_rate_discount_factor=0.9,
                                                       period_iteration_expansion_factor=3.0)

    error = CrossEntropySoftmaxError() 

    # Use a basic gradient descent learning rule
    learning_rule = AdamLearningRule(learning_rate=learning_rate)

    #Remember to use notebook=False when you write a script to be run in a terminal
    _ = train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True,scheduler=adam_scheduler_special)