In [None]:
import os
import sys; sys.path.append('../lib')
from functools import partial

import matplotlib.pyplot as plt

from assignment2 import visualize_learning_curves
from assignment3 import *
from data import Cifar
from gradients import compare_gradients
from history import TrainHistory
from multi_layer_fully_connected import MultiLayerFullyConnected
from search import SearchParam, SearchResultSeries, search

# Constants

In [None]:
DATA_DIR = '../data'
PICKLE_DIR = '../pickle'
FIGURE_DIR = '../figures'

# Load dataset

In [None]:
dataset = Cifar(DATA_DIR)

# Split into training, validation and test set

In [None]:
data_train, data_val, data_test = \
    dataset.train_val_test_split(n_val=5000, normalize='zscore')

# Train three layer network

In [None]:
network_three_layers = MultiLayerFullyConnected(
    input_size=data_train.input_size,
    hidden_nodes=[50, 50],
    num_classes=data_train.num_classes,
    alpha=0.005,
    random_seed=0)

history_three_layers = network_three_layers.train_cyclic(
    data_train,
    data_val,
    eta_ss=2250,
    n_cycles=2,
    shuffle=True,
    verbose=True)

history_three_layers.save(PICKLE_DIR, postfix='three_layers')

In [None]:
history_three_layers = TrainHistory.load(PICKLE_DIR, postfix='three_layers')

visualize_learning_curves(history_three_layers)

plt.savefig(os.path.join(FIGURE_DIR, 'learning_curves_three_layers.svg'))

# Train nine layer network

In [None]:
network_nine_layers = MultiLayerFullyConnected(
    input_size=data_train.input_size,
    hidden_nodes=[50, 30, 20, 20, 10, 10, 10, 10],
    num_classes=data_train.num_classes,
    alpha=0.005,
    random_seed=0)

history_nine_layers = network_nine_layers.train_cyclic(
    data_train,
    data_val,
    eta_ss=2250,
    n_cycles=2,
    verbose=True)

history_nine_layers.save(PICKLE_DIR, postfix='nine_layers')

In [None]:
history_nine_layers = TrainHistory.load(PICKLE_DIR, postfix='nine_layers')

visualize_learning_curves(history_nine_layers)

plt.savefig(os.path.join(FIGURE_DIR, 'learning_curves_nine_layers.svg'))

# Compare gradients with batchnorm

In [None]:
for hidden_nodes in [[50], [50, 50]]:
    hidden_nodes_str = 'x'.join(map(str, hidden_nodes))

    print((hidden_nodes_str + " hidden nodes ").ljust(70, '=') + '\n')

    params = [(20, 10, 0), (20, 10, 0.5)]

    compare_gradients(
        partial(MultiLayerFullyConnected,
                hidden_nodes=hidden_nodes,
                batchnorm=True),
        data_train,
        params)

# Train three layer network with batchnorm

In [None]:
network_three_layers_bn = MultiLayerFullyConnected(
    input_size=data_train.input_size,
    hidden_nodes=[50, 50],
    num_classes=data_train.num_classes,
    alpha=0.005,
    batchnorm=True,
    random_seed=0)

history_three_layers_bn = network_three_layers_bn.train_cyclic(
    data_train,
    data_val,
    eta_ss=2250,
    n_cycles=2,
    shuffle=True,
    verbose=True)

history_three_layers_bn.save(PICKLE_DIR, postfix='three_layers_bn')

In [None]:
history_three_layers_bn = TrainHistory.load(PICKLE_DIR, postfix='three_layers_bn')

visualize_learning_curves(history_three_layers_bn)

plt.savefig(os.path.join(FIGURE_DIR, 'learning_curves_three_layers_bn.svg'))

# Coarse search

In [None]:
def train_function(param_args):
    network_three_layers_bn = MultiLayerFullyConnected(
        input_size=data_train.input_size,
        hidden_nodes=[50, 50],
        num_classes=data_train.num_classes,
        alpha=param_args['alpha'],
        batchnorm=True,
        random_seed=0)

    return network_three_layers_bn.train_cyclic(
        data_train,
        data_val,
        eta_ss=2250,
        n_cycles=2,
        shuffle=True,
        verbose=True)

In [None]:
n_samples = 10

alpha = SearchParam('alpha', -5, -1, n_samples, scale='log')

In [None]:
search_results = search(data_train,
                        data_val,
                        params=[alpha],
                        train_function=train_function,
                        random_seed=0)

search_results.save(PICKLE_DIR, postfix='three_layers_coarse')

In [None]:
search_results = SearchResultSeries.load(PICKLE_DIR, postfix='three_layers_coarse')

search_results.visualize(alpha)

plt.savefig(os.path.join(FIGURE_DIR, 'search_three_layers_coarse.svg'))

# Fine search

In [None]:
n_samples = 10

alpha = SearchParam('alpha', -3, -2, n_samples, scale='log')

In [None]:
search_results = search(data_train,
                        data_val,
                        params=[alpha],
                        train_function=train_function,
                        random_seed=0)

search_results.save(PICKLE_DIR, postfix='three_layers_fine')

In [None]:
search_results = SearchResultSeries.load(PICKLE_DIR, postfix='three_layers_fine')

In [None]:
search_results.visualize(alpha)

plt.savefig(os.path.join(FIGURE_DIR, 'search_three_layers_.svg'))

In [None]:
alpha_top = search_results.top(verbose=True)

# Train final three layer network

In [None]:
network_three_layers_final = MultiLayerFullyConnected(
    input_size=data_train.input_size,
    hidden_nodes=[50, 50],
    num_classes=data_train.num_classes,
    alpha=alpha_top[0][0]['alpha'],
    batchnorm=True,
    random_seed=0)

history_three_layers_final = network_three_layers_final.train_cyclic(
    data_train,
    data_val,
    eta_ss=2250,
    n_cycles=3,
    shuffle=True,
    verbose=True)

history_three_layers_final.save(PICKLE_DIR, postfix='three_layers_final')

In [None]:
history_three_layers_final = TrainHistory.load(PICKLE_DIR, postfix='three_layers_final')

history_three_layers_final.final_network.visualize_performance(data_test)

plt.savefig(os.path.join(FIGURE_DIR, 'performance_three_layers_final.svg'))

# Test sensitivity to initialization

In [None]:
sigmas = [1e-4, 1e-3, 1e-1]

def train_function(sigma, bn):
    network = MultiLayerFullyConnected(
        input_size=data_train.input_size,
        hidden_nodes=[50, 50],
        num_classes=data_train.num_classes,
        alpha=0.005,
        weight_init='standard',
        weight_std=sigma,
        batchnorm=bn,
        random_seed=0)

    return network.train_cyclic(
        data_train,
        data_val,
        eta_ss=900,
        n_cycles=2,
        shuffle=True,
        verbose=True)

In [None]:
train_batchnorm_stabilization(train_function, sigmas, pickle_dir=PICKLE_DIR)

In [None]:
visualize_batchnorm_stabilization(sigmas, pickle_dir=PICKLE_DIR)

plt.savefig(os.path.join(FIGURE_DIR, 'three_layer_stabilization.svg'))

# Train nine layer network with batchnorm

In [None]:
network_nine_layers_bn = MultiLayerFullyConnected(
    input_size=data_train.input_size,
    hidden_nodes=[50, 30, 20, 20, 10, 10, 10, 10],
    num_classes=data_train.num_classes,
    alpha=0.005,
    batchnorm=True,
    random_seed=0)

history_nine_layers_bn = network_nine_layers_bn.train_cyclic(
    data_train,
    data_val,
    eta_ss=2250,
    n_cycles=2,
    verbose=True)

history_nine_layers_bn.save(PICKLE_DIR, postfix='nine_layers_bn')

In [None]:
history_nine_layers_bn = TrainHistory.load(PICKLE_DIR, postfix='nine_layers_bn')

visualize_learning_curves(history_nine_layers_bn)

plt.savefig(os.path.join(FIGURE_DIR, 'learning_curves_nine_layers_bn.svg'))

# Performance Optimization

In [None]:
history_three_layers_final = TrainHistory.load(PICKLE_DIR, postfix='three_layers_final')
acc = history_three_layers_final.final_network.accuracy(data_val)

print("reference: {}".format(acc))

In [None]:
search_results = SearchResultSeries.load(PICKLE_DIR, postfix='three_layers_fine')
alpha_top = search_results.top()[0][0]['alpha']

network_proto = partial(MultiLayerFullyConnected,
                        input_size=data_train.input_size,
                        num_classes=data_train.num_classes,
                        alpha=alpha_top,
                        batchnorm=True,
                        random_seed=0)

hidden_nodes_proto = [50, 50]

def default_train(network, n_cycles=2):
    return network.train_cyclic(data_train,
                                data_val,
                                eta_ss=2250,
                                n_cycles=n_cycles,
                                shuffle=True,
                                verbose=True)

## Increase the number of hidden layers

In [None]:
hidden_nodes_layer_four = [30, 40, 50, 60, 70]

In [None]:
for hn in hidden_nodes_layer_four:
    history = default_train(
        network_proto(hidden_nodes=hidden_nodes_proto + [hn]))
    
    history.save(PICKLE_DIR, postfix='four_layers_{}'.format(hn))

In [None]:
for hn in hidden_nodes_layer_four:
    history = TrainHistory.load(PICKLE_DIR, postfix='four_layers_{}'.format(hn))
    acc = history.final_network.accuracy(data_val)

    print("{} hidden nodes => {}".format(hn, acc))

## More hidden nodes

In [None]:
num_hidden_nodes = [100, 200, 300, 400, 500]

In [None]:
for hn in num_hidden_nodes:
    history = default_train(
        network_proto(hidden_nodes=[hn] * len(hidden_nodes_proto)))
    
    history.save(PICKLE_DIR, postfix='three_layers_wide_{}'.format(hn))

In [None]:
for hn in num_hidden_nodes:
    history = TrainHistory.load(PICKLE_DIR, postfix='three_layers_wide_{}'.format(hn))
    acc = history.final_network.accuracy(data_val)

    print("{} hidden nodes => {}".format(hn, acc))

## Dropout

In [None]:
history_dropout = default_train(
    network_proto(hidden_nodes=[300, 300], dropout=0.5))

history_dropout.save(PICKLE_DIR, postfix='three_layers_wide_dropout')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='three_layers_wide_dropout')
acc = history.final_network.accuracy(data_val)

print("with dropout => {}".format(acc))

## Data augmentation

In [None]:
data_train_aug = data_train.augment_orientation()

In [None]:
history_augment = default_train(
    network_proto(hidden_nodes=[300, 300]), n_cycles=6)

history_augment.save(PICKLE_DIR, postfix='three_layers_wide_augment')

In [None]:
history_augment = TrainHistory.load(PICKLE_DIR, postfix='three_layers_wide_augment')

In [None]:
visualize_learning_curves(history_augment)

plt.savefig(os.path.join(FIGURE_DIR, 'learning_curves_augment.svg'))

In [None]:
history_augment.final_network.visualize_performance(data_test)

plt.savefig(os.path.join(FIGURE_DIR, 'performance_augment.svg'))