In [None]:
import os
import sys; sys.path.append('../lib')
from functools import partial

import matplotlib.pyplot as plt

from assignment2 import *
from data import Cifar
from gradients import compare_gradients
from networks import TrainHistory, TwoLayerFullyConnected
from search import SearchParam, SearchResultSeries, search

# Constants

In [None]:
DATA_DIR = '../data'
PICKLE_DIR = '../pickle'
FIGURE_DIR = '../figures'

HIDDEN_NODES = 50

# Load dataset

In [None]:
dataset = Cifar(DATA_DIR)

# Split into training, validation and test set

In [None]:
data_train, data_val, data_test = dataset.train_val_test_split(
    n_train=10000, n_val=10000, normalize='zscore')

# Default network constructor

In [None]:
default_network = partial(
    TwoLayerFullyConnected,
    input_size=data_train.input_size,
    hidden_nodes=HIDDEN_NODES,
    num_classes=data_train.num_classes,
    random_seed=0)

# Compare analytical and numerical gradient

In [None]:
params = [(1, 20, 0), (1, 20, 0.5), (20, 20, 0), (20, 20, 0.5)]

compare_gradients(
    partial(TwoLayerFullyConnected,
            hidden_nodes=HIDDEN_NODES),
    data_train,
    params)

# Overfit on training data subset

In [None]:
network = default_network()

history = network.train(data_train.subsample(n=100),
                        data_val.subsample(n=100),
                        n_batch=20,
                        history_per_cycle=100,
                        verbose=True)

history.visualize()

# Reproduce learning curves

In [None]:
network = default_network(alpha=0.01)

history = network.train_cyclic(data_train, data_val, verbose=True)

history.save(PICKLE_DIR, postfix='default_one_cycle')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='default_one_cycle')

visualize_learning_curves(history)

plt.savefig(os.path.join(FIGURE_DIR, 'curves_default_one_cycle.svg'))

In [None]:
network = default_network(alpha=0.01)

history = network.train_cyclic(data_train,
                               data_val,
                               eta_ss=800,
                               n_cycles=3,
                               verbose=True)

history.save(PICKLE_DIR, postfix='default_three_cycles')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='default_three_cycles')

visualize_learning_curves(history)

plt.savefig(os.path.join(FIGURE_DIR, 'curves_default_three_cycles.svg'))

# Load more data

In [None]:
data_train, data_val, data_test = dataset.train_val_test_split(
    n_val=5000, normalize='zscore')

# Perform coarse search

In [None]:
n_samples = 20

alpha = SearchParam('alpha', -5, -1, n_samples, scale='log')

In [None]:
def train_function(param_args):
    network = default_network(alpha=param_args['alpha'])
    
    return network.train_cyclic(data_train,
                                data_val,
                                eta_ss=(2 * data_train.n // 100),
                                n_cycles=2,
                                verbose=True)

search_results = search(data_train,
                        data_val,
                        params=[alpha],
                        train_function=train_function,
                        random_seed=0)

search_results.save(PICKLE_DIR, postfix='coarse')

In [None]:
search_results = SearchResultSeries.load(PICKLE_DIR, postfix='coarse')

In [None]:
search_results.visualize(alpha)

plt.savefig(os.path.join(FIGURE_DIR, 'search_coarse.svg'))

In [None]:
search_results.top(verbose=True)

# Perform fine search

In [None]:
n_samples = 10

alpha = SearchParam('alpha', -3.5, -2.5, n_samples, scale='log')

In [None]:
def train_function(param_args):
    network = default_network(alpha=param_args['alpha'])
    
    return network.train_cyclic(data_train,
                                data_val,
                                eta_ss=(2 * data_train.n // 100),
                                n_cycles=4,
                                verbose=True)

search_results = search(data_train,
                        data_val,
                        params=[alpha],
                        train_function=train_function,
                        random_seed=0)

search_results.save(PICKLE_DIR, postfix='fine')

In [None]:
search_results = SearchResultSeries.load(PICKLE_DIR, postfix='fine')

In [None]:
search_results.visualize(alpha)

plt.savefig(os.path.join(FIGURE_DIR, 'search_fine.svg'))

In [None]:
search_results.top(verbose=True)

# Train on more data and evaluate on test set

In [None]:
data_train, data_val, data_test = dataset.train_val_test_split(
    n_val=1000, normalize='zscore')

In [None]:
network = default_network(alpha=search_results.optimum()['alpha'])

history = network.train_cyclic(data_train,
                               data_val,
                               eta_ss=(2 * data_train.n // 100),
                               n_cycles=3,
                               verbose=True)

history.save(PICKLE_DIR, postfix='final_three_cycles')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='final_three_cycles')

In [None]:
visualize_learning_curves(history)

plt.savefig(os.path.join(FIGURE_DIR, 'curves_final_three_cycles.svg'))

In [None]:
history.final_network.visualize_performance(data_test)

plt.savefig(os.path.join(FIGURE_DIR, 'performance_final_three_cycles.svg'))