In [None]:
import os
import sys; sys.path.append('../lib')
from functools import partial

import matplotlib.pyplot as plt
import numpy as np

from data import Cifar
from history import TrainHistory
from search import SearchParam, SearchResultSeries, search
from two_layer_fully_connected import TwoLayerFullyConnected

# Constants

In [None]:
DATA_DIR = '../data'
PICKLE_DIR = '../pickle'
FIGURE_DIR = '../figures'

HIDDEN_NODES = 50

# Load dataset

In [None]:
dataset = Cifar(DATA_DIR)

# Split into training, validation and test set

In [None]:
data_train, data_val, data_test = dataset.train_val_test_split(
    n_val=5000, normalize='zscore')

# Default network constructor

In [None]:
search_results = SearchResultSeries.load(
    PICKLE_DIR, postfix='fine')

default_network = partial(
    TwoLayerFullyConnected,
    input_size=data_train.input_size,
    hidden_nodes=HIDDEN_NODES,
    num_classes=data_train.num_classes,
    alpha=search_results.optimum()['alpha'],
    random_seed=0)

# Create reference network

In [None]:
network = default_network()

history = network.train_cyclic(data_train,
                               data_val,
                               eta_ss=(2 * data_train.n // 100),
                               n_cycles=3,
                               verbose=True)

history.save(PICKLE_DIR, postfix='reference_three_cycles')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='reference_three_cycles')

In [None]:
history.visualize()

plt.savefig(os.path.join(FIGURE_DIR, 'curves_reference_three_cycles.svg'))

In [None]:
history.final_network.visualize_performance(data_val)

plt.savefig(os.path.join(FIGURE_DIR, 'performance_reference_three_cycles.svg'))

# More hidden nodes

In [None]:
hidden_nodes = [50, 100, 200, 400]

In [None]:
for hn in hidden_nodes:
    network = TwoLayerFullyConnected(
        input_size=data_train.input_size,
        hidden_nodes=hn,
        num_classes=data_train.num_classes,
        alpha=search_results.optimum()['alpha'],
        random_seed=0)

    history = network.train_cyclic(
        data_train,
        data_val,
        eta_ss=(2 * data_train.n // 100),
        n_cycles=3,
        verbose=True)
    
    postfix = 'two_layers_{}_nodes'.format(h)
    history.save(PICKLE_DIR, postfix=postfix)

In [None]:
_, (ax_train, ax_val) = plt.subplots(1, 2, figsize=(10, 5))

for hn in hidden_nodes:
    postfix = 'two_layers_{}_nodes'.format(hn)
    history = TrainHistory.load(PICKLE_DIR, postfix=postfix)
    
    ax_train.plot(history.domain,
                  history.train_accuracy,
                  label="{} Hidden Nodes".format(hn))

    ax_val.plot(history.domain,
                history.val_accuracy,
                label="{} Hidden Nodes".format(hn))
    
ax_train.set_title("Training Set")
ax_val.set_title("Validation Set")

for ax in ax_train, ax_val:
    ax.set_xlabel("Update Step")
    ax.set_ylabel("Accuracy")
    
    ax.legend()
    ax.grid()
    
plt.savefig(os.path.join(FIGURE_DIR, 'more_hidden_nodes.svg'))

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='two_layers_400_nodes')

history.final_network.visualize_performance(data_val)

plt.savefig(os.path.join(FIGURE_DIR, 'performance_two_layers_400_nodes.svg'))

In [None]:
search_results = SearchResultSeries.load(
    PICKLE_DIR, postfix='fine')

default_network = partial(
    TwoLayerFullyConnected,
    input_size=data_train.input_size,
    hidden_nodes=400,
    num_classes=data_train.num_classes,
    alpha=search_results.optimum()['alpha'],
    random_seed=0)

# Perform more exhaustive search

In [None]:
n_samples = 10

alpha = SearchParam('alpha', -5, -2, n_samples, scale='log')
eta_ss = SearchParam('eta_ss', 200, 2000, n_samples, dtype=int)

In [None]:
def train_function(param_args):
    network = default_network(alpha=param_args['alpha'])
    
    return network.train_cyclic(
        data_train,
        data_val,
        eta_ss=param_args['eta_ss'],
        n_cycles=int(round(2000 / param_args['eta_ss'])),
        verbose=True)

for random_seed in range(3):
    search_results = search(data_train,
                            data_val,
                            params=[alpha, eta_ss],
                            train_function=train_function,
                            random_seed=random_seed)

    search_results.save(
        PICKLE_DIR, postfix='exhaustive_seed{}'.format(random_seed))

In [None]:
search_results = None

for random_seed in range(3):
    _search_results = SearchResultSeries.load(
        PICKLE_DIR, postfix='exhaustive_seed{}'.format(random_seed))
    
    if search_results is None:
        search_results = _search_results
    else:
        search_results = search_results.join(_search_results)

In [None]:
search_results.summarize()

In [None]:
search_results.visualize(alpha, eta_ss)

plt.savefig(os.path.join(FIGURE_DIR, 'exhaustive_coarse.svg'))

In [None]:
n_samples = 10

alpha = SearchParam('alpha', np.log10(5e-4), np.log10(2e-3), n_samples, scale='log')
eta_ss = SearchParam('eta_ss', 1000, 1250, n_samples, dtype=int)

In [None]:
def train_function(param_args):
    network = default_network(alpha=param_args['alpha'])

    return network.train_cyclic(
        data_train,
        data_val,
        eta_ss=param_args['eta_ss'],
        n_cycles=int(round(2000 / param_args['eta_ss'])),
        verbose=True)

for random_seed in range(3):
    search_results = search(data_train,
                            data_val,
                            params=[alpha, eta_ss],
                            train_function=train_function,
                            random_seed=random_seed)

    search_results.save(
        PICKLE_DIR, postfix='exhaustive_fine_seed{}'.format(random_seed))

In [None]:
search_results = None

for random_seed in range(3):
    _search_results = SearchResultSeries.load(
        PICKLE_DIR, postfix='exhaustive_fine_seed{}'.format(random_seed))

    if search_results is None:
        search_results = _search_results
    else:
        search_results = search_results.join(_search_results)

In [None]:
search_results.summarize()

In [None]:
search_results.visualize(alpha, eta_ss)

plt.savefig(os.path.join(FIGURE_DIR, 'exhaustive_fine.svg'))

# Try dropout

In [None]:
for p in [0, 0.5, 0.6]:
    network = default_network(dropout=p)

    history = network.train_cyclic(
        data_train,
        data_val,
        eta_ss=(2 * data_train.n // 100),
        n_cycles=3,
        verbose=True)

    postfix = 'two_layers_dropout{}'.format(
        str(p).replace('.', '_'))

    history.save(PICKLE_DIR, postfix=postfix)

In [None]:
for p in [0, 0.4, 0.5, 0.6]:
    postfix = 'two_layers_dropout{}'.format(
        str(p).replace('.', '_'))

    history = TrainHistory.load(PICKLE_DIR, postfix=postfix)
    
    print(history.final_network.accuracy(data_val))

# Create ensemble classifier

In [None]:
data_train, data_val, data_test = dataset.train_val_test_split(
    n_val=1000, normalize='zscore')

In [None]:
network = default_network()

history = network.train_cyclic(
    data_train,
    data_val,
    eta_ss=(2 * data_train.n // 100),
    n_cycles=10,
    create_ensemble=True,
    verbose=True)

history.save(PICKLE_DIR, postfix='minimum_ensemble')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='minimum_ensemble')

In [None]:
history.visualize()

plt.savefig(os.path.join(FIGURE_DIR, 'curves_minimum_ensemble.svg'))

In [None]:
history.final_network.visualize_performance(data_test)

plt.savefig(os.path.join(FIGURE_DIR, 'performance_minimum_ensemble.svg'))