In [None]:
import math
import os
import sys; sys.path.append('../lib')

import matplotlib.pyplot as plt
import numpy as np

from data import Cifar
from networks import EnsembleClassifier, \
                     SingleLayerFullyConnected, \
                     TrainHistory

In [None]:
DATA_DIR = '../data'
PICKLE_DIR = '../pickle'
FIGURE_DIR = '../figures'

HYPERPARAMS = {
    'alpha_low': 0,
    'alpha_high': 0.05,
    'eta': 0.01,
    'eta_decay_factor': 0.9,
    'epochs': 40,
    'dead_epochs_max': 5
}

# Load dataset

In [None]:
dataset = Cifar(DATA_DIR)

# Use all available training data

In [None]:
data_train_full, data_val_full, data_test_full = \
    dataset.train_val_test_split(n_val=1000)

In [None]:
network = SingleLayerFullyConnected(
    input_size=data_train_full.input_size,
    num_classes=data_train_full.num_classes,
    random_seed=0)

history = network.train(data_train_full,
                        data_val_full,
                        eta=HYPERPARAMS['eta'],
                        n_epochs=HYPERPARAMS['epochs'],
                        verbose=True)

history.save(PICKLE_DIR, postfix='full')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='full')

In [None]:
history.visualize()

plt.savefig(os.path.join(FIGURE_DIR, 'learning_curves_full.svg'))

In [None]:
history.final_network.visualize_performance(data_val_full)

plt.savefig(os.path.join(FIGURE_DIR, 'confusion_matrix_full.svg'))

# Early stopping

In [None]:
network = SingleLayerFullyConnected(
    input_size=data_train_full.input_size,
    num_classes=data_train_full.num_classes,
    random_seed=0)

history = network.train(
    data_train_full,
    data_val_full,
    eta=HYPERPARAMS['eta'],
    n_epochs=(4 * HYPERPARAMS['epochs']),
    n_dead_epochs_max=HYPERPARAMS['dead_epochs_max'],
    stop_early=True,
    stop_early_find_best_params=True,
    verbose=True)

history.save(PICKLE_DIR, postfix='stop_early')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='stop_early')

In [None]:
history.visualize()

plt.savefig(os.path.join(FIGURE_DIR, 'learning_curves_stop_early.svg'))

In [None]:
history.final_network.visualize_performance(data_val_full)

plt.savefig(os.path.join(FIGURE_DIR, 'confusion_matrix_stop_early.svg'))

# Grid search

Skipped because it would take forever on my machine

# Decay learning rate

Only really makes sense if we train for more epochs which is again not feasible

# Xavier initialization

Does not really make any sense for single layer networks

# Training data augmentation

In [None]:
data_sub = data_train_full.batch(0, 100)
data_sub_aug = data_sub.augment()

In [None]:
data_sub.preview(h=5, w=5)

plt.savefig(os.path.join(FIGURE_DIR, 'data_unaugmented.svg'))

In [None]:
data_sub_aug.preview(h=5, w=5)

plt.savefig(os.path.join(FIGURE_DIR, 'data_augmented.svg'))

In [None]:
aug = data_train_full.augment(verbose=True)
data_train_aug = data_train_full.join(aug)

In [None]:
network = SingleLayerFullyConnected(
    input_size=data_train_full.input_size,
    num_classes=data_train_full.num_classes,
    random_seed=0)

history = network.train(
    data_train_aug,
    data_val_full,
    eta=HYPERPARAMS['eta'],
    n_epochs=HYPERPARAMS['epochs'],
    n_dead_epochs_max=HYPERPARAMS['dead_epochs_max'],
    stop_early=True,
    stop_early_find_best_params=True,
    verbose=True)

history.save(PICKLE_DIR, postfix='augment')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='augment')

In [None]:
history.visualize()

plt.savefig(os.path.join(FIGURE_DIR, 'learning_curves_augment.svg'))

In [None]:
history.final_network.visualize_performance(data_val_full)

plt.savefig(os.path.join(FIGURE_DIR, 'confusion_matrix_augment.svg'))

# Shuffle training data

In [None]:
network = SingleLayerFullyConnected(
    input_size=data_train_full.input_size,
    num_classes=data_train_full.num_classes,
    random_seed=0)

history = network.train(
    data_train_full,
    data_val_full,
    eta=HYPERPARAMS['eta'],
    n_epochs=HYPERPARAMS['epochs'],
    n_dead_epochs_max=math.inf,
    shuffle=True,
    stop_early=True,
    stop_early_find_best_params=True,
    verbose=True)

history.save(PICKLE_DIR, postfix='shuffle')

In [None]:
history = TrainHistory.load(PICKLE_DIR, postfix='shuffle')

In [None]:
history.visualize()

plt.savefig(os.path.join(FIGURE_DIR, 'learning_curves_shuffle.svg'))

In [None]:
history.final_network.visualize_performance(data_val_full)

plt.savefig(os.path.join(FIGURE_DIR, 'confusion_matrix_shuffle.svg'))

# Train ensemble (with bagging)

In [None]:
np.random.seed(0)

for i in range(10):
    network = SingleLayerFullyConnected(
        input_size=data_train_full.input_size,
        num_classes=data_train_full.num_classes)

    history = network.train(
        data_train_full.bag(),
        data_val_full,
        eta=HYPERPARAMS['eta'],
        n_epochs=HYPERPARAMS['epochs'],
        n_dead_epochs_max=math.inf,
        shuffle=True,
        stop_early=True,
        stop_early_find_best_params=True,
        verbose=True)

    history.save(PICKLE_DIR, postfix=f'bag{i}')

In [None]:
np.random.seed(0)

networks = []
for i in range(10):
    history = TrainHistory.load(PICKLE_DIR, postfix=f'bag{i}')
    networks.append(history.final_network)

ensemble = EnsembleClassifier(networks)

ensemble.visualize_performance(data_val_full)

plt.savefig(os.path.join(FIGURE_DIR, 'ensemble_performance_val.svg'))

In [None]:
np.random.seed(0)

networks = []
for i in range(10):
    history = TrainHistory.load(PICKLE_DIR, postfix=f'bag{i}')
    networks.append(history.final_network)

ensemble = EnsembleClassifier(networks)

ensemble.visualize_performance(data_test_full)

plt.savefig(os.path.join(FIGURE_DIR, 'ensemble_performance_test.svg'))