# HPO example for CIFAR10 Custom CNN

## CIFAR10 problem setting

Lets consider the CIFAR10 dataset available in Tensorflow. https://www.cs.toronto.edu/~kriz/cifar.html

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.

We take as reference the trivial example CNN from Tensorflow to extend it with dropout layers and display pixel dropout for data augmentation.

We will implement a custom version of the following KERAS model: https://www.tensorflow.org/tutorials/images/cnn, enhanced with dropout.

     model = models.Sequential()
     model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
     model.add(layers.MaxPooling2D((2, 2)))
     model.add(layers.Dropout(0.05))
     model.add(layers.Conv2D(64, (3, 3), activation='relu'))
     model.add(layers.MaxPooling2D((2, 2)))
     model.add(layers.Dropout(0.05))
     model.add(layers.Conv2D(64, (3, 3), activation='relu'))
     model.add(layers.Flatten())
     model.add(layers.Dropout(0.05))
     model.add(layers.Dense(64, activation='relu'))
     model.add(layers.Dense(10))
     return model

Hyperparameters:
    * Network architecture (CNN)
        * Dropout prob 1
        * Dropout prob 2
        * Dropout prob 3
    * Data augmentation
        * Prob to modify batch sample
        * Pixel dropout prob

## Dataset inspection

In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras import datasets, layers, models

In [None]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
train_images.shape, test_images.shape

In [None]:
import numpy as np

In [None]:
np.nonzero(train_labels == 1)

In [None]:
rng = np.random.default_rng(42)
validation_samples = np.concatenate([rng.choice(np.nonzero(train_labels == i)[0], 1000, replace=False) for i in range(10)], axis=0)
training_samples = np.setdiff1d(np.array(range(50000)), validation_samples, assume_unique=True)
rng.shuffle(validation_samples)
rng.shuffle(training_samples)

In [None]:
train_labels[training_samples].size

In [None]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
    # The CIFAR labels happen to be arrays, 
    # which is why you need the extra index
    plt.xlabel(class_names[train_labels[i][0]])
plt.show()

## Experiment cache utils

Considering experiments can take more than 15m to turn, here a small decorator function to store the experiment outputs if it does not fail. I rely on a frozen dataclass Config to hash the results.

In [None]:
import os
import hashlib
import pickle

def cache_results(exp_func, exp_dir="cnn_experiments"):
    def cached(*args, **kwargs):
        config = args[0]
        cache_dir = hashlib.md5(str.encode(str(config.__hash__()))).hexdigest()
        results_dir = f'_cache/{exp_dir}/{cache_dir}'
        if not os.path.exists(results_dir):
            os.makedirs(results_dir)
        exp_results_file = f'{results_dir}/results.pkl'
        if os.path.exists(exp_results_file):
            print("results loaded from cache for: ", config)
            with open(exp_results_file, 'rb') as f:
                return pickle.load(f)
        results = exp_func(*args, **kwargs)
        with open(exp_results_file, 'wb') as f:
            pickle.dump(results, f)
        return results
    return cached

## Sequential hyper parameter tuning with annealing

In [None]:
from hpo import Experiment
from hpo.optimizers import Anneal, GridSearch, RandomSearch, GaussianProcess
from hpo.runtimes import Local
from hpo.space import Real, Integer, Quantized, Choice, SearchSpace
from self_tuning_nets.visualization import trajectories_legend

In [None]:
def run_model(args):
    experiment_config = ExperimentConfig(
        INIT_DROPOUT=args.get("init_dropout", 0.95),
        INIT_PIXEL_DROPOUT=args.get("init_pixel_dropout", 0.95),
        INIT_AUGMENT_PROB=args.get("init_augment_prob", 0.05),
        WITH_HYPER_TRAINING=False,
        MAX_EPOCHS=40)
    wlosses, hlosses, param_trajectories, scale_trajectories, accuracy, hyper_accuracy = \
    cache_results(run_deterministic_cpu_hyper_cnn_experiment, "hpo_cnn")(experiment_config, for_hpo=True)
    print(-hyper_accuracy)
    return {
        "loss": -float(hyper_accuracy),
        "wlosses": wlosses,
        "hlosses": hlosses,
        "param_trajectories": param_trajectories,
        "scale_trajectories": scale_trajectories,
        "accuracy": accuracy
    }

In [None]:
# We can not cache models for anneal since it does not let us modify the seed
# So instead we save the full results and avoid rerunning
def recover_anneal_results():
    results_dir = f'_cache/hpo_cnn/anneal_results1'
    exp_results_file = f'{results_dir}/results.pkl'
    if os.path.exists(exp_results_file):
        with open(exp_results_file, 'rb') as f:
            return pickle.load(f)

In [None]:
if recover_anneal_results() is None:
    optimizer = Anneal()
    space = SearchSpace(
        init_dropout=Real(start=0.25, end=1.0),
        init_pixel_dropout=Real(start=0.25, end=1.0),
        init_augment_prob=Real(start=0.0, end=0.95)
    )
    # number_of_trials = space.size
    number_of_trials = 20
    exp = Experiment(
       name="hpo_cnn",
       target=run_model,
       search_space=space,
       optimizer=optimizer,
       trials=number_of_trials)
    
    runtime = Local(njobs=1)
    res = runtime.run(exp)

    results_dir = f'_cache/hpo_cnn/anneal_results1'
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    exp_results_file = f'{results_dir}/results.pkl'
    with open(exp_results_file, 'wb') as f:
        pickle.dump(res, f)

In [None]:
anneal_results = recover_anneal_results()
print(f"Best model Hyperparam {anneal_results.x} -> Test acc: {anneal_results.y['accuracy'][-1]}")

In [None]:
results = [(r["wlosses"], r["hlosses"], r["param_trajectories"], r["scale_trajectories"], r["accuracy"]) for r in anneal_results.y_iters]

In [None]:
wlosses_n, hlosses_n, param_trajectories_n, scale_trajectories_n, accuracy_n = \
zip(*results)

In [None]:
best_acc = [acc[-1] for acc in accuracy_n]
rescaled_acc = [0.3 + ((1 - 0.3) / (max(best_acc) - min(best_acc))) * (acc - min(best_acc)) for acc in best_acc]
lines_palette = [plt.get_cmap('Reds')(acc) for acc in rescaled_acc]
print("Best acc: ", max(best_acc))
print("Worst acc: ", min(best_acc))

In [None]:
anneal_results.x_iters

In [None]:
exp_settings = anneal_results.x_iters
sorted_info = sorted(zip(lines_palette, exp_settings, best_acc), key=lambda x: x[2])
sorted_settings = [f"Acc: {acc:.{3}f} -> Param: {param}" for _, param, acc in sorted_info]
sorted_palette = [palette for palette, _, _ in sorted_info]
print("Accuracy -> param dict")
trajectories_legend(sorted_settings, sorted_palette)
plt.gcf().set_size_inches(1.0, 1.0)

In [None]:
from matplotlib import cm
mappable = cm.ScalarMappable(cmap=plt.get_cmap('Reds'))
mappable.set_clim(vmin=np.min(best_acc), vmax=np.max(best_acc))
plt.colorbar(mappable, ax=plt.gca(), orientation='horizontal')
plt.gca().set_visible(False)
plt.show()

In [None]:
plt.scatter(x=[x[0][1] for x in exp_settings], y=[x[1][1] for x in exp_settings], color=lines_palette)
plt.xlabel("augment_prob")
plt.ylabel("layer_dropout")
plt.show()
plt.scatter(x=[x[2][1] for x in exp_settings], y=[x[1][1] for x in exp_settings], color=lines_palette)
plt.xlabel("pixel_dropout")
plt.ylabel("layer_dropout")
plt.show()
plt.scatter(x=[x[2][1] for x in exp_settings], y=[x[0][1] for x in exp_settings], color=lines_palette)
plt.xlabel("pixel_dropout")
plt.ylabel("augment_prob")
plt.show()

In [None]:
trajectories_general_plot(accuracy_n, lines_palette, ylabel="accuracy")
plt.show()

In [None]:
import numpy as np
param_keys = param_trajectories_n[0].keys()
print("Best hyperparam with test acc: ", best_acc[np.argmax(best_acc)])
for pk in param_keys:
    print(pk, ": ", param_trajectories_n[np.argmax(best_acc)][pk][-1])

In [None]:
import numpy as np
param_keys = param_trajectories_n[0].keys()
print("Worst hyperparam with test acc: ", best_acc[np.argmin(best_acc)])
for pk in param_keys:
    print(pk, ": ", param_trajectories_n[np.argmin(best_acc)][pk][-1])