# Load dependencies

In [None]:
import numpy as np
# Better to fix the seed in the beginning:
seed = 666
np.random.seed(seed)

import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, KFold
from sklearn.datasets import load_wine
from sklearn.preprocessing import scale

from keras import backend 
from keras.models import Sequential
from keras.losses import categorical_crossentropy
from keras.callbacks import LearningRateScheduler
from keras.optimizers import SGD
from keras.utils import to_categorical
from keras.layers import Dense

# Utility functions (data preprocessing and KFold cross-validation)

In [None]:
### Scale and center features, transform labels into a one-hot encoding vector:
def preprocess_data(X, y):
    return scale(X), to_categorical(y)

### Training history plot function:
def print_training_history(training_history, fig_idx):
    epoch_absciss = range(1, len(training_history.history['loss'])+1)
    plt.figure(fig_idx, figsize=(10, 5))
    plt.suptitle("MLP model assessment")
    plt.subplot(1, 2, 1)
    plt.plot(epoch_absciss, training_history.history['loss'])
    plt.plot(epoch_absciss, training_history.history['val_loss'])
    plt.title("Train/Validation loss")
    plt.ylabel('Loss')
    plt.xlabel('Epochs')
    plt.legend(['Train loss', 'Validation loss'], loc='best')
    plt.subplot(1, 2, 2)
    plt.plot(epoch_absciss, training_history.history['acc'])
    plt.plot(epoch_absciss, training_history.history['val_acc'])
    plt.title("Train/Validation accuracy")
    plt.ylabel('Accuracy')
    plt.xlabel('Epochs')
    plt.legend(['Train accuracy', 'Validation accuracy'], loc='best')
    plt.show()

### Split+shuffle X and y into k=num_folds different folds:
def KFold_split(X, y, num_folds, seed):
    KFold_splitter = KFold(n_splits=num_folds, shuffle=True, random_state=seed)
    X_train_folds = []
    X_val_folds = []
    y_train_folds = []
    y_val_folds = []
    for (kth_fold_train_idxs, kth_fold_val_idxs) in KFold_splitter.split(X, y):
        X_train_folds.append(X[kth_fold_train_idxs])
        X_val_folds.append(X[kth_fold_val_idxs])
        y_train_folds.append(y[kth_fold_train_idxs])
        y_val_folds.append(y[kth_fold_val_idxs])
    return X_train_folds, X_val_folds, y_train_folds, y_val_folds

### Select a MLP model on a list of hyper-parameters instances, via Kfold cross-validation:
def KFold_model_selection(X, y, fixed_hyper_parameters, hyper_parameters_instances, num_folds, seed):
    # Extract a test set:
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=seed)
    # Extract train and validation folds:
    X_train_folds, X_val_folds, y_train_folds, y_val_folds = KFold_split(X_train_val, y_train_val, num_folds, seed)
    # For each hyper-parameter instance, do KFold cross validation:
    mean_val_accuracies = []
    for i, hyper_parameters_instance in enumerate(hyper_parameters_instances):
        print("\nNow preprocessing hyper-parameter instance " + str(1+i) + "/" + str(len(hyper_parameters_instances)) + ": " + str(hyper_parameters_instance))
        mean_val_accuracy = perform_KFold_CV(X_train_folds, X_val_folds, y_train_folds, y_val_folds, fixed_hyper_parameters, hyper_parameters_instance)
        print("Mean validation accuracy:", mean_val_accuracy)
        mean_val_accuracies.append(mean_val_accuracy)
    # The hyper-parameter instance with the highest mean validation accuracy is our model of choice:
    best_instance_idx = np.argmax(np.array(mean_val_accuracies))
    best_hyper_parameters_instance = hyper_parameters_instances[best_instance_idx]
    print("\n\nBest hyper-parameter instance:", best_hyper_parameters_instance)
    # Train and evaluate the best instance on the whole dataset:
    best_model_test_accuracy = assess_MLP(X_train=X_train_val, 
                                          X_test=X_test,
                                          y_train=y_train_val,
                                          y_test=y_test,
                                          fixed_hyper_parameters=fixed_hyper_parameters,
                                          hyper_parameters_instance=best_hyper_parameters_instance,
                                          verbose=True)
    print("Test accuracy:", best_model_test_accuracy)

### KFold cross-validation of a MLP model with given hyper-parameters:
def perform_KFold_CV(X_train_folds, X_val_folds, y_train_folds, y_val_folds, fixed_hyper_parameters, hyper_parameters_instance):
    val_fold_accuracies = []
    # For each fold, assess a surrogate model with fixed hyper-parameters:
    cmpt = 0
    for X_train_fold, X_val_fold, y_train_fold, y_val_fold in zip(X_train_folds, X_val_folds, y_train_folds, y_val_folds):
        val_fold_accuracy = assess_MLP(X_train=X_train_fold, 
                                       X_test=X_val_fold,
                                       y_train=y_train_fold,
                                       y_test=y_val_fold,
                                       fixed_hyper_parameters=fixed_hyper_parameters,
                                       hyper_parameters_instance=hyper_parameters_instance)
        cmpt += 1
        print("Surrogate model", str(cmpt) + "/" + str(len(X_val_folds)), "validation accuracy:", val_fold_accuracy)
        val_fold_accuracies.append(val_fold_accuracy)
    # Compute the mean validation accuracy between all the folds:
    mean_val_accuracy = np.mean(np.array(val_fold_accuracies))
    return mean_val_accuracy

### Fit and evaluate a MLP model with given hyper-parameters:
def assess_MLP(X_train, X_test, y_train, y_test, fixed_hyper_parameters, hyper_parameters_instance, verbose=False):
    # Extract the hyper-parameters:
    activation_type = hyper_parameters_instance["activation type"]
    decay_factor = hyper_parameters_instance["decay factor"]
    momentum = hyper_parameters_instance["momentum"]
    Nesterov = hyper_parameters_instance["Nesterov"]
    network_depth = hyper_parameters_instance["network depth"]
    # Same with fixed hyper-parameters:
    num_epochs = fixed_hyper_parameters["epochs"]
    train_batch_size = fixed_hyper_parameters["train batch size"]
    base_lr = fixed_hyper_parameters["base learning rate"]
    step_decay = fixed_hyper_parameters["steps between decay"]
    # Set the learning rate policy:
    learning_rate_policy = step_decay_schedule(base_lr=base_lr, decay_factor=decay_factor, step_decay=step_decay)
    input_shape = (X_train.shape[1], )
    # Build MLP:
    model = build_MLP(input_shape, num_classes, activation_type, momentum, Nesterov, network_depth)
    # Train MLP:
    model.fit(X_train, y_train,
              batch_size=train_batch_size,
              epochs=num_epochs,
              verbose=verbose,
              callbacks=[learning_rate_policy])
    # Evaluate MLP:
    _, test_accuracy = model.evaluate(X_test, y_test)
    # Clean session before processing next model:
    backend.clear_session()
    return test_accuracy

# MLP (multi-layer perceptron) builder

In [None]:
### Construct a simple fully-connected MLP with SGD:
def build_MLP(input_shape, num_classes, activation_type, momentum, Nesterov, network_depth):
    MLP = Sequential()
    # Hidden layers (fully connected):
    MLP.add(Dense(input_shape=input_shape,
                  units=10,
                  activation=activation_type))
    if network_depth > 1:
        MLP.add(Dense(units=5,
                      activation=activation_type))
    # Output layer (fully-connected):
    MLP.add(Dense(units=num_classes, 
                  activation='softmax'))
    MLP.compile(loss=categorical_crossentropy,
                optimizer=SGD(momentum=momentum, nesterov=Nesterov),
                metrics=['accuracy'])
    return MLP

# Bonus: learning rate policy

In [None]:
### Implement a step decay callback for learning rate policy:
def step_decay_schedule(base_lr, decay_factor, step_decay):
    def schedule(epoch):
        ## Multiply learning rate by 'decay_factor' every 'step_decay' epochs (note that epoch is indexed from 0):
        updated_lr = base_lr * (decay_factor ** np.floor((epoch + 1) / step_decay))  
        return updated_lr    
    return LearningRateScheduler(schedule)

# Load and preprocess the Wine dataset

In [None]:
# Load the Wine dataset:
wine = load_wine()
X = wine.data
y = wine.target
# Shape of the features fed to the MLP:
input_shape = (X.shape[1], )
# Number of classes:
num_classes = len(set(y))
# Preprocess data:
X, y = preprocess_data(X, y)

# Train, validate and evaluate a MLP model, and plot the results:

In [None]:
# Number of epochs:
num_epochs = 20
# Train batch size:
train_batch_size = 16
# Split data into train/val/test sets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=seed)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=False)
# Load an MLP:
model = build_MLP(input_shape, num_classes, activation_type='relu', momentum=0.0, Nesterov=False, network_depth=1)
model.summary()
# Train and validate MLP, store the training history in a variable:
training_history = model.fit(X_train, y_train,
                             batch_size=train_batch_size,
                             epochs=num_epochs,
                             verbose=True,
                             validation_data=(X_val, y_val))
# Evaluate the model:
print("\nTest on", X_test.shape[0], "samples")
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test loss:", test_loss)
print("Test accuracy:", test_accuracy)
# Plot training history:
print_training_history(training_history, fig_idx=1)

# Model selection of our MLP

In [None]:
# Number of folds in KFold cross-validation:
num_folds = 5
# Number of epochs:
num_epochs = 5
# Train batch size:
train_batch_size = 16
# Base learning rate:
base_lr = 0.01
# Number epochs (steps) between each decay of the learning rate:
step_decay = 2
# Create the list of hyper-parameters instances:
hyper_parameters_instances = []
for activation_type in ["sigmoid", "relu"]:
    for decay_factor in [0.75, 0.25]:
        for momentum in [0.1, 0.9]:
            for Nesterov in [False, True]:
                for network_depth in [1, 2]:
                    hyper_parameters_instances.append({"activation type": activation_type, 
                                                       "decay factor": decay_factor, 
                                                       "momentum": momentum, 
                                                       "Nesterov": Nesterov, 
                                                       "network depth": network_depth})
# Also store the fixed hyper-parameters:
fixed_hyper_parameters = {"epochs": num_epochs, 
                          "train batch size": train_batch_size, 
                          "base learning rate": base_lr, 
                          "steps between decay": step_decay}
# Select model with KFold cross-validation:
KFold_model_selection(X, y, fixed_hyper_parameters, hyper_parameters_instances, num_folds, seed)