# Hyperparameter search over range data model based on 5-fold cross-validation WIP

User c is completely removed as this is the test set.

The remaining users A,B,D,E and F make up each fold.

For example, fold 1 will train on users B,D,E and F then evaluate on A (and so on for each fold).

Choice of model ___ justified from "11_range_data_model_comparison.ipynb"

## Notebook setup

In [0]:
# Needed to allow editing using PyCharm etc
%load_ext autoreload
%autoreload 2

The following cell is needed for compatibility when using both CoLab and Local Jupyter notebook. It sets the appropriate file path for the data and also installs local packages such as models and data_loading.

In [0]:
import os
path = os.getcwd()
if path == '/content':
    from google.colab import drive
    drive.mount('/content/gdrive')
    BASE_PATH = '/content/gdrive/My Drive/Level-4-Project/'
#     !cd gdrive/My\ Drive/Level-4-Project/ && pip install --editable .
    os.chdir('gdrive/My Drive/Level-4-Project/')
    
elif path == 'D:\\Google Drive\\Level-4-Project\\notebooks':
    BASE_PATH = "D:/Google Drive/Level-4-Project/"
    
elif path == "/export/home/2192793m":
    BASE_PATH = "/export/home/2192793m/Level-4-Project/"
    
    
DATA_PATH_MTI = BASE_PATH + 'data/processed/range_FFT/3/MTI_applied/' # not used
DATA_PATH_NO_MTI = BASE_PATH + 'data/processed/range_FFT/3/MTI_not_applied/'

RESULTS_PATH = BASE_PATH + 'results/range_data_model_hyperparameter_search/'
if not os.path.exists(RESULTS_PATH):
    os.makedirs(RESULTS_PATH)
    
MODEL_PATH = BASE_PATH + 'models/range_data_model_hyperparameter_search/'
if not os.path.exists(MODEL_PATH):
    os.makedirs(MODEL_PATH)

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import classification_report, confusion_matrix
from keras import metrics
from keras import optimizers
from keras.callbacks import History, ModelCheckpoint, CSVLogger
from keras.models import load_model
from keras.utils import Sequence, to_categorical
from keras.layers import Input, Conv1D, Multiply, Add, Reshape, Activation, AveragePooling1D, Lambda, Flatten, Dense,GlobalAveragePooling1D
from keras.models import load_model, Model
from keras.callbacks import History, ModelCheckpoint
import sys
import tensorflow as tf

In [0]:
# needed for CheckpointSaver
# https://github.com/scikit-optimize/scikit-optimize/issues/678
! pip install git+https://github.com/scikit-optimize/scikit-optimize/ 
    
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
from skopt.callbacks import CheckpointSaver
from skopt import dump, load
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations

## Data Setup

In [0]:
# Load in data dictionary.
# This does not load in any actual data,
# just the dictionary with the names of the files and their associated labels
with open(DATA_PATH_NO_MTI + "index.pkl", "rb") as file:
    data = pickle.load(file)

In [0]:
#Remove user C as this user is reserved for the test set
try:
    del data["C"]
except KeyError:
    print ("Key 'C' not found")

In [0]:
def convert_label_to_int(label):
    if label == "walking":
        return 0
    if label == "pushing":
        return 1
    if label == "sitting":
        return 2
    if label == "pulling":
        return 3
    if label == "circling":
        return 4
    if label == "clapping":
        return 5
    if label == "bending":
        return 6

In [0]:
fold_data = {}
users = ["A", "B", "D", "E", "F"]
for user in users:
    labels = {}
    partition = {'train':[], 'validation':[]} # contains list of training and validation ID's
    validation_user = user

    for user_letter, actions in data.items():
        for action, results in actions.items():
            for result in results:
                for row in result:
                    if user_letter == validation_user:
                        partition["validation"].append(row)
                        labels[row] = convert_label_to_int(action)

                    else:
                        partition["train"].append(row)
                        labels[row] = convert_label_to_int(action)
                        
    fold_data[user] = {"labels": labels, "partition": partition}

In [0]:
target_names = ["walking", "pushing", "sitting", "pulling", "circling", "clapping", "bending"]
nb_classes = len(target_names)

## DataGenerator

In [0]:
'''Based on code from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly'''

class DataGenerator(Sequence):
    """Generates data for Keras"""
    def __init__(self, list_IDs, labels, batch_size=32, dim=(3000),
                 n_classes=7, shuffle=False, data_directory='data/',
                 bin_range=(0,60)):
        """Initialization"""
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.data_directory = data_directory
        self.bin_range=bin_range
        self.indexes = None
        self.on_epoch_end()

    def __len__(self):
        """Denotes the number of batches per epoch"""
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        """Generate one batch of data"""

        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        """Updates indexes after each epoch"""
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        """Generates data containing batch_size samples"""
        # Initialization
        X = np.empty((self.batch_size, *self.dim))

        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = abs(np.load(self.data_directory + ID))[:,self.bin_range[0]:self.bin_range[1]]
            # Store class
            y[i] = self.labels[ID]

        return X, to_categorical(y, num_classes=self.n_classes)

### Visualize Results

In [0]:
def visualize_results(csvlog_path, metric, save=False, save_file_name=""):
    df = pd.read_csv(RESULTS_PATH + csvlog_path)
    epoch = df['epoch'] +1
    train = df[metric]
    val = df['val_' + metric]
    plt.figure()
    plt.plot(epoch, train, label='train')
    plt.plot(epoch, val, label='val')
    plt.legend(loc='best')
    plt.xlabel('Epoch')
    plt.ylabel(metric)
    if save:
        plt.savefig(RESULTS_PATH + save_file_name, format='pdf')

## Model: Wavenet model adapted based on interpretation from Wavenet Paper

Keras implementation of wavenet model taken from https://github.com/basveeling/wavenet and https://github.com/mjpyeon/wavenet-classifier

This model has then been adapted to the classification task based on the intrustions from the paper "WAVENET: A GENERATIVE MODEL FOR RAW AUDIO" (https://arxiv.org/pdf/1609.03499.pdf)

Specifically:
"For this task we added a mean-pooling layer after the dilated convolutions that aggregated the activations to coarser frames spanning 10 milliseconds (160× downsampling).  The pooling layer was followed by a few non-causal convolutions."

In [0]:
class WaveNetClassifier:
    def __init__(self, input_shape, output_shape, kernel_size=2, dilation_depth=9, nb_stacks=1, nb_filters=40,
                 pool_size_1=80, pool_size_2=100, use_skip_connections=True):

        self.activation = 'softmax'
        self.pool_size_1 = pool_size_1
        self.pool_size_2 = pool_size_2
        self.nb_stacks = nb_stacks
        self.kernel_size = kernel_size
        self.dilation_depth = dilation_depth
        self.nb_filters = nb_filters
        self.use_skip_connections = use_skip_connections
        self.input_shape = input_shape
        self.output_shape = output_shape

        if len(input_shape) == 1:
            self.expand_dims = True
        elif len(input_shape) == 2:
            self.expand_dims = False
        else:
            print('ERROR: wrong input shape')
            sys.exit()

        self.model = self.build_model()

    def residual_block(self, x, i, stack_nb):
        original_x = x
        tanh_out = Conv1D(self.nb_filters, 2, dilation_rate=2 ** i, padding='causal',
                          name='dilated_conv_%d_tanh_s%d' % (2 ** i, stack_nb), activation='tanh')(x)
        sigm_out = Conv1D(self.nb_filters, 2, dilation_rate=2 ** i, padding='causal',
                          name='dilated_conv_%d_sigm_s%d' % (2 ** i, stack_nb), activation='sigmoid')(x)
        x = Multiply(name='gated_activation_%d_s%d' % (i, stack_nb))([tanh_out, sigm_out])

        res_x = Conv1D(self.nb_filters, 1, padding='same')(x)
        skip_x = Conv1D(self.nb_filters, 1, padding='same')(x)
        res_x = Add()([original_x, res_x])
        return res_x, skip_x

    def build_model(self):
        input_layer = Input(shape=self.input_shape, name='input_part')
        out = input_layer
        skip_connections = []
        out = Conv1D(self.nb_filters, 2,
                     dilation_rate=1,
                     padding='causal',
                     name='initial_causal_conv'
                     )(out)
        for stack_nb in range(self.nb_stacks):
            for i in range(0, self.dilation_depth + 1):
                out, skip_out = self.residual_block(out, i, stack_nb)
                skip_connections.append(skip_out)

        if self.use_skip_connections:
            out = Add()(skip_connections)
        out = Activation('relu')(out)
        # added a mean-pooling layer after the dilated convolutions that aggregated the activations to coarser frames
        # spanning 10 milliseconds (160× downsampling)
        # mean pooling layer adjust pool_size_1 to change downsampling
        out = AveragePooling1D(self.pool_size_1, padding='same', name='mean_pooling_layer_downsampling')(out)

        # few non-causal convolutions
        out = Conv1D(self.nb_filters, self.pool_size_1, strides=2, padding='same', activation='relu')(out)
        out = Conv1D(self.nb_filters, self.pool_size_2, strides=2, padding='same', activation='relu')(out)
        out = Conv1D(self.output_shape, self.pool_size_2, strides=2, padding='same', activation='relu')(out)
        out = Conv1D(self.output_shape, self.pool_size_2, strides=2, padding='same', activation='relu')(out)


        out = Flatten()(out)
        out = Dense(512, activation='relu')(out)
        out = Dense(self.output_shape, activation='softmax')(out)

        return Model(input_layer, out)

    def get_model(self):
        return self.model

    def get_summary(self):
        self.model.summary()

    def get_receptive_field(self):
        return self.nb_stacks * (2 ** (self.dilation_depth + 1)) - (self.nb_stacks - 1)

### Dilation depth: 4, Stack = 1 (No MTI)
Receptive Field: 32

In [0]:
dilation_depth = 4
nb_stacks = 1


csvlog_path = "model_1_NO_MTI_dd_" + str(dilation_depth) + "_stacks_" +str(nb_stacks) + ".csv"
test_model(model, params, epochs, csvlog_path, verbose=True)


In [0]:
save_graph_path = csvlog_path.split('.')[-2] + ".pdf"
visualize_results(csvlog_path, "acc", save=True, save_file_name=save_graph_path)

## Hyperparameter Search

### Fixed Parameters

In [0]:
# Try all bins to start with
bin_range = (0,63)
data_shape = (3000, 63)
activation = 'softmax'
epochs = 10
epochs = 1

In [0]:
nb_epoch = 30 # 30

### Parameters to Search

In [0]:
space = [
    Categorical(['adam', 'sgd_standard', 'sgd_nestrov'], name='optimizer'),
    Real(0.0001, 0.1, "log-uniform", name='learning_rate'),
    Categorical([True, False], name='causal'),
    Integer(8, 128, name="n_filters"),
    Integer(2,5, name="kernel_size"),
    Integer(2, 9, name="dilation_depth"),
    Integer(1, 4, name="number_of_stacks"),
    Integer(8, 64, name='batch_size'),
    Integer(4, 200, name="pool_size_1"),
    Integer(4, 200, name="pool_size_2"),
    Real(0.1, 0.9, name='dropout'),
    Integer(16, 1024, name='num_dense_nodes'),
    Integer(1,3, name='num_dense_layers'),
]

### Objective Function to Minimize

In [0]:
@use_named_args(space)
def objective(**params):
    average_accuracy = 0
    average_loss = 0
    for user in users:
        data_split = split_train_validation(user)
        train_data = data_split["train_data"]
        train_labels = data_split["train_labels"]
        validation_data = data_split["validation_data"]
        validation_labels = data_split["validation_labels"]
        
        
        wnc = WaveNetClassifier((data_shape), (nb_classes), kernel_size=kernel_size,
                        dilation_depth=dilation_depth, nb_stacks=nb_stacks,
                        nb_filters=n_filters,
                        pool_size_1=pool_size_1, pool_size_2=pool_size_2)

        wnc.build_model()
        model = wnc.get_model()
        # Parameters
        params = {'dim': data_shape,
                  'batch_size': batch_size,
                  'n_classes': nb_classes,
                  'data_directory': DATA_PATH_NO_MTI,
                  'bin_range': bin_range}
        

        training_generator = DataGenerator(fold_data[user]["partition"]['train'], fold_data[user]["labels"], **params, shuffle=True)
        validation_generator = DataGenerator(fold_data[user]["partition"]['validation'], fold_data[user]["labels"], **params, shuffle=False)

        if params['optimizer'] == 'adam':
            selected_optimizer = Adam(lr=params['learning_rate'])
            
        elif params['optimizer'] == 'sgd_standard':
            selected_optimizer = SGD(lr=params['learning_rate'])
            
        else:
            #nestrov momentum
            selected_optimizer = SGD(lr=params['learning_rate'], decay=1e-6, 
                                     momentum=0.9, nesterov=True)
            
        model.compile('adam', loss='categorical_crossentropy', metrics=['accuracy'])

        # Train model on dataset
        model.fit_generator(generator=training_generator,
                            use_multiprocessing=True,
                            workers=4,
                            epochs=epochs,
                            verbose=0)
         
        evaluation = model.evaluate_generator(validation_generator,workers=4,
                                              use_multiprocessing=True, verbose=1)

        average_loss += evaluation[0]
        average_accuracy += evaluation[1]
        
    return -(average_accuracy/len(users)) # return negative as minimizing

### Callbacks

In [0]:
checkpoint = CheckpointSaver(RESULTS_PATH + "res_gp_checkpoint.pkl")
callbacks_list = [checkpoint]

### Load checkpoint

In [0]:
LOAD_CHECKPOINT = False

In [0]:
if LOAD_CHECKPOINT:
    res = load(RESULTS_PATH + "res_gp_checkpoint.pkl")
    x0 = res.x_iters
    y0 = res.func_vals
    random_starts = 0
    
else:
    x0 = None
    y0 = None
    random_starts = 5

### Perform Search

In [0]:
res_gp = gp_minimize(objective, space, x0=x0, y0=y0,
                     n_calls=130, n_random_starts=random_starts,
                     random_state=0, callback=callbacks_list)

### Save gp results

In [0]:
dump(res_gp, RESULTS_PATH + "res_gp_complete.pkl")

### Load gp results

In [0]:
res_gp = load(RESULTS_PATH + "res_gp_complete.pkl")

## Results

In [0]:
# params at minimum
res_gp.x

In [0]:
print("Accuracy:", -res_gp.fun)

In [0]:
plot_convergence(res)

In [0]:
res.x

In [0]:
-res.fun

In [0]:
plt.plot(-res.func_vals)
plt.plot(np.full((len(res.func_vals)), -res.fun), label="Best Accuracy: " + str(round(-res.fun, 3))+"%")
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")

In [0]:
res.x_iters

In [0]:
plot_convergence(res_gp)

In [0]:
plt.plot(-res_gp.func_vals)
plt.plot(np.full((len(res_gp.func_vals)), -res_gp.fun), label="Best Accuracy: " + str(round(-res_gp.fun, 3))+"%")
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")