# Hyperparameter tuning notebook with Hyperas

In [1]:
#imports
from hyperopt import Trials, STATUS_OK, tpe

import keras
from keras.layers import Dense, Dropout, Flatten, Reshape, Input, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, TensorBoard
from keras.models import Sequential
from keras.utils import np_utils

from hyperas import optim
from hyperas.distributions import choice, uniform

# Imports
import os
from matplotlib import pyplot as plt
%matplotlib inline

import numpy as np
import pandas as pd
np.random.seed(42)

from sklearn.utils import class_weight

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [5]:
def data():
    """
    Data providing function:

    This function is separated from create_model() so that hyperopt
    won't reload data for each evaluation run.
    """
    print('Beginning loading')

    X_train = np.load('../processed_datasets/BMI_X_15_xtremefiltered.npy')
    y_train = np.load('../processed_datasets/BMI_y_15_xtremefiltered.npy')
    ids_train = np.load('../processed_datasets/BMI_ids_15_xtremefiltered.npy')

    print('Finished loading, beginning X_train axis adjustment')

    #Change X_train to weird format
    X_train = np.expand_dims(X_train, 1)
    X_train = np.swapaxes(X_train, -1, -2)

    #Randomize
    if False:
        print('Data will be randomized.')
        idxs = np.arange(X_train.shape[0])
        np.random.shuffle(idxs)
        X_train = X_train[idxs]
        y_train = y_train[idxs]
        ids_train = ids_train[idxs]
    else:
        print('Data is NOT randomized.')

    print('Beginning data splitting')

    split_num = int(0.8*X_train.shape[0])
    X_test = X_train[split_num:]
    y_test = y_train[split_num:]
    ids_test = ids_train[split_num:]

    X_train = X_train[:split_num]
    y_train = y_train[:split_num]
    ids_train = ids_train[:split_num]
    
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape, ids_train.shape, ids_test.shape)
    
    return X_train, y_train, X_test, y_test

In [6]:
def create_model(X_train, y_train, X_test, y_test):
    """
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    """
    
    #VGG
    import functools
    import json

    import keras.backend as K

    from keras.layers import Permute, Dense, Input, Conv2D, concatenate, MaxPooling2D
    from keras.layers import ELU, BatchNormalization, Dropout, GlobalAveragePooling2D
    from keras.models import Model

    # %load model_util
    from keras.layers import Input
    from keras.regularizers import l2


    REG_P = dict(kernel_regularizer=l2({{choice([0.001, 0.01, 0.05])}}))


    def _create_input(input_shape, input_tensor=None, name='input'):
        """
        Select a correct input tensor based on shape and instance specification.

        # Arguments
            input_shape: Input shape tuple
            input_tensor: Existing tensor to wrap into the `Input` layer.
                          If set, the layer will not create a placeholder tensor.
            name: Name string for layer.

        # Returns
            Input Tensor
        """
        if input_tensor is None:
            return Input(shape=input_shape, name=name)

        if not K.is_keras_tensor(input_tensor):
            return Input(tensor=input_tensor, shape=input_shape, name=name)

        return input_tensor


    def _conv_block(units, block=1, layer=1, sensor='acc'):
        """
        Create VGG style convolutional block.

        Deviations from original paper.
            - Remove `Dropout`
            - Added `BatchNormalization`
            - He-normal initialization
            - Uses `ELU` Activation

        # Arguments
            units: conv filters
            block: block number within network (used for naming)
            layer: layer number within block (used for naming)
            sensor: sensor name (used for naming)

        # Returns
            callable
        """
        def layer_wrapper(inp):
            filter_len = {{choice([3, 5])}}
            x = Conv2D(units, (1, filter_len), padding='same', kernel_initializer='he_normal',
                       name=f'block{block}_conv{layer}_{sensor}', **REG_P)(inp)
            x = BatchNormalization(name=f'block{block}_bn{layer}_{sensor}')(x)
            x = ELU(name=f'block{block}_act{layer}_{sensor}')(x)
            return x

        return layer_wrapper


    def _dense_block(units, dropout=0.3, name='fc1'):
        """
        Create VGG fully connected block.

        # Deviations from original paper.
            - Added `BatchNormalization`
            - Uses `ELU` Activation

        # Arguments
            units: fc layer dimensionality
            dropout: dropout probability
            name: prefix for dense layers

        # Returns
            callable
        """

        def layer_wrapper(inp):
            x = Dense(units, name=f'{name}', **REG_P)(inp)
            x = BatchNormalization(name=f'{name}_bn')(x)
            x = ELU(name=f'{name}_act')(x)
            x = Dropout(dropout, name=f'{name}_dropout')(x)
            return x

        return layer_wrapper


    def _vgg_body(factor=4, sensor='acc'):
        """
        VGG Network Body containing convolutional blocks

        # Arguments
            factor: scaling factor to reduce network filter width
            sensor: sensor name

        # Return
            callable
        """

        _vgg_conv_block = functools.partial(_conv_block, sensor=sensor)

        def layer_wrapper(inp):
            x = Permute((1, 3, 2), name=f'swapaxes_{sensor}')(inp)
            
            num_blocks = {{choice([2, 3, 4, 5])}}
            
            for block_num in range(1, num_blocks + 1):

                x = _vgg_conv_block(32 * (2**block_num) // factor, block=block_num, layer=1)(x)
                x = _vgg_conv_block(32 * (2**block_num) // factor, block=block_num, layer=2)(x)
                x = MaxPooling2D((1, {{choice([2, 4])}}), name=f'block{block_num}_pool_{sensor}')(x)

            return x

        return layer_wrapper

    def VGG16Net(input_shape=None, input_tensor=(None, None),
                 classes=1000, dropout=0.3, factor=2):
        """
        Modified VGG architecture
            https://arxiv.org/abs/1409.1556

        # Arguments
            input_shape: shape tuple
            input_tensor: Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
            classes: optional number of classes to classify images
            dropout: dropout applied to fc layers
            factor: scaling factor to reduce network filter width

        # Returns
            A Keras model instance.
        """
        assert input_shape or all(input_tensor), f'Must provide at least one: input_shape, input_tensor'

        # Two Inputs
        img_input_acc = _create_input(input_shape, input_tensor=input_tensor[0], name='acc_input')

        # Accelerometer and Gyroscope Conv Blocks
        x = _vgg_body(factor=factor, sensor='acc')(img_input_acc)

        # Merge and Pool Channels
        x = GlobalAveragePooling2D(name='avgpool')(x)

        # FC Layers
        num_dense = {{choice([1, 2])}}
        for i in range(num_dense):
            x = _dense_block(4098 // factor, dropout=dropout, name=f'fc{i + 1}')(x)

        # Classification block
        x = Dense(1, name='predictions')(x)

        return Model(img_input_acc, x, name='VGG16Net')
    
    model = VGG16Net(input_shape=(1, 15, 100), classes=2)

    #Define optimizer
    adam = keras.optimizers.Adam() #Default lr is 0.001

    model_metrics = ['mse']

    #Training parameters
    batch_size = 512
    
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=model_metrics)

    #Callbacks
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                  patience=2, min_lr=1e-7, verbose=1)

    early_stop = EarlyStopping(patience=4, verbose=1, restore_best_weights=True)

    #tb = TensorBoard(log_dir=os.path.join(output_dir, 'logs'), 
    #                 write_graph=False,) 
                     #histogram_freq=5, 
                     #embeddings_freq=5, 
                     #embeddings_layer_names=['fc1'],
                     #mbeddings_data = X_val)

    cb_list = [reduce_lr, early_stop]

    #Get data and train
    #Use a generator for smaller epochs
    class DataGenerator(keras.utils.Sequence):
        'Generates data for Keras'
        def __init__(self, X, y, batch_size=512, shuffle=True):
            self.batch_size = batch_size
            self.y = y
            self.X = X
            self.shuffle = shuffle
            self.on_epoch_end()

        def __len__(self):
            'Denotes the number of batches per epoch'
            return int(np.floor(len(self.y) / self.batch_size))

        def __getitem__(self, index):
            'Generate one batch of data'
            idxs = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

            # Generate the batch
            X = self.X[idxs]
            y = self.y[idxs]

            return X, y

        def on_epoch_end(self):
            'Updates indexes after each epoch'
            self.indexes = np.arange(len(self.X))
            if self.shuffle == True:
                np.random.shuffle(self.indexes)
    train_gen = DataGenerator(X_train, y_train)

    history = model.fit_generator(train_gen,
                        steps_per_epoch = 200,
                        epochs = 200,
                        validation_data = (X_test, y_test),#.squeeze().swapaxes(-1, -2), y_val),
                        callbacks = cb_list,
                        class_weight={0: 1.5, 1: 0.5}, #class_weights,
                        shuffle=True,
                        verbose=0)
    
    #get the highest validation accuracy of the training epochs
    validation_acc = np.amax(history.history['val_acc']) 
    print('Best validation acc of epoch:', validation_acc)
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}

In [7]:
best_run, best_model = optim.minimize(model=create_model,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=5,
                                      trials=Trials(),
                                      notebook_name = 'HyperparamOpt-BMI')


SyntaxError: invalid syntax (<unknown>, line 357)

In [None]:
X_train, Y_train, X_test, Y_test = data()
print("Evalutation of best performing model:")
print(best_model.evaluate(X_test, Y_test))
print("Best performing model chosen hyper-parameters:")
print(best_run)