## Ensemble model training
- this notebook will serve to extract the best hyper parameters

In [25]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

import keras_tuner as kt
import tensorflow as tf
from tensorflow.keras.regularizers import L2
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy as bce_loss
from tensorflow.keras.metrics import BinaryAccuracy, BinaryCrossentropy as bce_metric
from tensorflow.keras.initializers import GlorotNormal, GlorotUniform, RandomNormal, RandomUniform, HeNormal, HeUniform
from tensorflow.keras.optimizers import Adadelta, Adafactor, Adagrad, Adam, AdamW, Adamax, Ftrl, Nadam, RMSprop, SGD 

from utilities.data_preprocessor import preprocess

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Define architecture and hyper parameters to use

In [26]:
def model_builder(hp):
    """
    hp - hyperparameter
    """

    model = Sequential()

    hp_kernel_initializer = hp.Choice('initializer', values=['GlorotNormal', 'GlorotUniform', 'RandomNormal', 'RandomUniform', 'HeNormal', 'HeUniform'])
    initializers = {
        'GlorotNormal': GlorotNormal(),
        'GlorotUniform': GlorotUniform(),
        'RandomNormal': RandomNormal(mean=0.0, stddev=1.0),
        'RandomUniform': RandomUniform(minval=-0.05, maxval=0.05),
        'HeNormal': HeNormal(),
        'HeUniform': HeUniform()
    }

    hp_activation = hp.Choice('activation', values=['relu', 'tanh'])

    # the drop probability values, instead of keep probability
    hp_dropout = hp.Choice('dropout', values=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

    # learning rate alpha
    hp_learning_rate = hp.Choice('learning_rate', values=[1.2, 0.03, 0.01, 0.0075, 0.003, 0.001,])

    # regularization value lambda
    hp_lambda = hp.Choice('lambda', values=[10.0, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.25, 0.125, 0.01,])
    # hp_dropout = hp.Choice('dropout', value=[0.8, 0.85, 0.7, 0.6])

    hp_optimizer = hp.Choice('optimizer', values=['Adadelta', 'Adafactor', 'Adagrad', 'Adam', 'AdamW', 'Adamax', 'Ftrl', 'Nadam', 'RMSprop', 'SGD'])
    optimizers = {
        'Adadelta': Adadelta(learning_rate=hp_learning_rate),
        'Adafactor': Adafactor(learning_rate=hp_learning_rate),
        'Adagrad': Adagrad(learning_rate=hp_learning_rate),
        'Adam': Adam(learning_rate=hp_learning_rate),
        'AdamW': AdamW(learning_rate=hp_learning_rate),
        'Adamax': Adamax(learning_rate=hp_learning_rate), 
        'Ftrl': Ftrl(learning_rate=hp_learning_rate),
        'Nadam': Nadam(learning_rate=hp_learning_rate),
        'RMSprop': RMSprop(learning_rate=hp_learning_rate),
        'SGD': SGD(learning_rate=hp_learning_rate)
    }

    # number of hidden layers
    for index, l in enumerate(range(hp.Int('layer_num', min_value=1, max_value=80))):
        # number of nodes per layer
        model.add(Dense(
            units=hp.Int(f'layer_{index + 1}', min_value=1, max_value=1000, step=100), 
            activation=hp_activation, 
            kernel_initializer=initializers[hp_kernel_initializer],
            kernel_regularizer=L2(hp_lambda)))
        
        model.add(Dropout(hp_dropout))

    model.add(Dense(units=1, activation='linear', kernel_regularizer=L2(hp_lambda)))
    
    model.compile(
        optimizer=optimizers[hp_optimizer],
        loss=bce_loss(from_logits=True),
        metrics=[bce_metric(), BinaryAccuracy(threshold=0.5)]
    )

    return model

In [27]:
# define tuner
tuner = kt.Hyperband(
    model_builder, 
    objective=[kt.Objective('val_binary_accuracy', 'max'), kt.Objective('val_binary_crossentropy', 'min')], 
    max_epochs=100,
    factor=3,
    directory='tuned_models',
    project_name='model'
)

# if cross validation loss does not improve after 10 
# consecutive epochs we stop training our modelearly
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

## load data


In [28]:
# load data with the selected features to use
df = pd.read_csv('./data.csv')
X, Y = preprocess(df)


In [29]:
X

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,smoothness_se,compactness_se,concavity_se,concave points_se,symmetry_se,fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,1.097064,-2.073335,1.269934,0.984375,1.568466,3.283515,2.652874,2.532475,2.217515,2.255747,2.489734,-0.565265,2.833031,2.487578,-0.214002,1.316862,0.724026,0.660820,1.148757,0.907083,1.886690,-1.359293,2.303601,2.001237,1.307686,2.616665,2.109526,2.296076,2.750622,1.937015
1,1.829821,-0.353632,1.685955,1.908708,-0.826962,-0.487072,-0.023846,0.548144,0.001392,-0.868652,0.499255,-0.876244,0.263327,0.742402,-0.605351,-0.692926,-0.440780,0.260162,-0.805450,-0.099444,1.805927,-0.369203,1.535126,1.890489,-0.375612,-0.430444,-0.146749,1.087084,-0.243890,0.281190
2,1.579888,0.456187,1.566503,1.558884,0.942210,1.052926,1.363478,2.037231,0.939685,-0.398008,1.228676,-0.780083,0.850928,1.181336,-0.297005,0.814974,0.213076,1.424827,0.237036,0.293559,1.511870,-0.023974,1.347475,1.456285,0.527407,1.082932,0.854974,1.955000,1.152255,0.201391
3,-0.768909,0.253732,-0.592687,-0.764464,3.283553,3.402909,1.915897,1.451707,2.867383,4.910919,0.326373,-0.110409,0.286593,-0.288378,0.689702,2.744280,0.819518,1.115007,4.732680,2.047511,-0.281464,0.133984,-0.249939,-0.550021,3.394275,3.893397,1.989588,2.175786,6.046041,4.935010
4,1.750297,-1.151816,1.776573,1.826229,0.280372,0.539340,1.371011,1.428493,-0.009560,-0.562450,1.270543,-0.790244,1.273189,1.190357,1.483067,-0.048520,0.828471,1.144205,-0.361092,0.499328,1.298575,-1.466770,1.338539,1.220724,0.220556,-0.313395,0.613179,0.729259,-0.868353,-0.397100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,2.110995,0.721473,2.060786,2.343856,1.041842,0.219060,1.947285,2.320965,-0.312589,-0.931027,2.782080,0.071025,2.379583,2.604187,1.086384,0.191805,0.666001,2.067178,-1.138416,0.167980,1.901185,0.117700,1.752563,2.015301,0.378365,-0.273318,0.664512,1.629151,-1.360158,-0.709091
565,1.704854,2.085134,1.615931,1.723842,0.102458,-0.017833,0.693043,1.263669,-0.217664,-1.058611,1.300499,2.260938,1.156857,1.291565,-0.424010,-0.069758,0.252202,0.808431,-0.189161,-0.490556,1.536720,2.047399,1.421940,1.494959,-0.691230,-0.394820,0.236573,0.733827,-0.531855,-0.973978
566,0.702284,2.045574,0.672676,0.577953,-0.840484,-0.038680,0.046588,0.105777,-0.809117,-0.895587,0.184892,-0.257371,0.276693,0.180698,-0.379342,0.661277,0.510827,0.612157,-0.891416,0.036727,0.561361,1.374854,0.579001,0.427906,-0.809587,0.350735,0.326767,0.414069,-1.104549,-0.318409
567,1.838341,2.336457,1.982524,1.735218,1.525767,3.272144,3.296944,2.658866,2.137194,1.043695,1.157935,0.686088,1.438530,1.009503,-0.173000,2.017716,1.302285,0.785721,0.326634,0.904057,1.961239,2.237926,2.303601,1.653171,1.430427,3.904848,3.197605,2.289985,1.919083,2.219635


## ensemble train the multiple models

In [30]:
# fit model to data
tuner.search(
    X, Y, 
    epochs=50, 
    validation_split=0.3, 
    callbacks=[stop_early]
)

Trial 215 Complete [00h 00m 32s]
multi_objective: 2.746040999889374

Best multi_objective So Far: -0.9606144577264786
Total elapsed time: 01h 35m 16s
INFO:tensorflow:Oracle triggered exit


## extract the hyper parameters of the best model that trained 

In [34]:
tuner.get_best_hyperparameters()[0].get('layer_num')

2

In [35]:
hp_names = [f"layer_{l}" for l in range(2)] + ['activation', 'learning_rate', 'lambda', 'optimizer', 'dropout', 'initializer']
best_hyper_params = {}
for hp in hp_names:
    best_hyper_param = tuner.get_best_hyperparameters()[0].get(hp)
    print(f'{hp}: {best_hyper_param}')

    if hp not in best_hyper_params:
        best_hyper_params[hp] = best_hyper_param


best_hps = tuner.get_best_hyperparameters()[0]
best_hps


layer_0: 101
layer_1: 801
activation: tanh
learning_rate: 0.0075
lambda: 0.25
optimizers: Nadam
dropout: 0.3
kernel_initializer: HeUniform


<keras_tuner.engine.hyperparameters.hyperparameters.HyperParameters at 0x1a129d389a0>

## save best hyper parameter values to file

In [36]:
import json

best_hyper_params

{'layer_0': 101,
 'layer_1': 801,
 'activation': 'tanh',
 'learning_rate': 0.0075,
 'lambda': 0.25,
 'optimizers': 'Nadam',
 'dropout': 0.3,
 'kernel_initializer': 'HeUniform'}

In [37]:
with open('./results/best_hyper_params.json', 'w') as out_file:
    json.dump(best_hyper_params, out_file)