## this notebook takes the results of the ant-colony-optimizer notebook
- previous notebook will store the results of specifically the best ant which has the lowest cost
- the resulting features/tour it has used will be used in this notebook
- this notebook will train multiple neural network models using the best ants features
- models will undergo constant hyper parameter tuning
- the end extract the hyper parameters that produces the best model

In [53]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

import keras_tuner as kt
import tensorflow as tf
from tensorflow.nn import sigmoid
from tensorflow.keras.regularizers import L2
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model, Input, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy as bce_loss
from tensorflow.keras.metrics import BinaryAccuracy, BinaryCrossentropy as bce_metric

from utilities.data_preprocessor import preprocess

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Define architecture and hyper parameters to use

In [54]:
def model_builder(hp):
    """
    hp - hyperparameter
    """

    hp_activation = hp.Choice('activation', values=['relu', 'tanh'])

    # number of nodes per layer
    hp_layer_1 = hp.Int('layer_1', min_value=1, max_value=1000, step=100)
    hp_layer_2 = hp.Int('layer_2', min_value=1, max_value=1000, step=100)
    hp_layer_3 = hp.Int('layer_3', min_value=1, max_value=1000, step=100)

    # learning rate alpha
    hp_learning_rate = hp.Choice('learning_rate', values=[1.2, 0.03, 0.01, 0.0075, 0.003, 0.001,])

    # regularization value lambda
    hp_lambda = hp.Choice('lambda', values=[10.0, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.25, 0.125, 0.01,])
    # hp_dropout = hp.Choice('dropout', value=[0.8, 0.85, 0.7, 0.6])

    model = Sequential([
        Dense(units=hp_layer_1, activation=hp_activation, kernel_regularizer=L2(hp_lambda)),
        Dense(units=hp_layer_2, activation=hp_activation, kernel_regularizer=L2(hp_lambda)),
        Dense(units=hp_layer_3, activation=hp_activation, kernel_regularizer=L2(hp_lambda)),
        Dense(units=1, activation='linear', kernel_regularizer=L2(hp_lambda))
    ])

    model.compile(
        optimizer=Adam(learning_rate=hp_learning_rate),
        loss=bce_loss(from_logits=True),
        metrics=[bce_metric(), BinaryAccuracy(threshold=0.5)]
    )

    return model

In [55]:
# define tuner
tuner = kt.Hyperband(
    model_builder, 
    objective=[kt.Objective('val_binary_accuracy', 'max'), kt.Objective('val_binary_crossentropy', 'min')], 
    max_epochs=100,
    factor=3,
    directory='tuned_models',
    project_name='model'
)

# if cross validation loss does not improve after 10 
# consecutive epochs we stop training our modelearly
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

## load data
- [20, 16, 19, 14, 13, 11, 5, 3, 9, 28, 24, 15, 17, 21, 10] is the path of the best ant
- train multiple models with these feature indeces

In [56]:
# load data with the selected features to use
df = pd.read_csv('./data.csv')
feat_idxs = df.columns[[20, 16, 19, 14, 13, 11, 5, 3, 9, 28, 24, 15, 17, 21, 10]]
X, Y = preprocess(df)

feat_idxs


Index(['symmetry_se', 'smoothness_se', 'concave points_se', 'perimeter_se',
       'texture_se', 'fractal_dimension_mean', 'area_mean', 'texture_mean',
       'concave points_mean', 'concavity_worst', 'perimeter_worst', 'area_se',
       'compactness_se', 'fractal_dimension_se', 'symmetry_mean'],
      dtype='object')

In [57]:
X

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,1.097064,-2.073335,1.269934,0.984375,1.568466,3.283515,2.652874,2.532475,2.217515,2.255747,...,1.886690,-1.359293,2.303601,2.001237,1.307686,2.616665,2.109526,2.296076,2.750622,1.937015
1,1.829821,-0.353632,1.685955,1.908708,-0.826962,-0.487072,-0.023846,0.548144,0.001392,-0.868652,...,1.805927,-0.369203,1.535126,1.890489,-0.375612,-0.430444,-0.146749,1.087084,-0.243890,0.281190
2,1.579888,0.456187,1.566503,1.558884,0.942210,1.052926,1.363478,2.037231,0.939685,-0.398008,...,1.511870,-0.023974,1.347475,1.456285,0.527407,1.082932,0.854974,1.955000,1.152255,0.201391
3,-0.768909,0.253732,-0.592687,-0.764464,3.283553,3.402909,1.915897,1.451707,2.867383,4.910919,...,-0.281464,0.133984,-0.249939,-0.550021,3.394275,3.893397,1.989588,2.175786,6.046041,4.935010
4,1.750297,-1.151816,1.776573,1.826229,0.280372,0.539340,1.371011,1.428493,-0.009560,-0.562450,...,1.298575,-1.466770,1.338539,1.220724,0.220556,-0.313395,0.613179,0.729259,-0.868353,-0.397100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,2.110995,0.721473,2.060786,2.343856,1.041842,0.219060,1.947285,2.320965,-0.312589,-0.931027,...,1.901185,0.117700,1.752563,2.015301,0.378365,-0.273318,0.664512,1.629151,-1.360158,-0.709091
565,1.704854,2.085134,1.615931,1.723842,0.102458,-0.017833,0.693043,1.263669,-0.217664,-1.058611,...,1.536720,2.047399,1.421940,1.494959,-0.691230,-0.394820,0.236573,0.733827,-0.531855,-0.973978
566,0.702284,2.045574,0.672676,0.577953,-0.840484,-0.038680,0.046588,0.105777,-0.809117,-0.895587,...,0.561361,1.374854,0.579001,0.427906,-0.809587,0.350735,0.326767,0.414069,-1.104549,-0.318409
567,1.838341,2.336457,1.982524,1.735218,1.525767,3.272144,3.296944,2.658866,2.137194,1.043695,...,1.961239,2.237926,2.303601,1.653171,1.430427,3.904848,3.197605,2.289985,1.919083,2.219635


## ensemble train the multiple models

In [58]:
# fit model to data
tuner.search(
    X, Y, 
    epochs=50, 
    validation_split=0.3, 
    callbacks=[stop_early]
)

Trial 254 Complete [00h 00m 05s]
multi_objective: 2.746040999889374

Best multi_objective So Far: -0.9814724363386631
Total elapsed time: 00h 14m 24s
INFO:tensorflow:Oracle triggered exit


## extract the hyper parameters of the best model that trained 

In [64]:
hp_names = [f"layer_{l}" for l in range(1, 4)] + ['activation', 'learning_rate', 'lambda']
hp_values = {}
for hp in hp_names:
    best_param = tuner.get_best_hyperparameters()[0].get(hp)
    print(f'{hp}: {best_param}')

    if hp not in hp_values:
        hp_values[hp] = best_param


best_hps = tuner.get_best_hyperparameters()[0]
best_hps


layer_1: 401
layer_2: 201
layer_3: 901
activation: relu
learning_rate: 0.003
lambda: 0.01


<keras_tuner.engine.hyperparameters.hyperparameters.HyperParameters at 0x2502b56c700>

## save best hyper parameter values to file

In [66]:
import json

hp_values

{'layer_1': 401,
 'layer_2': 201,
 'layer_3': 901,
 'activation': 'relu',
 'learning_rate': 0.003,
 'lambda': 0.01}

In [67]:
with open('./results/best_hyper_params.json', 'w') as results:
    json.dump(hp_values, results)