## this notebook takes the results of the ant-colony-optimizer notebook
- previous notebook will store the results of specifically the best ant which has the lowest cost
- the resulting features/tour it has used will be used in this notebook
- this notebook will train multiple neural network models using the best ants features
- models will undergo constant hyper parameter tuning
- the end extract the hyper parameters that produces the best model

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

import keras_tuner as kt
import tensorflow as tf
from tensorflow.nn import sigmoid
from tensorflow.keras.regularizers import L2
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model, Input, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy as bce_loss
from tensorflow.keras.metrics import BinaryAccuracy, BinaryCrossentropy as bce_metric

from utilities.data_preprocessor import preprocess

## Define architecture and hyper parameters to use

In [None]:
def model_builder(hp):
    """
    hp - hyperparameter
    """

    hp_activation = hp.Choice('activation', values=['relu', 'tanh'])

    # number of nodes per layer
    hp_layer_1 = hp.Int('layer_1', min_value=1, max_value=1000, step=100)
    hp_layer_2 = hp.Int('layer_2', min_value=1, max_value=1000, step=100)
    hp_layer_3 = hp.Int('layer_3', min_value=1, max_value=1000, step=100)

    # learning rate alpha
    hp_learning_rate = hp.Choice('learning_rate', values=[1.2, 0.03, 0.01, 0.0075, 0.003, 0.001,])

    # regularization value lambda
    hp_lambda = hp.Choice('lambda', values=[10.0, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.25, 0.125, 0.01,])
    # hp_dropout = hp.Choice('dropout', value=[0.8, 0.85, 0.7, 0.6])

    model = Sequential([
        Dense(units=hp_layer_1, activation=hp_activation, kernel_regularizer=L2(hp_lambda)),
        Dense(units=hp_layer_2, activation=hp_activation, kernel_regularizer=L2(hp_lambda)),
        Dense(units=hp_layer_3, activation=hp_activation, kernel_regularizer=L2(hp_lambda)),
        Dense(units=1, activation='linear', kernel_regularizer=L2(hp_lambda))
    ])

    model.compile(
        optimizer=Adam(learning_rate=hp_learning_rate),
        loss=bce_loss(from_logits=True),
        metrics=[bce_metric(), BinaryAccuracy(threshold=0.5)]
    )

    return model

In [None]:
# define tuner
tuner = kt.Hyperband(
    model_builder, 
    objective=kt.Objective('val_binary_accuracy', 'max'), 
    max_epochs=100,
    factor=3,
    directory='tuned_models',
    project_name='model'
)

# if cross validation loss does not improve after 10 
# consecutive epochs we stop training our modelearly
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

## load data
- [20, 16, 19, 14, 13, 11, 5, 3, 9, 28, 24, 15, 17, 21, 10] is the path of the best ant
- train multiple models with these feature indeces

In [None]:
# load data
X, Y = preprocess(pd.read_csv('./data.csv'))

# fit model to data
tuner.search(
    X, Y, 
    epochs=50, 
    validation_split=0.3, 
    callbacks=[stop_early]
)

In [None]:
# extract the hyper parameters of 
# the best model that trained 
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# use the extracted best hyper params to build final model
model = tuner.hypermodel.build(best_hps)
history = model.fit(
    X, Y,
    epochs=50,
    validation_split=0.3,
    callbacks=[stop_early]
)