In [None]:
from tensorflow.keras.regularizers import l2
from keras_tuner import HyperParameters, Hyperband
from datetime import datetime

from utils.util import is_laptop
from utils.model_eval import mean_absolute_percentage_error
from utils.data import get_normalized_data, get_normalized_transformed_data

import pandas as pd, numpy as np, tensorflow as tf

In [None]:
today = datetime.today().strftime("%Y-%m-%d")

In [None]:
train, val, test = get_normalized_data()

X_train, y_train = train
X_val, y_val = val
X_test, y_test = test

In [None]:
hp = HyperParameters()
hp.Int('n_layers', min_value = 4, max_value = 15)
hp.Int('n_units', min_value = 32, max_value = 512, step = 32)
hp.Int('epochs', min_value = 50, max_value = 1000, step = 50)

hp.Float('penalty', min_value = 1e-4, max_value = 1, sampling = 'log')

hp.Choice('activation', ['relu', 'elu', 'softmax'])
hp.Choice('optimizer', ['adam', 'rmsprop', 'adamax'])


In [None]:
def build_model_tuner(hp):
    n_units = hp.get('n_units')
    n_layers = hp.get('n_layers')
    activation = hp.get('activation')
    penalty = hp.get('penalty')
    optimizer = hp.get('optimizer')

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty), input_shape = (X_train.shape[1], )))
    model.add(tf.keras.layers.BatchNormalization())

    for _ in range(n_layers):
        model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
        model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
    model.add(tf.keras.layers.Dropout(0.5))

    model.add(tf.keras.layers.Dense(units = 1, activation = 'linear'))

    model.compile(optimizer = optimizer, loss = mean_absolute_percentage_error)

    return model

In [None]:
def build_model(n_units = 64, activation = 'elu', penalty =  0.00168, n_layers = 11, optimizer = 'rmsprop'):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape = (X_train.shape[1], )))
    model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
    model.add(tf.keras.layers.BatchNormalization())

    for _ in range(n_layers):
        model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
        model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
    model.add(tf.keras.layers.Dropout(0.5))

    model.add(tf.keras.layers.Dense(units = 1, activation = 'linear'))

    model.compile(optimizer = optimizer, loss = mean_absolute_percentage_error)

    return model

In [None]:
model = build_model()

In [None]:
model.fit(X_train, y_train, epochs = 750, validation_data = (X_val, y_val))

In [None]:
import shap
explainer = shap.Explainer(model)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test, plot_type = "bar")

In [None]:
from tensorflow.keras.callbacks import TensorBoard
# Define the TensorBoard callback
log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
tuner = Hyperband(build_model_tuner, hyperparameters = hp, objective = 'val_loss', max_epochs = 100, project_name = "Try-2024-4-30")
tuner.search(X_train, y_train, validation_data = (X_val, y_val), callbacks = [tensorboard_callback])

In [None]:
import io, sys
stdout_buffer = io.StringIO()
sys.stdout = stdout_buffer
tuner.results_summary(num_trials = -1)
sys.stdout = sys.__stdout__
trials = stdout_buffer.getvalue()

In [None]:
def safe_cast(value):
    if value.isdigit():
        return int(value)
    try:
        return float(value)
    except:
        return value

In [None]:
trials_list = [item.split('\n') for item in trials.split('\n\n')]
columns  = ['trial', 'n_layers', 'n_units', 'epochs', 'penalty', 'activation', 'optimizer', 'score']
data = []
for trial in trials_list[1:]:
    data.append([safe_cast(trial[i].split()[1]) for i in [0, 2, 3, 4, 5, 6, 7, 12]])
df = pd.DataFrame(data, columns = columns)
df = df.set_index('trial')
df = df.sort_index()
df

In [None]:
trials_list

In [None]:
#from tensorboard.backend.event_processing import event_accumulator
#best_trial = tuner.oracle.get_best_trials()[0].trial_id
#trials = tuner.results_summary(num_trials=-1, )
trials = tuner.get_best_trials(num_trials = -1)
trials

In [None]:
def extract_history(best_trial):

  acc = []
  val_acc = []
  loss = []
  val_loss = []

  for set_data in ['train', 'validation']:
    if set_data == 'train':
      ea = event_accumulator.EventAccumulator('./logs/Try-2024-4-30/trial_' + best_trial + '/execution0/' + set_data)
      ea.Reload()
      for i in range(len(ea.Scalars('epoch_loss'))):
        acc.append(ea.Scalars('epoch_acc')[i][2])
        loss.append(ea.Scalars('epoch_loss')[i][2])
        #lr.append(ea.Scalars('epoch_lr')[i][2])

  if set_data == 'validation':
      ea = event_accumulator.EventAccumulator('logs/scalars/trial_' + best_trial + '/execution0/' + set_data)
      ea.Reload()
      for i in range(len(ea.Scalars('epoch_loss'))):
        val_acc.append(ea.Scalars('epoch_acc')[i][2])
        val_loss.append(ea.Scalars('epoch_loss')[i][2])

  return acc, val_acc, loss, val_loss

In [None]:
acc, val_acc, loss, val_loss = extract_history(best_trial)

print(acc, val_acc, loss, val_loss)

In [None]:
from datetime import date
best_model = tuner.get_best_models()[0]
best_model.save(f'./saved_models/nn-{date.today()}.keras')

In [None]:
model = build_model()
model.fit(X_train, y_train, batch_size = 256, epochs = 100, validation_data = (X_val, y_val))

In [None]:
y_pred = np.squeeze(model.predict(X_test))
y_true = y_test.values
mape = tf.metrics.mean_absolute_percentage_error(y_true, y_pred)