In [1]:
from tensorflow.keras.regularizers import l2
from keras_tuner import HyperParameters, Hyperband
from datetime import datetime

from utils.util import is_laptop
from utils.model_eval import mean_absolute_percentage_error
from utils.data import get_normalized_data, get_normalized_transformed_data

import pandas as pd, numpy as np, tensorflow as tf

In [2]:
today = datetime.today().strftime("%Y-%m-%d")

In [3]:
train, val, test = get_normalized_data()

X_train, y_train = train
X_val, y_val = val
X_test, y_test = test

In [4]:
tr_train, tr_val, tr_test = get_normalized_transformed_data()

tr_X_train, tr_y_train = tr_train
tr_X_val, tr_y_val = tr_val
tr_X_test, tr_y_test = tr_test

In [4]:
hp = HyperParameters()
hp.Int('n_layers', min_value = 4, max_value = 15)
hp.Int('n_units', min_value = 32, max_value = 512, step = 32)
hp.Int('epochs', min_value = 50, max_value = 1000, step = 50)

hp.Float('penalty', min_value = 1e-4, max_value = 1, sampling = 'log')

hp.Choice('activation', ['relu', 'elu', 'softmax'])
hp.Choice('optimizer', ['adam', 'rmsprop', 'adamax'])


'adam'

In [5]:
def build_model_tuner(hp):
    n_units = hp.get('n_units')
    n_layers = hp.get('n_layers')
    activation = hp.get('activation')
    penalty = hp.get('penalty')
    optimizer = hp.get('optimizer')

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty), input_shape = (X_train.shape[1], )))
    model.add(tf.keras.layers.BatchNormalization())

    for _ in range(n_layers):
        model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
        model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
    model.add(tf.keras.layers.Dropout(0.5))

    model.add(tf.keras.layers.Dense(units = 1, activation = 'linear'))

    model.compile(optimizer = optimizer, loss = mean_absolute_percentage_error)

    return model

In [15]:
def build_model(n_units = 64, activation = 'elu', penalty = 0.00168, n_layers = 11, optimizer = 'rmsprop'):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty), input_shape = (X_train.shape[1], )))
    model.add(tf.keras.layers.BatchNormalization())

    for _ in range(n_layers):
        model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
        model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
    model.add(tf.keras.layers.Dropout(0.5))

    model.add(tf.keras.layers.Dense(units = 1, activation = 'linear'))

    model.compile(optimizer = optimizer, loss = mean_absolute_percentage_error)

    return model

In [16]:
model = build_model()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
model.fit(X_train, y_train, epochs = 750, validation_data = (X_val, y_val))

Epoch 1/750
[1m1254/1254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 9ms/step - loss: 37.3475 - val_loss: 9.0435
Epoch 2/750
[1m1254/1254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 9.0413 - val_loss: 7.1171
Epoch 3/750
[1m1254/1254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 6.7161 - val_loss: 5.9235
Epoch 4/750
[1m1254/1254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 5.9780 - val_loss: 5.6703
Epoch 5/750
[1m1254/1254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 5.7349 - val_loss: 5.7382
Epoch 6/750
[1m1254/1254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 5.6208 - val_loss: 5.5901
Epoch 7/750
[1m1254/1254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - loss: 5.5947 - val_loss: 5.5706
Epoch 8/750
[1m1254/1254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 9ms/step - loss: 5.5195 - val_loss: 5.3812
Epoch 9

<keras.src.callbacks.history.History at 0x1a4277de240>

In [19]:
import shap
explainer = shap.Explainer(model)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test, plot_type = "bar")

ValueError: masker cannot be None.

In [6]:
from tensorflow.keras.callbacks import TensorBoard
# Define the TensorBoard callback
log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
tuner = Hyperband(build_model_tuner, hyperparameters = hp, objective = 'val_loss', max_epochs = 100, project_name = "Try-2024-4-30")
tuner.search(X_train, y_train, validation_data = (X_val, y_val), callbacks = [tensorboard_callback])

Trial 254 Complete [01h 10m 34s]
val_loss: 5.550853729248047

Best val_loss So Far: 4.785363674163818
Total elapsed time: 1d 05h 27m 19s


In [36]:
import io, sys
stdout_buffer = io.StringIO()
sys.stdout = stdout_buffer
tuner.results_summary(num_trials = -1)
sys.stdout = sys.__stdout__
trials = stdout_buffer.getvalue()

In [53]:
def safe_cast(value):
    if value.isdigit():
        return int(value)
    try:
        return float(value)
    except:
        return value

In [58]:
trials_list = [item.split('\n') for item in trials.split('\n\n')]
columns  = ['trial', 'n_layers', 'n_units', 'epochs', 'penalty', 'activation', 'optimizer', 'score']
data = []
for trial in trials_list[1:]:
    data.append([safe_cast(trial[i].split()[1]) for i in [0, 2, 3, 4, 5, 6, 7, 12]])
df = pd.DataFrame(data, columns = columns)
df = df.set_index('trial')
df = df.sort_index()
df

Unnamed: 0_level_0,n_layers,n_units,epochs,penalty,activation,optimizer,score
trial,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,12,128,650,0.698985,softmax,adamax,6.229276
1,10,288,350,0.014159,relu,adamax,51.318115
2,12,32,700,0.090532,elu,adamax,38.406048
3,15,160,750,0.000682,elu,adam,7.252502
4,6,448,700,0.015770,softmax,rmsprop,6.214997
...,...,...,...,...,...,...,...
249,9,256,250,0.005325,relu,adam,5.129033
250,4,160,150,0.437092,softmax,adamax,6.183726
251,9,160,950,0.000322,elu,adam,4.908710
252,9,256,400,0.008343,softmax,rmsprop,6.183403


In [50]:
trials_list

[['Results summary',
  'Results in .\\Try-2024-4-30',
  'Showing -1 best trials',
  'Objective(name="val_loss", direction="min")'],
 ['Trial 0208 summary',
  'Hyperparameters:',
  'n_layers: 11',
  'n_units: 64',
  'epochs: 750',
  'penalty: 0.00016805497508728002',
  'activation: elu',
  'optimizer: rmsprop',
  'tuner/epochs: 100',
  'tuner/initial_epoch: 34',
  'tuner/bracket: 3',
  'tuner/round: 3',
  'tuner/trial_id: 0203',
  'Score: 4.785363674163818'],
 ['Trial 0209 summary',
  'Hyperparameters:',
  'n_layers: 5',
  'n_units: 32',
  'epochs: 650',
  'penalty: 0.0011634242577526809',
  'activation: relu',
  'optimizer: adamax',
  'tuner/epochs: 100',
  'tuner/initial_epoch: 34',
  'tuner/bracket: 3',
  'tuner/round: 3',
  'tuner/trial_id: 0205',
  'Score: 4.799617290496826'],
 ['Trial 0234 summary',
  'Hyperparameters:',
  'n_layers: 15',
  'n_units: 32',
  'epochs: 550',
  'penalty: 0.00012403080548890148',
  'activation: relu',
  'optimizer: adam',
  'tuner/epochs: 100',
  'tune

In [35]:
#from tensorboard.backend.event_processing import event_accumulator
#best_trial = tuner.oracle.get_best_trials()[0].trial_id
#trials = tuner.results_summary(num_trials=-1, )
trials = tuner.get_best_trials(num_trials = -1)
trials

AttributeError: 'Hyperband' object has no attribute 'get_best_trials'

In [19]:
def extract_history(best_trial):

  acc = []
  val_acc = []
  loss = []
  val_loss = []

  for set_data in ['train', 'validation']:
    if set_data == 'train':
      ea = event_accumulator.EventAccumulator('./logs/Try-2024-4-30/trial_' + best_trial + '/execution0/' + set_data)
      ea.Reload()
      for i in range(len(ea.Scalars('epoch_loss'))):
        acc.append(ea.Scalars('epoch_acc')[i][2])
        loss.append(ea.Scalars('epoch_loss')[i][2])
        #lr.append(ea.Scalars('epoch_lr')[i][2])

  if set_data == 'validation':
      ea = event_accumulator.EventAccumulator('logs/scalars/trial_' + best_trial + '/execution0/' + set_data)
      ea.Reload()
      for i in range(len(ea.Scalars('epoch_loss'))):
        val_acc.append(ea.Scalars('epoch_acc')[i][2])
        val_loss.append(ea.Scalars('epoch_loss')[i][2])

  return acc, val_acc, loss, val_loss

In [20]:
acc, val_acc, loss, val_loss = extract_history(best_trial)

print(acc, val_acc, loss, val_loss)

DirectoryDeletedError: Directory ./logs/Try-2024-4-30/trial_0208/execution0/train has been permanently deleted

In [9]:
from datetime import date
best_model = tuner.get_best_models()[0]
best_model.save(f'./saved_models/nn-{date.today()}.keras')

  model.build_from_config(
  trackable.load_own_variables(weights_store.get(inner_path))


In [None]:
model = build_model()
model.fit(X_train, y_train, batch_size = 256, epochs = 100, validation_data = (X_val, y_val))

In [None]:
y_pred = np.squeeze(model.predict(X_test))
y_true = y_test.values
mape = tf.metrics.mean_absolute_percentage_error(y_true, y_pred)