In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tabulate import tabulate
import warnings
import time
import sys, os
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.model_selection import train_test_split
from skopt import BayesSearchCV
import keras_tuner as kt
from keras.models import Sequential
from keras.layers import Dense, Input, Normalization
from tensorflow.keras.optimizers import get as get_optimizer
from sklearn.preprocessing import StandardScaler


path = 'datasets/Dataset_2.xlsx'
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# Read and load dataset
df= pd.read_excel(path, sheet_name="preproc")





Num GPUs Available:  0


KeyboardInterrupt: 

INPUT DATA

In [None]:
X = (df.iloc[:,14:]).values
print(X.shape)

(2905, 530)


OUTPUT DATA

In [None]:
Y = (df.iloc[:,7]).values
print(Y.shape)

(2905,)


**NRMSEPiqr**

In [14]:

def NRMSEPiqr(observed_values, predicted_values):
    # Calculate RMSEP
    rmsep = np.sqrt(np.mean((observed_values - predicted_values) ** 2))
    # Calculate Q1 (25th percentile) and Q3 (75th percentile)
    Q1 = np.percentile(observed_values, 25)
    Q3 = np.percentile(observed_values, 75)

    # Calculate IQR
    IQR = Q3 - Q1

    return rmsep/IQR

NRMSEPiqrscorer = make_scorer(NRMSEPiqr,greater_is_better=False)


In [None]:
X[0].shape

(530,)

Bayseian Optimization

In [None]:

def model_builder(hp):
  
  max_n_layer = 8
  neurons = []
  for i in range(max_n_layer):
    neurons.append(hp.Int(f"n_neurons_{i}", min_value=32, max_value=512, sampling="log"))
  hp_hidden = hp.Int('n_hidden', min_value=1, max_value=8)
  hp_learning_rate = hp.Float('learning_rate',min_value=1e-5, max_value=0.1, sampling="log")
  hp_optimizers = hp.Choice('optimizer', values = ["SGD","RMSprop","Adam"])
  optimizer = get_optimizer(hp_optimizers)
  optimizer.learning_rate = hp_learning_rate

  model = Sequential()
  model.add(Input(shape=X[0].shape))
  for k in range(hp_hidden):
    model.add(keras.layers.Dense(units=neurons[k], activation="relu"))

  model.add(keras.layers.Dense(1))


  model.compile(optimizer=hp_optimizers,
                loss="mean_absolute_error",
                metrics=["MeanSquaredError"])

  return model

Create Tuner

In [None]:
import tempfile
temp_dir = tempfile.TemporaryDirectory()

tuner = kt.BayesianOptimization(model_builder,
                     objective=kt.Objective("val_loss", direction="min"),
                     seed = 777,
                      max_trials = 100, 
                      overwrite=True,
                      directory=temp_dir.name
)


Start Search

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=777)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

tuner.search(X_train, Y_train, epochs=9999, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(best_hps)


Trial 100 Complete [00h 00m 10s]
val_loss: 0.32751718163490295

Best val_loss So Far: 0.31888633966445923
Total elapsed time: 00h 15m 22s
<keras_tuner.src.engine.hyperparameters.hyperparameters.HyperParameters object at 0x77883fb9a420>


Evaluate best model

In [None]:
best_model = tuner.hypermodel.build(best_hps)
best_model.summary()

In [15]:

trials = 100
random_cv = np.random.seed(777)
#Create seed list
seeds = np.random.choice(trials + 1, size=trials, replace= False)

NRMSE = np.zeros((trials), dtype="float")
MSE = np.zeros((trials), dtype="float")
RMSE = np.zeros((trials), dtype="float")

for i in range(trials):
    optimizer = get_optimizer(best_hps.get('optimizer'))
    optimizer.learning_rate = best_hps.get('learning_rate')
    best_model.compile(optimizer=optimizer, loss="mean_absolute_error",
                metrics=["MeanSquaredError"])

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=seeds[i])

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    best_model.fit(X_train, Y_train, epochs=9999, validation_split=0.2, callbacks=[stop_early])

    # Make predictions using the testing set
    Y_pred_mlp = best_model.predict(X_test)

    #Compute error
    NRMSE[i] = NRMSEPiqr(Y_test, Y_pred_mlp)
    MSE[i] = mean_squared_error(Y_test, Y_pred_mlp)
    RMSE[i] = tf.sqrt(mean_squared_error(Y_test, Y_pred_mlp))

mean_NRMSEP_mlp = np.mean(NRMSE)
std_nrmsep = np.std(NRMSE)
mean_MSE_mlp = np.mean(MSE)
std_mse = np.std(MSE)
mean_RMSE_mlp = np.mean(RMSE)
std_rmse = np.std(RMSE)
print("Mean NRMSEPiqr: " + str(mean_NRMSEP_mlp))
print("NRMSEPiqr Standard Deviation: " + str(std_nrmsep))
print("Mean MSE: " + str(mean_MSE_mlp))
print("MSE Standard Deviation: " + str(std_mse))
print("Mean RMSE: " + str(mean_RMSE_mlp))
print("RMSE Standard Deviation: " + str(std_rmse))

Epoch 1/9999
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - MeanSquaredError: 0.0294 - loss: 0.1181 - val_MeanSquaredError: 0.0275 - val_loss: 0.1129
Epoch 2/9999
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - MeanSquaredError: 0.0228 - loss: 0.1010 - val_MeanSquaredError: 0.0322 - val_loss: 0.1192
Epoch 3/9999
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - MeanSquaredError: 0.0234 - loss: 0.1006 - val_MeanSquaredError: 0.0306 - val_loss: 0.1147
Epoch 4/9999
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - MeanSquaredError: 0.0251 - loss: 0.1025 - val_MeanSquaredError: 0.0314 - val_loss: 0.1222
Epoch 5/9999
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - MeanSquaredError: 0.0277 - loss: 0.1083 - val_MeanSquaredError: 0.0256 - val_loss: 0.1068
Epoch 6/9999
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - MeanSquaredError: 0.02