In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Models: ANN, ELM, SVR, KRR, RF, XGB
from sklearn.neural_network import MLPRegressor
from skelm import ELMRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

import optuna
import shap

import warnings
warnings.filterwarnings('ignore')

Import the data

In [None]:
rawdata = pd.read_excel("Data No Storage.xlsx")

In [None]:
rawdata.info()

In [None]:
rawdata.describe()

In [None]:
import random

def fixed_randomization(data):
    np.random.seed(42) 
    rand_data = data.sample(frac=1).reset_index(drop=True)
    return rand_data

rand_data = fixed_randomization(rawdata)

In [None]:
rand_data.head(10)

In [None]:
X_data_unscale = rand_data[rand_data.columns[0:6]]
Y_data_unscale = rand_data[rand_data.columns[6:11]]

Y_data = Y_data_unscale

In [None]:
SS = StandardScaler()
MMS = MinMaxScaler()
X_data = SS.fit_transform(X_data_unscale)
X_data = pd.DataFrame(X_data)

In [None]:
X_data.head(5)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.3, random_state=42)

In [None]:
y1_data = Y_data[Y_data.columns[0:1]]
y2_data = Y_data[Y_data.columns[1:2]]
y3_data = Y_data[Y_data.columns[2:3]]
y4_data = Y_data[Y_data.columns[3:4]]
y5_data = Y_data[Y_data.columns[4:5]]


y1_train = Y_train[Y_train.columns[0:1]]
y2_train = Y_train[Y_train.columns[1:2]]
y3_train = Y_train[Y_train.columns[2:3]]
y4_train = Y_train[Y_train.columns[3:4]]
y5_train = Y_train[Y_train.columns[4:5]]


y1_test = Y_test[Y_test.columns[0:1]]
y2_test = Y_test[Y_test.columns[1:2]]
y3_test = Y_test[Y_test.columns[2:3]]
y4_test = Y_test[Y_test.columns[3:4]]
y5_test = Y_test[Y_test.columns[4:5]]

In [None]:
Y_test.describe()

Metrics Function

In [None]:
import pandas as pd
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np

def calculate_metrics(y_true, y_pred):
    # Extract values from DataFrames
    y_true_values = y_true.values.flatten()
    y_pred_values = y_pred.values.flatten()
    
    # R2 score
    r2 = r2_score(y_true_values, y_pred_values)
    r2 = round(r2, 4)
    
    # Mean Squared Error (MSE)
    mse = mean_squared_error(y_true_values, y_pred_values)
    mse = round(mse, 4)
    
    # Root Mean Squared Error (RMSE)
    rmse = np.sqrt(mse)
    rmse = round(rmse, 4)

    # Average Absolute Deviation (AAD)
    aad = np.mean(np.abs(y_true_values - y_pred_values))
    aad = round(aad, 4)
    
    # Squared Error Percentage (SEP)
    sep = np.mean(((y_true_values - y_pred_values) / y_true_values)**2) * 100
    sep = round(sep, 4)
    
    # Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_true_values, y_pred_values)
    mae = round(mae, 4)
    
    '''print('r2: ', r2)
    print('mse: ', mse)
    print('rmse: ', rmse)
    print('aad: ', aad)
    print('sep: ', sep)
    print('mae: ', mae)'''   

    print(r2)
    print(mse)
    print(rmse)
    print(aad)
    print(sep)
    print(mae) 

# Support Vector Regression

In [None]:
svr = SVR()

Y1

In [None]:
y_data = y1_data
y_train = y1_train
y_test = y1_test

def objective(trial):
    param = {
        'C': trial.suggest_float('C', 0.000001, 1000000),
        'epsilon': trial.suggest_float('epsilon', 1e-6, 1e+1),
        'kernel': trial.suggest_categorical('kernel', ['rbf']),
        'degree': trial.suggest_int('degree', 1, 5),
        'gamma': trial.suggest_float('gamma', 0.00001, 0.1)
    }

    model = SVR(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)


In [None]:
# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

# Print the best trial
print('Best trial', study.best_trial)

In [None]:
svr_param_y1 = study.best_params

svr_model_y1 = SVR(**svr_param_y1)
svr_model_y1.fit(X_train, y_train)

y_train_pred = svr_model_y1.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = svr_model_y1.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = svr_model_y1.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y2

In [None]:
y_data = y2_data
y_train = y2_train
y_test = y2_test

def objective(trial):
    param = {
        'C': trial.suggest_float('C', 0.000001, 1000000),
        'epsilon': trial.suggest_float('epsilon', 1e-6, 1e+1),
        'kernel': trial.suggest_categorical('kernel', ['rbf']),
        'degree': trial.suggest_int('degree', 1, 5),
        'gamma': trial.suggest_float('gamma', 0.00001, 0.1)
    }

    model = SVR(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

In [None]:
svr_param_y2 = study.best_params

svr_model_y2 = SVR(**svr_param_y2)
svr_model_y2.fit(X_train, y_train)

y_train_pred = svr_model_y2.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = svr_model_y2.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = svr_model_y2.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y3

In [None]:
y_data = y3_data
y_train = y3_train
y_test = y3_test

def objective(trial):
    param = {
        'C': trial.suggest_float('C', 0.000001, 1000000),
        'epsilon': trial.suggest_float('epsilon', 1e-6, 1e+1),
        'kernel': trial.suggest_categorical('kernel', ['rbf', 'linear']),
        'degree': trial.suggest_int('degree', 1, 5),
        'gamma': trial.suggest_float('gamma', 0.00001, 0.1)
    }

    model = SVR(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

In [None]:
svr_param_y3 = study.best_params

svr_model_y3 = SVR(**svr_param_y3)
svr_model_y3.fit(X_train, y_train)

y_train_pred = svr_model_y3.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = svr_model_y3.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = svr_model_y3.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y4

In [None]:
y_data = y4_data
y_train = y4_train
y_test = y4_test

def objective(trial):
    param = {
        'C': trial.suggest_float('C', 0.000001, 1000000),
        'epsilon': trial.suggest_float('epsilon', 1e-6, 1e+1),
        'kernel': trial.suggest_categorical('kernel', ['rbf']),
        'degree': trial.suggest_int('degree', 1, 5),
        'gamma': trial.suggest_float('gamma', 0.00001, 0.1)
    }

    model = SVR(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

In [None]:
svr_param_y4 = study.best_params

svr_model_y4 = SVR(**svr_param_y4)
svr_model_y4.fit(X_train, y_train)

y_train_pred = svr_model_y4.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = svr_model_y4.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = svr_model_y4.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

# XGBoost

Y1

In [None]:
y_data = y1_data
y_train = y1_train
y_test = y1_test

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)
    }
    
    model = XGBRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
xgb_params_y1 = study.best_params

xgb_model_y1 = XGBRegressor(**xgb_params_y1)
xgb_model_y1.fit(X_train, y_train)

y_train_pred = xgb_model_y1.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = xgb_model_y1.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = xgb_model_y1.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y2

In [None]:
y_data = y2_data
y_train = y2_train
y_test = y2_test

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)
    }
    
    model = XGBRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
xgb_params_y2 = study.best_params

xgb_model_y2 = XGBRegressor(**xgb_params_y2)
xgb_model_y2.fit(X_train, y_train)

y_train_pred = xgb_model_y2.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = xgb_model_y2.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = xgb_model_y2.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y3

In [None]:
y_data = y3_data
y_train = y3_train
y_test = y3_test

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)
    }
    
    model = XGBRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
xgb_params_y3 = study.best_params

xgb_model_y3 = XGBRegressor(**xgb_params_y3)
xgb_model_y3.fit(X_train, y_train)

y_train_pred = xgb_model_y3.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = xgb_model_y3.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = xgb_model_y3.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y4

In [None]:
y_data = y4_data
y_train = y4_train
y_test = y4_test

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)
    }
    
    model = XGBRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
xgb_params_y4 = study.best_params

xgb_model_y4 = XGBRegressor(**xgb_params_y4)
xgb_model_y4.fit(X_train, y_train)

y_train_pred = xgb_model_y4.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = xgb_model_y4.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = xgb_model_y4.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

# MLP

Y1

In [None]:
y_data = y1_data
y_train = y1_train
y_test = y1_test

# Define the number of layers in the model
n_layers = 4

def objective(trial):
    # Initialize an empty list to store layer sizes
    layer_sizes = []
    
    # Add layer sizes to the list based on the number of layers
    for i in range(n_layers):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i}', 1, 50))
    
    param = {
        'hidden_layer_sizes': tuple(layer_sizes),
        'activation': trial.suggest_categorical('activation', ['relu']),
        'solver': trial.suggest_categorical('solver', ['adam', 'lbfgs']),
        'random_state': trial.suggest_int('random_state', 1, 100),
        'alpha': trial.suggest_float('alpha', 0.00001, 1),
    }
    model = MLPRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)


# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=20)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
y_data = y1_data
y_train = y1_train
y_test = y1_test

# Define the number of layers in the model
n_layers = 4

def objective(trial):
    # Initialize an empty list to store layer sizes
    layer_sizes = []
    
    # Add layer sizes to the list based on the number of layers
    for i in range(n_layers):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i}', 1, 50))
    
    param = {
        'hidden_layer_sizes': tuple(layer_sizes),
        'activation': trial.suggest_categorical('activation', ['relu']),
        'solver': trial.suggest_categorical('solver', ['adam', 'lbfgs']),
        'random_state': trial.suggest_int('random_state', 1, 100),
        'alpha': trial.suggest_float('alpha', 0.00001, 1),
    }
    model = MLPRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)


# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
ann_model_y1 = MLPRegressor(hidden_layer_sizes=(8, 30, 50, 32), activation='relu', alpha=0.13762346635059558, solver='lbfgs', random_state=3)

# Fit the model on the training data
ann_model_y1.fit(X_train, y_train)

y_train_pred = ann_model_y1.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = ann_model_y1.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = ann_model_y1.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y2

In [None]:
y_data = y2_data
y_train = y2_train
y_test = y2_test

# Define the number of layers in the model
n_layers = 4

def objective(trial):
    # Initialize an empty list to store layer sizes
    layer_sizes = []
    
    # Add layer sizes to the list based on the number of layers
    for i in range(n_layers):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i}', 1, 50))
    
    param = {
        'hidden_layer_sizes': tuple(layer_sizes),
        'activation': trial.suggest_categorical('activation', ['relu']),
        'solver': trial.suggest_categorical('solver', ['adam', 'lbfgs']),
        'random_state': trial.suggest_int('random_state', 1, 100),
        'alpha': trial.suggest_float('alpha', 0.00001, 1),
    }
    model = MLPRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)


# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
ann_model_y2 = MLPRegressor(hidden_layer_sizes=(40, 36, 17, 3), activation='relu', alpha=0.421732762333308, solver='lbfgs', random_state=34)

# Fit the model on the training data
ann_model_y2.fit(X_train, y_train)

y_train_pred = ann_model_y2.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = ann_model_y2.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = ann_model_y2.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y3

In [None]:
y_data = y3_data
y_train = y3_train
y_test = y3_test

# Define the number of layers in the model
n_layers = 4

def objective(trial):
    # Initialize an empty list to store layer sizes
    layer_sizes = []
    
    # Add layer sizes to the list based on the number of layers
    for i in range(n_layers):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i}', 1, 50))
    
    param = {
        'hidden_layer_sizes': tuple(layer_sizes),
        'activation': trial.suggest_categorical('activation', ['relu']),
        'solver': trial.suggest_categorical('solver', ['adam', 'lbfgs']),
        'random_state': trial.suggest_int('random_state', 1, 100),
        'alpha': trial.suggest_float('alpha', 0.00001, 1),
    }
    model = MLPRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)


# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
ann_model_y3 = MLPRegressor(hidden_layer_sizes=(38, 20, 2, 36), activation='relu', alpha=0.1368361449397597, solver='lbfgs', random_state=100)

# Fit the model on the training data
ann_model_y3.fit(X_train, y_train)

y_train_pred = ann_model_y3.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = ann_model_y3.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = ann_model_y3.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y4

In [None]:
y_data = y4_data
y_train = y4_train
y_test = y4_test

# Define the number of layers in the model
n_layers = 4

def objective(trial):
    # Initialize an empty list to store layer sizes
    layer_sizes = []
    
    # Add layer sizes to the list based on the number of layers
    for i in range(n_layers):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i}', 1, 50))
    
    param = {
        'hidden_layer_sizes': tuple(layer_sizes),
        'activation': trial.suggest_categorical('activation', ['relu']),
        'solver': trial.suggest_categorical('solver', ['adam', 'lbfgs']),
        'random_state': trial.suggest_int('random_state', 1, 100),
        'alpha': trial.suggest_float('alpha', 0.00001, 1),
    }
    model = MLPRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)


# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
ann_model_y4 = MLPRegressor(hidden_layer_sizes=(33, 10, 8, 40), activation='relu', alpha=0.20068666427656182, solver='lbfgs', random_state=52)

# Fit the model on the training data
ann_model_y4.fit(X_train, y_train)

y_train_pred = ann_model_y4.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = ann_model_y4.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = ann_model_y4.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

# Shap

In [None]:

y_data = y1_data
y_train = y1_train
y_test = y1_test

# Model for y1
y1_params = {'C': 792455.0628709563, 'epsilon': 1.615158320784387, 'kernel': 'rbf', 'degree': 4, 'gamma': 0.018028712570561923}
model_y1 = SVR(**y1_params)
model_y1.fit(X_train, y_train)
y1_pred = model_y1.predict(X_test)

y_data = y2_data
y_train = y2_train
y_test = y2_test
# Model for y2
y2_params = {'C': 35529.565119700674, 'epsilon': 0.01398121990100254, 'kernel': 'rbf', 'degree': 4, 'gamma': 0.00759513082892152}
model_y2 = SVR(**y2_params)
model_y2.fit(X_train, y_train)
y2_pred = model_y2.predict(X_test)

y_data = y3_data
y_train = y3_train
y_test = y3_test
# Model for y3
y3_params = {'C': 741046.8928371427, 'epsilon': 0.010895466956371502, 'kernel': 'rbf', 'degree': 5, 'gamma': 0.009751068786055191}
model_y3 = SVR(**y3_params)
model_y3.fit(X_train, y_train)
y3_pred = model_y3.predict(X_test)

y_data = y4_data
y_train = y4_train
y_test = y4_test
# Model for y4
model_y4 = MLPRegressor(hidden_layer_sizes=(33, 10, 8, 40), activation='relu', alpha=0.20068666427656182, solver='lbfgs', random_state=52)
model_y4.fit(X_train, y_train)
y4_pred = model_y4.predict(X_test)

In [None]:

# Copy column names from df_source to df_target
X_data.columns = X_data_unscale.columns


In [None]:
import pandas as pd

# Your list
mm = [1.11956119, -0.0427942,   0.02750056, -0.72244753,  0.13830575, -1.15761651]

# Convert to DataFrame
df = pd.DataFrame(mm)
df = df.T
df.columns = X_data_unscale.columns

# Display the DataFrame
print(df)


In [None]:

y_data = y1_data
y_train = y1_train
y_test = y1_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


y1_params = {'C': 792455.0628709563, 'epsilon': 1.615158320784387, 'kernel': 'rbf', 'degree': 4, 'gamma': 0.018028712570561923}
model_y1 = SVR(**y1_params)
model_y1.fit(X_train, y_train)


# Compute SHAP values
explainer = shap.KernelExplainer(model_y1.predict, X_train)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
shap.plots.waterfall(shap_values[0], max_display=20)

In [None]:

y_data = y1_data
y_train = y1_train
y_test = y1_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


mod_1 = {'max_depth': 3, 'learning_rate': 0.6559938747200145, 'n_estimators': 742, 'min_child_weight': 5, 'gamma': 0.2364570538403641, 'subsample': 0.9166005202188677, 'colsample_bytree': 0.5467984466797525, 'reg_alpha': 0.34210178318639684, 'reg_lambda': 0.6058891382532333, 'random_state': 590}

model_y1 = XGBRegressor(**mod_1)
model_y1.fit(X_train, y_train)


# Compute SHAP values
explainer = shap.Explainer(model_y1, X_train)
shap_values = explainer(X_data)

# Plot the SHAP values using a waterfall plot
shap.plots.beeswarm(shap_values)

In [None]:

y_data = y2_data
y_train = y2_train
y_test = y2_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


y2_params = {'C': 35529.565119700674, 'epsilon': 0.01398121990100254, 'kernel': 'rbf', 'degree': 4, 'gamma': 0.00759513082892152}
model_y2 = SVR(**y2_params)
model_y2.fit(X_train, y_train)



# Compute SHAP values
explainer = shap.KernelExplainer(model_y2.predict, X_data)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
shap.plots.waterfall(shap_values[0], max_display=20)

In [None]:

y_data = y2_data
y_train = y2_train
y_test = y2_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]

mod_2 = {'max_depth': 8, 'learning_rate': 0.8058929837921041, 'n_estimators': 707, 'min_child_weight': 6, 'gamma': 0.6203690244688376, 'subsample': 0.8633675242642986, 'colsample_bytree': 0.351803542599373, 'reg_alpha': 0.38202482253134534, 'reg_lambda': 0.19326434501771286, 'random_state': 507}

model_y2 = XGBRegressor(**mod_2)
model_y2.fit(X_train, y_train)

# Compute SHAP values
explainer = shap.Explainer(model_y2, X_data)
shap_values = explainer(X_data)

# Plot the SHAP values using a waterfall plot
shap.plots.beeswarm(shap_values)

In [None]:

y_data = y3_data
y_train = y3_train
y_test = y3_test

# Import necessary libraries
import shap
from xgboost import XGBRegressor
import pandas as pd

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]

# Define the parameters for the XGBRegressor
y3_params = {'C': 741046.8928371427, 'epsilon': 0.010895466956371502, 'kernel': 'rbf', 'degree': 5, 'gamma': 0.009751068786055191}
model_y3 = SVR(**y3_params)
model_y3.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.KernelExplainer(model_y3.predict, X_data)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
shap.plots.waterfall(shap_values[0], max_display=20)


In [None]:

y_data = y3_data
y_train = y3_train
y_test = y3_test

# Import necessary libraries
import shap
from xgboost import XGBRegressor
import pandas as pd

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]

# Define the parameters for the XGBRegressor
y3_params = {'max_depth': 3, 'learning_rate': 0.5560764434875813, 'n_estimators': 581, 'min_child_weight': 1, 'gamma': 0.17663683708839756, 'subsample': 0.22400187438994715, 'colsample_bytree': 0.45123077147598545, 'reg_alpha': 0.8358287950607348, 'reg_lambda': 0.3030245049556971, 'random_state': 475}

# Fit the model
model_y3 = XGBRegressor(**y3_params)
model_y3.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.Explainer(model_y3, X_data)
shap_values = explainer(X_data)

# Plot the SHAP values using a waterfall plot
shap.plots.beeswarm(shap_values)


In [None]:

y_data = y4_data
y_train = y4_train
y_test = y4_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


model_y4 = MLPRegressor(hidden_layer_sizes=(33, 10, 8, 40), activation='relu', alpha=0.20068666427656182, solver='lbfgs', random_state=52)
model_y4.fit(X_train, y_train)


# Compute SHAP values
explainer = shap.KernelExplainer(model_y4.predict, X_train)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
shap.plots.waterfall(shap_values[0], max_display=20)

In [None]:

y_data = y4_data
y_train = y4_train
y_test = y4_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


mod_4 = {'max_depth': 10, 'learning_rate': 0.43666973070690324, 'n_estimators': 828, 'min_child_weight': 8, 'gamma': 0.28078926083310424, 'subsample': 0.6681118230081214, 'colsample_bytree': 0.4876447096098242, 'reg_alpha': 0.29034765465801493, 'reg_lambda': 0.3669174369705459, 'random_state': 949}

model_y4 = XGBRegressor(**mod_4)
model_y4.fit(X_train, y_train)


# Compute SHAP values
explainer = shap.Explainer(model_y4, X_data)
shap_values = explainer(X_data)

# Plot the SHAP values using a waterfall plot
shap.plots.beeswarm(shap_values)