In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Models: ANN, ELM, SVR, KRR, RF, XGB
from sklearn.neural_network import MLPRegressor
#from skelm import ELMRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

#import optuna
import shap

import warnings
warnings.filterwarnings('ignore')

Import the data

In [None]:
rawdata = pd.read_excel("data.xlsx")

In [None]:
rawdata.info()

In [None]:
rawdata.describe()

In [None]:
import random

def fixed_randomization(data):
    np.random.seed(42) 
    rand_data = data.sample(frac=1).reset_index(drop=True)
    return rand_data

rand_data = fixed_randomization(rawdata)

In [None]:
rand_data.head(10)

In [None]:
X_data_unscale = rand_data[rand_data.columns[0:15]]
Y_data_unscale = rand_data[rand_data.columns[15:20]]

Y_data = Y_data_unscale

In [None]:
SS = StandardScaler()
MMS = MinMaxScaler()
X_data = SS.fit_transform(X_data_unscale)
X_data = pd.DataFrame(X_data)

In [None]:
X_data.columns = X_data_unscale.columns
X_data.head(5)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.3, random_state=42)

In [None]:
y1_data = Y_data[Y_data.columns[0:1]]
y2_data = Y_data[Y_data.columns[1:2]]
y3_data = Y_data[Y_data.columns[2:3]]
y4_data = Y_data[Y_data.columns[3:4]]
y5_data = Y_data[Y_data.columns[4:5]]


y1_train = Y_train[Y_train.columns[0:1]]
y2_train = Y_train[Y_train.columns[1:2]]
y3_train = Y_train[Y_train.columns[2:3]]
y4_train = Y_train[Y_train.columns[3:4]]
y5_train = Y_train[Y_train.columns[4:5]]


y1_test = Y_test[Y_test.columns[0:1]]
y2_test = Y_test[Y_test.columns[1:2]]
y3_test = Y_test[Y_test.columns[2:3]]
y4_test = Y_test[Y_test.columns[3:4]]
y5_test = Y_test[Y_test.columns[4:5]]

In [None]:
Y_test.describe()

Metrics Function

In [None]:
import pandas as pd
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np

def calculate_metrics(y_true, y_pred):
    # Extract values from DataFrames
    y_true_values = y_true.values.flatten()
    y_pred_values = y_pred.values.flatten()
    
    # R2 score
    r2 = r2_score(y_true_values, y_pred_values)
    r2 = round(r2, 4)
    
    # Mean Squared Error (MSE)
    mse = mean_squared_error(y_true_values, y_pred_values)
    mse = round(mse, 4)
    
    # Root Mean Squared Error (RMSE)
    rmse = np.sqrt(mse)
    rmse = round(rmse, 4)

    # Average Absolute Deviation (AAD)
    aad = np.mean(np.abs(y_true_values - y_pred_values))
    aad = round(aad, 4)
    
    # Squared Error Percentage (SEP)
    sep = np.mean(((y_true_values - y_pred_values) / y_true_values)**2) * 100
    sep = round(sep, 4)
    
    # Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_true_values, y_pred_values)
    mae = round(mae, 4)
    
    '''print('r2: ', r2)
    print('mse: ', mse)
    print('rmse: ', rmse)
    print('aad: ', aad)
    print('sep: ', sep)
    print('mae: ', mae)'''   

    print(r2)
    print(mse)
    print(rmse)
    print(aad)
    print(sep)
    print(mae) 

# Analysis

In [None]:
#import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
del_data = pd.concat([X_data, y1_data, y2_data, y3_data, y4_data], axis=1)

In [None]:
del_data2 = pd.concat([X_data_unscale, y1_data, y2_data, y3_data, y4_data], axis=1)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata

# Sample data preparation
x = del_data2['Nuclear']
y = del_data2['PP1']
z = del_data2['Total Cost']

# Create grid values first.
xi = np.linspace(x.min(), x.max(), 200)
yi = np.linspace(y.min(), y.max(), 200)
zi = griddata((x, y), z, (xi[None, :], yi[:, None]), method='cubic')

# Create contour plot
plt.contourf(xi, yi, zi, levels=30, cmap="RdBu_r")
plt.colorbar()
#plt.scatter(x, y, c=z, edgecolors='k', linewidths=0.5)

# Add axis labels
plt.xlabel('Nuclear')
plt.ylabel('PP')

# Add a title (optional)
#plt.title('Contour Plot of Total Cost')

plt.show()


In [None]:
#============ Here
#============
#============
#============


import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata

# Sample data preparation
x = del_data2['PP']
y = del_data2['Nuclear']
z = del_data2['CO2']

# Create grid values first.
xi = np.linspace(x.min(), x.max(), 200)
yi = np.linspace(y.min(), y.max(), 200)
zi = griddata((x, y), z, (xi[None, :], yi[:, None]), method='cubic')

# Create contour plot
plt.contourf(xi, yi, zi, levels=30, cmap="RdBu_r")
plt.colorbar()
#plt.scatter(x, y, c=z, edgecolors='k', linewidths=0.5)

# Add axis labels
plt.xlabel('PP')
plt.ylabel('Nuclear')

# Add a title (optional)
#plt.title('Contour Plot of CEEP')

plt.show()


# Support Vector Regression

In [None]:
svr = SVR()

Y1

In [None]:
y_data = y1_data
y_train = y1_train
y_test = y1_test

def objective(trial):
    param = {
        'C': trial.suggest_float('C', 0.000001, 1000000),
        'epsilon': trial.suggest_float('epsilon', 1e-6, 1e+1),
        'kernel': trial.suggest_categorical('kernel', ['rbf', 'linear', 'poly']),
        'degree': trial.suggest_int('degree', 1, 5),
        'gamma': trial.suggest_float('gamma', 0.00001, 0.1)
    }

    model = SVR(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)


In [None]:
# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

# Print the best trial
print('Best trial', study.best_trial)

In [None]:
svr_param_y1 = study.best_params

svr_model_y1 = SVR(**svr_param_y1)
svr_model_y1.fit(X_train, y_train)

y_train_pred = svr_model_y1.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = svr_model_y1.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = svr_model_y1.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y2

In [None]:
y_data = y2_data
y_train = y2_train
y_test = y2_test

def objective(trial):
    param = {
        'C': trial.suggest_float('C', 0.000001, 1000000),
        'epsilon': trial.suggest_float('epsilon', 1e-6, 1e+1),
        'kernel': trial.suggest_categorical('kernel', ['rbf']),
        'degree': trial.suggest_int('degree', 1, 5),
        'gamma': trial.suggest_float('gamma', 0.00001, 0.1)
    }

    model = SVR(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

In [None]:
svr_param_y2 = study.best_params

svr_model_y2 = SVR(**svr_param_y2)
svr_model_y2.fit(X_train, y_train)

y_train_pred = svr_model_y2.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = svr_model_y2.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = svr_model_y2.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y3

In [None]:
y_data = y3_data
y_train = y3_train
y_test = y3_test

def objective(trial):
    param = {
        'C': trial.suggest_float('C', 0.000001, 1000000),
        'epsilon': trial.suggest_float('epsilon', 1e-6, 1e+1),
        'kernel': trial.suggest_categorical('kernel', ['rbf']),
        'degree': trial.suggest_int('degree', 1, 5),
        'gamma': trial.suggest_float('gamma', 0.00001, 0.1)
    }

    model = SVR(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

In [None]:
svr_param_y3 = study.best_params

svr_model_y3 = SVR(**svr_param_y3)
svr_model_y3.fit(X_train, y_train)

y_train_pred = svr_model_y3.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = svr_model_y3.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = svr_model_y3.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y4

In [None]:
y_data = y4_data
y_train = y4_train
y_test = y4_test

def objective(trial):
    param = {
        'C': trial.suggest_float('C', 0.000001, 1000000),
        'epsilon': trial.suggest_float('epsilon', 1e-6, 1e+1),
        'kernel': trial.suggest_categorical('kernel', ['rbf']),
        'degree': trial.suggest_int('degree', 1, 5),
        'gamma': trial.suggest_float('gamma', 0.00001, 0.1)
    }

    model = SVR(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression', pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

In [None]:
svr_param_y4 = study.best_params

svr_model_y4 = SVR(**svr_param_y4)
svr_model_y4.fit(X_train, y_train)

y_train_pred = svr_model_y4.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = svr_model_y4.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = svr_model_y4.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

# XGBoost

Y1

In [None]:
y_data = y1_data
y_train = y1_train
y_test = y1_test

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)
    }
    
    model = XGBRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
xgb_params_y1 = study.best_params

xgb_model_y1 = XGBRegressor(**xgb_params_y1)
xgb_model_y1.fit(X_train, y_train)

y_train_pred = xgb_model_y1.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = xgb_model_y1.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = xgb_model_y1.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y2

In [None]:
y_data = y2_data
y_train = y2_train
y_test = y2_test

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)
    }
    
    model = XGBRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
xgb_params_y2 = study.best_params

xgb_model_y2 = XGBRegressor(**xgb_params_y2)
xgb_model_y2.fit(X_train, y_train)

y_train_pred = xgb_model_y2.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = xgb_model_y2.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = xgb_model_y2.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y3

In [None]:
y_data = y3_data
y_train = y3_train
y_test = y3_test

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)
    }
    
    model = XGBRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
xgb_params_y3 = study.best_params

xgb_model_y3 = XGBRegressor(**xgb_params_y3)
xgb_model_y3.fit(X_train, y_train)

y_train_pred = xgb_model_y3.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = xgb_model_y3.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = xgb_model_y3.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y4

In [None]:
y_data = y4_data
y_train = y4_train
y_test = y4_test

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)
    }
    
    model = XGBRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
xgb_params_y4 = study.best_params

xgb_model_y4 = XGBRegressor(**xgb_params_y4)
xgb_model_y4.fit(X_train, y_train)

y_train_pred = xgb_model_y4.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = xgb_model_y4.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = xgb_model_y4.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

# MLP

Y1

In [None]:
y_data = y1_data
y_train = y1_train
y_test = y1_test

# Define the number of layers in the model
n_layers = 4

def objective(trial):
    # Initialize an empty list to store layer sizes
    layer_sizes = []
    
    # Add layer sizes to the list based on the number of layers
    for i in range(n_layers):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i}', 1, 50))
    
    param = {
        'hidden_layer_sizes': tuple(layer_sizes),
        'activation': trial.suggest_categorical('activation', ['relu']),
        'solver': trial.suggest_categorical('solver', ['adam', 'lbfgs']),
        'random_state': trial.suggest_int('random_state', 1, 100),
        'alpha': trial.suggest_float('alpha', 0.00001, 1),
    }
    model = MLPRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)


# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
y_data = y1_data
y_train = y1_train
y_test = y1_test

ann_model_y1 = MLPRegressor(hidden_layer_sizes=(5, 5, 22, 46), activation='relu', alpha=0.6856023087910847, solver='lbfgs', random_state=88)

# Fit the model on the training data
ann_model_y1.fit(X_train, y_train)

y_train_pred = ann_model_y1.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = ann_model_y1.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = ann_model_y1.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y2

In [None]:
y_data = y2_data
y_train = y2_train
y_test = y2_test

# Define the number of layers in the model
n_layers = 4

def objective(trial):
    # Initialize an empty list to store layer sizes
    layer_sizes = []
    
    # Add layer sizes to the list based on the number of layers
    for i in range(n_layers):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i}', 1, 50))
    
    param = {
        'hidden_layer_sizes': tuple(layer_sizes),
        'activation': trial.suggest_categorical('activation', ['relu']),
        'solver': trial.suggest_categorical('solver', ['adam', 'lbfgs']),
        'random_state': trial.suggest_int('random_state', 1, 100),
        'alpha': trial.suggest_float('alpha', 0.00001, 1),
    }
    model = MLPRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)


# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
y_data = y2_data
y_train = y2_train
y_test = y2_test

ann_model_y2 = MLPRegressor(hidden_layer_sizes=(33, 35, 12, 10), activation='relu', alpha=0.87732216286418, solver='lbfgs', random_state=58)

# Fit the model on the training data
ann_model_y2.fit(X_train, y_train)

y_train_pred = ann_model_y2.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = ann_model_y2.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = ann_model_y2.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y3

In [None]:
y_data = y3_data
y_train = y3_train
y_test = y3_test

# Define the number of layers in the model
n_layers = 4

def objective(trial):
    # Initialize an empty list to store layer sizes
    layer_sizes = []
    
    # Add layer sizes to the list based on the number of layers
    for i in range(n_layers):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i}', 1, 50))
    
    param = {
        'hidden_layer_sizes': tuple(layer_sizes),
        'activation': trial.suggest_categorical('activation', ['relu']),
        'solver': trial.suggest_categorical('solver', ['adam', 'lbfgs']),
        'random_state': trial.suggest_int('random_state', 1, 100),
        'alpha': trial.suggest_float('alpha', 0.00001, 1),
    }
    model = MLPRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)


# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
y_data = y3_data
y_train = y3_train
y_test = y3_test

ann_model_y3 = MLPRegressor(hidden_layer_sizes=(4, 37, 41, 22), activation='relu', alpha=0.5394813016687909, solver='lbfgs', random_state=75)

# Fit the model on the training data
ann_model_y3.fit(X_train, y_train)

y_train_pred = ann_model_y3.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = ann_model_y3.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = ann_model_y3.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

Y4

In [None]:
y_data = y4_data
y_train = y4_train
y_test = y4_test

# Define the number of layers in the model
n_layers = 4

def objective(trial):
    # Initialize an empty list to store layer sizes
    layer_sizes = []
    
    # Add layer sizes to the list based on the number of layers
    for i in range(n_layers):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i}', 1, 50))
    
    param = {
        'hidden_layer_sizes': tuple(layer_sizes),
        'activation': trial.suggest_categorical('activation', ['relu']),
        'solver': trial.suggest_categorical('solver', ['adam', 'lbfgs']),
        'random_state': trial.suggest_int('random_state', 1, 100),
        'alpha': trial.suggest_float('alpha', 0.00001, 1),
    }
    model = MLPRegressor(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)


# Create the study
study = optuna.create_study(direction='maximize', study_name='regression')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

# Print the best value
print('Best value', study.best_value)

In [None]:
y_data = y4_data
y_train = y4_train
y_test = y4_test

ann_model_y4 = MLPRegressor(hidden_layer_sizes=(8, 30, 46, 14), activation='relu', alpha=0.7138596741565628, solver='lbfgs', random_state=72)

# Fit the model on the training data
ann_model_y4.fit(X_train, y_train)

y_train_pred = ann_model_y4.predict(X_train)
train_r2 = r2_score(y_train, y_train_pred)

y_pred = ann_model_y4.predict(X_test)
test_r2 = r2_score(y_test, y_pred)

y_data_pred = ann_model_y4.predict(X_data)
data_r2 = r2_score(y_data, y_data_pred)

print('Train R_sq:', train_r2)
print('Test R_sq:', test_r2)
print('Data R_sq:', data_r2)

y_data_pred = pd.DataFrame(y_data_pred)
y_data = pd.DataFrame(y_data)
calculate_metrics(y_data, y_data_pred)

In [None]:
import optuna.visualization as vis
vis.plot_optimization_history(study)

In [None]:
vis.plot_parallel_coordinate(study)

In [None]:
vis.plot_param_importances(study)

# Shap

In [None]:

y_data = y1_data
y_train = y1_train
y_test = y1_test

# Model for y1
model_y1 = SVR(C= 938266.6722022587, epsilon= 7.634680062462178, kernel= 'rbf', degree= 4, gamma= 0.00877730291334558)
model_y1.fit(X_data, y_data)
y1_pred = model_y1.predict(X_test)

y_data = y2_data
y_train = y2_train
y_test = y2_test
# Model for y2
model_y2 = SVR(C= 71459.276928581, epsilon= 0.9031869098259523, kernel= 'rbf', degree= 1, gamma= 0.0035901390986543542)
model_y2.fit(X_data, y_data)
y2_pred = model_y2.predict(X_test)

y_data = y3_data
y_train = y3_train
y_test = y3_test
# Model for y3
y3_params = {'max_depth': 5, 'learning_rate': 0.21192829865979532, 'n_estimators': 513, 'min_child_weight': 1, 'gamma': 0.8354449115144676, 'subsample': 0.8559761007878783, 'colsample_bytree': 0.9499990399914706, 'reg_alpha': 0.061420625912146976, 'reg_lambda': 0.9727616243556757, 'random_state': 599}
model_y3 = XGBRegressor(**y3_params)
model_y3.fit(X_data, y_data)
y3_pred = model_y3.predict(X_test)

y_data = y4_data
y_train = y4_train
y_test = y4_test
# Model for y4
y4_params = {'C': 435707.8208868281, 'epsilon': 0.25793769392540933, 'kernel': 'rbf', 'degree': 4, 'gamma': 0.0048393326352296085}
model_y4 = MLPRegressor(hidden_layer_sizes=(7, 28, 23, 19), activation='relu', alpha=0.9365945996054417, solver='lbfgs', random_state=72)
model_y4.fit(X_data, y_data)
y4_pred = model_y4.predict(X_test)

In [None]:

# Copy column names from df_source to df_target
X_data.columns = X_data_unscale.columns


In [None]:
import pandas as pd

# Your list
mm = [-0.08204877,  0.15594775, -0.00367291,  0.4247027,   0.84494347,  0.29067574,
      -0.22331748,  0.815149,   -1.02562235, -0.44830675, -0.60883313, -1.03435034,
      -0.12881452,  0.08031999, -0.78065315]

# Convert to DataFrame
df = pd.DataFrame(mm)
df = df.T
df.columns = X_data_unscale.columns

# Display the DataFrame
print(df)


In [None]:

y_data = y1_data
y_train = y1_train
y_test = y1_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


model_y1 = SVR(C= 938266.6722022587, epsilon= 7.634680062462178, kernel= 'rbf', degree= 4, gamma= 0.00877730291334558)
model_y1.fit(X_train, y_train)


# Compute SHAP values
explainer = shap.KernelExplainer(model_y1.predict, X_train)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
shap.plots.waterfall(shap_values[0], max_display=7)

In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import shap
import matplotlib.pyplot as plt

# Sample data preparation (replace this with your actual data)
# Assume del_data2, X_data, y1_data, y1_train, y1_test, and X_train are already defined

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Fit the model
model_y1 = SVR(C=938266.6722022587, epsilon=7.634680062462178, kernel='rbf', degree=4, gamma=0.00877730291334558)
model_y1.fit(X_train_scaled, y1_train)

# Compute SHAP values
explainer = shap.KernelExplainer(model_y1.predict, X_train_scaled)
shap_values = explainer.shap_values(X_train_scaled)

# Create a DataFrame to map scaled and unscaled values
X_train_df = pd.DataFrame(X_data, columns=X_data.columns)
X_train_scaled_df = pd.DataFrame(X_data, columns=X_data.columns)
unscaled_values = X_data_unscale.iloc[0]  # Example to use the first instance

# Function to replace scaled feature values with unscaled values
def replace_scaled_with_unscaled(shap_values, unscaled_values, feature_names):
    for i, feature in enumerate(feature_names):
        shap_values.feature_names[i] = f"{feature} (unscaled: {unscaled_values[feature]:.2f})"
    return shap_values

# Create the SHAP waterfall plot
shap_waterfall_plot = shap.plots.waterfall(shap_values[0], max_display=7)

# Customize the SHAP plot to show unscaled values
shap_values = replace_scaled_with_unscaled(shap_values, unscaled_values, X_data.columns)

# Customize the axis labels using Matplotlib
plt.xlabel("SHAP value (impact on model output)")
plt.ylabel("Optimum Capacities")
#plt.title("Customized SHAP Waterfall Plot with Unscaled Feature Values")

# Show the plot with custom labels
plt.show()


In [None]:

y_data = y1_data
y_train = y1_train
y_test = y1_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


model_y1 = SVR(C= 938266.6722022587, epsilon= 7.634680062462178, kernel= 'rbf', degree= 4, gamma= 0.00877730291334558)
model_y1.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.KernelExplainer(model_y1.predict, X_train)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
#shap.plots.waterfall(shap_values[0], max_display=7)

#=========================


# Create a DataFrame to map scaled and unscaled values
X_data_df = pd.DataFrame(X_data, columns=X_data.columns)
X_train_scaled_df = pd.DataFrame(X_data, columns=X_data.columns)
unscaled_values = X_data_df.iloc[0]  # Example to use the first instance

# Function to replace scaled feature values with unscaled values
def replace_scaled_with_unscaled(shap_values, unscaled_values, feature_names):
    for i, feature in enumerate(feature_names):
        shap_values.feature_names[i] = f"{feature}"
    return shap_values


MS = df
s_mean = SS.mean_
s_scale = SS.scale_
Optimized_Inputs = (MS * s_scale) + s_mean
Optimized_Inputs
hk = Optimized_Inputs.to_numpy()
hk
shap_values.data = hk


shap_waterfall_plot = shap.plots.waterfall(shap_values[0], max_display=20) 

In [None]:

y_data = y1_data
y_train = y1_train
y_test = y1_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


model_y1 = SVR(C= 938266.6722022587, epsilon= 7.634680062462178, kernel= 'rbf', degree= 4, gamma= 0.00877730291334558)
model_y1.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.KernelExplainer(model_y1.predict, X_train)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
#shap.plots.waterfall(shap_values[0], max_display=7)

#=========================


# Create a DataFrame to map scaled and unscaled values
X_data_df = pd.DataFrame(X_data, columns=X_data.columns)
X_train_scaled_df = pd.DataFrame(X_data, columns=X_data.columns)
unscaled_values = X_data_df.iloc[0]  # Example to use the first instance

# Function to replace scaled feature values with unscaled values
def replace_scaled_with_unscaled(shap_values, unscaled_values, feature_names):
    for i, feature in enumerate(feature_names):
        shap_values.feature_names[i] = f"{feature}"
    return shap_values


MS = df
s_mean = SS.mean_
s_scale = SS.scale_
Optimized_Inputs = (MS * s_scale) + s_mean
Optimized_Inputs
hk = Optimized_Inputs.to_numpy()
hk
shap_values.data = hk


shap_waterfall_plot = shap.plots.waterfall(shap_values[0], max_display=7) 

In [None]:

y_data = y1_data
y_train = y1_train
y_test = y1_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


mod_1 = {'max_depth': 3, 'learning_rate': 0.011723269811235474, 'n_estimators': 875, 'min_child_weight': 6, 'gamma': 0.7712938662023604, 'subsample': 0.7899217905693013, 'colsample_bytree': 0.8285058920843318, 'reg_alpha': 0.6370415102229366, 'reg_lambda': 0.3197877984587, 'random_state': 560}

model_y1 = XGBRegressor(**mod_1)
model_y1.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.Explainer(model_y1, X_train)
shap_values = explainer(X_data)

# Plot the SHAP values using a waterfall plot
shap.plots.beeswarm(shap_values, max_display=20)

In [None]:

y_data = y1_data
y_train = y1_train
y_test = y1_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


mod_1 = {'max_depth': 3, 'learning_rate': 0.011723269811235474, 'n_estimators': 875, 'min_child_weight': 6, 'gamma': 0.7712938662023604, 'subsample': 0.7899217905693013, 'colsample_bytree': 0.8285058920843318, 'reg_alpha': 0.6370415102229366, 'reg_lambda': 0.3197877984587, 'random_state': 560}

model_y1 = XGBRegressor(**mod_1)
model_y1.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.Explainer(model_y1, X_train)
shap_values = explainer(X_data)

# Plot the SHAP values using a waterfall plot
shap.plots.beeswarm(shap_values, max_display=8)

In [None]:

y_data = y2_data
y_train = y2_train
y_test = y2_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


model_y2 = SVR(C= 71459.276928581, epsilon= 0.9031869098259523, kernel= 'rbf', degree= 1, gamma= 0.0035901390986543542)
model_y2.fit(X_data, y_data)



# Compute SHAP values
explainer = shap.KernelExplainer(model_y2.predict, X_data)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
#shap.plots.waterfall(shap_values[0], max_display=20)


#=========================


# Create a DataFrame to map scaled and unscaled values
X_data_df = pd.DataFrame(X_data, columns=X_data.columns)
X_train_scaled_df = pd.DataFrame(X_data, columns=X_data.columns)
unscaled_values = X_data_df.iloc[0]  # Example to use the first instance

# Function to replace scaled feature values with unscaled values
def replace_scaled_with_unscaled(shap_values, unscaled_values, feature_names):
    for i, feature in enumerate(feature_names):
        shap_values.feature_names[i] = f"{feature}"
    return shap_values


MS = df
s_mean = SS.mean_
s_scale = SS.scale_
Optimized_Inputs = (MS * s_scale) + s_mean
Optimized_Inputs
hk = Optimized_Inputs.to_numpy()
hk
shap_values.data = hk


shap_waterfall_plot = shap.plots.waterfall(shap_values[0], max_display=20) 

In [None]:

y_data = y2_data
y_train = y2_train
y_test = y2_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


model_y2 = SVR(C= 71459.276928581, epsilon= 0.9031869098259523, kernel= 'rbf', degree= 1, gamma= 0.0035901390986543542)
model_y2.fit(X_data, y_data)



# Compute SHAP values
explainer = shap.KernelExplainer(model_y2.predict, X_data)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
#shap.plots.waterfall(shap_values[0], max_display=20)


#=========================


# Create a DataFrame to map scaled and unscaled values
X_data_df = pd.DataFrame(X_data, columns=X_data.columns)
X_train_scaled_df = pd.DataFrame(X_data, columns=X_data.columns)
unscaled_values = X_data_df.iloc[0]  # Example to use the first instance

# Function to replace scaled feature values with unscaled values
def replace_scaled_with_unscaled(shap_values, unscaled_values, feature_names):
    for i, feature in enumerate(feature_names):
        shap_values.feature_names[i] = f"{feature}"
    return shap_values


MS = df
s_mean = SS.mean_
s_scale = SS.scale_
Optimized_Inputs = (MS * s_scale) + s_mean
Optimized_Inputs
hk = Optimized_Inputs.to_numpy()
hk
shap_values.data = hk


shap_waterfall_plot = shap.plots.waterfall(shap_values[0], max_display=7) 

In [None]:

y_data = y2_data
y_train = y2_train
y_test = y2_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


model_y2 = SVR(C= 71459.276928581, epsilon= 0.9031869098259523, kernel= 'rbf', degree= 1, gamma= 0.0035901390986543542)
model_y2.fit(X_train, y_train)



# Compute SHAP values
explainer = shap.KernelExplainer(model_y2.predict, X_data)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
shap.plots.waterfall(shap_values[0], max_display=20)



In [None]:

y_data = y2_data
y_train = y2_train
y_test = y2_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]

mod_2 = {'max_depth': 3, 'learning_rate': 0.7866691318932307, 'n_estimators': 565, 'min_child_weight': 7, 'gamma': 0.9416813016248271, 'subsample': 0.9492401329994024, 'colsample_bytree': 0.9677794380528386, 'reg_alpha': 0.18569042405228248, 'reg_lambda': 0.8638844510641983, 'random_state': 666}

model_y2 = XGBRegressor(**mod_2)
model_y2.fit(X_data, y_data)

# Compute SHAP values
explainer = shap.Explainer(model_y2, X_data)
shap_values = explainer(X_data)

# Plot the SHAP values using a waterfall plot
shap.plots.beeswarm(shap_values,  max_display=20)

In [None]:

y_data = y3_data
y_train = y3_train
y_test = y3_test

# Import necessary libraries
import shap
from xgboost import XGBRegressor
import pandas as pd

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]

# Define the parameters for the XGBRegressor
y3_params = {
    'max_depth': 5,
    'learning_rate': 0.21192829865979532,
    'n_estimators': 513,
    'min_child_weight': 1,
    'gamma': 0.8354449115144676,
    'subsample': 0.8559761007878783,
    'colsample_bytree': 0.9499990399914706,
    'reg_alpha': 0.061420625912146976,
    'reg_lambda': 0.9727616243556757,
    'random_state': 599
}

# Fit the model
model_y3 = XGBRegressor(**y3_params)
model_y3.fit(X_data, y_data)

mm = [-0.08204877,  0.15594775, -0.00367291,  0.4247027,   0.84494347,  0.29067574,
 -0.22331748,  0.815149,   -1.02562235, -0.44830675, -0.60883313, -1.03435034,
 -0.12881452,  0.08031999, -0.78065315]

# Compute SHAP values
explainer = shap.Explainer(model_y3, X_data)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
#shap.plots.waterfall(shap_values[0], max_display=20)



#=========================


# Create a DataFrame to map scaled and unscaled values
X_data_df = pd.DataFrame(X_data, columns=X_data.columns)
X_train_scaled_df = pd.DataFrame(X_data, columns=X_data.columns)
unscaled_values = X_data_df.iloc[0]  # Example to use the first instance

# Function to replace scaled feature values with unscaled values
def replace_scaled_with_unscaled(shap_values, unscaled_values, feature_names):
    for i, feature in enumerate(feature_names):
        shap_values.feature_names[i] = f"{feature}"
    return shap_values


MS = df
s_mean = SS.mean_
s_scale = SS.scale_
Optimized_Inputs = (MS * s_scale) + s_mean
Optimized_Inputs
hk = Optimized_Inputs.to_numpy()
hk
shap_values.data = hk


shap_waterfall_plot = shap.plots.waterfall(shap_values[0], max_display=20) 

In [None]:

y_data = y3_data
y_train = y3_train
y_test = y3_test

# Import necessary libraries
import shap
from xgboost import XGBRegressor
import pandas as pd

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]

# Define the parameters for the XGBRegressor
y3_params = {
    'max_depth': 5,
    'learning_rate': 0.21192829865979532,
    'n_estimators': 513,
    'min_child_weight': 1,
    'gamma': 0.8354449115144676,
    'subsample': 0.8559761007878783,
    'colsample_bytree': 0.9499990399914706,
    'reg_alpha': 0.061420625912146976,
    'reg_lambda': 0.9727616243556757,
    'random_state': 599
}

# Fit the model
model_y3 = XGBRegressor(**y3_params)
model_y3.fit(X_data, y_data)

mm = [-0.08204877,  0.15594775, -0.00367291,  0.4247027,   0.84494347,  0.29067574,
 -0.22331748,  0.815149,   -1.02562235, -0.44830675, -0.60883313, -1.03435034,
 -0.12881452,  0.08031999, -0.78065315]

# Compute SHAP values
explainer = shap.Explainer(model_y3, X_data)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
shap.plots.waterfall(shap_values[0], max_display=8)

In [None]:

y_data = y3_data
y_train = y3_train
y_test = y3_test

# Import necessary libraries
import shap
from xgboost import XGBRegressor
import pandas as pd

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]

# Define the parameters for the XGBRegressor
y3_params = {
    'max_depth': 5,
    'learning_rate': 0.21192829865979532,
    'n_estimators': 513,
    'min_child_weight': 1,
    'gamma': 0.8354449115144676,
    'subsample': 0.8559761007878783,
    'colsample_bytree': 0.9499990399914706,
    'reg_alpha': 0.061420625912146976,
    'reg_lambda': 0.9727616243556757,
    'random_state': 599
}

# Fit the model
model_y3 = XGBRegressor(**y3_params)
model_y3.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.Explainer(model_y3, X_data)
shap_values = explainer(X_data)

# Plot the SHAP values using a waterfall plot
shap.plots.beeswarm(shap_values, max_display=20)


In [None]:

y_data = y4_data
y_train = y4_train
y_test = y4_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


y4_params = {'C': 435707.8208868281, 'epsilon': 0.25793769392540933, 'kernel': 'rbf', 'degree': 4, 'gamma': 0.0048393326352296085}
model_y4 = MLPRegressor(hidden_layer_sizes=(7, 28, 23, 19), activation='relu', alpha=0.9365945996054417, solver='lbfgs', random_state=72)
model_y4.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.KernelExplainer(model_y4.predict, X_train)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
#shap.plots.waterfall(shap_values[0], max_display=20)

#=========================


# Create a DataFrame to map scaled and unscaled values
X_data_df = pd.DataFrame(X_data, columns=X_data.columns)
X_train_scaled_df = pd.DataFrame(X_data, columns=X_data.columns)
unscaled_values = X_data_df.iloc[0]  # Example to use the first instance

# Function to replace scaled feature values with unscaled values
def replace_scaled_with_unscaled(shap_values, unscaled_values, feature_names):
    for i, feature in enumerate(feature_names):
        shap_values.feature_names[i] = f"{feature}"
    return shap_values


MS = df
s_mean = SS.mean_
s_scale = SS.scale_
Optimized_Inputs = (MS * s_scale) + s_mean
Optimized_Inputs
hk = Optimized_Inputs.to_numpy()
hk
shap_values.data = hk


shap_waterfall_plot = shap.plots.waterfall(shap_values[0], max_display=7) 

In [None]:

y_data = y4_data
y_train = y4_train
y_test = y4_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


y4_params = {'C': 435707.8208868281, 'epsilon': 0.25793769392540933, 'kernel': 'rbf', 'degree': 4, 'gamma': 0.0048393326352296085}
model_y4 = MLPRegressor(hidden_layer_sizes=(7, 28, 23, 19), activation='relu', alpha=0.9365945996054417, solver='lbfgs', random_state=72)
model_y4.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.KernelExplainer(model_y4.predict, X_train)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
#shap.plots.waterfall(shap_values[0], max_display=20)

#=========================


# Create a DataFrame to map scaled and unscaled values
X_data_df = pd.DataFrame(X_data, columns=X_data.columns)
X_train_scaled_df = pd.DataFrame(X_data, columns=X_data.columns)
unscaled_values = X_data_df.iloc[0]  # Example to use the first instance

# Function to replace scaled feature values with unscaled values
def replace_scaled_with_unscaled(shap_values, unscaled_values, feature_names):
    for i, feature in enumerate(feature_names):
        shap_values.feature_names[i] = f"{feature}"
    return shap_values


MS = df
s_mean = SS.mean_
s_scale = SS.scale_
Optimized_Inputs = (MS * s_scale) + s_mean
Optimized_Inputs
hk = Optimized_Inputs.to_numpy()
hk
shap_values.data = hk


shap_waterfall_plot = shap.plots.waterfall(shap_values[0], max_display=8) 

In [None]:

y_data = y4_data
y_train = y4_train
y_test = y4_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


y4_params = {'C': 435707.8208868281, 'epsilon': 0.25793769392540933, 'kernel': 'rbf', 'degree': 4, 'gamma': 0.0048393326352296085}
model_y4 = MLPRegressor(hidden_layer_sizes=(7, 28, 23, 19), activation='relu', alpha=0.9365945996054417, solver='lbfgs', random_state=72)
model_y4.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.KernelExplainer(model_y4.predict, X_train)
shap_values = explainer(df)

# Plot the SHAP values using a waterfall plot
shap.plots.waterfall(shap_values[0], max_display=20)

In [None]:

y_data = y4_data
y_train = y4_train
y_test = y4_test

# Ensure X_data is a DataFrame with proper feature names
if not isinstance(X_data, pd.DataFrame):
    X_data = pd.DataFrame(X_data)

# Convert feature names to strings if they are not already
X_data.columns = [str(col) for col in X_data.columns]


mod_4 = {'max_depth': 10, 'learning_rate': 0.31195099027474943, 'n_estimators': 120, 'min_child_weight': 8, 'gamma': 0.5887647311991991, 'subsample': 0.8053950657222284, 'colsample_bytree': 0.9531753908252204, 'reg_alpha': 0.9543398120967802, 'reg_lambda': 0.05317709105394063, 'random_state': 693}

model_y4 = XGBRegressor(**mod_4)
model_y4.fit(X_data, y_data)


# Compute SHAP values
explainer = shap.Explainer(model_y4, X_data)
shap_values = explainer(X_data)

# Plot the SHAP values using a waterfall plot
shap.plots.beeswarm(shap_values, max_display=20)