In [1]:
import shap
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from itertools import combinations
import seaborn as sns
from sklearn import metrics
from sklearn import preprocessing 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import LeaveOneOut
from sklearn.feature_selection import RFE
from bayes_opt import BayesianOptimization

In [6]:
data=pd.read_excel(r"C:\Users\HP\Desktop\Data.xlsx",sheet_name='16+3',index_col=0,header=0)
data1=data.iloc[0:18, :]    
OP=data1[['lg(O3)','lg(H2O2)','pH','TOC','Φn4','O3(l)20']]  
minmax_scaler=preprocessing.MinMaxScaler()  
data2=minmax_scaler.fit_transform(OP)
data3=pd.DataFrame(data2,columns=['lg(O3)','lg(H2O2)','pH','TOC','Φn4','O3(l)20'])  

In [7]:
X_full=data3[['lg(O3)','lg(H2O2)','pH','Φn4','O3(l)20']]  
y_full=data3['TOC']

def run_bayesian_optimization(X_train_all, y_train_all):
    def black_box_function(learning_rate, n_estimators, min_samples_split, max_features, max_depth, max_leaf_nodes):
        model = GradientBoostingRegressor(
            learning_rate=learning_rate,
            n_estimators=int(n_estimators),
            min_samples_split=int(min_samples_split),
            max_features=min(max_features, 0.999),
            max_depth=int(max_depth),
            max_leaf_nodes=int(max_leaf_nodes),
            random_state=2
        )
        loo = LeaveOneOut()
        y_real, y_predicted = [], []
        for train_index, test_index in loo.split(X_train_all):
            X_train, X_val = X_train_all[train_index], X_train_all[test_index]
            y_train, y_val = y_train_all[train_index], y_train_all[test_index]
            model.fit(X_train, y_train.ravel())
            y_pred = model.predict(X_val)
            y_real.append(y_val[0])
            y_predicted.append(y_pred[0])
        return r2_score(y_real, y_predicted)
    
    pbounds = {
        'learning_rate': (0.001, 0.2),
        'n_estimators': (10, 500),
        'min_samples_split': (2, 25),
        'max_features': (1, 4),
        'max_depth': (1, 5),
        'max_leaf_nodes': (2, 15)
    }
    optimizer = BayesianOptimization(
        f=black_box_function,
        pbounds=pbounds,
        random_state=1
    )
    optimizer.maximize(init_points=15, n_iter=20)
    return optimizer.max['params']


test_scores = []
test_rmse_scores = []
n_runs = 10  

for run in range(n_runs):
    
    X_train_all, X_test, y_train_all, y_test = train_test_split(
    X_full.values,  
    y_full.values, 
    test_size=4, 
    random_state=run
)
    
    best_params = run_bayesian_optimization(X_train_all, y_train_all)
    
    final_model = GradientBoostingRegressor(
        learning_rate=best_params['learning_rate'],
        n_estimators=int(best_params['n_estimators']),
        max_leaf_nodes=int(best_params['max_leaf_nodes']),
        max_features=min(best_params['max_features'], 0.999),
        min_samples_split=int(best_params['min_samples_split']),
        max_depth=int(best_params['max_depth']),
        random_state=2
    )
    final_model.fit(X_train_all, y_train_all.ravel())
    y_pred = final_model.predict(X_test)

    test_r2 = r2_score(y_test, y_pred)
    test_rmse = np.sqrt(mean_squared_error(y_test, y_pred)) 
    
    test_scores.append(test_r2)
    test_rmse_scores.append(test_rmse)  
    
    print(f"Run {run+1}/10 - Test R²: {test_r2:.4f}, Test RMSE: {test_rmse:.4f}")  


mean_r2 = np.mean(test_scores)
std_r2 = np.std(test_scores)
mean_rmse = np.mean(test_rmse_scores)  
std_rmse = np.std(test_rmse_scores)    

print(f"\n Average test R²score: {mean_r2:.4f} ± {std_r2:.4f}")
print(f"verage testRMSE: {mean_rmse:.4f} ± {std_rmse:.4f}")

|   iter    |  target   | learni... | max_depth | max_fe... | max_le... | min_sa... | n_esti... |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.6991   [39m | [39m0.08399  [39m | [39m3.881    [39m | [39m1.0      [39m | [39m5.93     [39m | [39m5.375    [39m | [39m55.25    [39m |
| [39m2        [39m | [39m0.5868   [39m | [39m0.03807  [39m | [39m2.382    [39m | [39m2.19     [39m | [39m9.005    [39m | [39m11.64    [39m | [39m345.8    [39m |
| [39m3        [39m | [39m0.5798   [39m | [39m0.04169  [39m | [39m4.512    [39m | [39m1.082    [39m | [39m10.72    [39m | [39m11.6     [39m | [39m283.8    [39m |
| [39m4        [39m | [39m0.6361   [39m | [39m0.02894  [39m | [39m1.792    [39m | [39m3.402    [39m | [39m14.59    [39m | [39m9.209    [39m | [39m349.2    [39m |
| [39m5        [39m | [39m0.486    [39m | [39m0.1754   [39m | [39m4.578    [39m | [

| [39m10       [39m | [39m-0.1598  [39m | [39m0.09882  [39m | [39m1.213    [39m | [39m2.722    [39m | [39m3.907    [39m | [39m15.55    [39m | [39m352.9    [39m |
| [39m11       [39m | [39m0.6102   [39m | [39m0.02136  [39m | [39m2.656    [39m | [39m3.083    [39m | [39m7.384    [39m | [39m3.149    [39m | [39m272.6    [39m |
| [39m12       [39m | [39m-0.1598  [39m | [39m0.1331   [39m | [39m3.06     [39m | [39m3.834    [39m | [39m9.625    [39m | [39m22.78    [39m | [39m77.36    [39m |
| [39m13       [39m | [39m-0.1598  [39m | [39m0.02872  [39m | [39m4.23     [39m | [39m2.193    [39m | [39m4.15     [39m | [39m23.33    [39m | [39m180.4    [39m |
| [39m14       [39m | [39m-0.1598  [39m | [39m0.1504   [39m | [39m3.904    [39m | [39m3.65     [39m | [39m10.11    [39m | [39m19.27    [39m | [39m181.0    [39m |
| [39m15       [39m | [39m-0.1598  [39m | [39m0.05472  [39m | [39m4.584    [39m | [39m2.284    [39m 

| [39m20       [39m | [39m-0.07196 [39m | [39m0.001    [39m | [39m5.0      [39m | [39m4.0      [39m | [39m13.25    [39m | [39m2.0      [39m | [39m54.73    [39m |
| [39m21       [39m | [39m0.6553   [39m | [39m0.1283   [39m | [39m1.0      [39m | [39m1.0      [39m | [39m3.775    [39m | [39m2.372    [39m | [39m51.29    [39m |
| [39m22       [39m | [39m0.6272   [39m | [39m0.2      [39m | [39m1.0      [39m | [39m1.0      [39m | [39m10.38    [39m | [39m2.0      [39m | [39m42.91    [39m |
| [39m23       [39m | [39m0.6671   [39m | [39m0.1287   [39m | [39m4.883    [39m | [39m3.006    [39m | [39m11.97    [39m | [39m3.029    [39m | [39m279.8    [39m |
| [39m24       [39m | [39m0.617    [39m | [39m0.02804  [39m | [39m1.722    [39m | [39m3.918    [39m | [39m14.34    [39m | [39m9.723    [39m | [39m274.6    [39m |
| [39m25       [39m | [39m0.5004   [39m | [39m0.2      [39m | [39m1.0      [39m | [39m4.0      [39m 

| [39m30       [39m | [39m0.2995   [39m | [39m0.001    [39m | [39m5.0      [39m | [39m4.0      [39m | [39m12.79    [39m | [39m2.0      [39m | [39m447.2    [39m |
| [39m31       [39m | [39m0.4345   [39m | [39m0.2      [39m | [39m5.0      [39m | [39m1.0      [39m | [39m15.0     [39m | [39m2.0      [39m | [39m279.5    [39m |
| [39m32       [39m | [39m0.4345   [39m | [39m0.2      [39m | [39m5.0      [39m | [39m4.0      [39m | [39m15.0     [39m | [39m2.0      [39m | [39m135.3    [39m |
| [39m33       [39m | [39m-0.00403 [39m | [39m0.001    [39m | [39m1.0      [39m | [39m1.0      [39m | [39m2.0      [39m | [39m2.0      [39m | [39m141.9    [39m |
| [39m34       [39m | [39m0.5913   [39m | [39m0.2      [39m | [39m1.0      [39m | [39m4.0      [39m | [39m2.0      [39m | [39m2.0      [39m | [39m47.53    [39m |
| [39m35       [39m | [39m-0.1598  [39m | [39m0.001    [39m | [39m1.0      [39m | [39m4.0      [39m 

| [39m3        [39m | [39m0.5412   [39m | [39m0.04169  [39m | [39m4.512    [39m | [39m1.082    [39m | [39m10.72    [39m | [39m11.6     [39m | [39m283.8    [39m |
| [39m4        [39m | [39m0.54     [39m | [39m0.02894  [39m | [39m1.792    [39m | [39m3.402    [39m | [39m14.59    [39m | [39m9.209    [39m | [39m349.2    [39m |
| [39m5        [39m | [39m0.4978   [39m | [39m0.1754   [39m | [39m4.578    [39m | [39m1.255    [39m | [39m2.508    [39m | [39m5.906    [39m | [39m440.3    [39m |
| [39m6        [39m | [39m-0.1598  [39m | [39m0.02057  [39m | [39m2.684    [39m | [39m3.874    [39m | [39m8.931    [39m | [39m17.91    [39m | [39m164.6    [39m |
| [39m7        [39m | [39m-0.1598  [39m | [39m0.1376   [39m | [39m4.339    [39m | [39m1.055    [39m | [39m11.75    [39m | [39m24.74    [39m | [39m376.6    [39m |
| [39m8        [39m | [39m-0.1598  [39m | [39m0.05681  [39m | [39m4.157    [39m | [39m1.31     [39m 

| [39m13       [39m | [39m-0.1598  [39m | [39m0.02872  [39m | [39m4.23     [39m | [39m2.193    [39m | [39m4.15     [39m | [39m23.33    [39m | [39m180.4    [39m |
| [39m14       [39m | [39m-0.1598  [39m | [39m0.1504   [39m | [39m3.904    [39m | [39m3.65     [39m | [39m10.11    [39m | [39m19.27    [39m | [39m181.0    [39m |
| [39m15       [39m | [39m-0.1598  [39m | [39m0.05472  [39m | [39m4.584    [39m | [39m2.284    [39m | [39m14.54    [39m | [39m17.26    [39m | [39m314.6    [39m |
| [39m16       [39m | [39m0.5458   [39m | [39m0.04668  [39m | [39m5.0      [39m | [39m1.0      [39m | [39m10.24    [39m | [39m13.44    [39m | [39m269.3    [39m |
| [39m17       [39m | [39m0.2071   [39m | [39m0.001    [39m | [39m4.925    [39m | [39m1.0      [39m | [39m13.44    [39m | [39m2.0      [39m | [39m339.1    [39m |
| [39m18       [39m | [39m0.5556   [39m | [39m0.2      [39m | [39m1.0      [39m | [39m4.0      [39m 

| [35m23       [39m | [35m0.7041   [39m | [35m0.06223  [39m | [35m3.886    [39m | [35m1.0      [39m | [35m5.051    [39m | [35m2.0      [39m | [35m47.08    [39m |
| [35m24       [39m | [35m0.7122   [39m | [35m0.104    [39m | [35m4.882    [39m | [35m1.0      [39m | [35m13.61    [39m | [35m2.0      [39m | [35m49.07    [39m |
| [39m25       [39m | [39m0.5071   [39m | [39m0.1473   [39m | [39m3.081    [39m | [39m1.0      [39m | [39m10.21    [39m | [39m10.31    [39m | [39m45.47    [39m |
| [39m26       [39m | [39m-0.1126  [39m | [39m0.001    [39m | [39m1.0      [39m | [39m4.0      [39m | [39m12.44    [39m | [39m2.0      [39m | [39m39.88    [39m |
| [39m27       [39m | [39m0.5943   [39m | [39m0.2      [39m | [39m5.0      [39m | [39m1.0      [39m | [39m15.0     [39m | [39m2.0      [39m | [39m57.08    [39m |
| [39m28       [39m | [39m-0.09376 [39m | [39m0.001    [39m | [39m5.0      [39m | [39m1.0      [39m 

| [39m33       [39m | [39m0.2596   [39m | [39m0.001    [39m | [39m4.641    [39m | [39m4.0      [39m | [39m11.55    [39m | [39m3.628    [39m | [39m348.3    [39m |
| [39m34       [39m | [39m0.2018   [39m | [39m0.001    [39m | [39m2.557    [39m | [39m1.0      [39m | [39m11.19    [39m | [39m6.233    [39m | [39m352.0    [39m |
| [39m35       [39m | [39m0.5834   [39m | [39m0.2      [39m | [39m4.43     [39m | [39m4.0      [39m | [39m9.334    [39m | [39m2.0      [39m | [39m272.4    [39m |
Run 9/10 - Test R²: 0.8763, Test RMSE: 0.0873
|   iter    |  target   | learni... | max_depth | max_fe... | max_le... | min_sa... | n_esti... |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.6526   [39m | [39m0.08399  [39m | [39m3.881    [39m | [39m1.0      [39m | [39m5.93     [39m | [39m5.375    [39m | [39m55.25    [39m |
| [35m2        [39m | [35m0.6547   [39m | 