In [1]:
import shap
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from itertools import combinations
import seaborn as sns
from sklearn import metrics
from sklearn import preprocessing 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from xgboost.sklearn import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import LeaveOneOut
from sklearn.feature_selection import RFE
from bayes_opt import BayesianOptimization

In [10]:
data=pd.read_excel(r"C:\Users\HP\Desktop\Data.xlsx",sheet_name='16+3',index_col=0,header=0)
data1=data.iloc[0:18, :]   
Features=['lg(O3)','lg(H2O2)','pH','TOC','UV254','FMax4']

OP=data1[Features]  
minmax_scaler=preprocessing.MinMaxScaler()  
data2=minmax_scaler.fit_transform(OP)
data3=pd.DataFrame(data2,columns=Features)  

X_features = [col for col in Features if col != 'TOC']
X_full = data3[X_features]
y_full=data3['TOC']

def run_bayesian_optimization(X_train_all, y_train_all):
    def black_box_function(n_estimators, max_depth, learning_rate, gamma, reg_alpha,reg_lambda,min_child_weight,subsample,colsample_bytree):                                                                          
        model= XGBRegressor(n_estimators=int(n_estimators),
                        max_depth=int(max_depth),
                        learning_rate=learning_rate,
                        gamma=gamma,
                        reg_alpha=10 **reg_alpha,
                        reg_lambda=10 **reg_lambda,
                        min_child_weight=min_child_weight,
                        subsample=subsample,
                        colsample_bytree=colsample_bytree,
                        random_state=2)
        loo = LeaveOneOut()
        y_real, y_predicted = [], []
        for train_index, test_index in loo.split(X_train_all):
            X_train, X_val = X_train_all[train_index], X_train_all[test_index]
            y_train, y_val = y_train_all[train_index], y_train_all[test_index]
            model.fit(X_train, y_train.ravel())
            y_pred = model.predict(X_val)
            y_real.append(y_val[0])
            y_predicted.append(y_pred[0])
        return r2_score(y_real, y_predicted)
    
    pbounds = {'n_estimators':(10, 501),
          'max_depth':(2, 10),
          'learning_rate': (0.01, 0.5),
         'gamma': (0,1),
         'reg_alpha': (-5,0),
         'reg_lambda': (-5,0),
         'min_child_weight':(1,8),
         'subsample':(0.5,1),
         'colsample_bytree':(0.5,1)}
    optimizer = BayesianOptimization(
        f=black_box_function,
        pbounds=pbounds,
        random_state=1
    )
    optimizer.maximize(init_points=15, n_iter=20)
    return optimizer.max['params']

test_scores = []
test_rmse_scores = []
n_runs = 10 

for run in range(n_runs):
    X_train_all, X_test, y_train_all, y_test = train_test_split(
    X_full.values,  
    y_full.values, 
    test_size=4, 
    random_state=run
)
    
    best_params = run_bayesian_optimization(X_train_all, y_train_all)
    

    final_model =  XGBRegressor(
        n_estimators=int(best_params['n_estimators']),
        max_depth=int(best_params['max_depth']),
        learning_rate=best_params['learning_rate'],
        gamma=best_params['gamma'],
        reg_alpha=10 ** best_params['reg_alpha'],
        reg_lambda=10 ** best_params['reg_lambda'],
        min_child_weight=best_params['min_child_weight'],
        subsample=best_params['subsample'],
        colsample_bytree=best_params['colsample_bytree'],
        random_state=2
    )
    final_model.fit(X_train_all, y_train_all.ravel())
    y_pred = final_model.predict(X_test)

    test_r2 = r2_score(y_test, y_pred)
    test_rmse = np.sqrt(mean_squared_error(y_test, y_pred)) 
    
    test_scores.append(test_r2)
    test_rmse_scores.append(test_rmse)  
    
    print(f"Run {run+1}/10 - Test R²: {test_r2:.4f}, Test RMSE: {test_rmse:.4f}")  

mean_r2 = np.mean(test_scores)
std_r2 = np.std(test_scores)
mean_rmse = np.mean(test_rmse_scores)  
std_rmse = np.std(test_rmse_scores)    

print(f"\n Average test R²score: {mean_r2:.4f} ± {std_r2:.4f}")
print(f"Average testRMSE: {mean_rmse:.4f} ± {std_rmse:.4f}") 

|   iter    |  target   | colsam... |   gamma   | learni... | max_depth | min_ch... | n_esti... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m-0.1115  [39m | [39m0.7085   [39m | [39m0.7203   [39m | [39m0.01006  [39m | [39m4.419    [39m | [39m2.027    [39m | [39m55.34    [39m | [39m-4.069   [39m | [39m-3.272   [39m | [39m0.6984   [39m |
| [39m2        [39m | [39m-0.169   [39m | [39m0.7694   [39m | [39m0.4192   [39m | [39m0.3458   [39m | [39m3.636    [39m | [39m7.147    [39m | [39m23.45    [39m | [39m-1.648   [39m | [39m-2.913   [39m | [39m0.7793   [39m |
| [35m3        [39m | [35m0.2872   [39m | [35m0.5702   [39m | [35m0.1981   [39m | [35m0.4024   [39m | [35m9.746    [39m | [35m3.194    [39m | [35m349.9    [39m | [35m-0.6181  [39m | [35m-0.527   [39m | [35m0.5425   [39m |


| [35m34       [39m | [35m0.65     [39m | [35m1.0      [39m | [35m0.0      [39m | [35m0.5      [39m | [35m7.322    [39m | [35m4.346    [39m | [35m474.1    [39m | [35m-1.661   [39m | [35m-0.3587  [39m | [35m0.6678   [39m |
| [35m35       [39m | [35m0.7108   [39m | [35m1.0      [39m | [35m0.0      [39m | [35m0.5      [39m | [35m7.785    [39m | [35m4.274    [39m | [35m474.0    [39m | [35m-1.953   [39m | [35m-0.6191  [39m | [35m0.8534   [39m |
Run 1/10 - Test R²: 0.5019, Test RMSE: 0.1756
|   iter    |  target   | colsam... |   gamma   | learni... | max_depth | min_ch... | n_esti... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m-0.1663  [39m | [39m0.7085   [39m | [39m0.7203   [39m | [39m0.01006  [39m | [39m4.419    [39m | [39m2.027    [39m | [39m55.34    [39m | [39m-4.069   [39m 

| [39m31       [39m | [39m0.3783   [39m | [39m0.9707   [39m | [39m0.0      [39m | [39m0.4392   [39m | [39m7.528    [39m | [39m4.507    [39m | [39m473.8    [39m | [39m-1.327   [39m | [39m-0.1243  [39m | [39m0.5      [39m |
| [39m32       [39m | [39m0.4232   [39m | [39m0.5031   [39m | [39m0.0      [39m | [39m0.3035   [39m | [39m7.501    [39m | [39m4.674    [39m | [39m474.2    [39m | [39m-1.478   [39m | [39m0.0      [39m | [39m0.5      [39m |
| [39m33       [39m | [39m0.4069   [39m | [39m1.0      [39m | [39m0.0      [39m | [39m0.1843   [39m | [39m7.281    [39m | [39m4.623    [39m | [39m474.5    [39m | [39m-1.283   [39m | [39m-0.1481  [39m | [39m0.5      [39m |
| [39m34       [39m | [39m0.393    [39m | [39m0.584    [39m | [39m0.0      [39m | [39m0.07593  [39m | [39m7.111    [39m | [39m4.318    [39m | [39m474.1    [39m | [39m-1.03    [39m | [39m0.0      [39m | [39m0.5      [39m |
| [39m35       [39m | 

| [39m28       [39m | [39m0.4629   [39m | [39m0.5      [39m | [39m0.0      [39m | [39m0.4095   [39m | [39m7.577    [39m | [39m4.297    [39m | [39m474.5    [39m | [39m-1.973   [39m | [39m-0.9233  [39m | [39m0.5      [39m |
| [39m29       [39m | [39m0.4595   [39m | [39m0.5      [39m | [39m0.0      [39m | [39m0.4452   [39m | [39m7.96     [39m | [39m4.586    [39m | [39m474.5    [39m | [39m-1.537   [39m | [39m-0.4587  [39m | [39m0.5      [39m |
| [39m30       [39m | [39m0.4699   [39m | [39m0.5      [39m | [39m0.0      [39m | [39m0.5      [39m | [39m7.969    [39m | [39m4.516    [39m | [39m474.8    [39m | [39m-2.339   [39m | [39m-0.3452  [39m | [39m0.5      [39m |
| [39m31       [39m | [39m-0.1577  [39m | [39m0.5      [39m | [39m0.0      [39m | [39m0.5      [39m | [39m7.858    [39m | [39m5.056    [39m | [39m475.0    [39m | [39m-1.985   [39m | [39m-0.8751  [39m | [39m0.5      [39m |
| [39m32       [39m | 

| [39m25       [39m | [39m0.4063   [39m | [39m0.5      [39m | [39m0.0      [39m | [39m0.05555  [39m | [39m7.813    [39m | [39m4.52     [39m | [39m473.8    [39m | [39m-2.076   [39m | [39m-0.399   [39m | [39m0.5      [39m |
| [35m26       [39m | [35m0.5997   [39m | [35m0.5      [39m | [35m0.0      [39m | [35m0.3299   [39m | [35m7.273    [39m | [35m4.461    [39m | [35m473.9    [39m | [35m-1.537   [39m | [35m-0.7395  [39m | [35m0.5      [39m |
| [39m27       [39m | [39m0.4965   [39m | [39m0.5      [39m | [39m0.0      [39m | [39m0.0899   [39m | [39m7.011    [39m | [39m4.927    [39m | [39m474.1    [39m | [39m-1.573   [39m | [39m-0.2752  [39m | [39m0.5      [39m |
| [39m28       [39m | [39m0.5803   [39m | [39m0.5      [39m | [39m0.0      [39m | [39m0.1395   [39m | [39m7.012    [39m | [39m4.651    [39m | [39m473.2    [39m | [39m-1.516   [39m | [39m-0.5516  [39m | [39m0.5      [39m |
| [35m29       [39m | 

| [39m22       [39m | [39m0.2354   [39m | [39m0.966    [39m | [39m0.3091   [39m | [39m0.2734   [39m | [39m8.107    [39m | [39m3.222    [39m | [39m284.7    [39m | [39m-2.097   [39m | [39m-2.154   [39m | [39m0.803    [39m |
| [39m23       [39m | [39m0.2721   [39m | [39m0.5134   [39m | [39m0.04442  [39m | [39m0.1393   [39m | [39m2.223    [39m | [39m1.967    [39m | [39m163.3    [39m | [39m-3.558   [39m | [39m-4.665   [39m | [39m0.8592   [39m |
| [39m24       [39m | [39m-0.158   [39m | [39m0.5787   [39m | [39m0.523    [39m | [39m0.1063   [39m | [39m3.979    [39m | [39m6.335    [39m | [39m349.2    [39m | [39m-4.733   [39m | [39m-3.302   [39m | [39m0.8935   [39m |
| [39m25       [39m | [39m0.08593  [39m | [39m0.7611   [39m | [39m0.07363  [39m | [39m0.1534   [39m | [39m9.982    [39m | [39m5.302    [39m | [39m151.8    [39m | [39m-4.902   [39m | [39m-3.689   [39m | [39m0.6083   [39m |
| [39m26       [39m | 

| [39m19       [39m | [39m-0.1076  [39m | [39m0.8835   [39m | [39m0.5875   [39m | [39m0.4797   [39m | [39m3.076    [39m | [39m5.576    [39m | [39m15.4     [39m | [39m-3.126   [39m | [39m-4.777   [39m | [39m0.7339   [39m |
| [39m20       [39m | [39m-0.1515  [39m | [39m0.7011   [39m | [39m0.8262   [39m | [39m0.1233   [39m | [39m4.423    [39m | [39m3.72     [39m | [39m243.4    [39m | [39m-2.709   [39m | [39m-0.453   [39m | [39m0.6663   [39m |
| [39m21       [39m | [39m0.4516   [39m | [39m0.5365   [39m | [39m0.1838   [39m | [39m0.4607   [39m | [39m2.775    [39m | [39m3.936    [39m | [39m480.3    [39m | [39m-2.333   [39m | [39m-1.532   [39m | [39m0.6759   [39m |
| [39m22       [39m | [39m0.3783   [39m | [39m0.5857   [39m | [39m0.1354   [39m | [39m0.1786   [39m | [39m2.94     [39m | [39m4.101    [39m | [39m480.5    [39m | [39m-2.353   [39m | [39m-1.645   [39m | [39m0.8406   [39m |
| [35m23       [39m | 

| [39m16       [39m | [39m-0.22    [39m | [39m0.7438   [39m | [39m0.2381   [39m | [39m0.2571   [39m | [39m4.066    [39m | [39m5.016    [39m | [39m353.3    [39m | [39m-4.008   [39m | [39m-2.975   [39m | [39m0.5134   [39m |
| [39m17       [39m | [39m0.6978   [39m | [39m0.7151   [39m | [39m0.05799  [39m | [39m0.2806   [39m | [39m7.318    [39m | [39m4.612    [39m | [39m473.8    [39m | [39m-2.059   [39m | [39m-0.475   [39m | [39m0.5768   [39m |
| [39m18       [39m | [39m0.1507   [39m | [39m0.9386   [39m | [39m0.6857   [39m | [39m0.4783   [39m | [39m7.065    [39m | [39m2.754    [39m | [39m389.7    [39m | [39m-0.7766  [39m | [39m-1.832   [39m | [39m0.8811   [39m |
| [39m19       [39m | [39m0.7232   [39m | [39m0.5485   [39m | [39m0.002051 [39m | [39m0.1438   [39m | [39m7.305    [39m | [39m4.597    [39m | [39m473.8    [39m | [39m-2.06    [39m | [39m-0.4804  [39m | [39m0.5039   [39m |
| [35m20       [39m | 

| [39m13       [39m | [39m-0.1631  [39m | [39m0.8454   [39m | [39m0.9973   [39m | [39m0.09445  [39m | [39m3.097    [39m | [39m7.528    [39m | [39m352.1    [39m | [39m-4.67    [39m | [39m-1.223   [39m | [39m0.8769   [39m |
| [39m14       [39m | [39m-0.05847 [39m | [39m0.9615   [39m | [39m0.7115   [39m | [39m0.07089  [39m | [39m2.159    [39m | [39m1.183    [39m | [39m23.9     [39m | [39m-3.769   [39m | [39m-0.6999  [39m | [39m0.7694   [39m |
| [39m15       [39m | [39m-0.1427  [39m | [39m0.7764   [39m | [39m0.842    [39m | [39m0.07084  [39m | [39m4.233    [39m | [39m5.1      [39m | [39m486.1    [39m | [39m-2.195   [39m | [39m-4.907   [39m | [39m0.9003   [39m |
| [39m16       [39m | [39m-0.03608 [39m | [39m0.7438   [39m | [39m0.2381   [39m | [39m0.2571   [39m | [39m4.066    [39m | [39m5.016    [39m | [39m353.3    [39m | [39m-4.008   [39m | [39m-2.975   [39m | [39m0.5134   [39m |
| [39m17       [39m | 

| [39m10       [39m | [39m-0.1556  [39m | [39m0.8118   [39m | [39m0.7509   [39m | [39m0.181    [39m | [39m4.159    [39m | [39m7.271    [39m | [39m220.2    [39m | [39m-0.1758  [39m | [39m-1.683   [39m | [39m0.8108   [39m |
| [39m11       [39m | [39m-0.1944  [39m | [39m0.5574   [39m | [39m0.9495   [39m | [39m0.2305   [39m | [39m6.627    [39m | [39m3.857    [39m | [39m126.4    [39m | [39m-0.4831  [39m | [39m-2.132   [39m | [39m0.5014   [39m |
| [39m12       [39m | [39m0.2608   [39m | [39m0.8086   [39m | [39m0.3266   [39m | [39m0.2683   [39m | [39m9.088    [39m | [39m3.501    [39m | [39m456.1    [39m | [39m-1.883   [39m | [39m-4.921   [39m | [39m0.9647   [39m |
| [39m13       [39m | [39m-0.1477  [39m | [39m0.8454   [39m | [39m0.9973   [39m | [39m0.09445  [39m | [39m3.097    [39m | [39m7.528    [39m | [39m352.1    [39m | [39m-4.67    [39m | [39m-1.223   [39m | [39m0.8769   [39m |
| [39m14       [39m | 

| [35m7        [39m | [35m0.4261   [39m | [35m0.7458   [39m | [35m0.05336  [39m | [35m0.2913   [39m | [35m3.174    [39m | [35m5.125    [39m | [35m353.6    [39m | [35m-4.488   [39m | [35m-2.93    [39m | [35m0.8472   [39m |
| [35m8        [39m | [35m0.4303   [39m | [35m0.7071   [39m | [35m0.04995  [39m | [35m0.2726   [39m | [35m7.31     [39m | [35m4.604    [39m | [35m473.8    [39m | [35m-2.067   [39m | [35m-0.483   [39m | [35m0.5687   [39m |
| [39m9        [39m | [39m-0.1491  [39m | [39m0.5696   [39m | [39m0.8074   [39m | [39m0.2049   [39m | [39m3.323    [39m | [39m7.493    [39m | [39m180.8    [39m | [39m-1.246   [39m | [39m-1.37    [39m | [39m0.9417   [39m |
| [39m10       [39m | [39m-0.1599  [39m | [39m0.8118   [39m | [39m0.7509   [39m | [39m0.181    [39m | [39m4.159    [39m | [39m7.271    [39m | [39m220.2    [39m | [39m-0.1758  [39m | [39m-1.683   [39m | [39m0.8108   [39m |
| [39m11       [39m | 