In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import LeaveOneOut
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_squared_error
from bayes_opt import BayesianOptimization
import warnings

warnings.filterwarnings('ignore')

def run_bayesian_optimization(X_train_all, y_train_all):
    def black_box_function(learning_rate, n_estimators, min_samples_split, max_features, max_depth, max_leaf_nodes):
        params = {
            'learning_rate': max(learning_rate, 1e-3),
            'n_estimators': int(n_estimators),
            'min_samples_split': int(min_samples_split),
            'max_features': min(max_features, 0.999),
            'max_depth': int(max_depth),
            'max_leaf_nodes': int(max_leaf_nodes),
            'random_state': 2
        }
        
        model = GradientBoostingRegressor(**params)
        loo = LeaveOneOut()
        preds, truths = [], []
        
        for train_idx, val_idx in loo.split(X_train_all):
            X_train, X_val = X_train_all[train_idx], X_train_all[val_idx]
            y_train, y_val = y_train_all[train_idx], y_train_all[val_idx]
            model.fit(X_train, y_train.ravel())
            preds.append(model.predict(X_val)[0])
            truths.append(y_val[0])
            
        return r2_score(truths, preds)

    pbounds = {
        'learning_rate': (0.001, 0.2),
        'n_estimators': (10, 500),
        'min_samples_split': (2, 25),
        'max_features': (0.1, 1.0),
        'max_depth': (1, 5),
        'max_leaf_nodes': (2, 15)
    }

    optimizer = BayesianOptimization(
        f=black_box_function,
        pbounds=pbounds,
        random_state=1
    )
    optimizer.maximize(init_points=15, n_iter=20)
    return optimizer.max['params']


Aug_all = pd.read_excel(r"C:\Users\HP\Desktop\NKU\O3-based AOPs\Peroxne.xlsx",
                      sheet_name='RefData',
                      index_col=0,
                      header=0)
Aug = Aug_all[["lg(O3/DOC)", "lg(H2O2/O3)", "pH", "TOC removal(2h)"]]
Ref = Aug.iloc[0:36, :]  

data1 = Aug.iloc[36:54, :]  
features = ["lg(O3/DOC)", "lg(H2O2/O3)", "pH"]
target = "TOC removal(2h)"

r2_results = []
rmse_results = []

all_train_true = []
all_train_pred = []
all_test_true = []
all_test_pred = []

for iter in range(10):
    print(f"\n========== Iteration times {iter+1}/10 ==========")
    
    test_indices = np.random.choice(data1.index, size=4, replace=False)
    test_set = data1.loc[test_indices]
    train_set = data1.drop(test_indices)
    
    combined_data = pd.concat([train_set, Ref], axis=0)
    
    scaler = MinMaxScaler()
    
    X_combined = scaler.fit_transform(combined_data[features])
    y_combined = combined_data[target].values.reshape(-1, 1)
    
    X_augmented = X_combined  
    y_augmented = y_combined
    
    X_test = scaler.transform(test_set[features])
    y_test = test_set[target].values
    
    best_params = run_bayesian_optimization(X_augmented, y_augmented)
    
    final_model = GradientBoostingRegressor(
        learning_rate=best_params['learning_rate'],
        n_estimators=int(best_params['n_estimators']),
        min_samples_split=int(best_params['min_samples_split']),
        max_features=best_params['max_features'],
        max_depth=int(best_params['max_depth']),
        max_leaf_nodes=int(best_params['max_leaf_nodes']),
        random_state=2
    )
    final_model.fit(X_augmented, y_augmented.ravel())
    
    train_pred = final_model.predict(X_augmented)
    all_train_true.extend(y_augmented.ravel().tolist())
    all_train_pred.extend(train_pred.tolist())
    
    test_pred = final_model.predict(X_test)
    all_test_true.extend(y_test.tolist())
    all_test_pred.extend(test_pred.tolist())
    
    r2 = r2_score(y_test, test_pred)
    rmse = np.sqrt(mean_squared_error(y_test, test_pred))
    
    r2_results.append(r2)
    rmse_results.append(rmse)
    print(f"Results of this round: R2={r2:.4f}, RMSE={rmse:.4f}")

print("\n================ Final result ================")
print(f"Average R2 score: {np.mean(r2_results):.4f} ± {np.std(r2_results):.4f}")
print(f"Average RMSE score: {np.mean(rmse_results):.4f} ± {np.std(rmse_results):.4f}")
print("\n Detailed R2 results:", [round(x, 4) for x in r2_results])
print("Detailed RMSE results:", [round(x, 4) for x in rmse_results])


|   iter    |  target   | learni... | max_depth | max_fe... | max_le... | min_sa... | n_esti... |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.7507   [39m | [39m0.08399  [39m | [39m3.881    [39m | [39m0.1001   [39m | [39m5.93     [39m | [39m5.375    [39m | [39m55.25    [39m |
| [39m2        [39m | [39m0.7414   [39m | [39m0.03807  [39m | [39m2.382    [39m | [39m0.4571   [39m | [39m9.005    [39m | [39m11.64    [39m | [39m345.8    [39m |
| [35m3        [39m | [35m0.7565   [39m | [35m0.04169  [39m | [35m4.512    [39m | [35m0.1246   [39m | [35m10.72    [39m | [35m11.6     [39m | [35m283.8    [39m |
| [39m4        [39m | [39m0.6884   [39m | [39m0.02894  [39m | [39m1.792    [39m | [39m0.8207   [39m | [39m14.59    [39m | [39m9.209    [39m | [39m349.2    [39m |
| [39m5        [39m | [39m0.6185   [39m | [39m0.1754   [39m | [39m4.578    [39m | 

| [39m9        [39m | [39m0.7202   [39m | [39m0.05827  [39m | [39m1.52     [39m | [39m0.1174   [39m | [39m10.82    [39m | [39m6.867    [39m | [39m140.1    [39m |
| [39m10       [39m | [39m0.6957   [39m | [39m0.09882  [39m | [39m1.213    [39m | [39m0.6167   [39m | [39m3.907    [39m | [39m15.55    [39m | [39m352.9    [39m |
| [39m11       [39m | [39m0.7427   [39m | [39m0.02136  [39m | [39m2.656    [39m | [39m0.725    [39m | [39m7.384    [39m | [39m3.149    [39m | [39m272.6    [39m |
| [39m12       [39m | [39m0.7019   [39m | [39m0.1331   [39m | [39m3.06     [39m | [39m0.9501   [39m | [39m9.625    [39m | [39m22.78    [39m | [39m77.36    [39m |
| [39m13       [39m | [39m0.7552   [39m | [39m0.02872  [39m | [39m4.23     [39m | [39m0.4579   [39m | [39m4.15     [39m | [39m23.33    [39m | [39m180.4    [39m |
| [39m14       [39m | [39m0.6848   [39m | [39m0.1504   [39m | [39m3.904    [39m | [39m0.895    [39m 

| [39m19       [39m | [39m0.7727   [39m | [39m0.06068  [39m | [39m3.935    [39m | [39m0.158    [39m | [39m11.08    [39m | [39m11.41    [39m | [39m283.8    [39m |
| [39m20       [39m | [39m0.7764   [39m | [39m0.0475   [39m | [39m4.104    [39m | [39m0.6452   [39m | [39m4.525    [39m | [39m23.49    [39m | [39m179.8    [39m |
| [39m21       [39m | [39m0.7508   [39m | [39m0.05629  [39m | [39m2.2      [39m | [39m0.777    [39m | [39m9.361    [39m | [39m18.07    [39m | [39m165.1    [39m |
| [35m22       [39m | [35m0.7851   [39m | [35m0.05254  [39m | [35m4.857    [39m | [35m0.2515   [39m | [35m12.21    [39m | [35m12.49    [39m | [35m115.3    [39m |
| [39m23       [39m | [39m0.712    [39m | [39m0.06679  [39m | [39m1.646    [39m | [39m0.3375   [39m | [39m9.999    [39m | [39m10.33    [39m | [39m488.5    [39m |
| [39m24       [39m | [39m0.7582   [39m | [39m0.02503  [39m | [39m2.029    [39m | [39m0.2521   [39m 

| [39m29       [39m | [39m0.7341   [39m | [39m0.0739   [39m | [39m1.727    [39m | [39m0.8856   [39m | [39m9.309    [39m | [39m16.39    [39m | [39m458.7    [39m |
| [39m30       [39m | [39m0.08487  [39m | [39m0.001    [39m | [39m4.388    [39m | [39m0.3106   [39m | [39m12.46    [39m | [39m12.74    [39m | [39m115.4    [39m |
| [39m31       [39m | [39m0.7871   [39m | [39m0.09442  [39m | [39m5.0      [39m | [39m0.2149   [39m | [39m11.95    [39m | [39m12.24    [39m | [39m115.2    [39m |
| [39m32       [39m | [39m0.08798  [39m | [39m0.001    [39m | [39m5.0      [39m | [39m0.5129   [39m | [39m12.05    [39m | [39m12.33    [39m | [39m115.5    [39m |
| [39m33       [39m | [39m0.7504   [39m | [39m0.1624   [39m | [39m4.524    [39m | [39m0.416    [39m | [39m14.96    [39m | [39m5.073    [39m | [39m309.8    [39m |
| [39m34       [39m | [39m0.7732   [39m | [39m0.1201   [39m | [39m4.901    [39m | [39m0.1      [39m 

| [39m2        [39m | [39m0.7402   [39m | [39m0.03807  [39m | [39m2.382    [39m | [39m0.4571   [39m | [39m9.005    [39m | [39m11.64    [39m | [39m345.8    [39m |
| [35m3        [39m | [35m0.7611   [39m | [35m0.04169  [39m | [35m4.512    [39m | [35m0.1246   [39m | [35m10.72    [39m | [35m11.6     [39m | [35m283.8    [39m |
| [39m4        [39m | [39m0.7077   [39m | [39m0.02894  [39m | [39m1.792    [39m | [39m0.8207   [39m | [39m14.59    [39m | [39m9.209    [39m | [39m349.2    [39m |
| [39m5        [39m | [39m0.6219   [39m | [39m0.1754   [39m | [39m4.578    [39m | [39m0.1765   [39m | [39m2.508    [39m | [39m5.906    [39m | [39m440.3    [39m |
| [39m6        [39m | [39m0.7361   [39m | [39m0.02057  [39m | [39m2.684    [39m | [39m0.9621   [39m | [39m8.931    [39m | [39m17.91    [39m | [39m164.6    [39m |
| [39m7        [39m | [39m0.6829   [39m | [39m0.1376   [39m | [39m4.339    [39m | [39m0.1165   [39m 

| [39m12       [39m | [39m0.7579   [39m | [39m0.1331   [39m | [39m3.06     [39m | [39m0.9501   [39m | [39m9.625    [39m | [39m22.78    [39m | [39m77.36    [39m |
| [39m13       [39m | [39m0.7769   [39m | [39m0.02872  [39m | [39m4.23     [39m | [39m0.4579   [39m | [39m4.15     [39m | [39m23.33    [39m | [39m180.4    [39m |
| [39m14       [39m | [39m0.7784   [39m | [39m0.1504   [39m | [39m3.904    [39m | [39m0.895    [39m | [39m10.11    [39m | [39m19.27    [39m | [39m181.0    [39m |
| [39m15       [39m | [39m0.795    [39m | [39m0.05472  [39m | [39m4.584    [39m | [39m0.4853   [39m | [39m14.54    [39m | [39m17.26    [39m | [39m314.6    [39m |
| [35m16       [39m | [35m0.8065   [39m | [35m0.1259   [39m | [35m4.589    [39m | [35m0.2012   [39m | [35m10.79    [39m | [35m11.67    [39m | [35m283.8    [39m |
| [39m17       [39m | [39m0.7875   [39m | [39m0.2      [39m | [39m5.0      [39m | [39m1.0      [39m 

| [35m22       [39m | [35m0.7858   [39m | [35m0.05254  [39m | [35m4.857    [39m | [35m0.2515   [39m | [35m12.21    [39m | [35m12.49    [39m | [35m115.3    [39m |
| [39m23       [39m | [39m0.6995   [39m | [39m0.06679  [39m | [39m1.646    [39m | [39m0.3375   [39m | [39m9.999    [39m | [39m10.33    [39m | [39m488.5    [39m |
| [39m24       [39m | [39m0.7684   [39m | [39m0.02503  [39m | [39m2.029    [39m | [39m0.2521   [39m | [39m14.07    [39m | [39m14.75    [39m | [39m163.4    [39m |
| [39m25       [39m | [39m0.7235   [39m | [39m0.07754  [39m | [39m3.498    [39m | [39m0.5247   [39m | [39m8.575    [39m | [39m20.35    [39m | [39m485.0    [39m |
| [39m26       [39m | [39m0.7244   [39m | [39m0.0751   [39m | [39m4.215    [39m | [39m0.9389   [39m | [39m2.239    [39m | [39m18.28    [39m | [39m100.3    [39m |
| [39m27       [39m | [39m0.7526   [39m | [39m0.1218   [39m | [39m4.151    [39m | [39m0.1004   [39m 

| [39m32       [39m | [39m0.7428   [39m | [39m0.1008   [39m | [39m3.42     [39m | [39m0.6453   [39m | [39m10.08    [39m | [39m22.71    [39m | [39m77.85    [39m |
| [39m33       [39m | [39m0.7506   [39m | [39m0.1624   [39m | [39m4.524    [39m | [39m0.416    [39m | [39m14.96    [39m | [39m5.073    [39m | [39m309.8    [39m |
| [39m34       [39m | [39m0.7585   [39m | [39m0.1502   [39m | [39m3.922    [39m | [39m0.1494   [39m | [39m8.644    [39m | [39m19.93    [39m | [39m49.87    [39m |
| [39m35       [39m | [39m0.7178   [39m | [39m0.1138   [39m | [39m2.147    [39m | [39m0.6744   [39m | [39m14.25    [39m | [39m14.49    [39m | [39m163.8    [39m |
本轮结果: R²=0.6842, RMSE=0.0880

|   iter    |  target   | learni... | max_depth | max_fe... | max_le... | min_sa... | n_esti... |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.7922   [39m | [39m0.08399  [