In [4]:
import optuna
import numpy as np
from sklearn.model_selection import KFold,train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
import pandas as pd

In [2]:
#数据
data_train=pd.read_csv("/Users/macbookair/Documents/python 2/data.csv")
data_train_price=data_train["价格"]
data_train=data_train.drop(columns=["Unnamed: 0","价格","text"])
X=data_train
y=data_train_price



In [11]:
X_sampled, _, y_sampled, _ = train_test_split(X, y, random_state=42, shuffle=True)


In [12]:
# 总试验次数
N_TRIALS = 50
progress_bar = tqdm(total=N_TRIALS)


120it [1:35:40, 47.84s/it]                               | 0/50 [00:00<?, ?it/s][A


In [13]:
# 定义每个模型的调参目标函数
def create_objective(model_name):
    def objective(trial):
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        rmse_scores = []

        for train_idx, val_idx in kf.split(X_sampled):
            X_train, X_val = X_sampled.iloc[train_idx], X_sampled.iloc[val_idx]
            y_train, y_val = y_sampled.iloc[train_idx], y_sampled.iloc[val_idx]

            if model_name == 'xgb':
                params = {
                    'max_depth': trial.suggest_int('max_depth', 3, 10),
                    'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
                    'n_estimators': trial.suggest_int('n_estimators', 100, 1000)
                }
                model = XGBRegressor(**params, verbosity=0)
            elif model_name == 'lgb':
                params = {
                    'num_leaves': trial.suggest_int('num_leaves', 20, 100),
                    'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
                    'n_estimators': trial.suggest_int('n_estimators', 100, 1000)
                }
                model = LGBMRegressor(**params)
            elif model_name == 'cat':
                params = {
                    'depth': trial.suggest_int('depth', 3, 10),
                    'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
                    'iterations': trial.suggest_int('iterations', 100, 1000)
                }
                model = CatBoostRegressor(**params, verbose=0)
            elif model_name == 'rf':
                params = {
                    'max_depth': trial.suggest_int('max_depth', 5, 20),
                    'n_estimators': trial.suggest_int('n_estimators', 100, 1000)
                }
                model = RandomForestRegressor(**params)
            else:
                raise ValueError("Unknown model")

            model.fit(X_train, y_train)
            preds = model.predict(X_val)
            rmse = mean_squared_error(y_val, preds, squared=False)
            rmse_scores.append(rmse)

        return np.mean(rmse_scores)
    return objective

In [14]:
def callback(study, trial):
    progress_bar.update(1)

In [15]:
# 创建一个函数批量调参
def tune_all_models():
    models = ['xgb', 'lgb', 'cat', 'rf']
    best_params = {}

    for model_name in tqdm(models, desc="Tuning models"):
        study = optuna.create_study(direction='minimize')
        study.optimize(create_objective(model_name), n_trials=30,callbacks=[callback])
        best_params[model_name] = study.best_params

    return best_params


In [16]:
# 执行调参
best_hyperparams = tune_all_models()
best_hyperparams


Tuning models:   0%|                                      | 0/4 [00:00<?, ?it/s][I 2025-06-04 00:51:31,544] A new study created in memory with name: no-name-1f92eaf7-0471-4ad7-a478-76d2e8296cce
[I 2025-06-04 00:51:44,981] Trial 0 finished with value: 751375.3734359893 and parameters: {'max_depth': 6, 'learning_rate': 0.2872039039390133, 'n_estimators': 535}. Best is trial 0 with value: 751375.3734359893.

  2%|▉                                           | 1/50 [00:18<15:12, 18.61s/it][A[I 2025-06-04 00:52:11,856] Trial 1 finished with value: 750305.1649093869 and parameters: {'max_depth': 8, 'learning_rate': 0.09541100974833464, 'n_estimators': 770}. Best is trial 1 with value: 750305.1649093869.

  4%|█▊                                          | 2/50 [00:45<18:46, 23.47s/it][A[I 2025-06-04 00:52:18,879] Trial 2 finished with value: 781796.7353751882 and parameters: {'max_depth': 8, 'learning_rate': 0.20868223113430293, 'n_estimators': 143}. Best is trial 1 with value: 750305.164909

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008853 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007811 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007795 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:02:43,913] Trial 0 finished with value: 731489.4631806696 and parameters: {'num_leaves': 39, 'learning_rate': 0.2548768558471221, 'n_estimators': 923}. Best is trial 0 with value: 731489.4631806696.

 62%|██████████████████████████▋                | 31/50 [11:17<06:04, 19.16s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008780 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008887 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008880 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:02:58,171] Trial 1 finished with value: 778529.5193881042 and parameters: {'num_leaves': 45, 'learning_rate': 0.05613946202155921, 'n_estimators': 541}. Best is trial 0 with value: 731489.4631806696.

 64%|███████████████████████████▌               | 32/50 [11:31<05:18, 17.69s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008365 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008316 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008917 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:03:06,806] Trial 2 finished with value: 802301.3537346857 and parameters: {'num_leaves': 60, 'learning_rate': 0.07100243328181766, 'n_estimators': 242}. Best is trial 0 with value: 731489.4631806696.

 66%|████████████████████████████▍              | 33/50 [11:40<04:14, 14.97s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010658 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009694 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008995 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:03:13,728] Trial 3 finished with value: 789290.3201352238 and parameters: {'num_leaves': 29, 'learning_rate': 0.09354162509218239, 'n_estimators': 323}. Best is trial 0 with value: 731489.4631806696.

 68%|█████████████████████████████▏             | 34/50 [11:47<03:20, 12.56s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008295 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009147 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008830 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:03:39,210] Trial 4 finished with value: 771250.5584730072 and parameters: {'num_leaves': 61, 'learning_rate': 0.2360094047270342, 'n_estimators': 811}. Best is trial 0 with value: 731489.4631806696.

 70%|██████████████████████████████             | 35/50 [12:12<04:06, 16.43s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008708 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008881 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007730 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:03:46,282] Trial 5 finished with value: 811928.4717269632 and parameters: {'num_leaves': 43, 'learning_rate': 0.06509513003536752, 'n_estimators': 249}. Best is trial 0 with value: 731489.4631806696.

 72%|██████████████████████████████▉            | 36/50 [12:19<03:10, 13.63s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008805 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008547 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008062 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:03:53,311] Trial 6 finished with value: 778749.1510358794 and parameters: {'num_leaves': 48, 'learning_rate': 0.1429380532858638, 'n_estimators': 244}. Best is trial 0 with value: 731489.4631806696.

 74%|███████████████████████████████▊           | 37/50 [12:26<02:31, 11.65s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008987 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007626 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008575 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:04:27,179] Trial 7 finished with value: 780960.4871965782 and parameters: {'num_leaves': 74, 'learning_rate': 0.1815208927221762, 'n_estimators': 921}. Best is trial 0 with value: 731489.4631806696.

 76%|████████████████████████████████▋          | 38/50 [13:00<03:39, 18.31s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009255 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011966 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009223 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:04:49,708] Trial 8 finished with value: 771657.3455804976 and parameters: {'num_leaves': 81, 'learning_rate': 0.1778026325182301, 'n_estimators': 522}. Best is trial 0 with value: 731489.4631806696.

 78%|█████████████████████████████████▌         | 39/50 [13:23<03:35, 19.58s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008806 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009458 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008985 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:05:06,153] Trial 9 finished with value: 775435.4423876603 and parameters: {'num_leaves': 28, 'learning_rate': 0.04503261578455015, 'n_estimators': 883}. Best is trial 0 with value: 731489.4631806696.

 80%|██████████████████████████████████▍        | 40/50 [13:39<03:06, 18.64s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008469 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007784 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007723 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:05:17,294] Trial 10 finished with value: 744727.0358062129 and parameters: {'num_leaves': 24, 'learning_rate': 0.27735987666603157, 'n_estimators': 707}. Best is trial 0 with value: 731489.4631806696.

 82%|███████████████████████████████████▎       | 41/50 [13:50<02:27, 16.39s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007639 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007036 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:05:30,343] Trial 11 finished with value: 740278.8554184828 and parameters: {'num_leaves': 27, 'learning_rate': 0.2988798490712939, 'n_estimators': 749}. Best is trial 0 with value: 731489.4631806696.

 84%|████████████████████████████████████       | 42/50 [14:03<02:03, 15.39s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007856 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008759 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011229 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:06:16,451] Trial 12 finished with value: 793867.3593519125 and parameters: {'num_leaves': 100, 'learning_rate': 0.29271474894547667, 'n_estimators': 991}. Best is trial 0 with value: 731489.4631806696.

 86%|████████████████████████████████████▉      | 43/50 [14:50<02:52, 24.60s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009414 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009402 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009012 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:06:32,147] Trial 13 finished with value: 747541.8643330268 and parameters: {'num_leaves': 38, 'learning_rate': 0.2396378032884777, 'n_estimators': 705}. Best is trial 0 with value: 731489.4631806696.

 88%|█████████████████████████████████████▊     | 44/50 [15:05<02:11, 21.93s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009922 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008524 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007864 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:06:43,005] Trial 14 finished with value: 736955.3962913199 and parameters: {'num_leaves': 22, 'learning_rate': 0.24307657817013617, 'n_estimators': 723}. Best is trial 0 with value: 731489.4631806696.

 90%|██████████████████████████████████████▋    | 45/50 [15:16<01:33, 18.61s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008066 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008273 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008139 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:06:55,440] Trial 15 finished with value: 751331.898425779 and parameters: {'num_leaves': 36, 'learning_rate': 0.24033755026552045, 'n_estimators': 621}. Best is trial 0 with value: 731489.4631806696.

 92%|███████████████████████████████████████▌   | 46/50 [15:29<01:07, 16.76s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007857 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007750 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007968 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:07:01,675] Trial 16 finished with value: 756793.403238869 and parameters: {'num_leaves': 21, 'learning_rate': 0.2086301310477772, 'n_estimators': 417}. Best is trial 0 with value: 731489.4631806696.

 94%|████████████████████████████████████████▍  | 47/50 [15:35<00:40, 13.60s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007874 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010335 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009500 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:07:25,145] Trial 17 finished with value: 754515.6390801053 and parameters: {'num_leaves': 53, 'learning_rate': 0.1375137064650746, 'n_estimators': 834}. Best is trial 0 with value: 731489.4631806696.

 96%|█████████████████████████████████████████▎ | 48/50 [15:58<00:33, 16.56s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008692 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007925 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007562 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:07:44,115] Trial 18 finished with value: 735856.8906027703 and parameters: {'num_leaves': 35, 'learning_rate': 0.2632291286664726, 'n_estimators': 983}. Best is trial 0 with value: 731489.4631806696.

 98%|██████████████████████████████████████████▏| 49/50 [16:17<00:17, 17.28s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008189 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009002 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008758 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:08:15,973] Trial 19 finished with value: 770759.4822556733 and parameters: {'num_leaves': 56, 'learning_rate': 0.258945918062371, 'n_estimators': 977}. Best is trial 0 with value: 731489.4631806696.

100%|███████████████████████████████████████████| 50/50 [16:49<00:00, 21.66s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009199 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008772 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008611 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:08:38,484] Trial 20 finished with value: 817761.0915336258 and parameters: {'num_leaves': 36, 'learning_rate': 0.018102296136151108, 'n_estimators': 888}. Best is trial 0 with value: 731489.4631806696.

51it [17:12, 21.91s/it]                                                         [A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011688 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008277 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:08:56,687] Trial 21 finished with value: 739110.9811020407 and parameters: {'num_leaves': 35, 'learning_rate': 0.20583075939599144, 'n_estimators': 797}. Best is trial 0 with value: 731489.4631806696.

52it [17:30, 20.80s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005381 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.017545 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008370 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:09:06,905] Trial 22 finished with value: 745048.0944676977 and parameters: {'num_leaves': 20, 'learning_rate': 0.26424194855342065, 'n_estimators': 644}. Best is trial 0 with value: 731489.4631806696.

53it [17:40, 17.63s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012918 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007481 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007593 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:09:27,381] Trial 23 finished with value: 739681.1903274016 and parameters: {'num_leaves': 31, 'learning_rate': 0.21990755580009147, 'n_estimators': 993}. Best is trial 0 with value: 731489.4631806696.

54it [18:01, 18.48s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011652 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008675 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009703 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:09:47,527] Trial 24 finished with value: 750897.6701574555 and parameters: {'num_leaves': 40, 'learning_rate': 0.1807314624323546, 'n_estimators': 907}. Best is trial 0 with value: 731489.4631806696.

55it [18:21, 18.98s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009362 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010021 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007473 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:09:53,623] Trial 25 finished with value: 794521.8120910327 and parameters: {'num_leaves': 72, 'learning_rate': 0.2752692528966756, 'n_estimators': 141}. Best is trial 0 with value: 731489.4631806696.

56it [18:27, 15.11s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008918 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008360 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009315 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:10:15,364] Trial 26 finished with value: 770476.029921944 and parameters: {'num_leaves': 49, 'learning_rate': 0.26135392637626303, 'n_estimators': 771}. Best is trial 0 with value: 731489.4631806696.

57it [18:48, 17.10s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010112 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007311 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010253 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:10:29,100] Trial 27 finished with value: 745448.9363265677 and parameters: {'num_leaves': 30, 'learning_rate': 0.11436084353605902, 'n_estimators': 636}. Best is trial 0 with value: 731489.4631806696.

58it [19:02, 16.09s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007194 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008437 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007779 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:10:42,042] Trial 28 finished with value: 729935.657262458 and parameters: {'num_leaves': 20, 'learning_rate': 0.2199761426536558, 'n_estimators': 937}. Best is trial 28 with value: 729935.657262458.

59it [19:15, 15.15s/it][A

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009022 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1069
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1966133.280770
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008725 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1066
[LightGBM] [Info] Number of data points in the train set: 50479, number of used features: 90
[LightGBM] [Info] Start training from score 1959077.578795
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009936 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory 

[I 2025-06-04 01:11:04,727] Trial 29 finished with value: 756440.7668716877 and parameters: {'num_leaves': 44, 'learning_rate': 0.21861089125396205, 'n_estimators': 939}. Best is trial 28 with value: 729935.657262458.

Tuning models:  50%|██████████████▌              | 2/4 [19:33<19:09, 574.98s/it][I 2025-06-04 01:11:04,731] A new study created in memory with name: no-name-648f0f2c-fa53-411f-8c23-413879a7b4d0
[I 2025-06-04 01:11:33,385] Trial 0 finished with value: 780945.9285754373 and parameters: {'depth': 8, 'learning_rate': 0.048272840390416745, 'iterations': 728}. Best is trial 0 with value: 780945.9285754373.

61it [20:07, 20.78s/it][A[I 2025-06-04 01:12:08,445] Trial 1 finished with value: 690130.5048388718 and parameters: {'depth': 8, 'learning_rate': 0.13124769075930845, 'iterations': 929}. Best is trial 1 with value: 690130.5048388718.

62it [20:42, 25.07s/it][A[I 2025-06-04 01:12:22,873] Trial 2 finished with value: 701852.1314063268 and parameters: {'depth': 7, 'learning_

{'xgb': {'max_depth': 7,
  'learning_rate': 0.14005821753397463,
  'n_estimators': 998},
 'lgb': {'num_leaves': 20,
  'learning_rate': 0.2199761426536558,
  'n_estimators': 937},
 'cat': {'depth': 9, 'learning_rate': 0.18537299613880068, 'iterations': 979},
 'rf': {'max_depth': 20, 'n_estimators': 783}}

In [18]:
best_hyperparams

{'xgb': {'max_depth': 7,
  'learning_rate': 0.14005821753397463,
  'n_estimators': 998},
 'lgb': {'num_leaves': 20,
  'learning_rate': 0.2199761426536558,
  'n_estimators': 937},
 'cat': {'depth': 9, 'learning_rate': 0.18537299613880068, 'iterations': 979},
 'rf': {'max_depth': 20, 'n_estimators': 783}}