In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.multioutput import RegressorChain
from lightgbm import LGBMRegressor
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
import xgboost as xgb
from catboost import CatBoostRegressor
from sklearn.linear_model import LinearRegression
import numpy as np
from xgboost import XGBRegressor



from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

from sklearn.ensemble import StackingRegressor

import matplotlib.pyplot as plt

from scipy import stats
import numpy as np
from sklearn.metrics import mean_absolute_error

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
preprocessed_dataset = pd.read_csv("../data/pre-processed/preprocessed_youtube_data.csv")

In [3]:
# Convert to datetime if not already
preprocessed_dataset['published_time'] = pd.to_datetime(preprocessed_dataset['published_time'], format='%H:%M:%S', errors='coerce')

# Extract hour and minute
preprocessed_dataset['published_hour'] = preprocessed_dataset['published_time'].dt.hour
preprocessed_dataset['published_minute'] = preprocessed_dataset['published_time'].dt.minute

# Drop the original time column
preprocessed_dataset.drop(columns=['published_time'], inplace=True)

In [4]:
# Separate label encoders for each column
le_day = LabelEncoder()
le_def = LabelEncoder()

preprocessed_dataset['published_day_of_week'] = le_day.fit_transform(preprocessed_dataset['published_day_of_week'])
preprocessed_dataset['definition'] = le_def.fit_transform(preprocessed_dataset['definition'])


In [5]:
missing_mask = preprocessed_dataset[['like_count_initial', 'like_count_final', 'view_count_initial', 'view_count_final']].isnull()

# Check rows where all four columns are NaN
rows_all_nan = missing_mask.all(axis=1)

print("Number of rows with all four columns NaN:", rows_all_nan.sum())

# Optionally, see those rows
print(preprocessed_dataset[rows_all_nan])

Number of rows with all four columns NaN: 15
       category_id  definition  view_count_initial  like_count_initial  \
360            NaN           2                 NaN                 NaN   
414            NaN           2                 NaN                 NaN   
1053           NaN           2                 NaN                 NaN   
1635           NaN           2                 NaN                 NaN   
3221           NaN           2                 NaN                 NaN   
4503           NaN           2                 NaN                 NaN   
8791           NaN           2                 NaN                 NaN   
13543          NaN           2                 NaN                 NaN   
16293          NaN           2                 NaN                 NaN   
17041          NaN           2                 NaN                 NaN   
17227          NaN           2                 NaN                 NaN   
17656          NaN           2                 NaN                 

In [6]:
rows_any_nan = missing_mask.any(axis=1)
print("Rows with any of the four columns NaN:", rows_any_nan.sum())

Rows with any of the four columns NaN: 728


In [7]:
preprocessed_dataset = preprocessed_dataset.dropna(subset=[
    'like_count_initial',
    'like_count_final',
    'view_count_initial',
    'view_count_final'
])

# Define target columns
target_columns = [
    'like_count_initial',
    'like_count_final',
    'view_count_initial',
    'view_count_final'
]

In [8]:
# Choose columns to check for outliers, e.g. target columns
cols_to_check = target_columns  # or other numeric features

z_scores = np.abs(stats.zscore(preprocessed_dataset[cols_to_check]))
# Define threshold, e.g. 3 std deviations
threshold = 3
# Keep only rows where all z-scores are below threshold (no outlier)
non_outliers = (z_scores < threshold).all(axis=1)


print(f"Rows before outlier removal: {len(preprocessed_dataset)}")
preprocessed_dataset = preprocessed_dataset[non_outliers]
print(f"Rows after outlier removal: {len(preprocessed_dataset)}")

Rows before outlier removal: 20613
Rows after outlier removal: 20399


In [9]:
# Separate features and targets
X = preprocessed_dataset.drop(columns=target_columns)
y = preprocessed_dataset[target_columns]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [10]:
# 2. Define objective function for Optuna
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 5),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 5.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 5.0),
        "random_state": 42,
        "n_jobs": -1,
    }

    xgb = XGBRegressor(**params)
    model = MultiOutputRegressor(xgb)
    model.fit(X_train, y_train)

    preds = model.predict(X_valid)
    return mean_absolute_error(y_valid, preds)

# 3. Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30, show_progress_bar=True)

# 4. Train final model with best params
best_params = study.best_trial.params
best_model = MultiOutputRegressor(XGBRegressor(**best_params))
best_model.fit(X_train, y_train)

# 5. Predict and evaluate
y_pred = best_model.predict(X_valid)
mae = mean_absolute_error(y_valid, y_pred)
print(f"✅ Best MAE: {mae:.4f}")

[I 2025-07-31 09:14:14,541] A new study created in memory with name: no-name-6c1985e7-84d7-4ac3-aebf-6880ead1c11c
Best trial: 0. Best value: 334.798:   3%|▎         | 1/30 [00:01<00:36,  1.27s/it]

[I 2025-07-31 09:14:15,817] Trial 0 finished with value: 334.797607421875 and parameters: {'n_estimators': 221, 'max_depth': 4, 'learning_rate': 0.01360403206523462, 'subsample': 0.9381384114391893, 'colsample_bytree': 0.5994871129710978, 'gamma': 2.0872107138738807, 'reg_alpha': 1.1939013552736917, 'reg_lambda': 1.6419752421957785}. Best is trial 0 with value: 334.797607421875.


Best trial: 0. Best value: 334.798:   7%|▋         | 2/30 [00:02<00:32,  1.15s/it]

[I 2025-07-31 09:14:16,877] Trial 1 finished with value: 352.4061279296875 and parameters: {'n_estimators': 238, 'max_depth': 3, 'learning_rate': 0.2349679240967145, 'subsample': 0.6907801228244059, 'colsample_bytree': 0.5458908080791052, 'gamma': 3.4441800793754185, 'reg_alpha': 2.0081120668773007, 'reg_lambda': 0.11520751158410947}. Best is trial 0 with value: 334.797607421875.


Best trial: 0. Best value: 334.798:  10%|█         | 3/30 [00:03<00:29,  1.08s/it]

[I 2025-07-31 09:14:17,873] Trial 2 finished with value: 341.2237854003906 and parameters: {'n_estimators': 100, 'max_depth': 7, 'learning_rate': 0.20933949756808834, 'subsample': 0.7760634595516149, 'colsample_bytree': 0.5728271601792538, 'gamma': 1.9554075285527666, 'reg_alpha': 4.0509216186844395, 'reg_lambda': 2.7108764855998597}. Best is trial 0 with value: 334.797607421875.


Best trial: 3. Best value: 321.261:  13%|█▎        | 4/30 [00:16<02:27,  5.67s/it]

[I 2025-07-31 09:14:30,586] Trial 3 finished with value: 321.26116943359375 and parameters: {'n_estimators': 604, 'max_depth': 12, 'learning_rate': 0.06827780074280858, 'subsample': 0.9314580124332552, 'colsample_bytree': 0.9868512074412708, 'gamma': 2.551023562328643, 'reg_alpha': 1.2625225682490888, 'reg_lambda': 2.6609722691889353}. Best is trial 3 with value: 321.26116943359375.


Best trial: 3. Best value: 321.261:  17%|█▋        | 5/30 [00:25<02:59,  7.18s/it]

[I 2025-07-31 09:14:40,452] Trial 4 finished with value: 344.0086975097656 and parameters: {'n_estimators': 927, 'max_depth': 9, 'learning_rate': 0.12697115289450117, 'subsample': 0.9613490354578808, 'colsample_bytree': 0.5953456587435282, 'gamma': 2.1152141702936023, 'reg_alpha': 2.614033079796032, 'reg_lambda': 2.2330384500217604}. Best is trial 3 with value: 321.26116943359375.


Best trial: 3. Best value: 321.261:  20%|██        | 6/30 [00:34<03:02,  7.61s/it]

[I 2025-07-31 09:14:48,877] Trial 5 finished with value: 352.8922119140625 and parameters: {'n_estimators': 484, 'max_depth': 11, 'learning_rate': 0.13952665057855734, 'subsample': 0.6394139411192422, 'colsample_bytree': 0.7424594864487544, 'gamma': 4.774134161304978, 'reg_alpha': 1.1678664575252422, 'reg_lambda': 2.4353335818585986}. Best is trial 3 with value: 321.26116943359375.


Best trial: 6. Best value: 310.433:  23%|██▎       | 7/30 [00:36<02:14,  5.86s/it]

[I 2025-07-31 09:14:51,138] Trial 6 finished with value: 310.4329833984375 and parameters: {'n_estimators': 528, 'max_depth': 3, 'learning_rate': 0.05109208878855795, 'subsample': 0.908495628226798, 'colsample_bytree': 0.8510327927383179, 'gamma': 1.3335568103955053, 'reg_alpha': 0.6665831410701906, 'reg_lambda': 1.1739495223798762}. Best is trial 6 with value: 310.4329833984375.


Best trial: 6. Best value: 310.433:  27%|██▋       | 8/30 [00:44<02:23,  6.51s/it]

[I 2025-07-31 09:14:59,046] Trial 7 finished with value: 362.4527893066406 and parameters: {'n_estimators': 751, 'max_depth': 9, 'learning_rate': 0.254099963865433, 'subsample': 0.7940344412949328, 'colsample_bytree': 0.781229436800554, 'gamma': 4.209443832206599, 'reg_alpha': 2.0732530308595583, 'reg_lambda': 2.5165375049945555}. Best is trial 6 with value: 310.4329833984375.


Best trial: 6. Best value: 310.433:  30%|███       | 9/30 [00:45<01:41,  4.84s/it]

[I 2025-07-31 09:15:00,226] Trial 8 finished with value: 343.44989013671875 and parameters: {'n_estimators': 232, 'max_depth': 4, 'learning_rate': 0.21829729179764595, 'subsample': 0.9074322232753236, 'colsample_bytree': 0.6588475190771593, 'gamma': 2.592111188895447, 'reg_alpha': 3.1303995367631825, 'reg_lambda': 3.1776683509879167}. Best is trial 6 with value: 310.4329833984375.


Best trial: 6. Best value: 310.433:  33%|███▎      | 10/30 [00:47<01:15,  3.76s/it]

[I 2025-07-31 09:15:01,559] Trial 9 finished with value: 365.25537109375 and parameters: {'n_estimators': 145, 'max_depth': 7, 'learning_rate': 0.22379783818349164, 'subsample': 0.5788177933889509, 'colsample_bytree': 0.7028616655194395, 'gamma': 2.7623267029900194, 'reg_alpha': 1.4723058519581906, 'reg_lambda': 4.2084595472196265}. Best is trial 6 with value: 310.4329833984375.


Best trial: 10. Best value: 302.283:  37%|███▋      | 11/30 [00:49<01:03,  3.34s/it]

[I 2025-07-31 09:15:03,940] Trial 10 finished with value: 302.282958984375 and parameters: {'n_estimators': 442, 'max_depth': 5, 'learning_rate': 0.012505701376519338, 'subsample': 0.8508226330746116, 'colsample_bytree': 0.8832273345323826, 'gamma': 0.41339863692574985, 'reg_alpha': 0.25005117433736723, 'reg_lambda': 0.47862921027651284}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  40%|████      | 12/30 [00:51<00:56,  3.11s/it]

[I 2025-07-31 09:15:06,543] Trial 11 finished with value: 304.5799560546875 and parameters: {'n_estimators': 447, 'max_depth': 5, 'learning_rate': 0.013124567381379711, 'subsample': 0.844741623816428, 'colsample_bytree': 0.8812526514423129, 'gamma': 0.36717780799776334, 'reg_alpha': 0.07519378946817157, 'reg_lambda': 0.4034492034890885}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  43%|████▎     | 13/30 [00:54<00:48,  2.83s/it]

[I 2025-07-31 09:15:08,728] Trial 12 finished with value: 304.6684265136719 and parameters: {'n_estimators': 379, 'max_depth': 5, 'learning_rate': 0.017949456106076955, 'subsample': 0.8327246530369485, 'colsample_bytree': 0.9148476943803923, 'gamma': 0.07930666211195747, 'reg_alpha': 0.12515693674362202, 'reg_lambda': 0.07944383345837563}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  47%|████▋     | 14/30 [00:57<00:45,  2.86s/it]

[I 2025-07-31 09:15:11,648] Trial 13 finished with value: 336.8096923828125 and parameters: {'n_estimators': 393, 'max_depth': 6, 'learning_rate': 0.09659328681772847, 'subsample': 0.8460894195412713, 'colsample_bytree': 0.84377723830558, 'gamma': 0.23779563095482842, 'reg_alpha': 0.09144822330046928, 'reg_lambda': 0.8860462126988013}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  50%|█████     | 15/30 [01:01<00:48,  3.22s/it]

[I 2025-07-31 09:15:15,707] Trial 14 finished with value: 393.8560791015625 and parameters: {'n_estimators': 676, 'max_depth': 5, 'learning_rate': 0.29509865187752615, 'subsample': 0.7282185007022343, 'colsample_bytree': 0.998387815842165, 'gamma': 0.8756269996400061, 'reg_alpha': 0.058506709378058915, 'reg_lambda': 0.7983605295714653}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  53%|█████▎    | 16/30 [01:04<00:43,  3.11s/it]

[I 2025-07-31 09:15:18,560] Trial 15 finished with value: 323.0491027832031 and parameters: {'n_estimators': 391, 'max_depth': 6, 'learning_rate': 0.06526388177794594, 'subsample': 0.9960949700259898, 'colsample_bytree': 0.8990925662213372, 'gamma': 0.8918397346968128, 'reg_alpha': 4.89112160386976, 'reg_lambda': 1.5709056998709965}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  57%|█████▋    | 17/30 [01:14<01:07,  5.22s/it]

[I 2025-07-31 09:15:28,671] Trial 16 finished with value: 342.936767578125 and parameters: {'n_estimators': 813, 'max_depth': 9, 'learning_rate': 0.17498091523774822, 'subsample': 0.8425395860111234, 'colsample_bytree': 0.8165332031526534, 'gamma': 0.7175976182331668, 'reg_alpha': 0.6715064672174327, 'reg_lambda': 4.717526822297218}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  60%|██████    | 18/30 [01:16<00:53,  4.48s/it]

[I 2025-07-31 09:15:31,430] Trial 17 finished with value: 344.9150390625 and parameters: {'n_estimators': 454, 'max_depth': 5, 'learning_rate': 0.10838668639662109, 'subsample': 0.7040275624719096, 'colsample_bytree': 0.9085740567708064, 'gamma': 1.4326013584821844, 'reg_alpha': 0.6996953287680927, 'reg_lambda': 0.4844169922129448}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  63%|██████▎   | 19/30 [01:23<00:55,  5.03s/it]

[I 2025-07-31 09:15:37,757] Trial 18 finished with value: 313.64178466796875 and parameters: {'n_estimators': 590, 'max_depth': 8, 'learning_rate': 0.03347349014456678, 'subsample': 0.5280996325136819, 'colsample_bytree': 0.941952105259841, 'gamma': 0.4688631784861991, 'reg_alpha': 3.2234926153516708, 'reg_lambda': 3.551455296731442}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  67%|██████▋   | 20/30 [01:25<00:42,  4.23s/it]

[I 2025-07-31 09:15:40,126] Trial 19 finished with value: 357.9959716796875 and parameters: {'n_estimators': 319, 'max_depth': 6, 'learning_rate': 0.16901644899632157, 'subsample': 0.8565810957758198, 'colsample_bytree': 0.5000133113378322, 'gamma': 1.4201898834875193, 'reg_alpha': 1.7588563286551002, 'reg_lambda': 1.5971994824132953}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  70%|███████   | 21/30 [01:30<00:39,  4.34s/it]

[I 2025-07-31 09:15:44,712] Trial 20 finished with value: 330.9963684082031 and parameters: {'n_estimators': 979, 'max_depth': 4, 'learning_rate': 0.06781963541075786, 'subsample': 0.7849768374104744, 'colsample_bytree': 0.7891884530809058, 'gamma': 3.3634369521504355, 'reg_alpha': 0.6620890296103457, 'reg_lambda': 0.5986038715671791}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  73%|███████▎  | 22/30 [01:32<00:29,  3.72s/it]

[I 2025-07-31 09:15:46,999] Trial 21 finished with value: 305.02178955078125 and parameters: {'n_estimators': 353, 'max_depth': 5, 'learning_rate': 0.02405134214325636, 'subsample': 0.8489284782178539, 'colsample_bytree': 0.8965872566366374, 'gamma': 0.0031906918218447267, 'reg_alpha': 0.010128309506888616, 'reg_lambda': 0.2776828064049419}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  77%|███████▋  | 23/30 [01:35<00:24,  3.50s/it]

[I 2025-07-31 09:15:49,992] Trial 22 finished with value: 312.76934814453125 and parameters: {'n_estimators': 458, 'max_depth': 5, 'learning_rate': 0.04086668848678205, 'subsample': 0.8174481161963546, 'colsample_bytree': 0.8586760071291525, 'gamma': 0.07766218760052485, 'reg_alpha': 0.46231853661995603, 'reg_lambda': 0.0849295260940105}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  80%|████████  | 24/30 [01:36<00:17,  2.84s/it]

[I 2025-07-31 09:15:51,273] Trial 23 finished with value: 328.9061584472656 and parameters: {'n_estimators': 300, 'max_depth': 3, 'learning_rate': 0.012650901947267479, 'subsample': 0.8890795910658822, 'colsample_bytree': 0.9446106394517864, 'gamma': 0.510534305233029, 'reg_alpha': 0.40268596604797124, 'reg_lambda': 1.1553734873708636}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  83%|████████▎ | 25/30 [01:40<00:15,  3.09s/it]

[I 2025-07-31 09:15:54,953] Trial 24 finished with value: 341.2532958984375 and parameters: {'n_estimators': 539, 'max_depth': 6, 'learning_rate': 0.08867407275280019, 'subsample': 0.7557361867594031, 'colsample_bytree': 0.9291928457103674, 'gamma': 1.117715523601348, 'reg_alpha': 0.9078149992659957, 'reg_lambda': 1.0943120047163695}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  87%|████████▋ | 26/30 [01:43<00:11,  2.99s/it]

[I 2025-07-31 09:15:57,718] Trial 25 finished with value: 315.04443359375 and parameters: {'n_estimators': 663, 'max_depth': 4, 'learning_rate': 0.04262982288562514, 'subsample': 0.8850933077754611, 'colsample_bytree': 0.8637692062993851, 'gamma': 0.4879396548824025, 'reg_alpha': 0.3467009132392487, 'reg_lambda': 0.06680719180985421}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  90%|█████████ | 27/30 [01:48<00:11,  3.67s/it]

[I 2025-07-31 09:16:02,970] Trial 26 finished with value: 332.7279052734375 and parameters: {'n_estimators': 474, 'max_depth': 8, 'learning_rate': 0.08182330126099426, 'subsample': 0.8231470100839599, 'colsample_bytree': 0.9688384723151553, 'gamma': 1.680312836998059, 'reg_alpha': 1.0188508539448347, 'reg_lambda': 1.8823327145136601}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  93%|█████████▎| 28/30 [01:52<00:07,  3.67s/it]

[I 2025-07-31 09:16:06,649] Trial 27 finished with value: 307.5177001953125 and parameters: {'n_estimators': 397, 'max_depth': 7, 'learning_rate': 0.014760642256955907, 'subsample': 0.7368063841659224, 'colsample_bytree': 0.7366794751976453, 'gamma': 0.9609372610906219, 'reg_alpha': 1.5877862510589986, 'reg_lambda': 0.5305203665253693}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283:  97%|█████████▋| 29/30 [01:54<00:03,  3.15s/it]

[I 2025-07-31 09:16:08,587] Trial 28 finished with value: 314.23406982421875 and parameters: {'n_estimators': 296, 'max_depth': 5, 'learning_rate': 0.05257197585097488, 'subsample': 0.6537160224887318, 'colsample_bytree': 0.8168586718503508, 'gamma': 0.3950809179303537, 'reg_alpha': 2.550851754759975, 'reg_lambda': 0.7938942603604289}. Best is trial 10 with value: 302.282958984375.


Best trial: 10. Best value: 302.283: 100%|██████████| 30/30 [01:57<00:00,  3.91s/it]


[I 2025-07-31 09:16:11,890] Trial 29 finished with value: 308.6338806152344 and parameters: {'n_estimators': 614, 'max_depth': 4, 'learning_rate': 0.011157343135218739, 'subsample': 0.9801067903015792, 'colsample_bytree': 0.958776319588503, 'gamma': 0.07589961420986298, 'reg_alpha': 0.009881011262728334, 'reg_lambda': 1.403606257420083}. Best is trial 10 with value: 302.282958984375.
✅ Best MAE: 302.6985


In [11]:
# Convert to DataFrame for easier handling if needed
if not isinstance(y_valid, pd.DataFrame):
    y_valid = pd.DataFrame(y_valid, columns=target_columns)
if not isinstance(y_pred, pd.DataFrame):
    y_pred = pd.DataFrame(y_pred, columns=target_columns)

# MAE per target column
mae_per_column = {}
for col in target_columns:
    mae_per_column[col] = mean_absolute_error(y_valid[col], y_pred[col])

print("MAE per target column:")
for col, val in mae_per_column.items():
    print(f"{col}: {val:.4f}")

MAE per target column:
like_count_initial: 19.7055
like_count_final: 20.7099
view_count_initial: 566.3317
view_count_final: 604.0472


In [12]:
def catboost_objective(trial):
    params = {
        "iterations": trial.suggest_int("iterations", 100, 1000),
        "depth": trial.suggest_int("depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "random_strength": trial.suggest_float("random_strength", 1, 20),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 1.0),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1, 10),
        "border_count": trial.suggest_int("border_count", 32, 255),
        "random_seed": 42,
        "verbose": False,
    }

    catBoost = CatBoostRegressor(**params)
    model = MultiOutputRegressor(catBoost)
    model.fit(X_train, y_train)

    preds = model.predict(X_valid)
    return mean_absolute_error(y_valid, preds)

# 3. Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(catboost_objective, n_trials=30, show_progress_bar=True)

# 4. Train final model with best params
best_params = study.best_trial.params
best_model = MultiOutputRegressor(CatBoostRegressor(**best_params))
best_model.fit(X_train, y_train)

# 5. Predict and evaluate
y_pred = best_model.predict(X_valid)
mae = mean_absolute_error(y_valid, y_pred)

# Convert y_valid and y_pred to DataFrames if they aren't already, using target column names
if not isinstance(y_valid, pd.DataFrame):
    y_valid = pd.DataFrame(y_valid, columns=target_columns)
if not isinstance(y_pred, pd.DataFrame):
    y_pred = pd.DataFrame(y_pred, columns=target_columns)

# Calculate MAE for each column
print(f"✅ Best Overall MAE: {mae:.4f}\n")
print("MAE per target column:")
for col in target_columns:
    col_mae = mean_absolute_error(y_valid[col], y_pred[col])
    print(f"{col}: {col_mae:.4f}")

[I 2025-07-31 09:16:14,652] A new study created in memory with name: no-name-e64cc4a8-b07f-4351-9031-b11679b466e9
Best trial: 0. Best value: 327.331:   3%|▎         | 1/30 [00:02<01:04,  2.23s/it]

[I 2025-07-31 09:16:16,884] Trial 0 finished with value: 327.33071628934397 and parameters: {'iterations': 478, 'depth': 5, 'learning_rate': 0.1983252933628219, 'random_strength': 2.078755107096994, 'bagging_temperature': 0.17770625988323896, 'l2_leaf_reg': 4.457169365920551, 'border_count': 213}. Best is trial 0 with value: 327.33071628934397.


Best trial: 0. Best value: 327.331:   7%|▋         | 2/30 [00:15<04:01,  8.61s/it]

[I 2025-07-31 09:16:29,962] Trial 1 finished with value: 338.7821945777454 and parameters: {'iterations': 365, 'depth': 12, 'learning_rate': 0.23219305196267334, 'random_strength': 5.813905077323028, 'bagging_temperature': 0.2720254280200962, 'l2_leaf_reg': 3.5113368360789434, 'border_count': 120}. Best is trial 0 with value: 327.33071628934397.


Best trial: 0. Best value: 327.331:  10%|█         | 3/30 [00:18<02:50,  6.32s/it]

[I 2025-07-31 09:16:33,564] Trial 2 finished with value: 372.595755444552 and parameters: {'iterations': 582, 'depth': 8, 'learning_rate': 0.19255549414824352, 'random_strength': 14.838894651432886, 'bagging_temperature': 0.17430028313265555, 'l2_leaf_reg': 6.110725855130495, 'border_count': 38}. Best is trial 0 with value: 327.33071628934397.


Best trial: 0. Best value: 327.331:  13%|█▎        | 4/30 [00:22<02:19,  5.35s/it]

[I 2025-07-31 09:16:37,415] Trial 3 finished with value: 350.73654312024405 and parameters: {'iterations': 950, 'depth': 5, 'learning_rate': 0.22938914731785387, 'random_strength': 17.03272201729894, 'bagging_temperature': 0.985384392301877, 'l2_leaf_reg': 8.159068252087293, 'border_count': 83}. Best is trial 0 with value: 327.33071628934397.


Best trial: 4. Best value: 320.307:  17%|█▋        | 5/30 [00:23<01:32,  3.71s/it]

[I 2025-07-31 09:16:38,228] Trial 4 finished with value: 320.30726813520766 and parameters: {'iterations': 128, 'depth': 5, 'learning_rate': 0.20535077336997107, 'random_strength': 19.818619671006765, 'bagging_temperature': 0.8510179448297254, 'l2_leaf_reg': 5.42545351609386, 'border_count': 169}. Best is trial 4 with value: 320.30726813520766.


Best trial: 4. Best value: 320.307:  20%|██        | 6/30 [00:27<01:33,  3.90s/it]

[I 2025-07-31 09:16:42,493] Trial 5 finished with value: 353.63618514953924 and parameters: {'iterations': 342, 'depth': 9, 'learning_rate': 0.28918686245061126, 'random_strength': 6.222176771805756, 'bagging_temperature': 0.45275207520519456, 'l2_leaf_reg': 1.8944884196166405, 'border_count': 191}. Best is trial 4 with value: 320.30726813520766.


Best trial: 4. Best value: 320.307:  23%|██▎       | 7/30 [00:28<01:08,  2.99s/it]

[I 2025-07-31 09:16:43,614] Trial 6 finished with value: 345.551908563643 and parameters: {'iterations': 107, 'depth': 9, 'learning_rate': 0.24834432345708893, 'random_strength': 16.303006976087843, 'bagging_temperature': 0.8964509467537191, 'l2_leaf_reg': 3.8279891394046826, 'border_count': 82}. Best is trial 4 with value: 320.30726813520766.


Best trial: 4. Best value: 320.307:  27%|██▋       | 8/30 [00:35<01:31,  4.18s/it]

[I 2025-07-31 09:16:50,338] Trial 7 finished with value: 336.83343517512606 and parameters: {'iterations': 986, 'depth': 8, 'learning_rate': 0.09247994979358976, 'random_strength': 17.393337533045894, 'bagging_temperature': 0.7526076541798958, 'l2_leaf_reg': 1.7979731468103441, 'border_count': 78}. Best is trial 4 with value: 320.30726813520766.


Best trial: 4. Best value: 320.307:  30%|███       | 9/30 [00:38<01:19,  3.81s/it]

[I 2025-07-31 09:16:53,323] Trial 8 finished with value: 346.6232121535911 and parameters: {'iterations': 809, 'depth': 4, 'learning_rate': 0.2806789282505968, 'random_strength': 9.387489422138797, 'bagging_temperature': 0.07953998963318021, 'l2_leaf_reg': 9.025963481533623, 'border_count': 239}. Best is trial 4 with value: 320.30726813520766.


Best trial: 4. Best value: 320.307:  33%|███▎      | 10/30 [00:55<02:38,  7.90s/it]

[I 2025-07-31 09:17:10,390] Trial 9 finished with value: 350.5511753388435 and parameters: {'iterations': 644, 'depth': 11, 'learning_rate': 0.2963800563673018, 'random_strength': 14.191142228700105, 'bagging_temperature': 0.4847418761275636, 'l2_leaf_reg': 8.269817860186768, 'border_count': 202}. Best is trial 4 with value: 320.30726813520766.


Best trial: 4. Best value: 320.307:  37%|███▋      | 11/30 [00:56<01:47,  5.66s/it]

[I 2025-07-31 09:17:10,972] Trial 10 finished with value: 404.10593915919264 and parameters: {'iterations': 108, 'depth': 3, 'learning_rate': 0.020090835150065628, 'random_strength': 19.282090112585326, 'bagging_temperature': 0.7120985158835, 'l2_leaf_reg': 6.443212988286994, 'border_count': 149}. Best is trial 4 with value: 320.30726813520766.


Best trial: 4. Best value: 320.307:  40%|████      | 12/30 [00:58<01:23,  4.64s/it]

[I 2025-07-31 09:17:13,266] Trial 11 finished with value: 322.48615540080976 and parameters: {'iterations': 362, 'depth': 6, 'learning_rate': 0.14931151571885104, 'random_strength': 1.738363320995373, 'bagging_temperature': 0.6630007172871771, 'l2_leaf_reg': 4.498490067327294, 'border_count': 254}. Best is trial 4 with value: 320.30726813520766.


Best trial: 12. Best value: 319.507:  43%|████▎     | 13/30 [01:00<01:03,  3.71s/it]

[I 2025-07-31 09:17:14,832] Trial 12 finished with value: 319.5065559388513 and parameters: {'iterations': 281, 'depth': 6, 'learning_rate': 0.1230652311960298, 'random_strength': 11.622100742345998, 'bagging_temperature': 0.7113345804672118, 'l2_leaf_reg': 5.316566471491623, 'border_count': 157}. Best is trial 12 with value: 319.5065559388513.


Best trial: 12. Best value: 319.507:  47%|████▋     | 14/30 [01:01<00:47,  2.98s/it]

[I 2025-07-31 09:17:16,118] Trial 13 finished with value: 321.16824547271347 and parameters: {'iterations': 225, 'depth': 6, 'learning_rate': 0.12198337467049807, 'random_strength': 11.49578843804053, 'bagging_temperature': 0.8358624265078177, 'l2_leaf_reg': 7.228729451450423, 'border_count': 158}. Best is trial 12 with value: 319.5065559388513.


Best trial: 14. Best value: 317.604:  50%|█████     | 15/30 [01:02<00:37,  2.49s/it]

[I 2025-07-31 09:17:17,479] Trial 14 finished with value: 317.6040324375989 and parameters: {'iterations': 235, 'depth': 6, 'learning_rate': 0.03341104862978812, 'random_strength': 11.705723619113085, 'bagging_temperature': 0.6278123222078743, 'l2_leaf_reg': 5.474377654922242, 'border_count': 181}. Best is trial 14 with value: 317.6040324375989.


Best trial: 15. Best value: 307.862:  53%|█████▎    | 16/30 [01:04<00:32,  2.31s/it]

[I 2025-07-31 09:17:19,373] Trial 15 finished with value: 307.8615313612479 and parameters: {'iterations': 265, 'depth': 7, 'learning_rate': 0.0387159165102766, 'random_strength': 11.135299776960872, 'bagging_temperature': 0.6165172422884438, 'l2_leaf_reg': 5.183554549349782, 'border_count': 181}. Best is trial 15 with value: 307.8615313612479.


Best trial: 15. Best value: 307.862:  57%|█████▋    | 17/30 [01:07<00:31,  2.46s/it]

[I 2025-07-31 09:17:22,173] Trial 16 finished with value: 309.2996445674256 and parameters: {'iterations': 467, 'depth': 7, 'learning_rate': 0.016140046264118763, 'random_strength': 9.235500793284096, 'bagging_temperature': 0.5937742269962661, 'l2_leaf_reg': 3.238201233704972, 'border_count': 116}. Best is trial 15 with value: 307.8615313612479.


Best trial: 15. Best value: 307.862:  60%|██████    | 18/30 [01:13<00:43,  3.61s/it]

[I 2025-07-31 09:17:28,475] Trial 17 finished with value: 310.06873569768794 and parameters: {'iterations': 475, 'depth': 10, 'learning_rate': 0.05191061634769065, 'random_strength': 8.53029567926597, 'bagging_temperature': 0.3726054894501776, 'l2_leaf_reg': 2.9819245006502553, 'border_count': 123}. Best is trial 15 with value: 307.8615313612479.


Best trial: 15. Best value: 307.862:  63%|██████▎   | 19/30 [01:17<00:41,  3.76s/it]

[I 2025-07-31 09:17:32,574] Trial 18 finished with value: 315.9898445488964 and parameters: {'iterations': 713, 'depth': 7, 'learning_rate': 0.06834675270822597, 'random_strength': 6.478451190880994, 'bagging_temperature': 0.5701218568342259, 'l2_leaf_reg': 2.558793339832367, 'border_count': 123}. Best is trial 15 with value: 307.8615313612479.


Best trial: 15. Best value: 307.862:  67%|██████▋   | 20/30 [01:21<00:35,  3.59s/it]

[I 2025-07-31 09:17:35,775] Trial 19 finished with value: 309.54739306165726 and parameters: {'iterations': 465, 'depth': 7, 'learning_rate': 0.01047274994615835, 'random_strength': 4.142237382014775, 'bagging_temperature': 0.37895371120251237, 'l2_leaf_reg': 1.3297413663575857, 'border_count': 220}. Best is trial 15 with value: 307.8615313612479.


Best trial: 15. Best value: 307.862:  70%|███████   | 21/30 [01:26<00:36,  4.02s/it]

[I 2025-07-31 09:17:40,793] Trial 20 finished with value: 322.0686259796569 and parameters: {'iterations': 547, 'depth': 9, 'learning_rate': 0.07498318597393028, 'random_strength': 8.385938207298054, 'bagging_temperature': 0.5776703698012178, 'l2_leaf_reg': 4.636729588826569, 'border_count': 108}. Best is trial 15 with value: 307.8615313612479.


Best trial: 21. Best value: 306.386:  73%|███████▎  | 22/30 [01:28<00:29,  3.67s/it]

[I 2025-07-31 09:17:43,644] Trial 21 finished with value: 306.385650118228 and parameters: {'iterations': 427, 'depth': 7, 'learning_rate': 0.013144398874159824, 'random_strength': 2.6305339503204124, 'bagging_temperature': 0.37189410638361, 'l2_leaf_reg': 1.9504995489048689, 'border_count': 222}. Best is trial 21 with value: 306.385650118228.


Best trial: 22. Best value: 304.358:  77%|███████▋  | 23/30 [01:31<00:23,  3.41s/it]

[I 2025-07-31 09:17:46,447] Trial 22 finished with value: 304.3577414444444 and parameters: {'iterations': 422, 'depth': 7, 'learning_rate': 0.04631334148309217, 'random_strength': 13.730115232057388, 'bagging_temperature': 0.39703266262832254, 'l2_leaf_reg': 2.438435610602422, 'border_count': 231}. Best is trial 22 with value: 304.3577414444444.


Best trial: 22. Best value: 304.358:  80%|████████  | 24/30 [01:35<00:20,  3.42s/it]

[I 2025-07-31 09:17:49,906] Trial 23 finished with value: 306.08285468775284 and parameters: {'iterations': 398, 'depth': 8, 'learning_rate': 0.046111808603912965, 'random_strength': 13.799803684358945, 'bagging_temperature': 0.3599491566637527, 'l2_leaf_reg': 1.1044559472639115, 'border_count': 228}. Best is trial 22 with value: 304.3577414444444.


Best trial: 22. Best value: 304.358:  83%|████████▎ | 25/30 [01:38<00:17,  3.42s/it]

[I 2025-07-31 09:17:53,327] Trial 24 finished with value: 317.40729732527524 and parameters: {'iterations': 393, 'depth': 8, 'learning_rate': 0.0839933418423432, 'random_strength': 13.832136512405562, 'bagging_temperature': 0.3095478786729196, 'l2_leaf_reg': 1.056512589295101, 'border_count': 237}. Best is trial 22 with value: 304.3577414444444.


Best trial: 22. Best value: 304.358:  87%|████████▋ | 26/30 [01:44<00:17,  4.29s/it]

[I 2025-07-31 09:17:59,636] Trial 25 finished with value: 330.108969294766 and parameters: {'iterations': 539, 'depth': 9, 'learning_rate': 0.11530730762899043, 'random_strength': 13.376723396952046, 'bagging_temperature': 0.4093842997302022, 'l2_leaf_reg': 2.258027348090854, 'border_count': 231}. Best is trial 22 with value: 304.3577414444444.


Best trial: 22. Best value: 304.358:  90%|█████████ | 27/30 [01:48<00:12,  4.15s/it]

[I 2025-07-31 09:18:03,476] Trial 26 finished with value: 311.18593079575226 and parameters: {'iterations': 414, 'depth': 8, 'learning_rate': 0.055061663272837694, 'random_strength': 15.69437404524346, 'bagging_temperature': 0.2514171032090386, 'l2_leaf_reg': 2.5427894996704494, 'border_count': 253}. Best is trial 22 with value: 304.3577414444444.


Best trial: 22. Best value: 304.358:  93%|█████████▎| 28/30 [02:00<00:12,  6.47s/it]

[I 2025-07-31 09:18:15,353] Trial 27 finished with value: 319.95540154035945 and parameters: {'iterations': 673, 'depth': 10, 'learning_rate': 0.09882765967902381, 'random_strength': 12.641776930093817, 'bagging_temperature': 0.5136205004273531, 'l2_leaf_reg': 1.0548544770640196, 'border_count': 207}. Best is trial 22 with value: 304.3577414444444.


Best trial: 22. Best value: 304.358:  97%|█████████▋| 29/30 [02:06<00:06,  6.21s/it]

[I 2025-07-31 09:18:20,965] Trial 28 finished with value: 322.4647062771293 and parameters: {'iterations': 309, 'depth': 10, 'learning_rate': 0.14962523461414856, 'random_strength': 3.792385246943592, 'bagging_temperature': 0.018966674524985128, 'l2_leaf_reg': 1.9785684466270266, 'border_count': 223}. Best is trial 22 with value: 304.3577414444444.


Best trial: 22. Best value: 304.358: 100%|██████████| 30/30 [02:08<00:00,  4.27s/it]


[I 2025-07-31 09:18:22,720] Trial 29 finished with value: 315.2159060109156 and parameters: {'iterations': 428, 'depth': 4, 'learning_rate': 0.046129613637102766, 'random_strength': 18.521320633977133, 'bagging_temperature': 0.17286554310480795, 'l2_leaf_reg': 3.95026127716016, 'border_count': 207}. Best is trial 22 with value: 304.3577414444444.
0:	learn: 146.0225257	total: 1.5ms	remaining: 630ms
1:	learn: 144.4744168	total: 2.89ms	remaining: 606ms
2:	learn: 142.4908743	total: 4.34ms	remaining: 606ms
3:	learn: 140.5695205	total: 5.71ms	remaining: 596ms
4:	learn: 138.8963530	total: 7.03ms	remaining: 586ms
5:	learn: 137.0228174	total: 8.56ms	remaining: 593ms
6:	learn: 135.4956390	total: 10.1ms	remaining: 597ms
7:	learn: 134.2974527	total: 11.9ms	remaining: 614ms
8:	learn: 132.5611585	total: 13.3ms	remaining: 609ms
9:	learn: 131.6397477	total: 14.8ms	remaining: 611ms
10:	learn: 130.0718625	total: 16.7ms	remaining: 624ms
11:	learn: 128.8980751	total: 18.4ms	remaining: 627ms
12:	learn: 127

In [13]:
def lightGBM_objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 50, 300),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2),
        "max_depth": trial.suggest_int("max_depth", 5, 15),
        "num_leaves": trial.suggest_int("num_leaves", 20, 100),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 20),
        "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 0.3),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
    }


    lightGBM = LGBMRegressor(**params)
    model = MultiOutputRegressor(lightGBM)
    model.fit(X_train, y_train)

    preds = model.predict(X_valid)
    return mean_absolute_error(y_valid, preds)

# 3. Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(lightGBM_objective, n_trials=30, show_progress_bar=True)

# 4. Train final model with best params
best_params = study.best_trial.params
best_model = MultiOutputRegressor(LGBMRegressor(**best_params))
best_model.fit(X_train, y_train)

# 5. Predict and evaluate
y_pred = best_model.predict(X_valid)
mae = mean_absolute_error(y_valid, y_pred)

# Convert y_valid and y_pred to DataFrames if they aren't already, using target column names
if not isinstance(y_valid, pd.DataFrame):
    y_valid = pd.DataFrame(y_valid, columns=target_columns)
if not isinstance(y_pred, pd.DataFrame):
    y_pred = pd.DataFrame(y_pred, columns=target_columns)

# Calculate MAE for each column
print(f"✅ Best Overall MAE: {mae:.4f}\n")
print("MAE per target column:")
for col in target_columns:
    col_mae = mean_absolute_error(y_valid[col], y_pred[col])
    print(f"{col}: {col_mae:.4f}")

[I 2025-07-31 09:18:25,743] A new study created in memory with name: no-name-c43c9d8c-ff83-47f5-9b92-e80755b3f44e
  0%|          | 0/30 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000555 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000537 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 59.030271
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000560 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start t

Best trial: 0. Best value: 359.251:   3%|▎         | 1/30 [00:10<04:53, 10.11s/it]

[I 2025-07-31 09:18:35,851] Trial 0 finished with value: 359.25050269292444 and parameters: {'n_estimators': 295, 'learning_rate': 0.1678616438610672, 'max_depth': 9, 'num_leaves': 65, 'min_child_samples': 6, 'min_split_gain': 0.2720831319373088, 'subsample': 0.6726340109520282, 'colsample_bytree': 0.7589327856715941}. Best is trial 0 with value: 359.25050269292444.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000489 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000526 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15

Best trial: 1. Best value: 316.441:   7%|▋         | 2/30 [00:13<02:50,  6.08s/it]

[I 2025-07-31 09:18:39,107] Trial 1 finished with value: 316.4409255970251 and parameters: {'n_estimators': 115, 'learning_rate': 0.07657261779711987, 'max_depth': 8, 'num_leaves': 77, 'min_child_samples': 7, 'min_split_gain': 0.12737481636160822, 'subsample': 0.7631516559277111, 'colsample_bytree': 0.7577887093883339}. Best is trial 1 with value: 316.4409255970251.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000526 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000681 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15

Best trial: 1. Best value: 316.441:  10%|█         | 3/30 [00:20<02:56,  6.55s/it]

[I 2025-07-31 09:18:46,229] Trial 2 finished with value: 369.241882374361 and parameters: {'n_estimators': 248, 'learning_rate': 0.1949247254728326, 'max_depth': 14, 'num_leaves': 51, 'min_child_samples': 10, 'min_split_gain': 0.1511569309842332, 'subsample': 0.8854244997857458, 'colsample_bytree': 0.7330829350143805}. Best is trial 1 with value: 316.4409255970251.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000437 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000533 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15


Best trial: 3. Best value: 308.634:  13%|█▎        | 4/30 [00:24<02:25,  5.60s/it]

[I 2025-07-31 09:18:50,354] Trial 3 finished with value: 308.63351149186866 and parameters: {'n_estimators': 282, 'learning_rate': 0.028573837512641857, 'max_depth': 9, 'num_leaves': 27, 'min_child_samples': 16, 'min_split_gain': 0.1101845041424814, 'subsample': 0.8317117645828006, 'colsample_bytree': 0.857550205172255}. Best is trial 3 with value: 308.63351149186866.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000495 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000426 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 

Best trial: 3. Best value: 308.634:  17%|█▋        | 5/30 [00:28<02:04,  4.97s/it]

[I 2025-07-31 09:18:54,211] Trial 4 finished with value: 314.55529573800936 and parameters: {'n_estimators': 135, 'learning_rate': 0.06672874173938717, 'max_depth': 10, 'num_leaves': 90, 'min_child_samples': 12, 'min_split_gain': 0.03047717025234098, 'subsample': 0.7242592060477383, 'colsample_bytree': 0.773041537796374}. Best is trial 3 with value: 308.63351149186866.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000539 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000475 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features:

Best trial: 3. Best value: 308.634:  20%|██        | 6/30 [00:33<02:02,  5.09s/it]

[I 2025-07-31 09:18:59,542] Trial 5 finished with value: 328.35446856067335 and parameters: {'n_estimators': 194, 'learning_rate': 0.09025368949133408, 'max_depth': 10, 'num_leaves': 53, 'min_child_samples': 8, 'min_split_gain': 0.06355433182876809, 'subsample': 0.6075859160648233, 'colsample_bytree': 0.9132246304902528}. Best is trial 3 with value: 308.63351149186866.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000505 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000564 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features:

Best trial: 3. Best value: 308.634:  23%|██▎       | 7/30 [00:35<01:31,  3.96s/it]

[I 2025-07-31 09:19:01,168] Trial 6 finished with value: 313.68888615689264 and parameters: {'n_estimators': 141, 'learning_rate': 0.11309518671584587, 'max_depth': 5, 'num_leaves': 97, 'min_child_samples': 10, 'min_split_gain': 0.18729422072246033, 'subsample': 0.9701343693833924, 'colsample_bytree': 0.9094449821189468}. Best is trial 3 with value: 308.63351149186866.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000460 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000574 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features:

Best trial: 3. Best value: 308.634:  27%|██▋       | 8/30 [00:42<01:46,  4.85s/it]

[I 2025-07-31 09:19:07,916] Trial 7 finished with value: 338.97417201051474 and parameters: {'n_estimators': 232, 'learning_rate': 0.08072725966526419, 'max_depth': 12, 'num_leaves': 60, 'min_child_samples': 12, 'min_split_gain': 0.01848490226539462, 'subsample': 0.9984013429226044, 'colsample_bytree': 0.6140477403147998}. Best is trial 3 with value: 308.63351149186866.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000531 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000485 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features

Best trial: 3. Best value: 308.634:  30%|███       | 9/30 [00:47<01:43,  4.95s/it]

[I 2025-07-31 09:19:13,086] Trial 8 finished with value: 309.8491936637846 and parameters: {'n_estimators': 259, 'learning_rate': 0.04827023046652761, 'max_depth': 7, 'num_leaves': 91, 'min_child_samples': 12, 'min_split_gain': 0.2585850537228868, 'subsample': 0.6511533588603783, 'colsample_bytree': 0.8564411009883237}. Best is trial 3 with value: 308.63351149186866.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000562 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000451 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 1

Best trial: 3. Best value: 308.634:  33%|███▎      | 10/30 [00:57<02:09,  6.46s/it]

[I 2025-07-31 09:19:22,934] Trial 9 finished with value: 330.57900296688837 and parameters: {'n_estimators': 240, 'learning_rate': 0.051807817060376245, 'max_depth': 11, 'num_leaves': 81, 'min_child_samples': 6, 'min_split_gain': 0.08375574286366748, 'subsample': 0.7191280043326216, 'colsample_bytree': 0.6176736032554601}. Best is trial 3 with value: 308.63351149186866.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000485 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000400 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features

Best trial: 3. Best value: 308.634:  37%|███▋      | 11/30 [00:58<01:30,  4.77s/it]

[I 2025-07-31 09:19:23,868] Trial 10 finished with value: 452.95701602929216 and parameters: {'n_estimators': 63, 'learning_rate': 0.014793205323058266, 'max_depth': 15, 'num_leaves': 20, 'min_child_samples': 18, 'min_split_gain': 0.20662137427543156, 'subsample': 0.8301937467036378, 'colsample_bytree': 0.9943134416916295}. Best is trial 3 with value: 308.63351149186866.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000533 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000494 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used feature

Best trial: 11. Best value: 303.175:  40%|████      | 12/30 [01:01<01:20,  4.48s/it]

[I 2025-07-31 09:19:27,690] Trial 11 finished with value: 303.1748974570464 and parameters: {'n_estimators': 295, 'learning_rate': 0.023047654470409053, 'max_depth': 6, 'num_leaves': 29, 'min_child_samples': 16, 'min_split_gain': 0.29365437728985944, 'subsample': 0.850014778879662, 'colsample_bytree': 0.8627778488541231}. Best is trial 11 with value: 303.1748974570464.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000495 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000554 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features:

Best trial: 12. Best value: 302.901:  43%|████▎     | 13/30 [01:05<01:09,  4.12s/it]

[I 2025-07-31 09:19:30,968] Trial 12 finished with value: 302.90148584438043 and parameters: {'n_estimators': 297, 'learning_rate': 0.02046384654261864, 'max_depth': 5, 'num_leaves': 26, 'min_child_samples': 17, 'min_split_gain': 0.11621176817155685, 'subsample': 0.8676022649578242, 'colsample_bytree': 0.857118454662846}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000540 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000534 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features

Best trial: 12. Best value: 302.901:  47%|████▋     | 14/30 [01:07<00:54,  3.43s/it]

[I 2025-07-31 09:19:32,826] Trial 13 finished with value: 324.0159450886513 and parameters: {'n_estimators': 192, 'learning_rate': 0.11781627379148678, 'max_depth': 5, 'num_leaves': 36, 'min_child_samples': 20, 'min_split_gain': 0.21557107583060192, 'subsample': 0.8942550990341167, 'colsample_bytree': 0.8390066406077239}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000297 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000527 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number

Best trial: 12. Best value: 302.901:  50%|█████     | 15/30 [01:11<00:55,  3.72s/it]

[I 2025-07-31 09:19:37,193] Trial 14 finished with value: 305.14697713100486 and parameters: {'n_estimators': 300, 'learning_rate': 0.02944967912299229, 'max_depth': 6, 'num_leaves': 34, 'min_child_samples': 15, 'min_split_gain': 0.2995381420040115, 'subsample': 0.8763366420196277, 'colsample_bytree': 0.9377182647816278}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000485 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000515 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features

Best trial: 12. Best value: 302.901:  53%|█████▎    | 16/30 [01:16<00:56,  4.00s/it]

[I 2025-07-31 09:19:41,858] Trial 15 finished with value: 330.4127684965596 and parameters: {'n_estimators': 212, 'learning_rate': 0.01021958058258953, 'max_depth': 7, 'num_leaves': 40, 'min_child_samples': 15, 'min_split_gain': 0.16323860341306204, 'subsample': 0.9169224685000993, 'colsample_bytree': 0.8120876485016045}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000498 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000499 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features

Best trial: 12. Best value: 302.901:  57%|█████▋    | 17/30 [01:18<00:47,  3.65s/it]

[I 2025-07-31 09:19:44,697] Trial 16 finished with value: 310.67239037498325 and parameters: {'n_estimators': 270, 'learning_rate': 0.045333709462860736, 'max_depth': 5, 'num_leaves': 21, 'min_child_samples': 18, 'min_split_gain': 0.22978111626516795, 'subsample': 0.7943197013568578, 'colsample_bytree': 0.6962561550924697}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000488 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000242 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Numb

Best trial: 12. Best value: 302.901:  60%|██████    | 18/30 [01:21<00:40,  3.34s/it]

[I 2025-07-31 09:19:47,320] Trial 17 finished with value: 332.1085007092049 and parameters: {'n_estimators': 166, 'learning_rate': 0.15029268563261125, 'max_depth': 7, 'num_leaves': 44, 'min_child_samples': 17, 'min_split_gain': 0.09645136828086276, 'subsample': 0.9393491692728019, 'colsample_bytree': 0.9651526129378675}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000506 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000513 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features

Best trial: 12. Best value: 302.901:  63%|██████▎   | 19/30 [01:24<00:35,  3.24s/it]

[I 2025-07-31 09:19:50,320] Trial 18 finished with value: 308.3426806095649 and parameters: {'n_estimators': 223, 'learning_rate': 0.030927433441788554, 'max_depth': 6, 'num_leaves': 30, 'min_child_samples': 20, 'min_split_gain': 0.057397446112403626, 'subsample': 0.8488200121342454, 'colsample_bytree': 0.8874071906006025}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000551 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000708 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used featur

Best trial: 12. Best value: 302.901:  67%|██████▋   | 20/30 [01:26<00:27,  2.72s/it]

[I 2025-07-31 09:19:51,833] Trial 19 finished with value: 316.76767893516364 and parameters: {'n_estimators': 74, 'learning_rate': 0.13336142077892255, 'max_depth': 8, 'num_leaves': 47, 'min_child_samples': 14, 'min_split_gain': 0.1782756667747885, 'subsample': 0.7721475438243965, 'colsample_bytree': 0.817041975416791}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000603 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000492 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 

Best trial: 12. Best value: 302.901:  70%|███████   | 21/30 [01:33<00:38,  4.28s/it]

[I 2025-07-31 09:19:59,738] Trial 20 finished with value: 348.26234591479647 and parameters: {'n_estimators': 273, 'learning_rate': 0.10402675413259696, 'max_depth': 12, 'num_leaves': 66, 'min_child_samples': 18, 'min_split_gain': 0.12476304800151662, 'subsample': 0.9440841236080919, 'colsample_bytree': 0.6820925409337467}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000244 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000285 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise

Best trial: 12. Best value: 302.901:  73%|███████▎  | 22/30 [01:38<00:35,  4.43s/it]

[I 2025-07-31 09:20:04,512] Trial 21 finished with value: 303.6007527912426 and parameters: {'n_estimators': 299, 'learning_rate': 0.03067940722058606, 'max_depth': 6, 'num_leaves': 33, 'min_child_samples': 14, 'min_split_gain': 0.297680985070736, 'subsample': 0.8535189337226275, 'colsample_bytree': 0.9228409616695519}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000457 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000507 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 

Best trial: 12. Best value: 302.901:  77%|███████▋  | 23/30 [01:42<00:29,  4.26s/it]

[I 2025-07-31 09:20:08,388] Trial 22 finished with value: 315.83674977655085 and parameters: {'n_estimators': 300, 'learning_rate': 0.06109797512869894, 'max_depth': 6, 'num_leaves': 28, 'min_child_samples': 14, 'min_split_gain': 0.2880102850052451, 'subsample': 0.8512628861444682, 'colsample_bytree': 0.8778362130908522}. Best is trial 12 with value: 302.90148584438043.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000690 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000531 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features

Best trial: 23. Best value: 300.952:  80%|████████  | 24/30 [01:45<00:23,  3.86s/it]

[I 2025-07-31 09:20:11,317] Trial 23 finished with value: 300.9522916936402 and parameters: {'n_estimators': 259, 'learning_rate': 0.034865766657820076, 'max_depth': 5, 'num_leaves': 37, 'min_child_samples': 16, 'min_split_gain': 0.24814116170059036, 'subsample': 0.8049052498643301, 'colsample_bytree': 0.9493004474309331}. Best is trial 23 with value: 300.9522916936402.
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000259 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000491 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number

Best trial: 23. Best value: 300.952:  83%|████████▎ | 25/30 [01:48<00:18,  3.68s/it]

[I 2025-07-31 09:20:14,587] Trial 24 finished with value: 311.13916683247334 and parameters: {'n_estimators': 264, 'learning_rate': 0.012694787231638131, 'max_depth': 5, 'num_leaves': 40, 'min_child_samples': 16, 'min_split_gain': 0.24628608197716473, 'subsample': 0.7993213900213935, 'colsample_bytree': 0.968669485567157}. Best is trial 23 with value: 300.9522916936402.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000568 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000249 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number

Best trial: 23. Best value: 300.952:  87%|████████▋ | 26/30 [01:53<00:15,  3.85s/it]

[I 2025-07-31 09:20:18,819] Trial 25 finished with value: 316.83025062294695 and parameters: {'n_estimators': 275, 'learning_rate': 0.045565905794963835, 'max_depth': 8, 'num_leaves': 25, 'min_child_samples': 19, 'min_split_gain': 0.24670018811396258, 'subsample': 0.7331099929322344, 'colsample_bytree': 0.9530837365297137}. Best is trial 23 with value: 300.9522916936402.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000517 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000693 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used feature

Best trial: 23. Best value: 300.952:  90%|█████████ | 27/30 [01:55<00:10,  3.48s/it]

[I 2025-07-31 09:20:21,448] Trial 26 finished with value: 302.7471936538865 and parameters: {'n_estimators': 211, 'learning_rate': 0.025843224153751676, 'max_depth': 5, 'num_leaves': 39, 'min_child_samples': 17, 'min_split_gain': 0.2713455380368669, 'subsample': 0.8201270445077508, 'colsample_bytree': 0.9965371677973374}. Best is trial 23 with value: 300.9522916936402.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000593 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000535 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features:

Best trial: 23. Best value: 300.952:  93%|█████████▎| 28/30 [01:57<00:06,  3.12s/it]

[I 2025-07-31 09:20:23,711] Trial 27 finished with value: 308.8126016097977 and parameters: {'n_estimators': 207, 'learning_rate': 0.06749551999855677, 'max_depth': 5, 'num_leaves': 40, 'min_child_samples': 17, 'min_split_gain': 0.19734398121855812, 'subsample': 0.7579729094647581, 'colsample_bytree': 0.9983863184235053}. Best is trial 23 with value: 300.9522916936402.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000542 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000500 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features:

Best trial: 23. Best value: 300.952:  97%|█████████▋| 29/30 [02:01<00:03,  3.37s/it]

[I 2025-07-31 09:20:27,682] Trial 28 finished with value: 308.1124497232363 and parameters: {'n_estimators': 248, 'learning_rate': 0.040898998676766996, 'max_depth': 7, 'num_leaves': 56, 'min_child_samples': 19, 'min_split_gain': 0.13627596655035049, 'subsample': 0.8018755800277507, 'colsample_bytree': 0.974367568300534}. Best is trial 23 with value: 300.9522916936402.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000568 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000529 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features:

Best trial: 23. Best value: 300.952: 100%|██████████| 30/30 [02:07<00:00,  4.24s/it]

[I 2025-07-31 09:20:32,819] Trial 29 finished with value: 327.126679153145 and parameters: {'n_estimators': 222, 'learning_rate': 0.08813741267213765, 'max_depth': 9, 'num_leaves': 66, 'min_child_samples': 17, 'min_split_gain': 0.22678523729070554, 'subsample': 0.6948728020032814, 'colsample_bytree': 0.8958396148002072}. Best is trial 23 with value: 300.9522916936402.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000480 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 33.251241





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000463 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 59.030271
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000287 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 16319, number of used features: 15
[LightGBM] [Info] Start training from score 875.028556
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000550 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train 