In [101]:
#vamos a probar modelos de ML
#leamos x_train, y_train, x_test, y_test
import pandas as pd
x_train = pd.read_csv("x_train.csv")
y_train = pd.read_csv("y_train.csv")
x_test = pd.read_csv("x_val.csv")
y_test = pd.read_csv("y_val.csv")




In [88]:
# ==============================
# Optuna + Pruning para LGBM
# ==============================
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from lightgbm import LGBMRegressor
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
from optuna.integration import LightGBMPruningCallback

SEED = 42
VAL_FRAC = 0.20   # último 20% del train como validación temporal
N_TRIALS = 50     # ajustá según tiempo disponible

# --------- Split temporal (train -> train/valid ; test queda intacto) ---------
X_train_all = x_train.copy()
y_train_all = y_train.copy()
X_test = x_test.copy()
y_test = y_test.copy()

n = len(X_train_all)
n_val = int(np.floor(n * VAL_FRAC))
n_tr = n - n_val
X_tr, y_tr = X_train_all.iloc[:n_tr], y_train_all[:n_tr]
X_val, y_val = X_train_all.iloc[n_tr:], y_train_all[n_tr:]

print(f"Train: {X_tr.shape}, Valid: {X_val.shape}, Test: {X_test.shape}")

# --------- Función objetivo para Optuna ---------
def objective(trial: optuna.Trial) -> float:
    # Espacio de búsqueda (razonable para baseline)
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 300, 2000),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 31, 255),
        "max_depth": trial.suggest_int("max_depth", -1, 16),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
        "random_state": SEED,
        "n_jobs": -1
    }

    # Modelo
    model = LGBMRegressor(**params)

    # Pruning callback: monitorea 'rmse' en el primer eval_set (valid_0)
    pruning_cb = LightGBMPruningCallback(trial, metric="rmse", valid_name="valid_0")

    # Entrena con early stopping y pruning
    model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
        eval_metric="rmse",
        callbacks=[pruning_cb],
    )

    # Predicción en valid y métrica a minimizar (RMSE)
    y_pred_val = model.predict(X_val)
    rmse_val = mean_squared_error(y_val, y_pred_val)
    return rmse_val

# --------- Estudio Optuna ---------
study = optuna.create_study(
    direction="minimize",
    sampler=TPESampler(seed=SEED),
    pruner=MedianPruner(n_warmup_steps=10)
)
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=True)

print("\n=== Mejores hiperparámetros ===")
print(study.best_params)
print(f"Mejor RMSE valid: {study.best_value:.4f}")

# --------- Re-entrenar con los mejores params (train+valid) ---------
best_params = study.best_params.copy()
best_model = LGBMRegressor(**best_params, random_state=SEED, n_jobs=-1)

best_model.fit(
    X_train_all, y_train_all,
    eval_set=[(X_val, y_val)],  # opcional, solo para logging
    eval_metric="rmse",
)

# --------- Evaluación en TEST ---------
y_pred_test = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred_test)
rmse = mean_squared_error(y_test, y_pred_test)
mae = mean_absolute_error(y_test, y_pred_test)
r2  = r2_score(y_test, y_pred_test)

print("\n=== Métricas en TEST ===")
print(f"MSE : {mse:,.2f}")
print(f"RMSE: {rmse:,.2f}")
print(f"MAE : {mae:,.2f}")
print(f"R²  : {r2:.4f}")


[I 2025-11-10 00:17:21,722] A new study created in memory with name: no-name-806e30fa-e537-4897-9e0c-c902f55581ee


Train: (692, 140), Valid: (172, 140), Test: (94, 140)


  0%|          | 0/50 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002544 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 0. Best value: 2.60139e+09:   2%|▏         | 1/50 [00:00<00:39,  1.24it/s]

[I 2025-11-10 00:17:22,528] Trial 0 finished with value: 2601386028.8587832 and parameters: {'n_estimators': 937, 'learning_rate': 0.17254716573280354, 'num_leaves': 195, 'max_depth': 9, 'min_child_samples': 19, 'subsample': 0.662397808134481, 'colsample_bytree': 0.6232334448672797, 'reg_alpha': 0.6245760287469893, 'reg_lambda': 0.002570603566117598}. Best is trial 0 with value: 2601386028.8587832.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002938 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 1. Best value: 8.41452e+08:   4%|▍         | 2/50 [00:02<01:03,  1.32s/it]

[I 2025-11-10 00:17:24,188] Trial 1 finished with value: 841451529.9436729 and parameters: {'n_estimators': 1504, 'learning_rate': 0.010636066512540286, 'num_leaves': 249, 'max_depth': 13, 'min_child_samples': 25, 'subsample': 0.6727299868828402, 'colsample_bytree': 0.6733618039413735, 'reg_alpha': 5.472429642032198e-06, 'reg_lambda': 0.00052821153945323}. Best is trial 1 with value: 841451529.9436729.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003121 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 2. Best value: 6.69786e+08:   6%|▌         | 3/50 [00:02<00:38,  1.22it/s]

[I 2025-11-10 00:17:24,429] Trial 2 finished with value: 669786028.1920238 and parameters: {'n_estimators': 1034, 'learning_rate': 0.023927528765580644, 'num_leaves': 168, 'max_depth': 1, 'min_child_samples': 33, 'subsample': 0.7465447373174767, 'colsample_bytree': 0.7824279936868144, 'reg_alpha': 0.1165691561324743, 'reg_lambda': 6.267062696005991e-07}. Best is trial 2 with value: 669786028.1920238.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001668 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 2. Best value: 6.69786e+08:   8%|▊         | 4/50 [00:03<00:41,  1.11it/s]

[I 2025-11-10 00:17:25,451] Trial 3 finished with value: 1711103077.3870637 and parameters: {'n_estimators': 1174, 'learning_rate': 0.05898602410432694, 'num_leaves': 41, 'max_depth': 9, 'min_child_samples': 21, 'subsample': 0.6260206371941118, 'colsample_bytree': 0.9795542149013333, 'reg_alpha': 4.905556676028774, 'reg_lambda': 0.18861495878553936}. Best is trial 2 with value: 669786028.1920238.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001669 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  10%|█         | 5/50 [00:04<00:33,  1.33it/s]

[I 2025-11-10 00:17:25,947] Trial 4 finished with value: 212532397.77084133 and parameters: {'n_estimators': 818, 'learning_rate': 0.013399060561509796, 'num_leaves': 184, 'max_depth': 6, 'min_child_samples': 16, 'subsample': 0.798070764044508, 'colsample_bytree': 0.6137554084460873, 'reg_alpha': 1.527156759251193, 'reg_lambda': 2.133142332373004e-06}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002628 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  12%|█▏        | 6/50 [00:05<00:47,  1.08s/it]

[I 2025-11-10 00:17:27,654] Trial 5 finished with value: 2135429344.3898723 and parameters: {'n_estimators': 1426, 'learning_rate': 0.02544166090938368, 'num_leaves': 148, 'max_depth': 8, 'min_child_samples': 22, 'subsample': 0.9878338511058234, 'colsample_bytree': 0.9100531293444458, 'reg_alpha': 2.854239907497756, 'reg_lambda': 1.1309571585271483}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002121 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  14%|█▍        | 7/50 [00:06<00:42,  1.01it/s]

[I 2025-11-10 00:17:28,475] Trial 6 finished with value: 400955802.2393345 and parameters: {'n_estimators': 1317, 'learning_rate': 0.15826541904647565, 'num_leaves': 50, 'max_depth': 2, 'min_child_samples': 9, 'subsample': 0.7301321323053057, 'colsample_bytree': 0.7554709158757928, 'reg_alpha': 2.7678419414850017e-06, 'reg_lambda': 0.28749982347407854}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  16%|█▌        | 8/50 [00:07<00:34,  1.23it/s]

[I 2025-11-10 00:17:28,894] Trial 7 finished with value: 1065621021.828747 and parameters: {'n_estimators': 906, 'learning_rate': 0.023200867504756827, 'num_leaves': 153, 'max_depth': 1, 'min_child_samples': 82, 'subsample': 0.6298202574719083, 'colsample_bytree': 0.9947547746402069, 'reg_alpha': 0.08916674715636537, 'reg_lambda': 6.143857495033091e-07}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001623 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  18%|█▊        | 9/50 [00:07<00:26,  1.57it/s]

[I 2025-11-10 00:17:29,144] Trial 8 finished with value: 2934665224.4459825 and parameters: {'n_estimators': 309, 'learning_rate': 0.11506408247250169, 'num_leaves': 190, 'max_depth': 12, 'min_child_samples': 79, 'subsample': 0.6296178606936361, 'colsample_bytree': 0.7433862914177091, 'reg_alpha': 1.1036250149900698e-07, 'reg_lambda': 0.5860448217200517}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002020 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  20%|██        | 10/50 [00:08<00:29,  1.36it/s]

[I 2025-11-10 00:17:30,096] Trial 9 finished with value: 1018437442.9789214 and parameters: {'n_estimators': 1360, 'learning_rate': 0.026946865572417687, 'num_leaves': 45, 'max_depth': 4, 'min_child_samples': 36, 'subsample': 0.8918424713352255, 'colsample_bytree': 0.8550229885420852, 'reg_alpha': 0.9658611176861268, 'reg_lambda': 0.0001778010520878397}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002530 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  20%|██        | 10/50 [00:08<00:29,  1.36it/s]

[I 2025-11-10 00:17:30,145] Trial 10 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002252 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  24%|██▍       | 12/50 [00:08<00:18,  2.01it/s]

[I 2025-11-10 00:17:30,560] Trial 11 finished with value: 263214873.18957448 and parameters: {'n_estimators': 613, 'learning_rate': 0.07476013380530672, 'num_leaves': 84, 'max_depth': 4, 'min_child_samples': 8, 'subsample': 0.7605900525468514, 'colsample_bytree': 0.716828566938887, 'reg_alpha': 2.3535702826591285e-05, 'reg_lambda': 9.524519477399059e-06}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001978 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  26%|██▌       | 13/50 [00:09<00:17,  2.10it/s]

[I 2025-11-10 00:17:30,973] Trial 12 finished with value: 1256498554.6479578 and parameters: {'n_estimators': 560, 'learning_rate': 0.06922749265952521, 'num_leaves': 97, 'max_depth': 5, 'min_child_samples': 6, 'subsample': 0.7984165248678856, 'colsample_bytree': 0.6969758134873644, 'reg_alpha': 0.0004774862169081376, 'reg_lambda': 1.1114584658963438e-05}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  28%|██▊       | 14/50 [00:09<00:18,  1.91it/s]

[I 2025-11-10 00:17:31,627] Trial 13 finished with value: 1541213417.7790825 and parameters: {'n_estimators': 631, 'learning_rate': 0.04170210949254517, 'num_leaves': 96, 'max_depth': 16, 'min_child_samples': 51, 'subsample': 0.8715582230144556, 'colsample_bytree': 0.6818374590960379, 'reg_alpha': 1.0834725996338191e-05, 'reg_lambda': 8.350059889538447e-06}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 2.12532e+08:  30%|███       | 15/50 [00:10<00:20,  1.67it/s]

[I 2025-11-10 00:17:32,419] Trial 14 finished with value: 3056809700.139082 and parameters: {'n_estimators': 699, 'learning_rate': 0.0904040404988496, 'num_leaves': 236, 'max_depth': -1, 'min_child_samples': 50, 'subsample': 0.7769047661288304, 'colsample_bytree': 0.831602864626308, 'reg_alpha': 0.004633011838829117, 'reg_lambda': 3.747397300489299e-08}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-10 00:17:32,485] Trial 15 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002087 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGB

Best trial: 4. Best value: 2.12532e+08:  34%|███▍      | 17/50 [00:11<00:14,  2.20it/s]

[I 2025-11-10 00:17:32,962] Trial 16 finished with value: 1647666983.3030424 and parameters: {'n_estimators': 813, 'learning_rate': 0.04335359475030848, 'num_leaves': 214, 'max_depth': 3, 'min_child_samples': 68, 'subsample': 0.7135386813183082, 'colsample_bytree': 0.7238803118866866, 'reg_alpha': 3.154443500139828e-05, 'reg_lambda': 0.00997288368341444}. Best is trial 4 with value: 212532397.77084133.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001713 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 17. Best value: 1.63715e+08:  36%|███▌      | 18/50 [00:11<00:14,  2.23it/s]

[I 2025-11-10 00:17:33,396] Trial 17 finished with value: 163715288.80406964 and parameters: {'n_estimators': 466, 'learning_rate': 0.015543156806680385, 'num_leaves': 127, 'max_depth': 6, 'min_child_samples': 40, 'subsample': 0.8223954337893923, 'colsample_bytree': 0.6328419617855737, 'reg_alpha': 0.0002856845668470286, 'reg_lambda': 3.7650033348564974e-07}. Best is trial 17 with value: 163715288.80406964.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002063 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  38%|███▊      | 19/50 [00:12<00:14,  2.11it/s]

[I 2025-11-10 00:17:33,939] Trial 18 finished with value: 149289375.1753951 and parameters: {'n_estimators': 410, 'learning_rate': 0.015410501180994867, 'num_leaves': 128, 'max_depth': 11, 'min_child_samples': 40, 'subsample': 0.8366070683982902, 'colsample_bytree': 0.6026000789824475, 'reg_alpha': 0.003249200305560471, 'reg_lambda': 3.0169675464488206e-07}. Best is trial 18 with value: 149289375.1753951.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001968 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  40%|████      | 20/50 [00:12<00:15,  1.95it/s]

[I 2025-11-10 00:17:34,564] Trial 19 finished with value: 229316592.62404498 and parameters: {'n_estimators': 482, 'learning_rate': 0.017220167746503853, 'num_leaves': 121, 'max_depth': 11, 'min_child_samples': 42, 'subsample': 0.84284267570738, 'colsample_bytree': 0.6599382769662936, 'reg_alpha': 0.0005031869050599828, 'reg_lambda': 9.487487829944984e-08}. Best is trial 18 with value: 149289375.1753951.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  42%|████▏     | 21/50 [00:14<00:21,  1.38it/s]

[I 2025-11-10 00:17:35,851] Trial 20 finished with value: 3304299544.163934 and parameters: {'n_estimators': 1691, 'learning_rate': 0.034737747677750466, 'num_leaves': 74, 'max_depth': 15, 'min_child_samples': 97, 'subsample': 0.932416347341711, 'colsample_bytree': 0.6399689232786072, 'reg_alpha': 0.008225723319543129, 'reg_lambda': 8.802075014264656}. Best is trial 18 with value: 149289375.1753951.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001652 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  46%|████▌     | 23/50 [00:14<00:11,  2.33it/s]

[I 2025-11-10 00:17:35,903] Trial 21 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-10 00:17:35,953] Trial 22 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001845 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  48%|████▊     | 24/50 [00:15<00:13,  1.87it/s]

[I 2025-11-10 00:17:36,821] Trial 23 finished with value: 367105860.92439604 and parameters: {'n_estimators': 736, 'learning_rate': 0.019958742757995685, 'num_leaves': 127, 'max_depth': 10, 'min_child_samples': 31, 'subsample': 0.8795661527987996, 'colsample_bytree': 0.6007297844013268, 'reg_alpha': 0.00014144604283550518, 'reg_lambda': 1.5544977737472501e-06}. Best is trial 18 with value: 149289375.1753951.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001747 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-10 00:17:36,878] Trial 24 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002383 seconds.
You can set `force_col_wise=true` to remove the overhead.
[

Best trial: 18. Best value: 1.49289e+08:  52%|█████▏    | 26/50 [00:17<00:17,  1.37it/s]

[I 2025-11-10 00:17:38,846] Trial 25 finished with value: 366429964.0545632 and parameters: {'n_estimators': 1123, 'learning_rate': 0.030413970360092486, 'num_leaves': 210, 'max_depth': 14, 'min_child_samples': 16, 'subsample': 0.9011121983228496, 'colsample_bytree': 0.6550961543654156, 'reg_alpha': 0.010407692350080489, 'reg_lambda': 1.0459073839217013e-08}. Best is trial 18 with value: 149289375.1753951.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002380 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-10 00:17:38,899] Trial 26 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[Li

Best trial: 18. Best value: 1.49289e+08:  56%|█████▌    | 28/50 [00:17<00:10,  2.08it/s]

[I 2025-11-10 00:17:38,949] Trial 27 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001518 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-10 00:17:39,028] Trial 28 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002198 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  64%|██████▍   | 32/50 [00:18<00:05,  3.09it/s]

[I 2025-11-10 00:17:39,780] Trial 29 finished with value: 739715634.4531391 and parameters: {'n_estimators': 700, 'learning_rate': 0.03359071256820757, 'num_leaves': 73, 'max_depth': 9, 'min_child_samples': 47, 'subsample': 0.8151476671534786, 'colsample_bytree': 0.617932972444714, 'reg_alpha': 0.8045785191462241, 'reg_lambda': 0.0025244715620066035}. Best is trial 18 with value: 149289375.1753951.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001912 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-10 00:17:39,830] Trial 30 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002298 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] 

Best trial: 18. Best value: 1.49289e+08:  68%|██████▊   | 34/50 [00:18<00:03,  4.20it/s]

[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-10 00:17:39,995] Trial 33 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002306 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-10 00:17:40,042] Trial 34 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001545 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  72%|███████▏  | 36/50 [00:19<00:04,  3.25it/s]

[I 2025-11-10 00:17:40,917] Trial 35 finished with value: 770167735.2193261 and parameters: {'n_estimators': 838, 'learning_rate': 0.022148136011726056, 'num_leaves': 155, 'max_depth': 8, 'min_child_samples': 28, 'subsample': 0.8236541158757194, 'colsample_bytree': 0.6786690036440393, 'reg_alpha': 0.02808467255498192, 'reg_lambda': 2.95565849151222e-06}. Best is trial 18 with value: 149289375.1753951.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002402 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  72%|███████▏  | 36/50 [00:19<00:04,  3.25it/s]

[I 2025-11-10 00:17:40,981] Trial 36 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002134 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  76%|███████▌  | 38/50 [00:20<00:04,  2.91it/s]

[I 2025-11-10 00:17:41,765] Trial 37 finished with value: 290962720.24051917 and parameters: {'n_estimators': 536, 'learning_rate': 0.027056406149875267, 'num_leaves': 252, 'max_depth': 13, 'min_child_samples': 34, 'subsample': 0.7327720570257665, 'colsample_bytree': 0.7583548252686989, 'reg_alpha': 6.865727594660676, 'reg_lambda': 4.104407348541903e-07}. Best is trial 18 with value: 149289375.1753951.


Best trial: 18. Best value: 1.49289e+08:  76%|███████▌  | 38/50 [00:20<00:04,  2.91it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002305 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-10 00:17:41,822] Trial 38 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002433 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 18. Best value: 1.49289e+08:  80%|████████  | 40/50 [00:20<00:03,  2.69it/s]

[I 2025-11-10 00:17:42,630] Trial 39 finished with value: 1735846515.8459575 and parameters: {'n_estimators': 1227, 'learning_rate': 0.02235999478079804, 'num_leaves': 146, 'max_depth': 3, 'min_child_samples': 25, 'subsample': 0.9012684955604969, 'colsample_bytree': 0.6203169475595353, 'reg_alpha': 2.7314484812534436, 'reg_lambda': 3.76743232780413e-05}. Best is trial 18 with value: 149289375.1753951.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002305 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07:  82%|████████▏ | 41/50 [00:21<00:04,  2.25it/s]

[I 2025-11-10 00:17:43,412] Trial 40 finished with value: 88317810.71503218 and parameters: {'n_estimators': 719, 'learning_rate': 0.051028986261133925, 'num_leaves': 166, 'max_depth': 7, 'min_child_samples': 13, 'subsample': 0.6622888045212691, 'colsample_bytree': 0.8189257141968503, 'reg_alpha': 6.848208783295387e-06, 'reg_lambda': 1.1919148850901538e-06}. Best is trial 40 with value: 88317810.71503218.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001730 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07:  84%|████████▍ | 42/50 [00:22<00:04,  1.80it/s]

[I 2025-11-10 00:17:44,398] Trial 41 finished with value: 139322422.21579453 and parameters: {'n_estimators': 1000, 'learning_rate': 0.05567925573365424, 'num_leaves': 165, 'max_depth': 7, 'min_child_samples': 16, 'subsample': 0.6674966482925005, 'colsample_bytree': 0.8229880486655917, 'reg_alpha': 7.004342096035909e-06, 'reg_lambda': 1.0403079604176447e-06}. Best is trial 40 with value: 88317810.71503218.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002270 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07:  86%|████████▌ | 43/50 [00:23<00:04,  1.52it/s]

[I 2025-11-10 00:17:45,400] Trial 42 finished with value: 110963506.77229147 and parameters: {'n_estimators': 966, 'learning_rate': 0.05408212177728712, 'num_leaves': 168, 'max_depth': 7, 'min_child_samples': 13, 'subsample': 0.6050954810743541, 'colsample_bytree': 0.8204964019653215, 'reg_alpha': 2.567650428787836e-07, 'reg_lambda': 8.587361802971928e-07}. Best is trial 40 with value: 88317810.71503218.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002406 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07:  88%|████████▊ | 44/50 [00:24<00:04,  1.23it/s]

[I 2025-11-10 00:17:46,708] Trial 43 finished with value: 203799192.9962606 and parameters: {'n_estimators': 1233, 'learning_rate': 0.05164698046730441, 'num_leaves': 170, 'max_depth': 7, 'min_child_samples': 12, 'subsample': 0.6509890279157975, 'colsample_bytree': 0.8175524492482081, 'reg_alpha': 2.6627533106739945e-07, 'reg_lambda': 3.2105763782346835e-06}. Best is trial 40 with value: 88317810.71503218.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07:  88%|████████▊ | 44/50 [00:25<00:04,  1.23it/s]

[I 2025-11-10 00:17:47,563] Trial 44 finished with value: 1387816012.1685846 and parameters: {'n_estimators': 988, 'learning_rate': 0.05170835698419771, 'num_leaves': 161, 'max_depth': 5, 'min_child_samples': 19, 'subsample': 0.6079752825709921, 'colsample_bytree': 0.8509569201160443, 'reg_alpha': 3.767811535680622e-08, 'reg_lambda': 8.387352082931081e-07}. Best is trial 40 with value: 88317810.71503218.


Best trial: 40. Best value: 8.83178e+07:  90%|█████████ | 45/50 [00:25<00:04,  1.21it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002138 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07:  92%|█████████▏| 46/50 [00:26<00:03,  1.13it/s]

[I 2025-11-10 00:17:48,615] Trial 45 finished with value: 221660806.85394698 and parameters: {'n_estimators': 963, 'learning_rate': 0.07559450550406292, 'num_leaves': 196, 'max_depth': 8, 'min_child_samples': 12, 'subsample': 0.6628244362129212, 'colsample_bytree': 0.8022897225733797, 'reg_alpha': 3.6576022808059927e-06, 'reg_lambda': 2.805934311472919e-07}. Best is trial 40 with value: 88317810.71503218.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001555 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07:  94%|█████████▍| 47/50 [00:27<00:02,  1.22it/s]

[I 2025-11-10 00:17:49,255] Trial 46 finished with value: 3836175932.2329164 and parameters: {'n_estimators': 867, 'learning_rate': 0.06260159662835478, 'num_leaves': 153, 'max_depth': 4, 'min_child_samples': 22, 'subsample': 0.6418600817711714, 'colsample_bytree': 0.8624063938492176, 'reg_alpha': 1.004858390133395e-05, 'reg_lambda': 4.403650109397335e-06}. Best is trial 40 with value: 88317810.71503218.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07:  96%|█████████▌| 48/50 [00:28<00:01,  1.28it/s]

[I 2025-11-10 00:17:49,936] Trial 47 finished with value: 746701405.8078946 and parameters: {'n_estimators': 745, 'learning_rate': 0.10058013171415466, 'num_leaves': 181, 'max_depth': 6, 'min_child_samples': 18, 'subsample': 0.6015590244350696, 'colsample_bytree': 0.8987322232550009, 'reg_alpha': 9.885208233032772e-07, 'reg_lambda': 0.00012620915431170874}. Best is trial 40 with value: 88317810.71503218.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002077 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07:  98%|█████████▊| 49/50 [00:29<00:00,  1.07it/s]

[I 2025-11-10 00:17:51,239] Trial 48 finished with value: 378890230.76905835 and parameters: {'n_estimators': 1528, 'learning_rate': 0.05144005618133025, 'num_leaves': 168, 'max_depth': 5, 'min_child_samples': 9, 'subsample': 0.6820606545904433, 'colsample_bytree': 0.8300295282705031, 'reg_alpha': 7.318821527077402e-08, 'reg_lambda': 1.8572801272488763e-05}. Best is trial 40 with value: 88317810.71503218.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30310
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 140
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 40. Best value: 8.83178e+07: 100%|██████████| 50/50 [00:29<00:00,  1.69it/s]


[I 2025-11-10 00:17:51,341] Trial 49 pruned. Trial was pruned at iteration 10.

=== Mejores hiperparámetros ===
{'n_estimators': 719, 'learning_rate': 0.051028986261133925, 'num_leaves': 166, 'max_depth': 7, 'min_child_samples': 13, 'subsample': 0.6622888045212691, 'colsample_bytree': 0.8189257141968503, 'reg_alpha': 6.848208783295387e-06, 'reg_lambda': 1.1919148850901538e-06}
Mejor RMSE valid: 88317810.7150
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002298 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 34198
[LightGBM] [Info] Number of data points in the train set: 864, number of used features: 140
[LightGBM] [Info] Start training from score 27879.464988

=== Métricas en TEST ===
MSE : 23,460,506.40
RMSE: 23,460,506.40
MAE : 3,630.52
R²  : -0.4794


In [38]:
# ==============================
# Optuna para RandomForestRegressor (split temporal) + "pruning" pasivo
# ==============================
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner

SEED = 42
VAL_FRAC = 0.20    # último 20% del train como valid
N_TRIALS = 60      # subí/bajá según tiempo

# --------- Split temporal (train -> train/valid ; test queda intacto) ---------
X_train_all = x_train.copy()
y_train_all = y_train.copy()
X_test = x_test.copy()
y_test = y_test.copy()

n = len(X_train_all)
n_val = int(np.floor(n * VAL_FRAC))
n_tr = n - n_val
X_tr, y_tr = X_train_all.iloc[:n_tr], y_train_all[:n_tr]
X_val, y_val = X_train_all.iloc[n_tr:], y_train_all[n_tr:]

print(f"Train: {X_tr.shape}, Valid: {X_val.shape}, Test: {X_test.shape}")

# --------- Función objetivo ---------
def objective(trial: optuna.Trial) -> float:
    # Espacio de búsqueda (robusto para RF sklearn)
    max_depth_choice = trial.suggest_categorical("max_depth", [None, 6, 10, 16, 24, 32])
    max_features_choice = trial.suggest_categorical("max_features", ["sqrt", "log2", 0.5, 0.7, 1.0])
    bootstrap_choice = trial.suggest_categorical("bootstrap", [True, False])

    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 2000),
        "criterion": "squared_error",
        "max_depth": max_depth_choice,                 # None = sin límite
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 20),
        "max_features": max_features_choice,           # fracción o estrategia
        "bootstrap": bootstrap_choice,
        "random_state": SEED,
        "n_jobs": -1,
    }

    # Si se usa bootstrap, probar muestreo parcial de filas
    if bootstrap_choice:
        params["max_samples"] = trial.suggest_float("max_samples", 0.5, 1.0)

    # Modelo
    model = RandomForestRegressor(**params)

    # Entrenar y evaluar en VALID (métrica a minimizar: RMSE)
    model.fit(X_tr, y_tr)
    y_pred_val = model.predict(X_val)
    rmse_val = mean_squared_error(y_val, y_pred_val)

    # Registrar también como “valor intermedio” (no habrá pruning real, pero queda logueado)
    trial.report(rmse_val, step=0)
    # if trial.should_prune():   # en RF no habrá pasos intermedios útiles
    #     raise optuna.exceptions.TrialPruned()

    return rmse_val

# --------- Estudio Optuna ---------
study = optuna.create_study(
    direction="minimize",
    sampler=TPESampler(seed=SEED),
    pruner=MedianPruner(n_warmup_steps=5)  # no tendrá efecto real aquí, pero se deja por consistencia
)
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=True)

print("\n=== Mejores hiperparámetros (VALID) ===")
print(study.best_params)
print(f"Mejor RMSE valid: {study.best_value:.4f}")

# --------- Re-entrenar con los mejores params (train+valid) ---------
best_params = study.best_params.copy()
best_model = RandomForestRegressor(**best_params, random_state=SEED, n_jobs=-1)

best_model.fit(X_train_all, y_train_all)

# --------- Evaluación en TEST ---------
y_pred_test = best_model.predict(X_test)
mse  = mean_squared_error(y_test, y_pred_test)
rmse = mean_squared_error(y_test, y_pred_test)
mae  = mean_absolute_error(y_test, y_pred_test)
r2   = r2_score(y_test, y_pred_test)

print("\n=== Métricas en TEST ===")
print(f"MSE : {mse:,.2f}")
print(f"RMSE: {rmse:,.2f}")
print(f"MAE : {mae:,.2f}")
print(f"R²  : {r2:.4f}")


[I 2025-11-09 21:51:50,957] A new study created in memory with name: no-name-603c26c3-86db-426e-b8c4-1d444f52b993


Train: (692, 140), Valid: (172, 140), Test: (94, 140)


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:51:51,506] Trial 0 finished with value: 77228746.58032861 and parameters: {'max_depth': 6, 'max_features': 'log2', 'bootstrap': True, 'n_estimators': 582, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_samples': 0.6521211214797689}. Best is trial 0 with value: 77228746.58032861.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:51:53,886] Trial 1 finished with value: 23312831.98173633 and parameters: {'max_depth': 16, 'max_features': 0.5, 'bootstrap': True, 'n_estimators': 1294, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_samples': 0.9744427686266666}. Best is trial 1 with value: 23312831.98173633.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:51:56,199] Trial 2 finished with value: 22746390.011101477 and parameters: {'max_depth': None, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1136, 'min_samples_split': 12, 'min_samples_leaf': 4, 'max_samples': 0.9847923138822793}. Best is trial 2 with value: 22746390.011101477.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:51:57,370] Trial 3 finished with value: 21000825.012116548 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 705, 'min_samples_split': 12, 'min_samples_leaf': 3, 'max_samples': 0.9010984903770198}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:00,005] Trial 4 finished with value: 22123050.65909423 and parameters: {'max_depth': 6, 'max_features': 0.5, 'bootstrap': False, 'n_estimators': 1322, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:05,313] Trial 5 finished with value: 38361591.5152549 and parameters: {'max_depth': 24, 'max_features': 1.0, 'bootstrap': False, 'n_estimators': 970, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:06,546] Trial 6 finished with value: 64321409.69544828 and parameters: {'max_depth': 24, 'max_features': 'log2', 'bootstrap': False, 'n_estimators': 1655, 'min_samples_split': 14, 'min_samples_leaf': 18}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:08,569] Trial 7 finished with value: 75361160.1292173 and parameters: {'max_depth': 32, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1119, 'min_samples_split': 9, 'min_samples_leaf': 5, 'max_samples': 0.5599326836668415}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:09,996] Trial 8 finished with value: 72054756.88996035 and parameters: {'max_depth': 6, 'max_features': 'sqrt', 'bootstrap': True, 'n_estimators': 1297, 'min_samples_split': 11, 'min_samples_leaf': 2, 'max_samples': 0.6393232321183058}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:10,861] Trial 9 finished with value: 59444830.667495824 and parameters: {'max_depth': 24, 'max_features': 'log2', 'bootstrap': False, 'n_estimators': 1164, 'min_samples_split': 3, 'min_samples_leaf': 17}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:11,282] Trial 10 finished with value: 78767038.61827034 and parameters: {'max_depth': 10, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 232, 'min_samples_split': 20, 'min_samples_leaf': 9, 'max_samples': 0.8271882613853765}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:15,189] Trial 11 finished with value: 153902262.69463122 and parameters: {'max_depth': 6, 'max_features': 0.5, 'bootstrap': False, 'n_estimators': 1962, 'min_samples_split': 16, 'min_samples_leaf': 9}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:16,579] Trial 12 finished with value: 80832771.20952387 and parameters: {'max_depth': 6, 'max_features': 0.5, 'bootstrap': False, 'n_estimators': 730, 'min_samples_split': 8, 'min_samples_leaf': 13}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:20,728] Trial 13 finished with value: 21467735.789901152 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': False, 'n_estimators': 1589, 'min_samples_split': 8, 'min_samples_leaf': 7}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:23,769] Trial 14 finished with value: 65726284.83061098 and parameters: {'max_depth': 10, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1725, 'min_samples_split': 14, 'min_samples_leaf': 7, 'max_samples': 0.8544918010410028}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:25,369] Trial 15 finished with value: 192912325.50117734 and parameters: {'max_depth': 16, 'max_features': 0.7, 'bootstrap': False, 'n_estimators': 517, 'min_samples_split': 18, 'min_samples_leaf': 11}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:30,916] Trial 16 finished with value: 22257926.435407538 and parameters: {'max_depth': 32, 'max_features': 0.7, 'bootstrap': False, 'n_estimators': 1576, 'min_samples_split': 11, 'min_samples_leaf': 6}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:31,766] Trial 17 finished with value: 62921925.56133141 and parameters: {'max_depth': None, 'max_features': 'sqrt', 'bootstrap': True, 'n_estimators': 751, 'min_samples_split': 7, 'min_samples_leaf': 14, 'max_samples': 0.8246299053201184}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:32,394] Trial 18 finished with value: 21641050.899984695 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': False, 'n_estimators': 208, 'min_samples_split': 13, 'min_samples_leaf': 8}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:35,854] Trial 19 finished with value: 21212155.287602205 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1980, 'min_samples_split': 16, 'min_samples_leaf': 1, 'max_samples': 0.9067985749948363}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:37,281] Trial 20 finished with value: 122253425.24478069 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 886, 'min_samples_split': 16, 'min_samples_leaf': 11, 'max_samples': 0.9115084181669175}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:40,450] Trial 21 finished with value: 22430033.13800691 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1976, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_samples': 0.7387761829297109}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:43,519] Trial 22 finished with value: 47287695.00636544 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1802, 'min_samples_split': 16, 'min_samples_leaf': 6, 'max_samples': 0.9132001366015811}. Best is trial 3 with value: 21000825.012116548.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:45,813] Trial 23 finished with value: 20853949.203559943 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1442, 'min_samples_split': 18, 'min_samples_leaf': 1, 'max_samples': 0.7538460826598823}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:48,228] Trial 24 finished with value: 21300883.49672919 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1500, 'min_samples_split': 19, 'min_samples_leaf': 1, 'max_samples': 0.7451894157515093}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:50,431] Trial 25 finished with value: 62627098.76139105 and parameters: {'max_depth': None, 'max_features': 'sqrt', 'bootstrap': True, 'n_estimators': 1847, 'min_samples_split': 17, 'min_samples_leaf': 4, 'max_samples': 0.8956751977184135}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:52,825] Trial 26 finished with value: 78072449.39386573 and parameters: {'max_depth': 10, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1431, 'min_samples_split': 14, 'min_samples_leaf': 20, 'max_samples': 0.7706973859857333}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:53,643] Trial 27 finished with value: 23654733.314759936 and parameters: {'max_depth': 16, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 438, 'min_samples_split': 18, 'min_samples_leaf': 1, 'max_samples': 0.6869904444457483}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:54,935] Trial 28 finished with value: 36433696.37641546 and parameters: {'max_depth': 32, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 946, 'min_samples_split': 20, 'min_samples_leaf': 3, 'max_samples': 0.5114371860526319}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:55,771] Trial 29 finished with value: 63753584.86112579 and parameters: {'max_depth': 6, 'max_features': 'log2', 'bootstrap': True, 'n_estimators': 728, 'min_samples_split': 15, 'min_samples_leaf': 4, 'max_samples': 0.9414810874822069}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:56,449] Trial 30 finished with value: 42769863.009021565 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 376, 'min_samples_split': 18, 'min_samples_leaf': 5, 'max_samples': 0.8711171468959507}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:52:58,780] Trial 31 finished with value: 21176964.50938543 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1453, 'min_samples_split': 19, 'min_samples_leaf': 1, 'max_samples': 0.7703217018248536}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:01,072] Trial 32 finished with value: 21215991.87326194 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1410, 'min_samples_split': 19, 'min_samples_leaf': 3, 'max_samples': 0.7865998554842213}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:03,017] Trial 33 finished with value: 22199057.234999485 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1260, 'min_samples_split': 17, 'min_samples_leaf': 1, 'max_samples': 0.6970314970019539}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:06,291] Trial 34 finished with value: 21806574.65581325 and parameters: {'max_depth': 6, 'max_features': 0.7, 'bootstrap': True, 'n_estimators': 1869, 'min_samples_split': 13, 'min_samples_leaf': 2, 'max_samples': 0.8280703961503995}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:10,531] Trial 35 finished with value: 21111433.26899199 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1739, 'min_samples_split': 19, 'min_samples_leaf': 3, 'max_samples': 0.8059748042810768}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:14,495] Trial 36 finished with value: 21692480.204276428 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1730, 'min_samples_split': 20, 'min_samples_leaf': 3, 'max_samples': 0.7835167562598488}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:17,540] Trial 37 finished with value: 49013196.55030176 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1478, 'min_samples_split': 19, 'min_samples_leaf': 5, 'max_samples': 0.7238434483608429}. Best is trial 23 with value: 20853949.203559943.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:21,003] Trial 38 finished with value: 20829057.9342247 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1213, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_samples': 0.8107371168752001}. Best is trial 38 with value: 20829057.9342247.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:23,726] Trial 39 finished with value: 29069975.343242556 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1043, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_samples': 0.8080601441057531}. Best is trial 38 with value: 20829057.9342247.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:26,083] Trial 40 finished with value: 81014782.91536862 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1242, 'min_samples_split': 6, 'min_samples_leaf': 6, 'max_samples': 0.6355538238182616}. Best is trial 38 with value: 20829057.9342247.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:31,160] Trial 41 finished with value: 20470302.901988517 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1592, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.8533199463401034}. Best is trial 41 with value: 20470302.901988517.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:35,561] Trial 42 finished with value: 19763045.576949656 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1332, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_samples': 0.8579978727787488}. Best is trial 42 with value: 19763045.576949656.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:40,167] Trial 43 finished with value: 19708208.808814075 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1342, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.8626438126158126}. Best is trial 43 with value: 19708208.808814075.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:44,519] Trial 44 finished with value: 19816178.41903186 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1343, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.8420370874086704}. Best is trial 43 with value: 19708208.808814075.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:48,281] Trial 45 finished with value: 19598476.052012965 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1183, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.8654367662002576}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:52,882] Trial 46 finished with value: 19757186.59481941 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1362, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.9501669845578695}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:53:56,422] Trial 47 finished with value: 23119635.03227047 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1079, 'min_samples_split': 3, 'min_samples_leaf': 4, 'max_samples': 0.9903814305643676}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:01,799] Trial 48 finished with value: 19770650.89731384 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1331, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_samples': 0.9507083751873139}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:06,079] Trial 49 finished with value: 32352020.590352625 and parameters: {'max_depth': 24, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1341, 'min_samples_split': 4, 'min_samples_leaf': 5, 'max_samples': 0.9562594126715206}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:10,655] Trial 50 finished with value: 20007658.51754717 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1128, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_samples': 0.9534753091952318}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:15,387] Trial 51 finished with value: 19603991.369633764 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1335, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.8753902641229886}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:18,782] Trial 52 finished with value: 25646338.64775046 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1183, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_samples': 0.8747035490340677}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:21,480] Trial 53 finished with value: 24881324.270099606 and parameters: {'max_depth': 16, 'max_features': 0.5, 'bootstrap': True, 'n_estimators': 1375, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_samples': 0.9330831077766787}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:26,322] Trial 54 finished with value: 243564173.39090246 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': False, 'n_estimators': 1307, 'min_samples_split': 2, 'min_samples_leaf': 14}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:31,545] Trial 55 finished with value: 19735897.002372194 and parameters: {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1512, 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_samples': 0.9746584147315998}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:33,568] Trial 56 finished with value: 82424935.73174387 and parameters: {'max_depth': 16, 'max_features': 'log2', 'bootstrap': True, 'n_estimators': 1551, 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_samples': 0.8827368956402636}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:38,000] Trial 57 finished with value: 570131179.9076813 and parameters: {'max_depth': None, 'max_features': 1.0, 'bootstrap': False, 'n_estimators': 1030, 'min_samples_split': 6, 'min_samples_leaf': 9}. Best is trial 45 with value: 19598476.052012965.


  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:39,766] Trial 58 finished with value: 58303833.51420128 and parameters: {'max_depth': 24, 'max_features': 'sqrt', 'bootstrap': True, 'n_estimators': 1542, 'min_samples_split': 2, 'min_samples_leaf': 7, 'max_samples': 0.9765057871885373}. Best is trial 45 with value: 19598476.052012965.


Best trial: 45. Best value: 1.95985e+07: 100%|██████████| 60/60 [02:52<00:00,  2.87s/it]
  return fit_method(estimator, *args, **kwargs)


[I 2025-11-09 21:54:42,958] Trial 59 finished with value: 88469412.59344965 and parameters: {'max_depth': 10, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1647, 'min_samples_split': 3, 'min_samples_leaf': 17, 'max_samples': 0.9163049595955386}. Best is trial 45 with value: 19598476.052012965.

=== Mejores hiperparámetros (VALID) ===
{'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1183, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.8654367662002576}
Mejor RMSE valid: 19598476.0520

=== Métricas en TEST ===
MSE : 11,225,648.41
RMSE: 11,225,648.41
MAE : 2,629.43
R²  : 0.2921


In [96]:
"""El mejor modelo fue RFRegressor con los siguientes hiperparámetros:
    {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1183, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.8654367662002576}"""
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
#Entrenemos un modelo con esos hiperparámetros

best_model = RandomForestRegressor(
    n_estimators=1183,
    max_depth=16,
    max_features=1.0,
    bootstrap=True,
    min_samples_split=2,
    min_samples_leaf=2,
    max_samples=0.8654367662002576,
    random_state=42,
    n_jobs=-1
)
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer(method='yeo-johnson')
x_train_ready = pd.DataFrame(pt.fit_transform(x_train), columns=x_train.columns)
x_test_ready = pd.DataFrame(pt.transform(x_test), columns=x_test.columns)

best_model.fit(x_train_ready, y_train)
y_pred_test = best_model.predict(x_test_ready)
mse  = mean_squared_error(y_test, y_pred_test)
rmse = mean_squared_error(y_test, y_pred_test)
mae  = mean_absolute_error(y_test, y_pred_test)
r2   = r2_score(y_test, y_pred_test)
print("\n=== Métricas en TEST del mejor modelo entrenado manualmente ===")
print(f"MSE : {mse:,.2f}")
print(f"RMSE: {rmse:,.2f}")
print(f"MAE : {mae:,.2f}")
print(f"R²  : {r2:.4f}")


  return fit_method(estimator, *args, **kwargs)



=== Métricas en TEST del mejor modelo entrenado manualmente ===
MSE : 11,223,523.16
RMSE: 11,223,523.16
MAE : 2,628.99
R²  : 0.2923


In [110]:
#quiero los valores 
print(feature_importances["feature"].tolist())

['Frio (Kw)', 'Frio_roll_mean_7_lag1', 'Sala Maq (Kw)', 'Envasado (Kw)', 'Frio_roll_mean_14_lag1', 'Servicios (Kw)', 'Prod Agua (Kw)', 'KW Gral Planta', 'Linea 2 (Kw)', 'CO 2 / Hl', 'EE Caldera / Hl', 'Cocina (Kw)', 'ET Linea 5/Hl', 'VAPOR DE LINEA 4 KG', 'Linea 3 (Kw)', 'Resto Serv (Kw)', 'Conversion Kg/Mj', 'Restos Planta (Kw)', 'Hl Cerveza L2', 'Frio_roll_mean_3_lag1']


In [97]:
feature_importances = best_model.feature_importances_
feature_names = x_train.columns
feature_importances = pd.DataFrame({'feature': feature_names, 'importance': feature_importances})
feature_importances = feature_importances.sort_values(by='importance', ascending=False)
feature_importances = feature_importances[:20]
print("Feature importances:")
print(feature_importances)
x_train = x_train[feature_importances['feature']]
x_test = x_test[feature_importances['feature']]

Feature importances:
                    feature  importance
76                Frio (Kw)    0.415853
134   Frio_roll_mean_7_lag1    0.140973
72            Sala Maq (Kw)    0.098940
67            Envasado (Kw)    0.063350
136  Frio_roll_mean_14_lag1    0.028401
71           Servicios (Kw)    0.012200
78           Prod Agua (Kw)    0.010649
81           KW Gral Planta    0.006070
68             Linea 2 (Kw)    0.005377
50                CO 2 / Hl    0.004458
13          EE Caldera / Hl    0.004136
66              Cocina (Kw)    0.003900
38            ET Linea 5/Hl    0.003631
109     VAPOR DE LINEA 4 KG    0.003530
69             Linea 3 (Kw)    0.003528
79          Resto Serv (Kw)    0.003396
98         Conversion Kg/Mj    0.003335
80       Restos Planta (Kw)    0.003300
59            Hl Cerveza L2    0.003267
132   Frio_roll_mean_3_lag1    0.003221


In [65]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
#Entrenemos un modelo con esos hiperparámetros

best_model = RandomForestRegressor(
    n_estimators=1183,
    max_depth=16,
    max_features=1.0,
    bootstrap=True,
    min_samples_split=2,
    min_samples_leaf=2,
    max_samples=0.8654367662002576,
    random_state=42,
    n_jobs=-1
)
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer(method='yeo-johnson')
x_train_ready = pd.DataFrame(pt.fit_transform(x_train), columns=x_train.columns)
x_test_ready = pd.DataFrame(pt.transform(x_test), columns=x_test.columns)

best_model.fit(x_train_ready, y_train)
y_pred_test = best_model.predict(x_test_ready)
mse  = mean_squared_error(y_test, y_pred_test)
rmse = mean_squared_error(y_test, y_pred_test)
mae  = mean_absolute_error(y_test, y_pred_test)
r2   = r2_score(y_test, y_pred_test)
print("\n=== Métricas en TEST del mejor modelo entrenado manualmente ===")
print(f"MSE : {mse:,.2f}")
print(f"RMSE: {rmse:,.2f}")
print(f"MAE : {mae:,.2f}")
print(f"R²  : {r2:.4f}")

  return fit_method(estimator, *args, **kwargs)



=== Métricas en TEST del mejor modelo entrenado manualmente ===
MSE : 11,223,523.16
RMSE: 11,223,523.16
MAE : 2,628.99
R²  : 0.2923


In [17]:
# ==============================
# Optuna + Pruning para LGBM
# ==============================
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from lightgbm import LGBMRegressor
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
from optuna.integration import LightGBMPruningCallback

SEED = 42
VAL_FRAC = 0.20   # último 20% del train como validación temporal
N_TRIALS = 50     # ajustá según tiempo disponible

# --------- Split temporal (train -> train/valid ; test queda intacto) ---------
X_train_all = x_train.copy()
y_train_all = y_train.copy()
X_test = x_test.copy()
y_test = y_test.copy()

n = len(X_train_all)
n_val = int(np.floor(n * VAL_FRAC))
n_tr = n - n_val
X_tr, y_tr = X_train_all.iloc[:n_tr], y_train_all[:n_tr]
X_val, y_val = X_train_all.iloc[n_tr:], y_train_all[n_tr:]

print(f"Train: {X_tr.shape}, Valid: {X_val.shape}, Test: {X_test.shape}")

# --------- Función objetivo para Optuna ---------
def objective(trial: optuna.Trial) -> float:
    # Espacio de búsqueda (razonable para baseline)
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 300, 2000),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 31, 255),
        "max_depth": trial.suggest_int("max_depth", -1, 16),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
        "random_state": SEED,
        "n_jobs": -1
    }

    # Modelo
    model = LGBMRegressor(**params)

    # Pruning callback: monitorea 'rmse' en el primer eval_set (valid_0)
    pruning_cb = LightGBMPruningCallback(trial, metric="rmse", valid_name="valid_0")

    # Entrena con early stopping y pruning
    model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
        eval_metric="rmse",
        callbacks=[pruning_cb],
    )

    # Predicción en valid y métrica a minimizar (RMSE)
    y_pred_val = model.predict(X_val)
    rmse_val = mean_squared_error(y_val, y_pred_val)
    return rmse_val

# --------- Estudio Optuna ---------
study = optuna.create_study(
    direction="minimize",
    sampler=TPESampler(seed=SEED),
    pruner=MedianPruner(n_warmup_steps=10)
)
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=True)

print("\n=== Mejores hiperparámetros ===")
print(study.best_params)
print(f"Mejor RMSE valid: {study.best_value:.4f}")

# --------- Re-entrenar con los mejores params (train+valid) ---------
best_params = study.best_params.copy()
best_model = LGBMRegressor(**best_params, random_state=SEED, n_jobs=-1)

best_model.fit(
    X_train_all, y_train_all,
    eval_set=[(X_val, y_val)],  # opcional, solo para logging
    eval_metric="rmse",
)

# --------- Evaluación en TEST ---------
y_pred_test = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred_test)
rmse = mean_squared_error(y_test, y_pred_test)
mae = mean_absolute_error(y_test, y_pred_test)
r2  = r2_score(y_test, y_pred_test)

print("\n=== Métricas en TEST ===")
print(f"MSE : {mse:,.2f}")
print(f"RMSE: {rmse:,.2f}")
print(f"MAE : {mae:,.2f}")
print(f"R²  : {r2:.4f}")


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-11-09 22:41:41,215] A new study created in memory with name: no-name-ab816747-189c-43b5-8fc0-6e4aefb7cbd0


Train: (692, 11), Valid: (172, 11), Test: (94, 11)


  0%|          | 0/50 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171

Best trial: 0. Best value: 3.46919e+08:   2%|▏         | 1/50 [00:00<00:17,  2.85it/s]


[I 2025-11-09 22:41:41,564] Trial 0 finished with value: 346919267.52484727 and parameters: {'n_estimators': 937, 'learning_rate': 0.17254716573280354, 'num_leaves': 195, 'max_depth': 9, 'min_child_samples': 19, 'subsample': 0.662397808134481, 'colsample_bytree': 0.6232334448672797, 'reg_alpha': 0.6245760287469893, 'reg_lambda': 0.002570603566117598}. Best is trial 0 with value: 346919267.52484727.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000223 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 1. Best value: 2.54759e+08:   4%|▍         | 2/50 [00:01<00:29,  1.65it/s]

[I 2025-11-09 22:41:42,350] Trial 1 finished with value: 254758514.9854434 and parameters: {'n_estimators': 1504, 'learning_rate': 0.010636066512540286, 'num_leaves': 249, 'max_depth': 13, 'min_child_samples': 25, 'subsample': 0.6727299868828402, 'colsample_bytree': 0.6733618039413735, 'reg_alpha': 5.472429642032198e-06, 'reg_lambda': 0.00052821153945323}. Best is trial 1 with value: 254758514.9854434.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 1. Best value: 2.54759e+08:   6%|▌         | 3/50 [00:01<00:19,  2.45it/s]

[I 2025-11-09 22:41:42,519] Trial 2 finished with value: 693751051.2934923 and parameters: {'n_estimators': 1034, 'learning_rate': 0.023927528765580644, 'num_leaves': 168, 'max_depth': 1, 'min_child_samples': 33, 'subsample': 0.7465447373174767, 'colsample_bytree': 0.7824279936868144, 'reg_alpha': 0.1165691561324743, 'reg_lambda': 6.267062696005991e-07}. Best is trial 1 with value: 254758514.9854434.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000231 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 3. Best value: 1.30091e+08:   8%|▊         | 4/50 [00:01<00:20,  2.27it/s]

[I 2025-11-09 22:41:43,011] Trial 3 finished with value: 130090702.2983624 and parameters: {'n_estimators': 1174, 'learning_rate': 0.05898602410432694, 'num_leaves': 41, 'max_depth': 9, 'min_child_samples': 21, 'subsample': 0.6260206371941118, 'colsample_bytree': 0.9795542149013333, 'reg_alpha': 4.905556676028774, 'reg_lambda': 0.18861495878553936}. Best is trial 3 with value: 130090702.2983624.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000189 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 9.66011e+07:  10%|█         | 5/50 [00:02<00:17,  2.63it/s]

[I 2025-11-09 22:41:43,282] Trial 4 finished with value: 96601129.86159487 and parameters: {'n_estimators': 818, 'learning_rate': 0.013399060561509796, 'num_leaves': 184, 'max_depth': 6, 'min_child_samples': 16, 'subsample': 0.798070764044508, 'colsample_bytree': 0.6137554084460873, 'reg_alpha': 1.527156759251193, 'reg_lambda': 2.133142332373004e-06}. Best is trial 4 with value: 96601129.86159487.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000139 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 9.66011e+07:  12%|█▏        | 6/50 [00:02<00:23,  1.89it/s]

[I 2025-11-09 22:41:44,104] Trial 5 finished with value: 270518111.803419 and parameters: {'n_estimators': 1426, 'learning_rate': 0.02544166090938368, 'num_leaves': 148, 'max_depth': 8, 'min_child_samples': 22, 'subsample': 0.9878338511058234, 'colsample_bytree': 0.9100531293444458, 'reg_alpha': 2.854239907497756, 'reg_lambda': 1.1309571585271483}. Best is trial 4 with value: 96601129.86159487.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000154 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 9.66011e+07:  14%|█▍        | 7/50 [00:03<00:23,  1.85it/s]

[I 2025-11-09 22:41:44,666] Trial 6 finished with value: 892175855.2647876 and parameters: {'n_estimators': 1317, 'learning_rate': 0.15826541904647565, 'num_leaves': 50, 'max_depth': 2, 'min_child_samples': 9, 'subsample': 0.7301321323053057, 'colsample_bytree': 0.7554709158757928, 'reg_alpha': 2.7678419414850017e-06, 'reg_lambda': 0.28749982347407854}. Best is trial 4 with value: 96601129.86159487.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-09 22:41:44,681] Trial 7 pruned. Trial was pruned at iteration 17.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [I

Best trial: 4. Best value: 9.66011e+07:  18%|█▊        | 9/50 [00:03<00:12,  3.18it/s]

[I 2025-11-09 22:41:44,811] Trial 8 finished with value: 284255938.0706264 and parameters: {'n_estimators': 309, 'learning_rate': 0.11506408247250169, 'num_leaves': 190, 'max_depth': 12, 'min_child_samples': 79, 'subsample': 0.6296178606936361, 'colsample_bytree': 0.7433862914177091, 'reg_alpha': 1.1036250149900698e-07, 'reg_lambda': 0.5860448217200517}. Best is trial 4 with value: 96601129.86159487.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000121 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 4. Best value: 9.66011e+07:  20%|██        | 10/50 [00:04<00:15,  2.53it/s]

[I 2025-11-09 22:41:45,439] Trial 9 finished with value: 597120391.9384733 and parameters: {'n_estimators': 1360, 'learning_rate': 0.026946865572417687, 'num_leaves': 45, 'max_depth': 4, 'min_child_samples': 36, 'subsample': 0.8918424713352255, 'colsample_bytree': 0.8550229885420852, 'reg_alpha': 0.9658611176861268, 'reg_lambda': 0.0001778010520878397}. Best is trial 4 with value: 96601129.86159487.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-09 22:41:45,467] Trial 10 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000164 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [

Best trial: 4. Best value: 9.66011e+07:  24%|██▍       | 12/50 [00:04<00:11,  3.37it/s]

[I 2025-11-09 22:41:45,767] Trial 11 finished with value: 386914725.4632142 and parameters: {'n_estimators': 558, 'learning_rate': 0.06931012670622363, 'num_leaves': 103, 'max_depth': 10, 'min_child_samples': 50, 'subsample': 0.792758531865332, 'colsample_bytree': 0.9760875973138388, 'reg_alpha': 0.0029151336209232823, 'reg_lambda': 1.1805230865598731e-05}. Best is trial 4 with value: 96601129.86159487.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000198 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 12. Best value: 7.43483e+07:  26%|██▌       | 13/50 [00:05<00:13,  2.69it/s]

[I 2025-11-09 22:41:46,388] Trial 12 finished with value: 74348289.58964331 and parameters: {'n_estimators': 678, 'learning_rate': 0.05226400620365841, 'num_leaves': 236, 'max_depth': 16, 'min_child_samples': 7, 'subsample': 0.9234602307457067, 'colsample_bytree': 0.8510571781023799, 'reg_alpha': 8.186687457457445, 'reg_lambda': 0.0455548839932313}. Best is trial 12 with value: 74348289.58964331.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000197 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 12. Best value: 7.43483e+07:  28%|██▊       | 14/50 [00:05<00:17,  2.11it/s]

[I 2025-11-09 22:41:47,167] Trial 13 finished with value: 99275860.50805202 and parameters: {'n_estimators': 686, 'learning_rate': 0.04199188778410168, 'num_leaves': 252, 'max_depth': 16, 'min_child_samples': 5, 'subsample': 0.925913487601959, 'colsample_bytree': 0.8509146984111675, 'reg_alpha': 0.013280483381364248, 'reg_lambda': 0.008727409054242165}. Best is trial 12 with value: 74348289.58964331.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000151 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 12. Best value: 7.43483e+07:  32%|███▏      | 16/50 [00:06<00:10,  3.18it/s]

[I 2025-11-09 22:41:47,194] Trial 14 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000176 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-09 22:41:47,351] Trial 15 finished with value: 357662260.9201355 and parameters: {'n_estimators': 351, 'learning_rate': 0.0799912417038611, 'num_leaves': 220, 'max_depth': 5, 'min_child_samples': 66, 'subsample': 0.9930744627111218, 'colsample_bytree': 0.8393896205906665, 'reg_alpha': 9.730978615907079, 'reg_lambda': 6.243069797304142}. Best is trial 12 with value: 74348289.58964331.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info]

Best trial: 12. Best value: 7.43483e+07:  34%|███▍      | 17/50 [00:07<00:21,  1.53it/s]

[I 2025-11-09 22:41:49,147] Trial 16 finished with value: 81243759.8734599 and parameters: {'n_estimators': 813, 'learning_rate': 0.04106141062337479, 'num_leaves': 114, 'max_depth': -1, 'min_child_samples': 5, 'subsample': 0.7977433214483701, 'colsample_bytree': 0.9223935672425749, 'reg_alpha': 0.09733745850445547, 'reg_lambda': 0.03857991481720517}. Best is trial 12 with value: 74348289.58964331.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000698 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 12. Best value: 7.43483e+07:  36%|███▌      | 18/50 [00:08<00:18,  1.75it/s]

[I 2025-11-09 22:41:49,460] Trial 17 finished with value: 292139289.1214538 and parameters: {'n_estimators': 466, 'learning_rate': 0.03702917556373974, 'num_leaves': 112, 'max_depth': 13, 'min_child_samples': 37, 'subsample': 0.9183509774407929, 'colsample_bytree': 0.9251990649716094, 'reg_alpha': 0.05432980201370617, 'reg_lambda': 0.021121718337466615}. Best is trial 12 with value: 74348289.58964331.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 12. Best value: 7.43483e+07:  38%|███▊      | 19/50 [00:09<00:22,  1.38it/s]

[I 2025-11-09 22:41:50,621] Trial 18 finished with value: 87336129.08669989 and parameters: {'n_estimators': 735, 'learning_rate': 0.049757182841202716, 'num_leaves': 76, 'max_depth': -1, 'min_child_samples': 5, 'subsample': 0.73964158693172, 'colsample_bytree': 0.9100488453134897, 'reg_alpha': 0.00022163473160045528, 'reg_lambda': 0.029316903899976005}. Best is trial 12 with value: 74348289.58964331.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000144 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 12. Best value: 7.43483e+07:  40%|████      | 20/50 [00:10<00:26,  1.14it/s]

[I 2025-11-09 22:41:51,901] Trial 19 finished with value: 1777046918.8790507 and parameters: {'n_estimators': 1672, 'learning_rate': 0.10329028379573521, 'num_leaves': 121, 'max_depth': -1, 'min_child_samples': 30, 'subsample': 0.845543330156515, 'colsample_bytree': 0.8170309282184145, 'reg_alpha': 0.2957784269604846, 'reg_lambda': 0.00022181559510132682}. Best is trial 12 with value: 74348289.58964331.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 12. Best value: 7.43483e+07:  42%|████▏     | 21/50 [00:11<00:22,  1.27it/s]

[I 2025-11-09 22:41:52,460] Trial 20 finished with value: 149574926.02697682 and parameters: {'n_estimators': 1154, 'learning_rate': 0.03493538006744742, 'num_leaves': 74, 'max_depth': 11, 'min_child_samples': 94, 'subsample': 0.9467051861632398, 'colsample_bytree': 0.8813919356795588, 'reg_alpha': 1.2722974438529106e-08, 'reg_lambda': 8.802075014264656}. Best is trial 12 with value: 74348289.58964331.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 12. Best value: 7.43483e+07:  44%|████▍     | 22/50 [00:12<00:26,  1.08it/s]

[I 2025-11-09 22:41:53,746] Trial 21 finished with value: 95761853.63338858 and parameters: {'n_estimators': 723, 'learning_rate': 0.05542490742162084, 'num_leaves': 77, 'max_depth': 0, 'min_child_samples': 5, 'subsample': 0.7599610925494408, 'colsample_bytree': 0.9395991103990001, 'reg_alpha': 0.00013084466370348973, 'reg_lambda': 0.05535983593964644}. Best is trial 12 with value: 74348289.58964331.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 22. Best value: 3.61882e+07:  46%|████▌     | 23/50 [00:12<00:19,  1.36it/s]

[I 2025-11-09 22:41:54,007] Trial 22 finished with value: 36188201.78666547 and parameters: {'n_estimators': 529, 'learning_rate': 0.056519003375479805, 'num_leaves': 128, 'max_depth': 2, 'min_child_samples': 13, 'subsample': 0.7067428599848027, 'colsample_bytree': 0.8802489778678998, 'reg_alpha': 0.0012233728720223204, 'reg_lambda': 0.0012309919300470854}. Best is trial 22 with value: 36188201.78666547.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000217 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 22. Best value: 3.61882e+07:  48%|████▊     | 24/50 [00:13<00:15,  1.63it/s]

[I 2025-11-09 22:41:54,328] Trial 23 finished with value: 92986841.41484776 and parameters: {'n_estimators': 525, 'learning_rate': 0.09375261199302208, 'num_leaves': 130, 'max_depth': 3, 'min_child_samples': 14, 'subsample': 0.699697802830657, 'colsample_bytree': 0.8838838384787057, 'reg_alpha': 0.002531482850998271, 'reg_lambda': 0.0023549997871596426}. Best is trial 22 with value: 36188201.78666547.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000251 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 22. Best value: 3.61882e+07:  48%|████▊     | 24/50 [00:13<00:15,  1.63it/s]

[I 2025-11-09 22:41:54,361] Trial 24 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 22. Best value: 3.61882e+07:  52%|█████▏    | 26/50 [00:13<00:11,  2.00it/s]

[I 2025-11-09 22:41:55,052] Trial 25 finished with value: 652200750.7289796 and parameters: {'n_estimators': 995, 'learning_rate': 0.0625962538347523, 'num_leaves': 165, 'max_depth': 14, 'min_child_samples': 45, 'subsample': 0.7788199483292415, 'colsample_bytree': 0.8099384119347259, 'reg_alpha': 2.7440868372488887e-05, 'reg_lambda': 4.479828184875643e-05}. Best is trial 22 with value: 36188201.78666547.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000160 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-09 22:41:55,091] Trial 26 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000175 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightG

Best trial: 22. Best value: 3.61882e+07:  54%|█████▍    | 27/50 [00:14<00:11,  2.00it/s]

[I 2025-11-09 22:41:55,954] Trial 27 finished with value: 44160813.63677327 and parameters: {'n_estimators': 436, 'learning_rate': 0.07748406167020265, 'num_leaves': 225, 'max_depth': -1, 'min_child_samples': 10, 'subsample': 0.8792317225944629, 'colsample_bytree': 0.8963558573246825, 'reg_alpha': 0.011164798590623241, 'reg_lambda': 0.0073110345706609075}. Best is trial 22 with value: 36188201.78666547.


Best trial: 22. Best value: 3.61882e+07:  58%|█████▊    | 29/50 [00:14<00:10,  2.08it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-09 22:41:55,995] Trial 28 pruned. Trial was pruned at iteration 11.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-09 22:41:56,038] Trial 29 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000229 seconds.
You can set `force_col_wise=true` to remove the ov

Best trial: 22. Best value: 3.61882e+07:  62%|██████▏   | 31/50 [00:15<00:06,  3.03it/s]

[I 2025-11-09 22:41:56,411] Trial 30 finished with value: 382736176.84697473 and parameters: {'n_estimators': 630, 'learning_rate': 0.0738161277769, 'num_leaves': 203, 'max_depth': 0, 'min_child_samples': 59, 'subsample': 0.8966246108453321, 'colsample_bytree': 0.8765508151447902, 'reg_alpha': 0.010998537209913211, 'reg_lambda': 1.9887063187162126}. Best is trial 22 with value: 36188201.78666547.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 22. Best value: 3.61882e+07:  64%|██████▍   | 32/50 [00:16<00:08,  2.06it/s]

[I 2025-11-09 22:41:57,608] Trial 31 finished with value: 86602696.37879838 and parameters: {'n_estimators': 863, 'learning_rate': 0.0881316918488901, 'num_leaves': 238, 'max_depth': -1, 'min_child_samples': 11, 'subsample': 0.8135492570946378, 'colsample_bytree': 0.9064805162766488, 'reg_alpha': 0.46701873302377467, 'reg_lambda': 0.004982985807090376}. Best is trial 22 with value: 36188201.78666547.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 22. Best value: 3.61882e+07:  66%|██████▌   | 33/50 [00:16<00:07,  2.20it/s]

[I 2025-11-09 22:41:57,943] Trial 32 finished with value: 108002785.25966254 and parameters: {'n_estimators': 425, 'learning_rate': 0.04266353616299205, 'num_leaves': 171, 'max_depth': 0, 'min_child_samples': 21, 'subsample': 0.8700248870330292, 'colsample_bytree': 0.9563809111938181, 'reg_alpha': 0.26339119608647066, 'reg_lambda': 0.0005125625943978609}. Best is trial 22 with value: 36188201.78666547.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000147 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07:  68%|██████▊   | 34/50 [00:17<00:07,  2.17it/s]

[I 2025-11-09 22:41:58,422] Trial 33 finished with value: 20454675.860347647 and parameters: {'n_estimators': 1046, 'learning_rate': 0.055684427069361615, 'num_leaves': 123, 'max_depth': 2, 'min_child_samples': 10, 'subsample': 0.6592178372197177, 'colsample_bytree': 0.8652042366654502, 'reg_alpha': 0.034402896453370226, 'reg_lambda': 0.021424974357939684}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07:  70%|███████   | 35/50 [00:17<00:07,  2.13it/s]

[I 2025-11-09 22:41:58,919] Trial 34 finished with value: 1003038942.4459457 and parameters: {'n_estimators': 1022, 'learning_rate': 0.053876714846382755, 'num_leaves': 236, 'max_depth': 3, 'min_child_samples': 25, 'subsample': 0.6613989848564389, 'colsample_bytree': 0.792683793248586, 'reg_alpha': 0.024792687808499542, 'reg_lambda': 6.186212500038245e-05}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07:  72%|███████▏  | 36/50 [00:18<00:06,  2.11it/s]

[I 2025-11-09 22:41:59,405] Trial 35 finished with value: 40057172.16309064 and parameters: {'n_estimators': 1105, 'learning_rate': 0.14074188851823957, 'num_leaves': 177, 'max_depth': 2, 'min_child_samples': 19, 'subsample': 0.6623943851930296, 'colsample_bytree': 0.8607032768226789, 'reg_alpha': 0.005390611105199749, 'reg_lambda': 0.10390896724992181}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000211 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-09 22:41:59,440] Trial 36 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000126 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM

Best trial: 33. Best value: 2.04547e+07:  76%|███████▌  | 38/50 [00:19<00:05,  2.19it/s]

[I 2025-11-09 22:42:00,269] Trial 37 finished with value: 242065166.52074686 and parameters: {'n_estimators': 1266, 'learning_rate': 0.16874753291453742, 'num_leaves': 201, 'max_depth': 4, 'min_child_samples': 17, 'subsample': 0.6873967144159474, 'colsample_bytree': 0.6454078948307838, 'reg_alpha': 0.0008025542093718969, 'reg_lambda': 0.18845581486358343}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000154 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07:  80%|████████  | 40/50 [00:20<00:05,  1.68it/s]

[I 2025-11-09 22:42:01,323] Trial 38 finished with value: 415820097.7203329 and parameters: {'n_estimators': 1652, 'learning_rate': 0.1440625340719431, 'num_leaves': 156, 'max_depth': 6, 'min_child_samples': 21, 'subsample': 0.721611206584752, 'colsample_bytree': 0.8280998427268168, 'reg_alpha': 0.0012817776684511848, 'reg_lambda': 0.0006325112366682407}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000208 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-09 22:42:01,360] Trial 39 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGB

Best trial: 33. Best value: 2.04547e+07:  84%|████████▍ | 42/50 [00:20<00:03,  2.31it/s]

[I 2025-11-09 22:42:02,130] Trial 41 finished with value: 1223769162.727747 and parameters: {'n_estimators': 1077, 'learning_rate': 0.08166391422034187, 'num_leaves': 125, 'max_depth': 9, 'min_child_samples': 9, 'subsample': 0.643583035152199, 'colsample_bytree': 0.8520852854947029, 'reg_alpha': 1.7873238367119288, 'reg_lambda': 0.07622393223328545}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000179 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07:  86%|████████▌ | 43/50 [00:21<00:03,  2.26it/s]

[I 2025-11-09 22:42:02,616] Trial 42 finished with value: 29326709.613516975 and parameters: {'n_estimators': 945, 'learning_rate': 0.061573501067013044, 'num_leaves': 255, 'max_depth': 2, 'min_child_samples': 19, 'subsample': 0.6042822813859435, 'colsample_bytree': 0.7763595448205975, 'reg_alpha': 0.1415432319010726, 'reg_lambda': 1.9177661562776231}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07:  88%|████████▊ | 44/50 [00:21<00:02,  2.18it/s]

[I 2025-11-09 22:42:03,134] Trial 43 finished with value: 50591827.365566276 and parameters: {'n_estimators': 1015, 'learning_rate': 0.030202513373159514, 'num_leaves': 253, 'max_depth': 2, 'min_child_samples': 19, 'subsample': 0.6114957396908178, 'colsample_bytree': 0.7558237979330871, 'reg_alpha': 0.005034390197546259, 'reg_lambda': 1.5942612166564458}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000164 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07:  88%|████████▊ | 44/50 [00:21<00:02,  2.18it/s]

[I 2025-11-09 22:42:03,169] Trial 44 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003233 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07:  92%|█████████▏| 46/50 [00:22<00:01,  2.26it/s]

[I 2025-11-09 22:42:03,965] Trial 45 finished with value: 128162242.23514467 and parameters: {'n_estimators': 1236, 'learning_rate': 0.05891161074728807, 'num_leaves': 155, 'max_depth': 5, 'min_child_samples': 12, 'subsample': 0.6783741215650878, 'colsample_bytree': 0.7762286982552374, 'reg_alpha': 0.15375673592478778, 'reg_lambda': 0.19172061642064786}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000234 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07:  94%|█████████▍| 47/50 [00:23<00:01,  2.40it/s]

[I 2025-11-09 22:42:04,285] Trial 46 finished with value: 489920575.32205164 and parameters: {'n_estimators': 315, 'learning_rate': 0.08151049334330324, 'num_leaves': 191, 'max_depth': 0, 'min_child_samples': 36, 'subsample': 0.7083720137361806, 'colsample_bytree': 0.8123474711712716, 'reg_alpha': 2.7107158490415825e-06, 'reg_lambda': 0.0036428175837060536}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171
[I 2025-11-09 22:42:04,335] Trial 47 pruned. Trial was pruned at iteration 10.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[Ligh

Best trial: 33. Best value: 2.04547e+07:  98%|█████████▊| 49/50 [00:23<00:00,  2.78it/s]

[I 2025-11-09 22:42:04,819] Trial 48 finished with value: 328255012.46859545 and parameters: {'n_estimators': 945, 'learning_rate': 0.06283899534485134, 'num_leaves': 246, 'max_depth': 2, 'min_child_samples': 22, 'subsample': 0.6290953135343392, 'colsample_bytree': 0.7248735813962334, 'reg_alpha': 1.06981926159116, 'reg_lambda': 1.912414341419089e-08}. Best is trial 33 with value: 20454675.860347647.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2541
[LightGBM] [Info] Number of data points in the train set: 692, number of used features: 11
[LightGBM] [Info] Start training from score 28710.196171


Best trial: 33. Best value: 2.04547e+07: 100%|██████████| 50/50 [00:24<00:00,  2.07it/s]

[I 2025-11-09 22:42:05,338] Trial 49 finished with value: 73658813.83245496 and parameters: {'n_estimators': 785, 'learning_rate': 0.07221736284895174, 'num_leaves': 145, 'max_depth': 6, 'min_child_samples': 11, 'subsample': 0.6769887579163967, 'colsample_bytree': 0.8655029322071005, 'reg_alpha': 0.0014723078815818218, 'reg_lambda': 0.0009553773921840817}. Best is trial 33 with value: 20454675.860347647.

=== Mejores hiperparámetros ===
{'n_estimators': 1046, 'learning_rate': 0.055684427069361615, 'num_leaves': 123, 'max_depth': 2, 'min_child_samples': 10, 'subsample': 0.6592178372197177, 'colsample_bytree': 0.8652042366654502, 'reg_alpha': 0.034402896453370226, 'reg_lambda': 0.021424974357939684}
Mejor RMSE valid: 20454675.8603
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2805
[LightGBM] [Info] Number of data points in the train set: 864, nu





=== Métricas en TEST ===
MSE : 14,017,846.90
RMSE: 14,017,846.90
MAE : 2,920.16
R²  : 0.1161


### Evaluación final del modelo
¿Vamos a aprobar? No lo sabemos. Para eso vamos a verificar que el modelo funcione dentro del rango de mae esperado para datos inéditos. Vamos a hacer pruebas con la variable inédita x_test y y_test

In [106]:
x_train

Unnamed: 0,Frio (Kw),Frio_roll_mean_7_lag1,Sala Maq (Kw),Envasado (Kw),Frio_roll_mean_14_lag1,Servicios (Kw),Prod Agua (Kw),KW Gral Planta,Linea 2 (Kw),CO 2 / Hl,EE Caldera / Hl,Cocina (Kw),ET Linea 5/Hl,VAPOR DE LINEA 4 KG,Linea 3 (Kw),Resto Serv (Kw),Conversion Kg/Mj,Restos Planta (Kw),Hl Cerveza L2,Frio_roll_mean_3_lag1
0,24796.877744,24658.209509,28306.225915,14962.530183,26395.792079,40581.503049,673.820427,68820.020122,4523.201387,0.448379,0.091459,1794.225,25.191172,28918.129588,5540.002439,4991.821951,3.628813,3747.276110,1470.268293,24631.518213
1,24796.877744,24658.209509,28306.225915,14962.530183,26395.792079,40581.503049,673.820427,68820.020122,4523.201387,0.448379,0.091459,1794.225,25.191172,28918.129588,5540.002439,4991.821951,3.628813,3747.276110,1470.268293,24631.518213
2,28268.000000,26626.823553,31386.000000,14818.000000,65938.706722,42565.500000,943.000000,68039.000000,1289.530000,0.689543,0.116571,1260.000,0.000000,35280.450000,6629.000000,3302.500000,2.505079,1864.340000,33.000000,26654.541620
3,24246.000000,29890.189641,28070.000000,18696.000000,29558.225590,39650.000000,1011.000000,69857.000000,5861.350000,0.275763,0.073432,2020.000,13.010000,29736.150000,7115.000000,3998.000000,2.868745,3058.290000,1348.000000,22182.333333
4,29885.000000,28877.865079,33463.000000,14831.000000,29282.342318,45385.000000,1018.000000,73431.000000,5653.890000,0.541448,0.098328,1932.000,13.308435,31668.900000,3732.000000,3782.000000,2.872046,3236.390000,1103.000000,25489.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953,21756.666667,18942.571429,24529.333333,18948.333333,18097.714286,37179.000000,275.666667,71860.666667,5642.080000,0.432624,0.013479,2486.000,50.319762,36217.610000,7812.000000,5471.333333,3.453020,4731.413333,1261.666667,18126.000000
954,23447.000000,19700.809524,26544.000000,18924.000000,18494.261905,38946.000000,258.000000,73678.000000,5657.560000,0.251824,0.019722,2352.000,23.322541,32355.800000,7835.000000,5727.000000,3.384492,4737.060000,1319.000000,18866.555556
955,18856.000000,20144.666667,20656.000000,6233.000000,18930.904762,31460.000000,234.000000,48969.000000,2517.310000,0.475733,0.067638,2066.000,28.178420,11386.050000,2315.000000,4975.000000,3.270637,3014.310000,349.000000,20244.888889
956,10274.000000,19882.952381,10547.000000,1069.000000,18919.190476,16174.000000,155.000000,22264.000000,449.940000,0.001687,0.063744,1029.000,51.836863,1422.720000,657.000000,1341.000000,3.816010,1042.940000,27.000000,21353.222222


In [107]:
x_test

Unnamed: 0,Frio (Kw),Frio_roll_mean_7_lag1,Sala Maq (Kw),Envasado (Kw),Frio_roll_mean_14_lag1,Servicios (Kw),Prod Agua (Kw),KW Gral Planta,Linea 2 (Kw),CO 2 / Hl,EE Caldera / Hl,Cocina (Kw),ET Linea 5/Hl,VAPOR DE LINEA 4 KG,Linea 3 (Kw),Resto Serv (Kw),Conversion Kg/Mj,Restos Planta (Kw),Hl Cerveza L2,Frio_roll_mean_3_lag1
0,19199.0,18417.968635,19377.0,19390.0,18510.400349,31245.0,103.0,63066.0,6275.150000,0.000171,0.000000,311.0,27.657067,34506.45,7692.0,4431.0,3.398585,5510.650000,3153.0,17922.625608
1,18576.0,18431.104643,19407.0,19677.0,18370.161874,30738.0,215.0,64958.0,6493.090000,0.002616,0.000000,1345.0,24.108764,36541.05,7717.0,4041.0,3.517712,6389.590000,2873.0,17874.421596
2,21478.0,18426.584193,24270.0,18937.0,17460.300226,35599.0,438.0,68665.0,5382.110000,0.368160,0.000000,2406.0,32.419244,36438.03,7938.0,4536.0,3.364916,4398.610000,1340.0,16645.983663
3,21039.0,20906.251934,24047.0,5579.0,20821.670503,33531.0,317.0,52300.0,2522.420000,0.493976,0.000000,2352.0,32.893117,2450.00,3196.0,4505.0,3.435641,3715.420000,364.0,19751.000000
4,19832.0,18747.474876,22642.0,997.0,19728.716547,31383.0,224.0,42736.0,673.470000,0.230394,0.000000,2227.0,44.659491,1225.13,614.0,4322.0,3.027228,2655.970000,38.0,20364.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225,9481.0,13061.000000,9793.0,452.0,14919.000000,16977.0,70.0,22953.0,461.300000,0.793820,0.178469,242.0,29.285922,0.00,546.0,4487.0,8.316918,1815.800000,0.0,12301.666667
226,11587.0,11927.285714,11873.0,2758.0,14353.000000,20821.0,105.0,31956.0,4351.070000,0.001906,0.068689,1387.0,3.325236,0.00,584.0,5086.0,6.579727,3508.570000,1167.0,11355.000000
227,10123.0,11870.285714,10395.0,3339.0,13895.285714,18771.0,76.0,30480.0,5503.480000,0.000000,0.541538,313.0,0.000000,0.00,550.0,4900.0,10.935106,4228.480000,975.0,10999.666667
228,10360.0,11358.000000,10614.0,5513.0,13407.285714,18461.0,83.0,32770.0,6940.660156,1.020979,0.272386,235.0,5.850167,0.00,1287.0,4384.0,6.553919,4212.160156,1865.0,10397.000000


In [134]:
#vamos a probar modelos de ML
#leamos x_train, y_train, x_test, y_test
import pandas as pd
x_train = pd.read_csv("x_train.csv")
y_train = pd.read_csv("y_train.csv")
x_test = pd.read_csv("x_val.csv")
y_test = pd.read_csv("y_val.csv")
"""El mejor modelo fue RFRegressor con los siguientes hiperparámetros:
    {'max_depth': 16, 'max_features': 1.0, 'bootstrap': True, 'n_estimators': 1183, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.8654367662002576}"""
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
#Entrenemos un modelo con esos hiperparámetros

best_model = RandomForestRegressor(
    n_estimators=1183,
    max_depth=16,
    max_features=1.0,
    bootstrap=True,
    min_samples_split=2,
    min_samples_leaf=2,
    max_samples=0.8654367662002576,
    random_state=42,
    n_jobs=-1
)
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer(method='yeo-johnson')
x_train_ready = pd.DataFrame(pt.fit_transform(x_train), columns=x_train.columns)
x_test_ready = pd.DataFrame(pt.transform(x_test), columns=x_test.columns)

best_model.fit(x_train_ready, y_train)
y_pred_test = best_model.predict(x_test_ready)
mse  = mean_squared_error(y_test, y_pred_test)
rmse = mean_squared_error(y_test, y_pred_test)
mae  = mean_absolute_error(y_test, y_pred_test)
r2   = r2_score(y_test, y_pred_test)
print("\n=== Métricas en TEST del mejor modelo entrenado manualmente ===")
print(f"MSE : {mse:,.2f}")
print(f"RMSE: {rmse:,.2f}")
print(f"MAE : {mae:,.2f}")
print(f"R²  : {r2:.4f}")
feature_importances = best_model.feature_importances_
feature_names = x_train.columns
feature_importances = pd.DataFrame({'feature': feature_names, 'importance': feature_importances})
feature_importances = feature_importances.sort_values(by='importance', ascending=False)
feature_importances = feature_importances[:20]
print("Feature importances:")
print(feature_importances)
x_train = x_train[feature_importances['feature']]
x_test = x_test[feature_importances['feature']]

  return fit_method(estimator, *args, **kwargs)



=== Métricas en TEST del mejor modelo entrenado manualmente ===
MSE : 11,223,523.16
RMSE: 11,223,523.16
MAE : 2,628.99
R²  : 0.2923
Feature importances:
                    feature  importance
76                Frio (Kw)    0.415853
134   Frio_roll_mean_7_lag1    0.140973
72            Sala Maq (Kw)    0.098940
67            Envasado (Kw)    0.063350
136  Frio_roll_mean_14_lag1    0.028401
71           Servicios (Kw)    0.012200
78           Prod Agua (Kw)    0.010649
81           KW Gral Planta    0.006070
68             Linea 2 (Kw)    0.005377
50                CO 2 / Hl    0.004458
13          EE Caldera / Hl    0.004136
66              Cocina (Kw)    0.003900
38            ET Linea 5/Hl    0.003631
109     VAPOR DE LINEA 4 KG    0.003530
69             Linea 3 (Kw)    0.003528
79          Resto Serv (Kw)    0.003396
98         Conversion Kg/Mj    0.003335
80       Restos Planta (Kw)    0.003300
59            Hl Cerveza L2    0.003267
132   Frio_roll_mean_3_lag1    0.003221


In [135]:
x_val = x_test.copy()
y_val = y_test.copy()

In [136]:
import numpy as np
import pandas as pd

# =========================
# 1) Redefinir train y leer test
# =========================
x_train = pd.concat([x_train, x_val], ignore_index=True)
y_train = pd.concat([y_train, y_val], ignore_index=True)

x_test  = pd.read_csv("X_test.csv")
y_test  = pd.read_csv("y_test.csv")

# Aseguramos mismas columnas y orden
x_test = x_test[x_train.columns]

# =========================
# 2) Columnas numéricas / no numéricas
# =========================
numeric_cols = x_train.select_dtypes(include=[np.number]).columns.tolist()
other_cols   = [c for c in x_train.columns if c not in numeric_cols]

# Copias aisladas de numéricas
Xtr_num = x_train[numeric_cols].copy()
Xte_num = x_test[numeric_cols].copy()

# =========================
# 3) Límites por feature con MAD (fallback a percentiles si MAD=0)
# =========================
def mad_bounds(col_train: pd.Series, Z: float = 3.5,
               q_low: float = 0.001, q_high: float = 0.999):
    """
    Devuelve (low, high) por MAD; si MAD==0, usa percentiles (q_low, q_high).
    """
    med = col_train.median()
    mad = np.median(np.abs(col_train - med))
    if mad > 0:
        # robust z-score bounds
        # 0.6745 hace que MAD sea comparable a sigma para Normal
        scale = 0.6745 * (col_train - med).abs().median() / mad  # opcional; muchos usan 0.6745 directamente
        # usamos la forma clásica: robust_z = 0.6745*(x-med)/MAD
        # límites: med ± Z * MAD / 0.6745
        low  = med - (Z * mad / 0.6745)
        high = med + (Z * mad / 0.6745)
    else:
        # Sin dispersión: usamos percentiles amplios
        low  = col_train.quantile(q_low)
        high = col_train.quantile(q_high)
        if low == high:
            # todos iguales: expandir un poco para no anular la feature
            low, high = low - 1e-12, high + 1e-12
    return low, high

bounds = {}
for c in numeric_cols:
    low, high = mad_bounds(Xtr_num[c].dropna(), Z=3.5, q_low=0.001, q_high=0.999)
    bounds[c] = (low, high)

# =========================
# 4) Reemplazo cellwise: fuera de [low, high] -> np.nan
#    (usamos límites aprendidos SOLO del train)
# =========================
def mask_outliers_to_nan(df_num: pd.DataFrame, bounds_dict: dict) -> pd.DataFrame:
    df = df_num.copy()
    for c, (low, high) in bounds_dict.items():
        mask_low  = df[c] < low
        mask_high = df[c] > high
        df.loc[mask_low | mask_high, c] = np.nan
    return df

Xtr_num_nan = mask_outliers_to_nan(Xtr_num, bounds)
Xte_num_nan = mask_outliers_to_nan(Xte_num, bounds)

# Si querés contar cuántas celdas se “nanearon”:
tr_nan_cells = Xtr_num_nan.isna().sum().sum() - Xtr_num.isna().sum().sum()
te_nan_cells = Xte_num_nan.isna().sum().sum() - Xte_num.isna().sum().sum()
print(f"Celdas convertidas a NaN por outliers - train: {tr_nan_cells}, test: {te_nan_cells}")

# =========================
# 5) Imputación KNN en ESPACIO ESCALADO y volver a original
# =========================
from sklearn.preprocessing import RobustScaler
from sklearn.impute import KNNImputer

scaler = RobustScaler()
Xtr_scaled = pd.DataFrame(scaler.fit_transform(Xtr_num_nan), columns=numeric_cols, index=Xtr_num_nan.index)
Xte_scaled = pd.DataFrame(scaler.transform(Xte_num_nan),    columns=numeric_cols, index=Xte_num_nan.index)

imputer = KNNImputer(n_neighbors=5, weights='distance')
Xtr_imp_scaled = pd.DataFrame(imputer.fit_transform(Xtr_scaled), columns=numeric_cols, index=Xtr_scaled.index)
Xte_imp_scaled = pd.DataFrame(imputer.transform(Xte_scaled),    columns=numeric_cols, index=Xte_scaled.index)

# Volvemos a la escala original
Xtr_imp = pd.DataFrame(scaler.inverse_transform(Xtr_imp_scaled), columns=numeric_cols, index=Xtr_imp_scaled.index)
Xte_imp = pd.DataFrame(scaler.inverse_transform(Xte_imp_scaled), columns=numeric_cols, index=Xte_imp_scaled.index)

# =========================
# 6) Reconstruir datasets finales
# =========================
x_train_clean = pd.concat([Xtr_imp, x_train[other_cols].reset_index(drop=True)], axis=1)[x_train.columns]
x_test_clean  = pd.concat([Xte_imp, x_test[other_cols].reset_index(drop=True)],  axis=1)[x_test.columns]

# Resultado final para modelar
x_train = x_train_clean
x_test  = x_test_clean


Celdas convertidas a NaN por outliers - train: 686, test: 206


In [137]:
best_model = RandomForestRegressor(
    n_estimators=1183,
    max_depth=16,
    max_features=1.0,
    bootstrap=True,
    min_samples_split=2,
    min_samples_leaf=2,
    max_samples=0.8654367662002576,
    random_state=42,
    n_jobs=-1
)
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer(method='yeo-johnson')
x_train_ready = pd.DataFrame(pt.fit_transform(x_train), columns=x_train.columns)
x_test_ready = pd.DataFrame(pt.transform(x_test), columns=x_test.columns)

best_model.fit(x_train_ready, y_train)
y_pred_test = best_model.predict(x_test_ready)
mse  = mean_squared_error(y_test, y_pred_test)
rmse = mean_squared_error(y_test, y_pred_test)
mae  = mean_absolute_error(y_test, y_pred_test)
r2   = r2_score(y_test, y_pred_test)
print("\n=== Métricas en TEST del mejor modelo entrenado manualmente ===")
print(f"MSE : {mse:,.2f}")
print(f"RMSE: {rmse:,.2f}")
print(f"MAE : {mae:,.2f}")
print(f"R²  : {r2:.4f}")


  return fit_method(estimator, *args, **kwargs)



=== Métricas en TEST del mejor modelo entrenado manualmente ===
MSE : 12,165,506.70
RMSE: 12,165,506.70
MAE : 2,630.52
R²  : 0.6108
