# In this notebook we would train the catboost model

In [None]:
drop_catboost_data_driven = [
    # Raw OHLC (massively correlated with close)
    'open', 'high', 'low',
    
    # ATR components
    'high_low', 'high_close', 'low_close',
    
    # Helper calculations  
    'typical_price',  # 99.99% corr with close
    'true_range',     # Captured in atr_14
    'volume_mean_20', # Used in ratio
    
    # Highly correlated MAs (keep EMA_7, drop others)
    'EMA_21',         # 99.99% corr with SMA_20
    'SMA_20',         # 99.99% corr with vwap_24h  
    'vwap_24h',       # Redundant with SMA_20
    'close_4h',       # 99.99% corr with close
    
    # Bollinger components (keep bb_position, width)
    'bollinger_upper', 'bollinger_lower',  # 99.98% corr with SMA_20
    
    # MACD components (keep histogram)
    'MACD_line', 'MACD_signal',  # 95% corr, histogram captures key info
    
    # Support/resistance (99.85% corr with each other)
    'resistance_level', 'support_level',  # Highly correlated
    
    # Zero-importance binary flags
    'vol_spike_1_5x',           # 0 LightGBM splits
    'near_upper_band', 'near_lower_band',  # 0 LightGBM splits
    'break_upper_band', 'break_lower_band', # 0 LightGBM splits
    'rsi_oversold',             # 0 LightGBM splits
    'above_sma20',              # 0 LightGBM splits, derivable
    'macd_positive',            # 0 LightGBM splits
    'volume_breakout', 'volume_breakdown',  # 0 LightGBM splits
    
    # Highly correlated position flags
    'above_sma50', 'ema7_above_ema21',  # 77% corr, derivable
    
    # Low-importance cross signals (keep best ones)
    'ema_cross_down',           # Lower importance than ema_cross_up
    
    # Some oscillator extremes (keep the reversals)
    'rsi_overbought', 'stoch_overbought', 'stoch_oversold',
    'cci_overbought', 'cci_oversold',
    
    # Trend flags vs continuous values
    'trending_market',          # Keep ADX value instead
    
    # All scenario features (very low importance)
    'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6',
    'bearish_scenario_1', 'bearish_scenario_2', 'bearish_scenario_3',
    'bearish_scenario_4', 'bearish_scenario_6',
]

In [None]:
# =============================================================
#  Stable CatBoost F0.5 Optimizer (Windows‑friendly, CPU default)
#  – avoids Bayesian‑bootstrap "subsample" crash
#  – safe pos_weight bounds when classes ~balanced
# =============================================================

import os, gc, time, random, warnings, joblib, platform
from pathlib import Path

import numpy as np
import pandas as pd
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score
)
from sklearn.model_selection import TimeSeriesSplit
import optuna

# ─── GLOBAL CONFIG ────────────────────────────────────────────
CSV_PATH     = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")
VAL_FRAC     = 0.20              # fraction held out for final test
N_TRIALS     = 100               # hyper‑parameter trials
EARLY_STOP   = 150               # CatBoost early stopping
CV_FOLDS     = 3                 # chronological folds
FIXED_THRESH = 0.50              # threshold used inside CV
BETA         = 0.5               # F‑beta focus (precision‑heavy)
SEED         = 42

# ─── REPRODUCIBILITY ─────────────────────────────────────────
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)
random.seed(SEED)
warnings.filterwarnings("ignore")

print(f"🖥️ Platform: {platform.system()} — running in CPU‑safe mode")

# ─── HELPERS ─────────────────────────────────────────────────

def fbeta_score_fixed(y_true, y_pred, beta: float = BETA, zero_division: float = 0.0):
    """Compute F‑beta where precision is weighted β<1 heavier (β=0.5)."""
    p = precision_score(y_true, y_pred, zero_division=0)
    r = recall_score(y_true, y_pred, zero_division=0)
    if p == 0 and r == 0:
        return zero_division
    beta2 = beta ** 2
    return (1 + beta2) * p * r / (beta2 * p + r + 1e-9)

def cleanup_memory():
    gc.collect()

# ─── LOAD & PREPARE DATA ─────────────────────────────────────
print("📂 Loading data …")

df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df[df.index >= "2018-01-01"]
if "target" not in df.columns:
    raise ValueError("Column 'target' not found in dataset")

# feature leakage prevention — drop highly correlated / helper cols
DROP_COLS = [
    "open", "high", "low", "high_low", "high_close", "low_close",
    "typical_price", "true_range", "volume_mean_20", "EMA_21", "SMA_20",
    "vwap_24h", "close_4h", "bollinger_upper", "bollinger_lower",
    "MACD_line", "MACD_signal", "resistance_level", "support_level",
    "vol_spike_1_5x", "near_upper_band", "near_lower_band",
    "break_upper_band", "break_lower_band", "rsi_oversold", "above_sma20",
    "macd_positive", "volume_breakout", "volume_breakdown", "above_sma50",
    "ema7_above_ema21", "ema_cross_down", "rsi_overbought",
    "stoch_overbought", "stoch_oversold", "cci_overbought", "cci_oversold",
    "trending_market", "bullish_scenario_1", "bullish_scenario_2",
    "bullish_scenario_3", "bullish_scenario_4", "bullish_scenario_5",
    "bullish_scenario_6", "bearish_scenario_1", "bearish_scenario_2",
    "bearish_scenario_3", "bearish_scenario_4", "bearish_scenario_6",
    "close",
]
df.drop(columns=[c for c in DROP_COLS if c in df.columns], inplace=True)
df.dropna(inplace=True)
print(f"✅ Data: {len(df):,} rows, {df.shape[1]-1} features after cleaning")

X = df.drop(columns="target")
y = df["target"].astype(int)
cat_cols = X.select_dtypes(include=["object", "category"]).columns.tolist()
cat_indices = [X.columns.get_loc(c) for c in cat_cols]
print(f"✅ Categorical columns: {len(cat_cols)}")

class_counts = np.bincount(y)
print(f"📊 Class balance 0/1: {class_counts}")

# ─── OPTUNA OBJECTIVE ───────────────────────────────────────

def objective(trial: optuna.Trial) -> float:
    # ----- balanced class weight upper bound (never <1) -------
    raw_pw = len(y) / max(1, 2 * class_counts[1])
    upper_pw = min(8.0, max(1.0, raw_pw))  # always ≥1

    # sample pos_weight only if useful
    if upper_pw > 1.0:
        pos_w = trial.suggest_float("pos_weight", 1.0, upper_pw)
        class_w = [1.0, pos_w]
    else:
        class_w = [1.0, 1.0]

    params = {
        "iterations": trial.suggest_int("iterations", 200, 1000),
        "learning_rate": trial.suggest_float("lr", 0.02, 0.15),
        "depth": trial.suggest_int("depth", 4, 8),
        "l2_leaf_reg": trial.suggest_float("l2", 2.0, 15.0),
        "random_strength": trial.suggest_float("rand_str", 1.0, 3.0),
        "min_data_in_leaf": trial.suggest_int("min_data_leaf", 10, 30),
        "class_weights": class_w,
        "bootstrap_type": trial.suggest_categorical("bootstrap", ["Bernoulli", "MVS", "Bayesian"]),
        "eval_metric": "F1",
        "loss_function": "Logloss",
        "random_seed": SEED,
        "task_type": "CPU",
        "thread_count": 4,
        "early_stopping_rounds": EARLY_STOP,
        "verbose": False,
    }

    # conditional hyper‑params --------------------------------
    if params["bootstrap_type"] == "Bernoulli":
        params["subsample"] = trial.suggest_float("subsample", 0.6, 0.9)
    elif params["bootstrap_type"] == "Bayesian":
        params["bagging_temperature"] = trial.suggest_float("bagging_temp", 0.5, 3.0)
    # MVS does not need extra knobs and must not have "subsample"

    splitter = TimeSeriesSplit(n_splits=CV_FOLDS)
    scores = []

    for tr_idx, val_idx in splitter.split(X):
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx], y.iloc[val_idx]

        if y_val.nunique() < 2:
            continue

        model = CatBoostClassifier(**params)
        model.fit(Pool(X_tr, y_tr, cat_features=cat_indices or None),
                  eval_set=Pool(X_val, y_val, cat_features=cat_indices or None))

        y_pred = (model.predict_proba(X_val)[:, 1] >= FIXED_THRESH).astype(int)
        scores.append(fbeta_score_fixed(y_val, y_pred))
        # ─── Optional metrics per trial fold ─────
        prec = precision_score(y_val, y_pred, zero_division=0)
        rec  = recall_score(y_val, y_pred, zero_division=0)
        f1   = f1_score(y_val, y_pred, zero_division=0)

        print(f"[Trial {trial.number}] Fold metrics – Precision: {prec:.3f} | Recall: {rec:.3f} | F1: {f1:.3f}")

        del model
        cleanup_memory()

    return float(np.mean(scores)) if scores else 0.0

# ─── RUN OPTIMISATION ───────────────────────────────────────
print("🚀 Hyper‑parameter search …")

study = optuna.create_study(direction="maximize",
                           sampler=optuna.samplers.TPESampler(seed=SEED))
study.optimize(objective, n_trials=N_TRIALS, timeout=None, show_progress_bar=True)

print("🏆 Best trial value:", study.best_value)

# ─── TRAIN FINAL MODEL ──────────────────────────────────────
print("🔧 Training final model …")

cut = int(len(df) * (1 - VAL_FRAC))
X_train, y_train = X.iloc[:cut], y.iloc[:cut]
X_test,  y_test  = X.iloc[cut:], y.iloc[cut:]

best_params = {
    "iterations": study.best_params.get("iterations", 500),
    "learning_rate": study.best_params.get("lr", 0.05),
    "depth": study.best_params.get("depth", 6),
    "l2_leaf_reg": study.best_params.get("l2", 5.0),
    "min_data_in_leaf": study.best_params.get("min_data_leaf", 15),
    "random_strength": study.best_params.get("rand_str", 1.5),
    "class_weights": [1.0, study.best_params.get("pos_weight", 1.0)],
    "bootstrap_type": study.best_params.get("bootstrap", "Bernoulli"),
    "eval_metric": "F1",
    "loss_function": "Logloss",
    "random_seed": SEED,
    "early_stopping_rounds": EARLY_STOP,
    "task_type": "CPU",
    "thread_count": 4,
    "verbose": 100,
}

if best_params["bootstrap_type"] == "Bernoulli":
    best_params["subsample"] = study.best_params.get("subsample", 0.8)
elif best_params["bootstrap_type"] == "Bayesian":
    best_params["bagging_temperature"] = study.best_params.get("bagging_temp", 1.0)

final_model = CatBoostClassifier(**best_params)
final_model.fit(Pool(X_train, y_train, cat_features=cat_indices or None),
                eval_set=Pool(X_test, y_test, cat_features=cat_indices or None))
print("✅ Final model trained")

# ─── EVALUATE ───────────────────────────────────────────────
print("📊 Evaluating …")

y_proba = final_model.predict_proba(X_test)[:, 1]
y_pred_fixed = (y_proba >= FIXED_THRESH).astype(int)

auc_fixed = roc_auc_score(y_test, y_proba)
f1_fixed  = f1_score(y_test, y_pred_fixed)
f05_fixed = fbeta_score_fixed(y_test, y_pred_fixed)

# Threshold sweep for best F0.5
thrs = np.linspace(0.1, 0.9, 17)
best_thr, best_f05 = FIXED_THRESH, f05_fixed
for thr in thrs:
    pred = (y_proba >= thr).astype(int)
    f05 = fbeta_score_fixed(y_test, pred)
    if f05 > best_f05:
        best_f05, best_thr = f05, thr

y_pred_opt = (y_proba >= best_thr).astype(int)

print("═"*60)
print(f"AUC‑ROC     : {auc_fixed:.4f}")
print(f"F1@0.5      : {f1_fixed:.4f}")
print(f"F0.5@0.5    : {f05_fixed:.4f}")
print(f"Best F0.5   : {best_f05:.4f}  (thr = {best_thr:.2f})")
print("═"*60)

# ─── SAVE ARTEFACTS ─────────────────────────────────────────
print("💾 Saving artefacts …")

out_dir = Path("catboost_f05_results")
out_dir.mkdir(exist_ok=True)

joblib.dump(study, out_dir / "optuna_study.pkl")
final_model.save_model(str(out_dir / "catboost_final.cbm"))

pd.DataFrame({
    "probability": y_proba,
    "pred_fixed": y_pred_fixed,
    "pred_optimal": y_pred_opt,
    "actual": y_test.values,
}).to_csv(out_dir / "predictions.csv")
]
print(f"✅ Files saved to {out_dir.resolve()}")


[I 2025-06-12 11:10:22,340] A new study created in memory with name: no-name-28b17c07-ed34-4a61-bac9-7eaf66c6d443


🖥️ Platform: Windows — running in CPU‑safe mode
📂 Loading data …
✅ Data: 15,855 rows, 31 features after cleaning
✅ Categorical columns: 0
📊 Class balance 0/1: [7758 8097]
🚀 Hyper‑parameter search …


  0%|          | 0/100 [00:00<?, ?it/s]

[Trial 0] Fold metrics – Precision: 0.545 | Recall: 0.581 | F1: 0.562
[Trial 0] Fold metrics – Precision: 0.514 | Recall: 0.718 | F1: 0.599


Best trial: 0. Best value: 0.551707:   1%|          | 1/100 [00:03<05:19,  3.23s/it]

[Trial 0] Fold metrics – Precision: 0.537 | Recall: 0.664 | F1: 0.594
[I 2025-06-12 11:10:25,568] Trial 0 finished with value: 0.5517073274365529 and parameters: {'iterations': 500, 'lr': 0.1435928598332891, 'depth': 7, 'l2': 9.782560294561476, 'rand_str': 1.312037280884873, 'min_data_leaf': 13, 'bootstrap': 'MVS'}. Best is trial 0 with value: 0.5517073274365529.
[Trial 1] Fold metrics – Precision: 0.549 | Recall: 0.599 | F1: 0.573
[Trial 1] Fold metrics – Precision: 0.526 | Recall: 0.713 | F1: 0.606


Best trial: 1. Best value: 0.554636:   2%|▏         | 2/100 [00:10<09:01,  5.52s/it]

[Trial 1] Fold metrics – Precision: 0.551 | Recall: 0.548 | F1: 0.549
[I 2025-06-12 11:10:32,695] Trial 1 finished with value: 0.5546356803960565 and parameters: {'iterations': 767, 'lr': 0.022675984258454317, 'depth': 8, 'l2': 12.821754330405483, 'rand_str': 1.4246782213565523, 'min_data_leaf': 13, 'bootstrap': 'Bayesian', 'bagging_temp': 1.5798625466052894}. Best is trial 1 with value: 0.5546356803960565.
[Trial 2] Fold metrics – Precision: 0.546 | Recall: 0.662 | F1: 0.599
[Trial 2] Fold metrics – Precision: 0.522 | Recall: 0.735 | F1: 0.610


Best trial: 2. Best value: 0.56195:   3%|▎         | 3/100 [00:11<05:45,  3.57s/it] 

[Trial 2] Fold metrics – Precision: 0.566 | Recall: 0.568 | F1: 0.567
[I 2025-06-12 11:10:33,934] Trial 2 finished with value: 0.5619501676922632 and parameters: {'iterations': 433, 'lr': 0.09954087631390934, 'depth': 4, 'l2': 5.797880430957836, 'rand_str': 1.7327236865873834, 'min_data_leaf': 19, 'bootstrap': 'Bernoulli', 'subsample': 0.7777243706586128}. Best is trial 2 with value: 0.5619501676922632.
[Trial 3] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 3] Fold metrics – Precision: 0.512 | Recall: 0.758 | F1: 0.611


Best trial: 2. Best value: 0.56195:   4%|▍         | 4/100 [00:12<04:13,  2.64s/it]

[Trial 3] Fold metrics – Precision: 0.563 | Recall: 0.561 | F1: 0.562
[I 2025-06-12 11:10:35,156] Trial 3 finished with value: 0.5588135822251431 and parameters: {'iterations': 237, 'lr': 0.098980830747187, 'depth': 4, 'l2': 2.8456707088086337, 'rand_str': 2.8977710745066663, 'min_data_leaf': 30, 'bootstrap': 'Bernoulli', 'subsample': 0.8052699079536471}. Best is trial 2 with value: 0.5619501676922632.
[Trial 4] Fold metrics – Precision: 0.543 | Recall: 0.663 | F1: 0.597
[Trial 4] Fold metrics – Precision: 0.513 | Recall: 0.768 | F1: 0.615


Best trial: 2. Best value: 0.56195:   5%|▌         | 5/100 [00:14<03:52,  2.45s/it]

[Trial 4] Fold metrics – Precision: 0.548 | Recall: 0.592 | F1: 0.569
[I 2025-06-12 11:10:37,272] Trial 4 finished with value: 0.5563963249718736 and parameters: {'iterations': 552, 'lr': 0.03586497052982125, 'depth': 6, 'l2': 2.447050774497839, 'rand_str': 2.818640804157564, 'min_data_leaf': 15, 'bootstrap': 'Bernoulli', 'subsample': 0.7640130838029839}. Best is trial 2 with value: 0.5619501676922632.
[Trial 5] Fold metrics – Precision: 0.547 | Recall: 0.574 | F1: 0.561
[Trial 5] Fold metrics – Precision: 0.516 | Recall: 0.778 | F1: 0.620


Best trial: 2. Best value: 0.56195:   6%|▌         | 6/100 [00:17<04:09,  2.65s/it]

[Trial 5] Fold metrics – Precision: 0.545 | Recall: 0.601 | F1: 0.572
[I 2025-06-12 11:10:40,319] Trial 5 finished with value: 0.5535276785355495 and parameters: {'iterations': 348, 'lr': 0.1460460016093926, 'depth': 7, 'l2': 14.213486240334458, 'rand_str': 2.789654700855298, 'min_data_leaf': 22, 'bootstrap': 'Bernoulli', 'subsample': 0.6135681866731614}. Best is trial 2 with value: 0.5619501676922632.
[Trial 6] Fold metrics – Precision: 0.553 | Recall: 0.578 | F1: 0.566
[Trial 6] Fold metrics – Precision: 0.503 | Recall: 0.784 | F1: 0.613


Best trial: 2. Best value: 0.56195:   7%|▋         | 7/100 [00:19<03:46,  2.43s/it]

[Trial 6] Fold metrics – Precision: 0.561 | Recall: 0.552 | F1: 0.557
[I 2025-06-12 11:10:42,302] Trial 6 finished with value: 0.5531240661086622 and parameters: {'iterations': 460, 'lr': 0.07052804765963266, 'depth': 5, 'l2': 12.773587618975082, 'rand_str': 1.7135066533871786, 'min_data_leaf': 15, 'bootstrap': 'Bayesian', 'bagging_temp': 0.686376609199427}. Best is trial 2 with value: 0.5619501676922632.
[Trial 7] Fold metrics – Precision: 0.538 | Recall: 0.623 | F1: 0.577
[Trial 7] Fold metrics – Precision: 0.530 | Recall: 0.718 | F1: 0.610


Best trial: 2. Best value: 0.56195:   8%|▊         | 8/100 [00:22<03:41,  2.40s/it]

[Trial 7] Fold metrics – Precision: 0.535 | Recall: 0.684 | F1: 0.601
[I 2025-06-12 11:10:44,641] Trial 7 finished with value: 0.5573015880447886 and parameters: {'iterations': 990, 'lr': 0.12039182000856548, 'depth': 4, 'l2': 2.071787522606831, 'rand_str': 2.6309228569096685, 'min_data_leaf': 24, 'bootstrap': 'MVS'}. Best is trial 2 with value: 0.5619501676922632.
[Trial 8] Fold metrics – Precision: 0.534 | Recall: 0.728 | F1: 0.616
[Trial 8] Fold metrics – Precision: 0.523 | Recall: 0.720 | F1: 0.606


Best trial: 2. Best value: 0.56195:   9%|▉         | 9/100 [00:32<07:11,  4.74s/it]

[Trial 8] Fold metrics – Precision: 0.566 | Recall: 0.505 | F1: 0.534
[I 2025-06-12 11:10:54,528] Trial 8 finished with value: 0.556724432365112 and parameters: {'iterations': 487, 'lr': 0.03506297773826686, 'depth': 8, 'l2': 10.102875648758253, 'rand_str': 1.6617960497052984, 'min_data_leaf': 11, 'bootstrap': 'Bayesian', 'bagging_temp': 2.093893678388033}. Best is trial 2 with value: 0.5619501676922632.
[Trial 9] Fold metrics – Precision: 0.548 | Recall: 0.668 | F1: 0.602
[Trial 9] Fold metrics – Precision: 0.525 | Recall: 0.751 | F1: 0.618


Best trial: 9. Best value: 0.562586:  10%|█         | 10/100 [00:34<05:54,  3.94s/it]

[Trial 9] Fold metrics – Precision: 0.559 | Recall: 0.568 | F1: 0.563
[I 2025-06-12 11:10:56,676] Trial 9 finished with value: 0.5625864041640566 and parameters: {'iterations': 910, 'lr': 0.08138794027105341, 'depth': 4, 'l2': 11.272182233898935, 'rand_str': 2.521570097233795, 'min_data_leaf': 21, 'bootstrap': 'Bernoulli', 'subsample': 0.7282623055075649}. Best is trial 9 with value: 0.5625864041640566.
[Trial 10] Fold metrics – Precision: 0.544 | Recall: 0.681 | F1: 0.605
[Trial 10] Fold metrics – Precision: 0.511 | Recall: 0.848 | F1: 0.638


Best trial: 9. Best value: 0.562586:  11%|█         | 11/100 [00:37<05:16,  3.56s/it]

[Trial 10] Fold metrics – Precision: 0.562 | Recall: 0.565 | F1: 0.563
[I 2025-06-12 11:10:59,370] Trial 10 finished with value: 0.5614455286164585 and parameters: {'iterations': 769, 'lr': 0.065807087285632, 'depth': 5, 'l2': 6.850306131534887, 'rand_str': 2.3037331015563574, 'min_data_leaf': 28, 'bootstrap': 'Bernoulli', 'subsample': 0.644494882491665}. Best is trial 9 with value: 0.5625864041640566.
[Trial 11] Fold metrics – Precision: 0.547 | Recall: 0.664 | F1: 0.599
[Trial 11] Fold metrics – Precision: 0.530 | Recall: 0.714 | F1: 0.608


Best trial: 9. Best value: 0.562586:  12%|█▏        | 12/100 [00:39<04:46,  3.25s/it]

[Trial 11] Fold metrics – Precision: 0.555 | Recall: 0.562 | F1: 0.558
[I 2025-06-12 11:11:01,917] Trial 11 finished with value: 0.5603646673108532 and parameters: {'iterations': 727, 'lr': 0.09498358995844303, 'depth': 4, 'l2': 6.385542042022537, 'rand_str': 2.170258639524419, 'min_data_leaf': 19, 'bootstrap': 'Bernoulli', 'subsample': 0.8806685628244275}. Best is trial 9 with value: 0.5625864041640566.
[Trial 12] Fold metrics – Precision: 0.544 | Recall: 0.681 | F1: 0.605
[Trial 12] Fold metrics – Precision: 0.506 | Recall: 0.828 | F1: 0.628


Best trial: 9. Best value: 0.562586:  13%|█▎        | 13/100 [00:42<04:29,  3.10s/it]

[Trial 12] Fold metrics – Precision: 0.559 | Recall: 0.574 | F1: 0.566
[I 2025-06-12 11:11:04,675] Trial 12 finished with value: 0.5589256570050339 and parameters: {'iterations': 993, 'lr': 0.07201322385084936, 'depth': 5, 'l2': 5.6968088641735175, 'rand_str': 2.3994566430841013, 'min_data_leaf': 19, 'bootstrap': 'Bernoulli', 'subsample': 0.7060188414980881}. Best is trial 9 with value: 0.5625864041640566.
[Trial 13] Fold metrics – Precision: 0.549 | Recall: 0.652 | F1: 0.596
[Trial 13] Fold metrics – Precision: 0.530 | Recall: 0.738 | F1: 0.617


Best trial: 9. Best value: 0.562586:  14%|█▍        | 14/100 [00:44<04:05,  2.85s/it]

[Trial 13] Fold metrics – Precision: 0.555 | Recall: 0.578 | F1: 0.566
[I 2025-06-12 11:11:06,952] Trial 13 finished with value: 0.5623889837435304 and parameters: {'iterations': 632, 'lr': 0.1179191712234414, 'depth': 4, 'l2': 8.553268982782875, 'rand_str': 1.878834381233934, 'min_data_leaf': 24, 'bootstrap': 'Bernoulli', 'subsample': 0.7250773824982502}. Best is trial 9 with value: 0.5625864041640566.
[Trial 14] Fold metrics – Precision: 0.551 | Recall: 0.637 | F1: 0.591
[Trial 14] Fold metrics – Precision: 0.522 | Recall: 0.748 | F1: 0.615


Best trial: 9. Best value: 0.562586:  15%|█▌        | 15/100 [00:47<03:59,  2.82s/it]

[Trial 14] Fold metrics – Precision: 0.550 | Recall: 0.576 | F1: 0.563
[I 2025-06-12 11:11:09,684] Trial 14 finished with value: 0.5589161654776199 and parameters: {'iterations': 667, 'lr': 0.12156246854518325, 'depth': 5, 'l2': 10.380757908485112, 'rand_str': 1.0262806314356168, 'min_data_leaf': 25, 'bootstrap': 'Bernoulli', 'subsample': 0.6999068077639297}. Best is trial 9 with value: 0.5625864041640566.
[Trial 15] Fold metrics – Precision: 0.548 | Recall: 0.557 | F1: 0.553
[Trial 15] Fold metrics – Precision: 0.513 | Recall: 0.741 | F1: 0.606


Best trial: 9. Best value: 0.562586:  16%|█▌        | 16/100 [00:51<04:31,  3.24s/it]

[Trial 15] Fold metrics – Precision: 0.547 | Recall: 0.619 | F1: 0.581
[I 2025-06-12 11:11:13,900] Trial 15 finished with value: 0.55235741649078 and parameters: {'iterations': 884, 'lr': 0.11875873081206059, 'depth': 6, 'l2': 8.295759945755048, 'rand_str': 1.9888895727723297, 'min_data_leaf': 26, 'bootstrap': 'MVS'}. Best is trial 9 with value: 0.5625864041640566.
[Trial 16] Fold metrics – Precision: 0.549 | Recall: 0.667 | F1: 0.602
[Trial 16] Fold metrics – Precision: 0.521 | Recall: 0.759 | F1: 0.618


Best trial: 9. Best value: 0.562586:  17%|█▋        | 17/100 [00:53<04:03,  2.94s/it]

[Trial 16] Fold metrics – Precision: 0.561 | Recall: 0.572 | F1: 0.566
[I 2025-06-12 11:11:16,136] Trial 16 finished with value: 0.5625702505842114 and parameters: {'iterations': 836, 'lr': 0.061791385133628564, 'depth': 4, 'l2': 11.498284706012234, 'rand_str': 2.0023976158682975, 'min_data_leaf': 22, 'bootstrap': 'Bernoulli', 'subsample': 0.7035800473184166}. Best is trial 9 with value: 0.5625864041640566.
[Trial 17] Fold metrics – Precision: 0.553 | Recall: 0.557 | F1: 0.555
[Trial 17] Fold metrics – Precision: 0.518 | Recall: 0.732 | F1: 0.607


Best trial: 9. Best value: 0.562586:  18%|█▊        | 18/100 [00:57<04:29,  3.28s/it]

[Trial 17] Fold metrics – Precision: 0.553 | Recall: 0.586 | F1: 0.569
[I 2025-06-12 11:11:20,222] Trial 17 finished with value: 0.5544660014844727 and parameters: {'iterations': 864, 'lr': 0.05453276301120475, 'depth': 6, 'l2': 11.927554701168951, 'rand_str': 2.503829906270678, 'min_data_leaf': 21, 'bootstrap': 'Bernoulli', 'subsample': 0.6738569345465305}. Best is trial 9 with value: 0.5625864041640566.
[Trial 18] Fold metrics – Precision: 0.543 | Recall: 0.641 | F1: 0.588
[Trial 18] Fold metrics – Precision: 0.507 | Recall: 0.828 | F1: 0.628


Best trial: 9. Best value: 0.562586:  19%|█▉        | 19/100 [01:00<04:14,  3.14s/it]

[Trial 18] Fold metrics – Precision: 0.563 | Recall: 0.557 | F1: 0.560
[I 2025-06-12 11:11:23,033] Trial 18 finished with value: 0.5569846793433383 and parameters: {'iterations': 878, 'lr': 0.0546175701768629, 'depth': 5, 'l2': 14.340964315369071, 'rand_str': 2.252957827103233, 'min_data_leaf': 17, 'bootstrap': 'Bayesian', 'bagging_temp': 2.9969897708839963}. Best is trial 9 with value: 0.5625864041640566.
[Trial 19] Fold metrics – Precision: 0.547 | Recall: 0.609 | F1: 0.576
[Trial 19] Fold metrics – Precision: 0.518 | Recall: 0.749 | F1: 0.613


Best trial: 9. Best value: 0.562586:  20%|██        | 20/100 [01:02<03:50,  2.88s/it]

[Trial 19] Fold metrics – Precision: 0.562 | Recall: 0.565 | F1: 0.564
[I 2025-06-12 11:11:25,304] Trial 19 finished with value: 0.5579859154394458 and parameters: {'iterations': 832, 'lr': 0.08839343092297679, 'depth': 4, 'l2': 11.945185321172795, 'rand_str': 2.0963792041577842, 'min_data_leaf': 22, 'bootstrap': 'MVS'}. Best is trial 9 with value: 0.5625864041640566.
[Trial 20] Fold metrics – Precision: 0.543 | Recall: 0.663 | F1: 0.597
[Trial 20] Fold metrics – Precision: 0.525 | Recall: 0.719 | F1: 0.607


Best trial: 9. Best value: 0.562586:  21%|██        | 21/100 [01:07<04:21,  3.31s/it]

[Trial 20] Fold metrics – Precision: 0.549 | Recall: 0.570 | F1: 0.559
[I 2025-06-12 11:11:29,623] Trial 20 finished with value: 0.5570623643050803 and parameters: {'iterations': 937, 'lr': 0.07989816355419879, 'depth': 6, 'l2': 11.099053633522672, 'rand_str': 2.5588564046233575, 'min_data_leaf': 27, 'bootstrap': 'Bernoulli', 'subsample': 0.8221551092136461}. Best is trial 9 with value: 0.5625864041640566.
[Trial 21] Fold metrics – Precision: 0.549 | Recall: 0.667 | F1: 0.602
[Trial 21] Fold metrics – Precision: 0.523 | Recall: 0.763 | F1: 0.621


Best trial: 21. Best value: 0.564796:  22%|██▏       | 22/100 [01:09<03:53,  3.00s/it]

[Trial 21] Fold metrics – Precision: 0.562 | Recall: 0.590 | F1: 0.576
[I 2025-06-12 11:11:31,893] Trial 21 finished with value: 0.5647956379516194 and parameters: {'iterations': 625, 'lr': 0.05874728888241147, 'depth': 4, 'l2': 8.594264745023331, 'rand_str': 1.925231957936014, 'min_data_leaf': 23, 'bootstrap': 'Bernoulli', 'subsample': 0.7275822825284761}. Best is trial 21 with value: 0.5647956379516194.
[Trial 22] Fold metrics – Precision: 0.549 | Recall: 0.667 | F1: 0.602
[Trial 22] Fold metrics – Precision: 0.528 | Recall: 0.745 | F1: 0.618


Best trial: 22. Best value: 0.565504:  23%|██▎       | 23/100 [01:11<03:32,  2.76s/it]

[Trial 22] Fold metrics – Precision: 0.562 | Recall: 0.590 | F1: 0.576
[I 2025-06-12 11:11:34,103] Trial 22 finished with value: 0.5655043606922715 and parameters: {'iterations': 697, 'lr': 0.05307592822927969, 'depth': 4, 'l2': 8.625798314956667, 'rand_str': 1.9161178234306955, 'min_data_leaf': 23, 'bootstrap': 'Bernoulli', 'subsample': 0.7365237906217537}. Best is trial 22 with value: 0.5655043606922715.
[Trial 23] Fold metrics – Precision: 0.544 | Recall: 0.681 | F1: 0.605
[Trial 23] Fold metrics – Precision: 0.510 | Recall: 0.823 | F1: 0.630


Best trial: 22. Best value: 0.565504:  24%|██▍       | 24/100 [01:14<03:29,  2.76s/it]

[Trial 23] Fold metrics – Precision: 0.553 | Recall: 0.592 | F1: 0.572
[I 2025-06-12 11:11:36,849] Trial 23 finished with value: 0.5596574710054881 and parameters: {'iterations': 592, 'lr': 0.045338558365569215, 'depth': 5, 'l2': 8.483880149164419, 'rand_str': 1.914834670069054, 'min_data_leaf': 24, 'bootstrap': 'Bernoulli', 'subsample': 0.7411892767254703}. Best is trial 22 with value: 0.5655043606922715.
[Trial 24] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 24] Fold metrics – Precision: 0.525 | Recall: 0.729 | F1: 0.611


Best trial: 22. Best value: 0.565504:  25%|██▌       | 25/100 [01:16<03:16,  2.62s/it]

[Trial 24] Fold metrics – Precision: 0.558 | Recall: 0.566 | F1: 0.562
[I 2025-06-12 11:11:39,150] Trial 24 finished with value: 0.560952897181201 and parameters: {'iterations': 719, 'lr': 0.04773517191424417, 'depth': 4, 'l2': 7.34762696343794, 'rand_str': 1.4814155222205103, 'min_data_leaf': 17, 'bootstrap': 'Bernoulli', 'subsample': 0.7563313076714752}. Best is trial 22 with value: 0.5655043606922715.
[Trial 25] Fold metrics – Precision: 0.550 | Recall: 0.639 | F1: 0.591
[Trial 25] Fold metrics – Precision: 0.512 | Recall: 0.827 | F1: 0.632


Best trial: 22. Best value: 0.565504:  26%|██▌       | 26/100 [01:19<03:18,  2.68s/it]

[Trial 25] Fold metrics – Precision: 0.562 | Recall: 0.565 | F1: 0.564
[I 2025-06-12 11:11:41,982] Trial 25 finished with value: 0.5609517822803681 and parameters: {'iterations': 653, 'lr': 0.07333356115699105, 'depth': 5, 'l2': 9.692260483049132, 'rand_str': 1.804018448559455, 'min_data_leaf': 20, 'bootstrap': 'Bernoulli', 'subsample': 0.6708045610921853}. Best is trial 22 with value: 0.5655043606922715.
[Trial 26] Fold metrics – Precision: 0.546 | Recall: 0.664 | F1: 0.600
[Trial 26] Fold metrics – Precision: 0.525 | Recall: 0.715 | F1: 0.605


Best trial: 22. Best value: 0.565504:  27%|██▋       | 27/100 [01:22<03:20,  2.74s/it]

[Trial 26] Fold metrics – Precision: 0.570 | Recall: 0.517 | F1: 0.542
[I 2025-06-12 11:11:44,853] Trial 26 finished with value: 0.5595878043377541 and parameters: {'iterations': 566, 'lr': 0.08410481345664976, 'depth': 4, 'l2': 4.092250697622816, 'rand_str': 1.5839399517476442, 'min_data_leaf': 29, 'bootstrap': 'Bernoulli', 'subsample': 0.7916608877826562}. Best is trial 22 with value: 0.5655043606922715.
[Trial 27] Fold metrics – Precision: 0.547 | Recall: 0.574 | F1: 0.561
[Trial 27] Fold metrics – Precision: 0.515 | Recall: 0.732 | F1: 0.605


Best trial: 22. Best value: 0.565504:  28%|██▊       | 28/100 [01:28<04:17,  3.57s/it]

[Trial 27] Fold metrics – Precision: 0.551 | Recall: 0.549 | F1: 0.550
[I 2025-06-12 11:11:50,371] Trial 27 finished with value: 0.5501920887574082 and parameters: {'iterations': 698, 'lr': 0.024078443412759776, 'depth': 7, 'l2': 7.711199141797209, 'rand_str': 2.1388748201585948, 'min_data_leaf': 23, 'bootstrap': 'Bernoulli', 'subsample': 0.8408451459752495}. Best is trial 22 with value: 0.5655043606922715.
[Trial 28] Fold metrics – Precision: 0.524 | Recall: 0.684 | F1: 0.593
[Trial 28] Fold metrics – Precision: 0.503 | Recall: 0.784 | F1: 0.613


Best trial: 22. Best value: 0.565504:  29%|██▉       | 29/100 [01:30<03:59,  3.37s/it]

[Trial 28] Fold metrics – Precision: 0.559 | Recall: 0.509 | F1: 0.533
[I 2025-06-12 11:11:53,257] Trial 28 finished with value: 0.5466175561171939 and parameters: {'iterations': 394, 'lr': 0.058564820165705515, 'depth': 5, 'l2': 9.14437667050203, 'rand_str': 2.386552444677471, 'min_data_leaf': 26, 'bootstrap': 'Bayesian', 'bagging_temp': 0.8453697931659703}. Best is trial 22 with value: 0.5655043606922715.
[Trial 29] Fold metrics – Precision: 0.541 | Recall: 0.601 | F1: 0.569
[Trial 29] Fold metrics – Precision: 0.523 | Recall: 0.712 | F1: 0.603


Best trial: 22. Best value: 0.565504:  30%|███       | 30/100 [01:36<04:47,  4.11s/it]

[Trial 29] Fold metrics – Precision: 0.537 | Recall: 0.664 | F1: 0.594
[I 2025-06-12 11:11:59,098] Trial 29 finished with value: 0.5543342896470836 and parameters: {'iterations': 787, 'lr': 0.10728620251224479, 'depth': 7, 'l2': 4.373720303872149, 'rand_str': 1.190320379089986, 'min_data_leaf': 21, 'bootstrap': 'MVS'}. Best is trial 22 with value: 0.5655043606922715.
[Trial 30] Fold metrics – Precision: 0.552 | Recall: 0.617 | F1: 0.583
[Trial 30] Fold metrics – Precision: 0.518 | Recall: 0.736 | F1: 0.608


Best trial: 22. Best value: 0.565504:  31%|███       | 31/100 [01:39<04:10,  3.64s/it]

[Trial 30] Fold metrics – Precision: 0.535 | Recall: 0.684 | F1: 0.601
[I 2025-06-12 11:12:01,635] Trial 30 finished with value: 0.5580514078406461 and parameters: {'iterations': 934, 'lr': 0.04142740643850309, 'depth': 4, 'l2': 10.415020641869793, 'rand_str': 2.6733672343901955, 'min_data_leaf': 17, 'bootstrap': 'MVS'}. Best is trial 22 with value: 0.5655043606922715.
[Trial 31] Fold metrics – Precision: 0.549 | Recall: 0.667 | F1: 0.602
[Trial 31] Fold metrics – Precision: 0.518 | Recall: 0.774 | F1: 0.620


Best trial: 22. Best value: 0.565504:  32%|███▏      | 32/100 [01:41<03:37,  3.20s/it]

[Trial 31] Fold metrics – Precision: 0.554 | Recall: 0.586 | F1: 0.569
[I 2025-06-12 11:12:03,822] Trial 31 finished with value: 0.5611305922028585 and parameters: {'iterations': 827, 'lr': 0.060162494621904254, 'depth': 4, 'l2': 11.325769604081383, 'rand_str': 1.9992674651634352, 'min_data_leaf': 22, 'bootstrap': 'Bernoulli', 'subsample': 0.7209951832056909}. Best is trial 22 with value: 0.5655043606922715.
[Trial 32] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 32] Fold metrics – Precision: 0.521 | Recall: 0.719 | F1: 0.604


Best trial: 22. Best value: 0.565504:  33%|███▎      | 33/100 [01:43<03:13,  2.89s/it]

[Trial 32] Fold metrics – Precision: 0.555 | Recall: 0.565 | F1: 0.560
[I 2025-06-12 11:12:05,980] Trial 32 finished with value: 0.5584235280097528 and parameters: {'iterations': 933, 'lr': 0.0777942794202706, 'depth': 4, 'l2': 13.259126736273638, 'rand_str': 1.3746090092642924, 'min_data_leaf': 23, 'bootstrap': 'Bernoulli', 'subsample': 0.6852951466343287}. Best is trial 22 with value: 0.5655043606922715.
[Trial 33] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 33] Fold metrics – Precision: 0.526 | Recall: 0.723 | F1: 0.609


Best trial: 22. Best value: 0.565504:  34%|███▍      | 34/100 [01:45<02:57,  2.69s/it]

[Trial 33] Fold metrics – Precision: 0.546 | Recall: 0.619 | F1: 0.580
[I 2025-06-12 11:12:08,218] Trial 33 finished with value: 0.5606511983603438 and parameters: {'iterations': 536, 'lr': 0.06386780511387181, 'depth': 4, 'l2': 9.42733674827222, 'rand_str': 1.5507116105090262, 'min_data_leaf': 20, 'bootstrap': 'Bernoulli', 'subsample': 0.7377188814708178}. Best is trial 22 with value: 0.5655043606922715.
[Trial 34] Fold metrics – Precision: 0.549 | Recall: 0.667 | F1: 0.602
[Trial 34] Fold metrics – Precision: 0.521 | Recall: 0.719 | F1: 0.604


Best trial: 22. Best value: 0.565504:  35%|███▌      | 35/100 [01:48<02:44,  2.53s/it]

[Trial 34] Fold metrics – Precision: 0.558 | Recall: 0.578 | F1: 0.568
[I 2025-06-12 11:12:10,381] Trial 34 finished with value: 0.5607202583697343 and parameters: {'iterations': 797, 'lr': 0.05044161514585033, 'depth': 4, 'l2': 11.015796167643524, 'rand_str': 1.7844225481169156, 'min_data_leaf': 25, 'bootstrap': 'Bernoulli', 'subsample': 0.709066427169277}. Best is trial 22 with value: 0.5655043606922715.
[Trial 35] Fold metrics – Precision: 0.538 | Recall: 0.707 | F1: 0.611
[Trial 35] Fold metrics – Precision: 0.524 | Recall: 0.747 | F1: 0.616


Best trial: 22. Best value: 0.565504:  36%|███▌      | 36/100 [01:50<02:37,  2.46s/it]

[Trial 35] Fold metrics – Precision: 0.562 | Recall: 0.587 | F1: 0.574
[I 2025-06-12 11:12:12,652] Trial 35 finished with value: 0.5629083633676235 and parameters: {'iterations': 621, 'lr': 0.033756991755374714, 'depth': 4, 'l2': 12.977478540926173, 'rand_str': 2.058560862981183, 'min_data_leaf': 20, 'bootstrap': 'Bernoulli', 'subsample': 0.750613817369306}. Best is trial 22 with value: 0.5655043606922715.
[Trial 36] Fold metrics – Precision: 0.548 | Recall: 0.564 | F1: 0.556
[Trial 36] Fold metrics – Precision: 0.520 | Recall: 0.731 | F1: 0.608


Best trial: 22. Best value: 0.565504:  37%|███▋      | 37/100 [01:59<04:34,  4.35s/it]

[Trial 36] Fold metrics – Precision: 0.559 | Recall: 0.566 | F1: 0.562
[I 2025-06-12 11:12:21,437] Trial 36 finished with value: 0.5544715780299129 and parameters: {'iterations': 611, 'lr': 0.028733964132530924, 'depth': 8, 'l2': 12.968339946595691, 'rand_str': 1.8893927706698521, 'min_data_leaf': 18, 'bootstrap': 'Bernoulli', 'subsample': 0.7537842631499281}. Best is trial 22 with value: 0.5655043606922715.
[Trial 37] Fold metrics – Precision: 0.544 | Recall: 0.681 | F1: 0.605
[Trial 37] Fold metrics – Precision: 0.511 | Recall: 0.833 | F1: 0.633


Best trial: 22. Best value: 0.565504:  38%|███▊      | 38/100 [02:01<03:59,  3.86s/it]

[Trial 37] Fold metrics – Precision: 0.560 | Recall: 0.561 | F1: 0.560
[I 2025-06-12 11:12:24,158] Trial 37 finished with value: 0.5600695551437941 and parameters: {'iterations': 277, 'lr': 0.03821337708499175, 'depth': 5, 'l2': 12.295894351505916, 'rand_str': 2.2285796390429016, 'min_data_leaf': 21, 'bootstrap': 'Bernoulli', 'subsample': 0.7718229936345373}. Best is trial 22 with value: 0.5655043606922715.
[Trial 38] Fold metrics – Precision: 0.548 | Recall: 0.601 | F1: 0.573
[Trial 38] Fold metrics – Precision: 0.517 | Recall: 0.756 | F1: 0.614


Best trial: 22. Best value: 0.565504:  39%|███▉      | 39/100 [02:04<03:28,  3.41s/it]

[Trial 38] Fold metrics – Precision: 0.543 | Recall: 0.594 | F1: 0.567
[I 2025-06-12 11:12:26,508] Trial 38 finished with value: 0.55395074322871 and parameters: {'iterations': 530, 'lr': 0.029650941538025454, 'depth': 4, 'l2': 13.661797708756673, 'rand_str': 2.935792120251455, 'min_data_leaf': 14, 'bootstrap': 'Bayesian', 'bagging_temp': 2.7462186019424157}. Best is trial 22 with value: 0.5655043606922715.
[Trial 39] Fold metrics – Precision: 0.549 | Recall: 0.669 | F1: 0.603
[Trial 39] Fold metrics – Precision: 0.523 | Recall: 0.752 | F1: 0.617


Best trial: 22. Best value: 0.565504:  40%|████      | 40/100 [02:06<03:05,  3.09s/it]

[Trial 39] Fold metrics – Precision: 0.555 | Recall: 0.599 | F1: 0.576
[I 2025-06-12 11:12:28,848] Trial 39 finished with value: 0.5631384526408266 and parameters: {'iterations': 679, 'lr': 0.020346834966046612, 'depth': 4, 'l2': 13.752352846901136, 'rand_str': 2.065197958624937, 'min_data_leaf': 19, 'bootstrap': 'Bernoulli', 'subsample': 0.7322966821882076}. Best is trial 22 with value: 0.5655043606922715.
[Trial 40] Fold metrics – Precision: 0.544 | Recall: 0.681 | F1: 0.605
[Trial 40] Fold metrics – Precision: 0.508 | Recall: 0.824 | F1: 0.629


Best trial: 22. Best value: 0.565504:  41%|████      | 41/100 [02:09<02:56,  3.00s/it]

[Trial 40] Fold metrics – Precision: 0.559 | Recall: 0.571 | F1: 0.565
[I 2025-06-12 11:12:31,635] Trial 40 finished with value: 0.5596383521076382 and parameters: {'iterations': 668, 'lr': 0.02406855431399353, 'depth': 5, 'l2': 14.758963146140276, 'rand_str': 2.0541010366912196, 'min_data_leaf': 16, 'bootstrap': 'Bernoulli', 'subsample': 0.7921360724696977}. Best is trial 22 with value: 0.5655043606922715.
[Trial 41] Fold metrics – Precision: 0.538 | Recall: 0.707 | F1: 0.611
[Trial 41] Fold metrics – Precision: 0.525 | Recall: 0.750 | F1: 0.618


Best trial: 22. Best value: 0.565504:  42%|████▏     | 42/100 [02:11<02:39,  2.75s/it]

[Trial 41] Fold metrics – Precision: 0.536 | Recall: 0.697 | F1: 0.606
[I 2025-06-12 11:12:33,811] Trial 41 finished with value: 0.5616936470958444 and parameters: {'iterations': 742, 'lr': 0.021222710046461576, 'depth': 4, 'l2': 13.83745245181025, 'rand_str': 2.4291625574001503, 'min_data_leaf': 19, 'bootstrap': 'Bernoulli', 'subsample': 0.7374984733191795}. Best is trial 22 with value: 0.5655043606922715.
[Trial 42] Fold metrics – Precision: 0.550 | Recall: 0.669 | F1: 0.604
[Trial 42] Fold metrics – Precision: 0.526 | Recall: 0.721 | F1: 0.608


Best trial: 22. Best value: 0.565504:  43%|████▎     | 43/100 [02:13<02:30,  2.64s/it]

[Trial 42] Fold metrics – Precision: 0.546 | Recall: 0.619 | F1: 0.580
[I 2025-06-12 11:12:36,175] Trial 42 finished with value: 0.5617064562777648 and parameters: {'iterations': 591, 'lr': 0.034444581345000745, 'depth': 4, 'l2': 14.927959938194618, 'rand_str': 1.6639252627063896, 'min_data_leaf': 23, 'bootstrap': 'Bernoulli', 'subsample': 0.7338572425171875}. Best is trial 22 with value: 0.5655043606922715.
[Trial 43] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 43] Fold metrics – Precision: 0.525 | Recall: 0.760 | F1: 0.621


Best trial: 22. Best value: 0.565504:  44%|████▍     | 44/100 [02:16<02:20,  2.51s/it]

[Trial 43] Fold metrics – Precision: 0.536 | Recall: 0.697 | F1: 0.606
[I 2025-06-12 11:12:38,376] Trial 43 finished with value: 0.5629363648239711 and parameters: {'iterations': 683, 'lr': 0.04304321773202563, 'depth': 4, 'l2': 12.555495688003719, 'rand_str': 2.778037803783163, 'min_data_leaf': 18, 'bootstrap': 'Bernoulli', 'subsample': 0.7774776609307122}. Best is trial 22 with value: 0.5655043606922715.
[Trial 44] Fold metrics – Precision: 0.538 | Recall: 0.659 | F1: 0.592
[Trial 44] Fold metrics – Precision: 0.515 | Recall: 0.754 | F1: 0.612


Best trial: 22. Best value: 0.565504:  45%|████▌     | 45/100 [02:18<02:13,  2.43s/it]

[Trial 44] Fold metrics – Precision: 0.536 | Recall: 0.697 | F1: 0.606
[I 2025-06-12 11:12:40,647] Trial 44 finished with value: 0.5569164210109132 and parameters: {'iterations': 700, 'lr': 0.04309317727460815, 'depth': 4, 'l2': 12.815838624888581, 'rand_str': 2.9948920240449737, 'min_data_leaf': 12, 'bootstrap': 'Bernoulli', 'subsample': 0.7763145114662607}. Best is trial 22 with value: 0.5655043606922715.
[Trial 45] Fold metrics – Precision: 0.549 | Recall: 0.639 | F1: 0.591
[Trial 45] Fold metrics – Precision: 0.512 | Recall: 0.795 | F1: 0.622


Best trial: 22. Best value: 0.565504:  46%|████▌     | 46/100 [02:19<01:57,  2.18s/it]

[Trial 45] Fold metrics – Precision: 0.555 | Recall: 0.571 | F1: 0.563
[I 2025-06-12 11:12:42,232] Trial 45 finished with value: 0.5581549657308279 and parameters: {'iterations': 632, 'lr': 0.032459766872748645, 'depth': 4, 'l2': 12.387992196308256, 'rand_str': 2.824193238617712, 'min_data_leaf': 18, 'bootstrap': 'Bayesian', 'bagging_temp': 1.5287909989622088}. Best is trial 22 with value: 0.5655043606922715.
[Trial 46] Fold metrics – Precision: 0.544 | Recall: 0.681 | F1: 0.605
[Trial 46] Fold metrics – Precision: 0.510 | Recall: 0.833 | F1: 0.632


Best trial: 22. Best value: 0.565504:  47%|████▋     | 47/100 [02:21<01:45,  1.99s/it]

[Trial 46] Fold metrics – Precision: 0.558 | Recall: 0.575 | F1: 0.566
[I 2025-06-12 11:12:43,768] Trial 46 finished with value: 0.5601726091985015 and parameters: {'iterations': 457, 'lr': 0.04059737378427843, 'depth': 5, 'l2': 13.46349354645074, 'rand_str': 2.2989248323989666, 'min_data_leaf': 15, 'bootstrap': 'Bernoulli', 'subsample': 0.7944470684274794}. Best is trial 22 with value: 0.5655043606922715.
[Trial 47] Fold metrics – Precision: 0.538 | Recall: 0.707 | F1: 0.611
[Trial 47] Fold metrics – Precision: 0.525 | Recall: 0.747 | F1: 0.616


Best trial: 22. Best value: 0.565504:  48%|████▊     | 48/100 [02:22<01:31,  1.77s/it]

[Trial 47] Fold metrics – Precision: 0.555 | Recall: 0.559 | F1: 0.557
[I 2025-06-12 11:12:45,027] Trial 47 finished with value: 0.5596376910538016 and parameters: {'iterations': 508, 'lr': 0.050320790526224256, 'depth': 4, 'l2': 5.850060935291788, 'rand_str': 2.183696582578306, 'min_data_leaf': 18, 'bootstrap': 'Bernoulli', 'subsample': 0.8186828777064658}. Best is trial 22 with value: 0.5655043606922715.
[Trial 48] Fold metrics – Precision: 0.541 | Recall: 0.623 | F1: 0.579
[Trial 48] Fold metrics – Precision: 0.521 | Recall: 0.733 | F1: 0.609


Best trial: 22. Best value: 0.565504:  49%|████▉     | 49/100 [02:23<01:23,  1.63s/it]

[Trial 48] Fold metrics – Precision: 0.554 | Recall: 0.553 | F1: 0.554
[I 2025-06-12 11:12:46,331] Trial 48 finished with value: 0.5543314397673761 and parameters: {'iterations': 745, 'lr': 0.028735838118295314, 'depth': 4, 'l2': 14.41082008701643, 'rand_str': 1.8020125431513607, 'min_data_leaf': 20, 'bootstrap': 'MVS'}. Best is trial 22 with value: 0.5655043606922715.
[Trial 49] Fold metrics – Precision: 0.544 | Recall: 0.681 | F1: 0.605
[Trial 49] Fold metrics – Precision: 0.510 | Recall: 0.834 | F1: 0.633


Best trial: 22. Best value: 0.565504:  50%|█████     | 50/100 [02:25<01:20,  1.61s/it]

[Trial 49] Fold metrics – Precision: 0.563 | Recall: 0.572 | F1: 0.568
[I 2025-06-12 11:12:47,890] Trial 49 finished with value: 0.5615382461993512 and parameters: {'iterations': 671, 'lr': 0.020284936704297298, 'depth': 5, 'l2': 10.618335981068658, 'rand_str': 2.6967920265095398, 'min_data_leaf': 20, 'bootstrap': 'Bernoulli', 'subsample': 0.7611287075485007}. Best is trial 22 with value: 0.5655043606922715.
[Trial 50] Fold metrics – Precision: 0.538 | Recall: 0.707 | F1: 0.611
[Trial 50] Fold metrics – Precision: 0.524 | Recall: 0.747 | F1: 0.616


Best trial: 22. Best value: 0.565504:  51%|█████     | 51/100 [02:26<01:13,  1.51s/it]

[Trial 50] Fold metrics – Precision: 0.561 | Recall: 0.585 | F1: 0.573
[I 2025-06-12 11:12:49,166] Trial 50 finished with value: 0.5627062056522516 and parameters: {'iterations': 576, 'lr': 0.05492884542289908, 'depth': 4, 'l2': 7.756068226840636, 'rand_str': 1.9082039254960201, 'min_data_leaf': 16, 'bootstrap': 'Bernoulli', 'subsample': 0.7532330570587863}. Best is trial 22 with value: 0.5655043606922715.
[Trial 51] Fold metrics – Precision: 0.539 | Recall: 0.707 | F1: 0.611
[Trial 51] Fold metrics – Precision: 0.520 | Recall: 0.745 | F1: 0.612


Best trial: 22. Best value: 0.565504:  52%|█████▏    | 52/100 [02:28<01:08,  1.43s/it]

[Trial 51] Fold metrics – Precision: 0.561 | Recall: 0.585 | F1: 0.573
[I 2025-06-12 11:12:50,402] Trial 51 finished with value: 0.5616278426042256 and parameters: {'iterations': 568, 'lr': 0.06965908603570983, 'depth': 4, 'l2': 8.8496678460271, 'rand_str': 1.9291803277374304, 'min_data_leaf': 10, 'bootstrap': 'Bernoulli', 'subsample': 0.7528952413743029}. Best is trial 22 with value: 0.5655043606922715.
[Trial 52] Fold metrics – Precision: 0.548 | Recall: 0.666 | F1: 0.601
[Trial 52] Fold metrics – Precision: 0.525 | Recall: 0.742 | F1: 0.615


Best trial: 22. Best value: 0.565504:  53%|█████▎    | 53/100 [02:29<01:04,  1.38s/it]

[Trial 52] Fold metrics – Precision: 0.561 | Recall: 0.564 | F1: 0.563
[I 2025-06-12 11:12:51,663] Trial 52 finished with value: 0.5624479710608274 and parameters: {'iterations': 606, 'lr': 0.05546261101338423, 'depth': 4, 'l2': 7.48852583139573, 'rand_str': 1.8492749143635356, 'min_data_leaf': 16, 'bootstrap': 'Bernoulli', 'subsample': 0.776958652714735}. Best is trial 22 with value: 0.5655043606922715.
[Trial 53] Fold metrics – Precision: 0.550 | Recall: 0.669 | F1: 0.604
[Trial 53] Fold metrics – Precision: 0.521 | Recall: 0.719 | F1: 0.604


Best trial: 22. Best value: 0.565504:  54%|█████▍    | 54/100 [02:30<01:01,  1.34s/it]

[Trial 53] Fold metrics – Precision: 0.565 | Recall: 0.594 | F1: 0.579
[I 2025-06-12 11:12:52,928] Trial 53 finished with value: 0.5638379836789992 and parameters: {'iterations': 637, 'lr': 0.035671612771177995, 'depth': 4, 'l2': 8.079433096935066, 'rand_str': 1.7339338804777007, 'min_data_leaf': 14, 'bootstrap': 'Bernoulli', 'subsample': 0.7223730517623818}. Best is trial 22 with value: 0.5655043606922715.
[Trial 54] Fold metrics – Precision: 0.550 | Recall: 0.669 | F1: 0.604
[Trial 54] Fold metrics – Precision: 0.528 | Recall: 0.737 | F1: 0.615


Best trial: 22. Best value: 0.565504:  55%|█████▌    | 55/100 [02:31<00:59,  1.31s/it]

[Trial 54] Fold metrics – Precision: 0.557 | Recall: 0.571 | F1: 0.564
[I 2025-06-12 11:12:54,172] Trial 54 finished with value: 0.5630966430879473 and parameters: {'iterations': 632, 'lr': 0.035144891267128345, 'depth': 4, 'l2': 6.489802684741546, 'rand_str': 1.684970724115755, 'min_data_leaf': 13, 'bootstrap': 'Bernoulli', 'subsample': 0.7147121803375743}. Best is trial 22 with value: 0.5655043606922715.
[Trial 55] Fold metrics – Precision: 0.544 | Recall: 0.681 | F1: 0.605
[Trial 55] Fold metrics – Precision: 0.510 | Recall: 0.802 | F1: 0.624


Best trial: 22. Best value: 0.565504:  56%|█████▌    | 56/100 [02:33<01:00,  1.37s/it]

[Trial 55] Fold metrics – Precision: 0.559 | Recall: 0.571 | F1: 0.565
[I 2025-06-12 11:12:55,686] Trial 55 finished with value: 0.5593774259983785 and parameters: {'iterations': 698, 'lr': 0.13649172003675045, 'depth': 5, 'l2': 6.757301555789935, 'rand_str': 1.7166067174200335, 'min_data_leaf': 14, 'bootstrap': 'Bernoulli', 'subsample': 0.7163876540459992}. Best is trial 22 with value: 0.5655043606922715.
[Trial 56] Fold metrics – Precision: 0.537 | Recall: 0.714 | F1: 0.613
[Trial 56] Fold metrics – Precision: 0.521 | Recall: 0.719 | F1: 0.604


Best trial: 22. Best value: 0.565504:  57%|█████▋    | 57/100 [02:34<00:57,  1.34s/it]

[Trial 56] Fold metrics – Precision: 0.557 | Recall: 0.576 | F1: 0.566
[I 2025-06-12 11:12:56,959] Trial 56 finished with value: 0.5589065091482179 and parameters: {'iterations': 654, 'lr': 0.03929921139379955, 'depth': 4, 'l2': 4.882114457484134, 'rand_str': 1.6053860518782437, 'min_data_leaf': 12, 'bootstrap': 'Bernoulli', 'subsample': 0.6867071253922443}. Best is trial 22 with value: 0.5655043606922715.
[Trial 57] Fold metrics – Precision: 0.543 | Recall: 0.715 | F1: 0.617
[Trial 57] Fold metrics – Precision: 0.516 | Recall: 0.807 | F1: 0.629


Best trial: 22. Best value: 0.565504:  58%|█████▊    | 58/100 [02:35<00:55,  1.32s/it]

[Trial 57] Fold metrics – Precision: 0.552 | Recall: 0.634 | F1: 0.590
[I 2025-06-12 11:12:58,227] Trial 57 finished with value: 0.5641785616333514 and parameters: {'iterations': 763, 'lr': 0.04563393175332064, 'depth': 4, 'l2': 5.629181221838124, 'rand_str': 1.2915802799684024, 'min_data_leaf': 14, 'bootstrap': 'Bayesian', 'bagging_temp': 2.2171824555502404}. Best is trial 22 with value: 0.5655043606922715.
[Trial 58] Fold metrics – Precision: 0.557 | Recall: 0.551 | F1: 0.554
[Trial 58] Fold metrics – Precision: 0.508 | Recall: 0.826 | F1: 0.629


Best trial: 22. Best value: 0.565504:  59%|█████▉    | 59/100 [02:38<01:04,  1.56s/it]

[Trial 58] Fold metrics – Precision: 0.545 | Recall: 0.571 | F1: 0.557
[I 2025-06-12 11:13:00,361] Trial 58 finished with value: 0.5518591115247702 and parameters: {'iterations': 754, 'lr': 0.026554601528461362, 'depth': 6, 'l2': 3.190655882526795, 'rand_str': 1.190988516068951, 'min_data_leaf': 14, 'bootstrap': 'Bayesian', 'bagging_temp': 2.2380798714514887}. Best is trial 22 with value: 0.5655043606922715.
[Trial 59] Fold metrics – Precision: 0.543 | Recall: 0.715 | F1: 0.617
[Trial 59] Fold metrics – Precision: 0.522 | Recall: 0.787 | F1: 0.628


Best trial: 22. Best value: 0.565504:  60%|██████    | 60/100 [02:39<00:58,  1.47s/it]

[Trial 59] Fold metrics – Precision: 0.547 | Recall: 0.598 | F1: 0.571
[I 2025-06-12 11:13:01,605] Trial 59 finished with value: 0.5620884986907934 and parameters: {'iterations': 722, 'lr': 0.04789956113115266, 'depth': 4, 'l2': 5.931290094908261, 'rand_str': 1.4721158750103314, 'min_data_leaf': 13, 'bootstrap': 'Bayesian', 'bagging_temp': 2.3587563127941213}. Best is trial 22 with value: 0.5655043606922715.
[Trial 60] Fold metrics – Precision: 0.541 | Recall: 0.761 | F1: 0.632
[Trial 60] Fold metrics – Precision: 0.514 | Recall: 0.739 | F1: 0.606


Best trial: 22. Best value: 0.565504:  61%|██████    | 61/100 [02:40<00:58,  1.50s/it]

[Trial 60] Fold metrics – Precision: 0.560 | Recall: 0.529 | F1: 0.544
[I 2025-06-12 11:13:03,174] Trial 60 finished with value: 0.5584324512407453 and parameters: {'iterations': 802, 'lr': 0.03722499677957938, 'depth': 5, 'l2': 6.976115666862765, 'rand_str': 1.2306265186293952, 'min_data_leaf': 11, 'bootstrap': 'Bayesian', 'bagging_temp': 1.2337867706181695}. Best is trial 22 with value: 0.5655043606922715.
[Trial 61] Fold metrics – Precision: 0.549 | Recall: 0.646 | F1: 0.594
[Trial 61] Fold metrics – Precision: 0.512 | Recall: 0.807 | F1: 0.627


Best trial: 22. Best value: 0.565504:  62%|██████▏   | 62/100 [02:42<00:54,  1.42s/it]

[Trial 61] Fold metrics – Precision: 0.556 | Recall: 0.591 | F1: 0.573
[I 2025-06-12 11:13:04,426] Trial 61 finished with value: 0.5602763344628626 and parameters: {'iterations': 693, 'lr': 0.04504455697701836, 'depth': 4, 'l2': 4.889228853190531, 'rand_str': 1.1084006906539197, 'min_data_leaf': 13, 'bootstrap': 'Bayesian', 'bagging_temp': 1.9272356803332187}. Best is trial 22 with value: 0.5655043606922715.
[Trial 62] Fold metrics – Precision: 0.543 | Recall: 0.715 | F1: 0.617
[Trial 62] Fold metrics – Precision: 0.510 | Recall: 0.809 | F1: 0.625


Best trial: 22. Best value: 0.565504:  63%|██████▎   | 63/100 [02:43<00:50,  1.37s/it]

[Trial 62] Fold metrics – Precision: 0.553 | Recall: 0.598 | F1: 0.575
[I 2025-06-12 11:13:05,679] Trial 62 finished with value: 0.5605692474454513 and parameters: {'iterations': 643, 'lr': 0.03166816781858435, 'depth': 4, 'l2': 6.375074448964654, 'rand_str': 1.339613751836066, 'min_data_leaf': 15, 'bootstrap': 'Bayesian', 'bagging_temp': 2.5519439371619823}. Best is trial 22 with value: 0.5655043606922715.
[Trial 63] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 63] Fold metrics – Precision: 0.518 | Recall: 0.776 | F1: 0.621


Best trial: 22. Best value: 0.565504:  64%|██████▍   | 64/100 [02:44<00:47,  1.33s/it]

[Trial 63] Fold metrics – Precision: 0.546 | Recall: 0.619 | F1: 0.580
[I 2025-06-12 11:13:06,904] Trial 63 finished with value: 0.5600363067354402 and parameters: {'iterations': 678, 'lr': 0.05148045262306782, 'depth': 4, 'l2': 8.304524654777083, 'rand_str': 1.772437371931168, 'min_data_leaf': 11, 'bootstrap': 'Bernoulli', 'subsample': 0.6909492081191618}. Best is trial 22 with value: 0.5655043606922715.
[Trial 64] Fold metrics – Precision: 0.541 | Recall: 0.602 | F1: 0.570
[Trial 64] Fold metrics – Precision: 0.514 | Recall: 0.792 | F1: 0.623


Best trial: 22. Best value: 0.565504:  65%|██████▌   | 65/100 [02:45<00:46,  1.32s/it]

[Trial 64] Fold metrics – Precision: 0.560 | Recall: 0.564 | F1: 0.562
[I 2025-06-12 11:13:08,191] Trial 64 finished with value: 0.5549432292122111 and parameters: {'iterations': 764, 'lr': 0.04401601346808335, 'depth': 4, 'l2': 5.189839989744668, 'rand_str': 1.6393524795495589, 'min_data_leaf': 12, 'bootstrap': 'MVS'}. Best is trial 22 with value: 0.5655043606922715.
[Trial 65] Fold metrics – Precision: 0.548 | Recall: 0.606 | F1: 0.575
[Trial 65] Fold metrics – Precision: 0.521 | Recall: 0.719 | F1: 0.604


Best trial: 22. Best value: 0.565504:  66%|██████▌   | 66/100 [02:47<00:43,  1.29s/it]

[Trial 65] Fold metrics – Precision: 0.561 | Recall: 0.558 | F1: 0.559
[I 2025-06-12 11:13:09,411] Trial 65 finished with value: 0.556721259644623 and parameters: {'iterations': 719, 'lr': 0.06641621555898616, 'depth': 4, 'l2': 7.98141852854611, 'rand_str': 1.498626546166936, 'min_data_leaf': 24, 'bootstrap': 'Bernoulli', 'subsample': 0.6520307465037648}. Best is trial 22 with value: 0.5655043606922715.
[Trial 66] Fold metrics – Precision: 0.550 | Recall: 0.669 | F1: 0.604
[Trial 66] Fold metrics – Precision: 0.519 | Recall: 0.732 | F1: 0.608


Best trial: 22. Best value: 0.565504:  67%|██████▋   | 67/100 [02:48<00:42,  1.29s/it]

[Trial 66] Fold metrics – Precision: 0.562 | Recall: 0.589 | F1: 0.575
[I 2025-06-12 11:13:10,708] Trial 66 finished with value: 0.5628084838656132 and parameters: {'iterations': 634, 'lr': 0.03590178982335925, 'depth': 4, 'l2': 9.860110251607157, 'rand_str': 1.9680368933611923, 'min_data_leaf': 14, 'bootstrap': 'Bernoulli', 'subsample': 0.7299161795768209}. Best is trial 22 with value: 0.5655043606922715.
[Trial 67] Fold metrics – Precision: 0.549 | Recall: 0.669 | F1: 0.603
[Trial 67] Fold metrics – Precision: 0.524 | Recall: 0.723 | F1: 0.608


Best trial: 22. Best value: 0.565504:  68%|██████▊   | 68/100 [02:49<00:41,  1.29s/it]

[Trial 67] Fold metrics – Precision: 0.556 | Recall: 0.573 | F1: 0.564
[I 2025-06-12 11:13:12,004] Trial 67 finished with value: 0.561056480028384 and parameters: {'iterations': 773, 'lr': 0.026649163933535416, 'depth': 4, 'l2': 8.945506769588143, 'rand_str': 1.7277088321721523, 'min_data_leaf': 19, 'bootstrap': 'Bernoulli', 'subsample': 0.7148852233518469}. Best is trial 22 with value: 0.5655043606922715.
[Trial 68] Fold metrics – Precision: 0.541 | Recall: 0.761 | F1: 0.632
[Trial 68] Fold metrics – Precision: 0.521 | Recall: 0.703 | F1: 0.598


Best trial: 22. Best value: 0.565504:  69%|██████▉   | 69/100 [02:51<00:44,  1.45s/it]

[Trial 68] Fold metrics – Precision: 0.564 | Recall: 0.528 | F1: 0.546
[I 2025-06-12 11:13:13,815] Trial 68 finished with value: 0.5599564512957335 and parameters: {'iterations': 604, 'lr': 0.058816501401748295, 'depth': 5, 'l2': 6.450366030323647, 'rand_str': 1.2900518453375411, 'min_data_leaf': 25, 'bootstrap': 'Bayesian', 'bagging_temp': 1.176983834791933}. Best is trial 22 with value: 0.5655043606922715.
[Trial 69] Fold metrics – Precision: 0.545 | Recall: 0.675 | F1: 0.603
[Trial 69] Fold metrics – Precision: 0.521 | Recall: 0.725 | F1: 0.606


Best trial: 22. Best value: 0.565504:  70%|███████   | 70/100 [02:52<00:41,  1.40s/it]

[Trial 69] Fold metrics – Precision: 0.563 | Recall: 0.591 | F1: 0.577
[I 2025-06-12 11:13:15,094] Trial 69 finished with value: 0.5624839367756086 and parameters: {'iterations': 544, 'lr': 0.047841414038309986, 'depth': 4, 'l2': 7.146425158021573, 'rand_str': 2.1112679044654383, 'min_data_leaf': 10, 'bootstrap': 'Bernoulli', 'subsample': 0.6148764062728892}. Best is trial 22 with value: 0.5655043606922715.
[Trial 70] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 70] Fold metrics – Precision: 0.521 | Recall: 0.719 | F1: 0.604


Best trial: 22. Best value: 0.565504:  71%|███████   | 71/100 [02:53<00:39,  1.35s/it]

[Trial 70] Fold metrics – Precision: 0.556 | Recall: 0.549 | F1: 0.553
[I 2025-06-12 11:13:16,326] Trial 70 finished with value: 0.5575743852488457 and parameters: {'iterations': 681, 'lr': 0.042171452856665174, 'depth': 4, 'l2': 4.046812910532715, 'rand_str': 1.0557251627750182, 'min_data_leaf': 13, 'bootstrap': 'Bernoulli', 'subsample': 0.6998210581070075}. Best is trial 22 with value: 0.5655043606922715.
[Trial 71] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 71] Fold metrics – Precision: 0.522 | Recall: 0.734 | F1: 0.610


Best trial: 22. Best value: 0.565504:  72%|███████▏  | 72/100 [02:55<00:37,  1.33s/it]

[Trial 71] Fold metrics – Precision: 0.562 | Recall: 0.587 | F1: 0.574
[I 2025-06-12 11:13:17,627] Trial 71 finished with value: 0.5624075627229413 and parameters: {'iterations': 623, 'lr': 0.03305518590007829, 'depth': 4, 'l2': 13.93257563108671, 'rand_str': 2.0266882872692156, 'min_data_leaf': 21, 'bootstrap': 'Bernoulli', 'subsample': 0.7400216933473563}. Best is trial 22 with value: 0.5655043606922715.
[Trial 72] Fold metrics – Precision: 0.548 | Recall: 0.667 | F1: 0.602
[Trial 72] Fold metrics – Precision: 0.528 | Recall: 0.743 | F1: 0.617


Best trial: 22. Best value: 0.565504:  73%|███████▎  | 73/100 [02:56<00:35,  1.32s/it]

[Trial 72] Fold metrics – Precision: 0.559 | Recall: 0.570 | F1: 0.565
[I 2025-06-12 11:13:18,930] Trial 72 finished with value: 0.5632782877284268 and parameters: {'iterations': 656, 'lr': 0.037103400209772416, 'depth': 4, 'l2': 11.823834745856564, 'rand_str': 1.843672735602565, 'min_data_leaf': 22, 'bootstrap': 'Bernoulli', 'subsample': 0.7664691307677706}. Best is trial 22 with value: 0.5655043606922715.
[Trial 73] Fold metrics – Precision: 0.549 | Recall: 0.669 | F1: 0.603
[Trial 73] Fold metrics – Precision: 0.529 | Recall: 0.736 | F1: 0.616


Best trial: 22. Best value: 0.565504:  74%|███████▍  | 74/100 [02:57<00:33,  1.31s/it]

[Trial 73] Fold metrics – Precision: 0.558 | Recall: 0.592 | F1: 0.574
[I 2025-06-12 11:13:20,197] Trial 73 finished with value: 0.5648918901619278 and parameters: {'iterations': 649, 'lr': 0.024421356483576424, 'depth': 4, 'l2': 11.788492530962955, 'rand_str': 1.8357264315461386, 'min_data_leaf': 22, 'bootstrap': 'Bernoulli', 'subsample': 0.7250562968464499}. Best is trial 22 with value: 0.5655043606922715.
[Trial 74] Fold metrics – Precision: 0.547 | Recall: 0.574 | F1: 0.561
[Trial 74] Fold metrics – Precision: 0.511 | Recall: 0.781 | F1: 0.618


Best trial: 22. Best value: 0.565504:  75%|███████▌  | 75/100 [03:00<00:45,  1.83s/it]

[Trial 74] Fold metrics – Precision: 0.545 | Recall: 0.601 | F1: 0.572
[I 2025-06-12 11:13:23,239] Trial 74 finished with value: 0.5522702962633894 and parameters: {'iterations': 586, 'lr': 0.02399231887031712, 'depth': 7, 'l2': 11.578514527330636, 'rand_str': 1.8476256298699163, 'min_data_leaf': 22, 'bootstrap': 'Bernoulli', 'subsample': 0.7229020128472113}. Best is trial 22 with value: 0.5655043606922715.
[Trial 75] Fold metrics – Precision: 0.549 | Recall: 0.669 | F1: 0.603
[Trial 75] Fold metrics – Precision: 0.520 | Recall: 0.727 | F1: 0.606


Best trial: 22. Best value: 0.565504:  76%|███████▌  | 76/100 [03:02<00:39,  1.67s/it]

[Trial 75] Fold metrics – Precision: 0.557 | Recall: 0.578 | F1: 0.568
[I 2025-06-12 11:13:24,525] Trial 75 finished with value: 0.5608477326361254 and parameters: {'iterations': 655, 'lr': 0.026361687605118693, 'depth': 4, 'l2': 9.36332291610559, 'rand_str': 1.959979355785722, 'min_data_leaf': 23, 'bootstrap': 'Bernoulli', 'subsample': 0.7028397310042447}. Best is trial 22 with value: 0.5655043606922715.
[Trial 76] Fold metrics – Precision: 0.542 | Recall: 0.603 | F1: 0.571
[Trial 76] Fold metrics – Precision: 0.509 | Recall: 0.798 | F1: 0.622


Best trial: 22. Best value: 0.565504:  77%|███████▋  | 77/100 [03:03<00:36,  1.57s/it]

[Trial 76] Fold metrics – Precision: 0.565 | Recall: 0.545 | F1: 0.554
[I 2025-06-12 11:13:25,888] Trial 76 finished with value: 0.5540794650209595 and parameters: {'iterations': 722, 'lr': 0.020055206239302913, 'depth': 4, 'l2': 5.388867909153998, 'rand_str': 1.6569144146363777, 'min_data_leaf': 23, 'bootstrap': 'MVS'}. Best is trial 22 with value: 0.5655043606922715.
[Trial 77] Fold metrics – Precision: 0.550 | Recall: 0.576 | F1: 0.563
[Trial 77] Fold metrics – Precision: 0.521 | Recall: 0.719 | F1: 0.604


Best trial: 22. Best value: 0.565504:  78%|███████▊  | 78/100 [03:04<00:32,  1.47s/it]

[Trial 77] Fold metrics – Precision: 0.557 | Recall: 0.584 | F1: 0.570
[I 2025-06-12 11:13:27,130] Trial 77 finished with value: 0.5562475761758203 and parameters: {'iterations': 202, 'lr': 0.03801650924561572, 'depth': 4, 'l2': 10.201443115381734, 'rand_str': 1.8590005343412916, 'min_data_leaf': 24, 'bootstrap': 'Bernoulli', 'subsample': 0.6676309077806724}. Best is trial 22 with value: 0.5655043606922715.
[Trial 78] Fold metrics – Precision: 0.547 | Recall: 0.601 | F1: 0.572
[Trial 78] Fold metrics – Precision: 0.526 | Recall: 0.676 | F1: 0.592


Best trial: 22. Best value: 0.565504:  79%|███████▉  | 79/100 [03:09<00:54,  2.57s/it]

[Trial 78] Fold metrics – Precision: 0.555 | Recall: 0.560 | F1: 0.558
[I 2025-06-12 11:13:32,267] Trial 78 finished with value: 0.5543810338084492 and parameters: {'iterations': 648, 'lr': 0.08936313678279152, 'depth': 8, 'l2': 6.1588282841143895, 'rand_str': 1.5377723868427973, 'min_data_leaf': 22, 'bootstrap': 'Bernoulli', 'subsample': 0.7215536026893545}. Best is trial 22 with value: 0.5655043606922715.
[Trial 79] Fold metrics – Precision: 0.543 | Recall: 0.663 | F1: 0.597
[Trial 79] Fold metrics – Precision: 0.513 | Recall: 0.768 | F1: 0.615


Best trial: 22. Best value: 0.565504:  80%|████████  | 80/100 [03:11<00:48,  2.42s/it]

[Trial 79] Fold metrics – Precision: 0.557 | Recall: 0.579 | F1: 0.568
[I 2025-06-12 11:13:34,334] Trial 79 finished with value: 0.558122364177957 and parameters: {'iterations': 817, 'lr': 0.03108928898150446, 'depth': 6, 'l2': 11.946238202161403, 'rand_str': 1.7594295667853324, 'min_data_leaf': 26, 'bootstrap': 'Bernoulli', 'subsample': 0.7674450373226456}. Best is trial 22 with value: 0.5655043606922715.
[Trial 80] Fold metrics – Precision: 0.538 | Recall: 0.707 | F1: 0.611
[Trial 80] Fold metrics – Precision: 0.524 | Recall: 0.740 | F1: 0.613


Best trial: 22. Best value: 0.565504:  81%|████████  | 81/100 [03:13<00:39,  2.07s/it]

[Trial 80] Fold metrics – Precision: 0.556 | Recall: 0.581 | F1: 0.568
[I 2025-06-12 11:13:35,597] Trial 80 finished with value: 0.560823337613186 and parameters: {'iterations': 553, 'lr': 0.05235172176337049, 'depth': 4, 'l2': 10.690775439223296, 'rand_str': 2.0774347986935044, 'min_data_leaf': 24, 'bootstrap': 'Bernoulli', 'subsample': 0.7437147708630916}. Best is trial 22 with value: 0.5655043606922715.
[Trial 81] Fold metrics – Precision: 0.548 | Recall: 0.667 | F1: 0.602
[Trial 81] Fold metrics – Precision: 0.528 | Recall: 0.713 | F1: 0.607


Best trial: 22. Best value: 0.565504:  82%|████████▏ | 82/100 [03:14<00:32,  1.83s/it]

[Trial 81] Fold metrics – Precision: 0.552 | Recall: 0.571 | F1: 0.561
[I 2025-06-12 11:13:36,848] Trial 81 finished with value: 0.5602643313757951 and parameters: {'iterations': 690, 'lr': 0.03556717681495053, 'depth': 4, 'l2': 12.439277844907247, 'rand_str': 1.8279829748870182, 'min_data_leaf': 23, 'bootstrap': 'Bernoulli', 'subsample': 0.8851311574854035}. Best is trial 22 with value: 0.5655043606922715.
[Trial 82] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 82] Fold metrics – Precision: 0.521 | Recall: 0.719 | F1: 0.604


Best trial: 22. Best value: 0.565504:  83%|████████▎ | 83/100 [03:15<00:28,  1.65s/it]

[Trial 82] Fold metrics – Precision: 0.560 | Recall: 0.549 | F1: 0.555
[I 2025-06-12 11:13:38,088] Trial 82 finished with value: 0.5588071663202023 and parameters: {'iterations': 737, 'lr': 0.04419197424548242, 'depth': 4, 'l2': 13.261152081909051, 'rand_str': 1.4130721923480707, 'min_data_leaf': 21, 'bootstrap': 'Bernoulli', 'subsample': 0.7638212345568647}. Best is trial 22 with value: 0.5655043606922715.
[Trial 83] Fold metrics – Precision: 0.548 | Recall: 0.666 | F1: 0.601
[Trial 83] Fold metrics – Precision: 0.517 | Recall: 0.776 | F1: 0.621


Best trial: 22. Best value: 0.565504:  84%|████████▍ | 84/100 [03:16<00:24,  1.52s/it]

[Trial 83] Fold metrics – Precision: 0.557 | Recall: 0.590 | F1: 0.573
[I 2025-06-12 11:13:39,315] Trial 83 finished with value: 0.5616607530492798 and parameters: {'iterations': 664, 'lr': 0.04713076843027249, 'depth': 4, 'l2': 11.86002003372992, 'rand_str': 1.7008501248255283, 'min_data_leaf': 22, 'bootstrap': 'Bernoulli', 'subsample': 0.7868073250172155}. Best is trial 22 with value: 0.5655043606922715.
[Trial 84] Fold metrics – Precision: 0.549 | Recall: 0.668 | F1: 0.603
[Trial 84] Fold metrics – Precision: 0.523 | Recall: 0.725 | F1: 0.607


Best trial: 22. Best value: 0.565504:  85%|████████▌ | 85/100 [03:18<00:21,  1.45s/it]

[Trial 84] Fold metrics – Precision: 0.560 | Recall: 0.571 | F1: 0.565
[I 2025-06-12 11:13:40,587] Trial 84 finished with value: 0.561614551824414 and parameters: {'iterations': 619, 'lr': 0.04030878817546291, 'depth': 4, 'l2': 8.123053720178586, 'rand_str': 1.9325424023798299, 'min_data_leaf': 18, 'bootstrap': 'Bernoulli', 'subsample': 0.8064321287511278}. Best is trial 22 with value: 0.5655043606922715.
[Trial 85] Fold metrics – Precision: 0.550 | Recall: 0.669 | F1: 0.604
[Trial 85] Fold metrics – Precision: 0.522 | Recall: 0.723 | F1: 0.606


Best trial: 22. Best value: 0.565504:  86%|████████▌ | 86/100 [03:19<00:19,  1.39s/it]

[Trial 85] Fold metrics – Precision: 0.546 | Recall: 0.619 | F1: 0.580
[I 2025-06-12 11:13:41,828] Trial 85 finished with value: 0.5605013171543094 and parameters: {'iterations': 708, 'lr': 0.028828046537198844, 'depth': 4, 'l2': 14.176059270479477, 'rand_str': 1.5881898951998021, 'min_data_leaf': 21, 'bootstrap': 'Bernoulli', 'subsample': 0.7299332687148482}. Best is trial 22 with value: 0.5655043606922715.
[Trial 86] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 86] Fold metrics – Precision: 0.521 | Recall: 0.764 | F1: 0.620


Best trial: 22. Best value: 0.565504:  87%|████████▋ | 87/100 [03:20<00:17,  1.36s/it]

[Trial 86] Fold metrics – Precision: 0.536 | Recall: 0.697 | F1: 0.606
[I 2025-06-12 11:13:43,111] Trial 86 finished with value: 0.5617593329048655 and parameters: {'iterations': 777, 'lr': 0.02447288135450005, 'depth': 4, 'l2': 8.73147125046105, 'rand_str': 2.6002332535945514, 'min_data_leaf': 17, 'bootstrap': 'Bernoulli', 'subsample': 0.7084148490394236}. Best is trial 22 with value: 0.5655043606922715.
[Trial 87] Fold metrics – Precision: 0.550 | Recall: 0.705 | F1: 0.618
[Trial 87] Fold metrics – Precision: 0.519 | Recall: 0.751 | F1: 0.614


Best trial: 22. Best value: 0.565504:  88%|████████▊ | 88/100 [03:22<00:17,  1.43s/it]

[Trial 87] Fold metrics – Precision: 0.562 | Recall: 0.540 | F1: 0.551
[I 2025-06-12 11:13:44,715] Trial 87 finished with value: 0.5620286771441642 and parameters: {'iterations': 508, 'lr': 0.03574058222830822, 'depth': 5, 'l2': 12.737432670974083, 'rand_str': 2.1955529468185455, 'min_data_leaf': 19, 'bootstrap': 'Bayesian', 'bagging_temp': 1.7957969859135476}. Best is trial 22 with value: 0.5655043606922715.
[Trial 88] Fold metrics – Precision: 0.539 | Recall: 0.707 | F1: 0.611
[Trial 88] Fold metrics – Precision: 0.521 | Recall: 0.740 | F1: 0.611


Best trial: 22. Best value: 0.565504:  89%|████████▉ | 89/100 [03:23<00:15,  1.37s/it]

[Trial 88] Fold metrics – Precision: 0.561 | Recall: 0.584 | F1: 0.572
[I 2025-06-12 11:13:45,959] Trial 88 finished with value: 0.5614482356323421 and parameters: {'iterations': 590, 'lr': 0.10685497281392778, 'depth': 4, 'l2': 9.578112418151914, 'rand_str': 2.001662516984461, 'min_data_leaf': 15, 'bootstrap': 'Bernoulli', 'subsample': 0.7456130510193696}. Best is trial 22 with value: 0.5655043606922715.
[Trial 89] Fold metrics – Precision: 0.539 | Recall: 0.614 | F1: 0.574
[Trial 89] Fold metrics – Precision: 0.515 | Recall: 0.805 | F1: 0.628


Best trial: 22. Best value: 0.565504:  90%|█████████ | 90/100 [03:24<00:13,  1.35s/it]

[Trial 89] Fold metrics – Precision: 0.560 | Recall: 0.553 | F1: 0.557
[I 2025-06-12 11:13:47,261] Trial 89 finished with value: 0.5554067197067495 and parameters: {'iterations': 633, 'lr': 0.06263057065863112, 'depth': 4, 'l2': 7.479148590429354, 'rand_str': 1.877012265717196, 'min_data_leaf': 20, 'bootstrap': 'MVS'}. Best is trial 22 with value: 0.5655043606922715.
[Trial 90] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 90] Fold metrics – Precision: 0.527 | Recall: 0.727 | F1: 0.611


Best trial: 22. Best value: 0.565504:  91%|█████████ | 91/100 [03:26<00:11,  1.31s/it]

[Trial 90] Fold metrics – Precision: 0.566 | Recall: 0.552 | F1: 0.559
[I 2025-06-12 11:13:48,458] Trial 90 finished with value: 0.5626291560135491 and parameters: {'iterations': 851, 'lr': 0.05589556359158724, 'depth': 4, 'l2': 11.116885429251306, 'rand_str': 2.4489985121801245, 'min_data_leaf': 25, 'bootstrap': 'Bernoulli', 'subsample': 0.8525508049455583}. Best is trial 22 with value: 0.5655043606922715.
[Trial 91] Fold metrics – Precision: 0.538 | Recall: 0.707 | F1: 0.611
[Trial 91] Fold metrics – Precision: 0.528 | Recall: 0.722 | F1: 0.610


Best trial: 22. Best value: 0.565504:  92%|█████████▏| 92/100 [03:27<00:10,  1.29s/it]

[Trial 91] Fold metrics – Precision: 0.559 | Recall: 0.595 | F1: 0.576
[I 2025-06-12 11:13:49,702] Trial 91 finished with value: 0.562815864469528 and parameters: {'iterations': 618, 'lr': 0.03300228003903208, 'depth': 4, 'l2': 13.239274311102369, 'rand_str': 2.0564383386193605, 'min_data_leaf': 21, 'bootstrap': 'Bernoulli', 'subsample': 0.7474475473468705}. Best is trial 22 with value: 0.5655043606922715.
[Trial 92] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 92] Fold metrics – Precision: 0.521 | Recall: 0.754 | F1: 0.616


Best trial: 22. Best value: 0.565504:  93%|█████████▎| 93/100 [03:28<00:08,  1.27s/it]

[Trial 92] Fold metrics – Precision: 0.536 | Recall: 0.697 | F1: 0.606
[I 2025-06-12 11:13:50,927] Trial 92 finished with value: 0.561316175021251 and parameters: {'iterations': 678, 'lr': 0.03932773102834559, 'depth': 4, 'l2': 12.468006172092728, 'rand_str': 2.331272968629163, 'min_data_leaf': 20, 'bootstrap': 'Bernoulli', 'subsample': 0.7311265272216669}. Best is trial 22 with value: 0.5655043606922715.
[Trial 93] Fold metrics – Precision: 0.539 | Recall: 0.717 | F1: 0.615
[Trial 93] Fold metrics – Precision: 0.521 | Recall: 0.747 | F1: 0.614


Best trial: 22. Best value: 0.565504:  94%|█████████▍| 94/100 [03:29<00:07,  1.28s/it]

[Trial 93] Fold metrics – Precision: 0.559 | Recall: 0.570 | F1: 0.564
[I 2025-06-12 11:13:52,223] Trial 93 finished with value: 0.5608594861444881 and parameters: {'iterations': 647, 'lr': 0.030260309173024698, 'depth': 4, 'l2': 13.037862622750469, 'rand_str': 2.123979884219749, 'min_data_leaf': 22, 'bootstrap': 'Bernoulli', 'subsample': 0.7833408272414075}. Best is trial 22 with value: 0.5655043606922715.
[Trial 94] Fold metrics – Precision: 0.538 | Recall: 0.707 | F1: 0.611
[Trial 94] Fold metrics – Precision: 0.521 | Recall: 0.786 | F1: 0.627


Best trial: 22. Best value: 0.565504:  95%|█████████▌| 95/100 [03:31<00:06,  1.26s/it]

[Trial 94] Fold metrics – Precision: 0.536 | Recall: 0.697 | F1: 0.606
[I 2025-06-12 11:13:53,460] Trial 94 finished with value: 0.5617876257324067 and parameters: {'iterations': 707, 'lr': 0.041726272734742706, 'depth': 4, 'l2': 12.164048231431373, 'rand_str': 2.770634772427937, 'min_data_leaf': 23, 'bootstrap': 'Bernoulli', 'subsample': 0.7584278690442804}. Best is trial 22 with value: 0.5655043606922715.
[Trial 95] Fold metrics – Precision: 0.549 | Recall: 0.669 | F1: 0.603
[Trial 95] Fold metrics – Precision: 0.523 | Recall: 0.718 | F1: 0.605


Best trial: 22. Best value: 0.565504:  96%|█████████▌| 96/100 [03:32<00:05,  1.31s/it]

[Trial 95] Fold metrics – Precision: 0.565 | Recall: 0.571 | F1: 0.568
[I 2025-06-12 11:13:54,877] Trial 95 finished with value: 0.5629811488777609 and parameters: {'iterations': 560, 'lr': 0.022978096868083022, 'depth': 4, 'l2': 13.566458465026738, 'rand_str': 1.8163396753801868, 'min_data_leaf': 19, 'bootstrap': 'Bernoulli', 'subsample': 0.7173125594290354}. Best is trial 22 with value: 0.5655043606922715.
[Trial 96] Fold metrics – Precision: 0.549 | Recall: 0.669 | F1: 0.603
[Trial 96] Fold metrics – Precision: 0.518 | Recall: 0.779 | F1: 0.622


Best trial: 22. Best value: 0.565504:  97%|█████████▋| 97/100 [03:33<00:03,  1.30s/it]

[Trial 96] Fold metrics – Precision: 0.556 | Recall: 0.573 | F1: 0.564
[I 2025-06-12 11:13:56,150] Trial 96 finished with value: 0.5612713224895142 and parameters: {'iterations': 562, 'lr': 0.02590344522217556, 'depth': 4, 'l2': 14.421910760708155, 'rand_str': 1.7913645169468206, 'min_data_leaf': 18, 'bootstrap': 'Bernoulli', 'subsample': 0.6944086333382221}. Best is trial 22 with value: 0.5655043606922715.
[Trial 97] Fold metrics – Precision: 0.544 | Recall: 0.613 | F1: 0.576
[Trial 97] Fold metrics – Precision: 0.520 | Recall: 0.781 | F1: 0.625


Best trial: 22. Best value: 0.565504:  98%|█████████▊| 98/100 [03:35<00:02,  1.38s/it]

[Trial 97] Fold metrics – Precision: 0.537 | Recall: 0.619 | F1: 0.575
[I 2025-06-12 11:13:57,733] Trial 97 finished with value: 0.5552397669248152 and parameters: {'iterations': 521, 'lr': 0.02827569211243937, 'depth': 5, 'l2': 14.02557036395605, 'rand_str': 1.6980364890928383, 'min_data_leaf': 19, 'bootstrap': 'Bayesian', 'bagging_temp': 2.6029017832621983}. Best is trial 22 with value: 0.5655043606922715.
[Trial 98] Fold metrics – Precision: 0.549 | Recall: 0.669 | F1: 0.603
[Trial 98] Fold metrics – Precision: 0.527 | Recall: 0.740 | F1: 0.615


Best trial: 98. Best value: 0.566487:  99%|█████████▉| 99/100 [03:36<00:01,  1.34s/it]

[Trial 98] Fold metrics – Precision: 0.565 | Recall: 0.596 | F1: 0.580
[I 2025-06-12 11:13:58,976] Trial 98 finished with value: 0.5664871124720557 and parameters: {'iterations': 752, 'lr': 0.020691023226819216, 'depth': 4, 'l2': 13.683470245201063, 'rand_str': 1.7369638634616382, 'min_data_leaf': 17, 'bootstrap': 'Bernoulli', 'subsample': 0.715299484815648}. Best is trial 98 with value: 0.5664871124720557.
[Trial 99] Fold metrics – Precision: 0.547 | Recall: 0.574 | F1: 0.561
[Trial 99] Fold metrics – Precision: 0.511 | Recall: 0.781 | F1: 0.618


Best trial: 98. Best value: 0.566487: 100%|██████████| 100/100 [03:39<00:00,  2.20s/it]

[Trial 99] Fold metrics – Precision: 0.545 | Recall: 0.601 | F1: 0.572
[I 2025-06-12 11:14:01,982] Trial 99 finished with value: 0.5522702962633894 and parameters: {'iterations': 737, 'lr': 0.021200075185411518, 'depth': 7, 'l2': 13.619680830900808, 'rand_str': 1.9388636570956166, 'min_data_leaf': 17, 'bootstrap': 'Bernoulli', 'subsample': 0.7127557257063355}. Best is trial 98 with value: 0.5664871124720557.
🏆 Best trial value: 0.5664871124720557
🔧 Training final model …
0:	learn: 0.5552593	test: 0.5676988	best: 0.5676988 (0)	total: 3.3ms	remaining: 2.48s





100:	learn: 0.5716047	test: 0.5072318	best: 0.5676988 (0)	total: 293ms	remaining: 1.89s
Stopped by overfitting detector  (150 iterations wait)

bestTest = 0.5676988464
bestIteration = 0

Shrink model to first 1 iterations.
✅ Final model trained
📊 Evaluating …
════════════════════════════════════════════════════════════
AUC‑ROC     : 0.5581
F1@0.5      : 0.5677
F0.5@0.5    : 0.5696
Best F0.5   : 0.5774  (thr = 0.10)
════════════════════════════════════════════════════════════
💾 Saving artefacts …
✅ Files saved to C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\src\Models\models\models\catboost_f05_results


In [15]:
# =============================================================
#  MULTI-TRIAL CATBOOST EVALUATOR - Top 10 Configurations
#  Tests all top performing parameter sets on validation data
# =============================================================
import os, random, warnings, joblib, time
from pathlib import Path

import numpy as np
import pandas as pd
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import (accuracy_score, precision_recall_fscore_support,
                             fbeta_score, roc_auc_score, precision_score, 
                             recall_score, f1_score)

# ─── GLOBAL SEED ─────────────────────────────────────────────
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)
random.seed(SEED)
warnings.filterwarnings("ignore")

# ─── CONFIG ─────────────────────────────────────────────────
CSV_PATH   = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")
DROP_COLS  = [
    # Raw OHLC (massively correlated with close)
    'open', 'high', 'low',
    # ATR components
    'high_low', 'high_close', 'low_close',
    # Helper calculations  
    'typical_price', 'true_range', 'volume_mean_20',
    # Highly correlated MAs
    'EMA_21', 'SMA_20', 'vwap_24h', 'close_4h',
    # Bollinger components
    'bollinger_upper', 'bollinger_lower',
    # MACD components
    'MACD_line', 'MACD_signal',
    # Support/resistance
    'resistance_level', 'support_level',
    # Zero-importance binary flags
    'vol_spike_1_5x', 'near_upper_band', 'near_lower_band',
    'break_upper_band', 'break_lower_band', 'rsi_oversold',
    'above_sma20', 'macd_positive', 'volume_breakout', 'volume_breakdown',
    # Highly correlated position flags
    'above_sma50', 'ema7_above_ema21',
    # Low-importance cross signals
    'ema_cross_down',
    # Oscillator extremes
    'rsi_overbought', 'stoch_overbought', 'stoch_oversold',
    'cci_overbought', 'cci_oversold',
    # Trend flags
    'trending_market',
    # All scenario features
    'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6',
    'bearish_scenario_1', 'bearish_scenario_2', 'bearish_scenario_3',
    'bearish_scenario_4', 'bearish_scenario_6',
    'close'  # Target leakage prevention
]
VAL_FRAC   = 0.20
BETA       = 0.5  # F0.5 score (precision-weighted)
SAVE_DIR   = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\src\Models\models\models")

# ─── TOP 10 TRIAL CONFIGURATIONS FROM OPTIMIZATION RESULTS ──
TOP_TRIALS = [
    {
        "name": "Trial_98_Best_Overall",
        "trial": 98,
        "iterations": 752,
        "lr": 0.020691023226819216,
        "depth": 4,
        "l2": 13.683470245201063,
        "rand_str": 1.7369638634616382,
        "min_data_leaf": 17,
        "bootstrap": "Bernoulli",
        "subsample": 0.715299484815648,
        "cv_f05": 0.5665,
        "cv_precision": 0.565,
        "cv_recall": 0.596,
        "cv_f1": 0.580
    },
    {
        "name": "Trial_22_High_Precision", 
        "trial": 22,
        "iterations": 697,
        "lr": 0.05307592822927969,
        "depth": 4,
        "l2": 8.625798314956667,
        "rand_str": 1.9161178234306955,
        "min_data_leaf": 23,
        "bootstrap": "Bernoulli",
        "subsample": 0.7365237906217537,
        "cv_f05": 0.5655,
        "cv_precision": 0.562,
        "cv_recall": 0.590,
        "cv_f1": 0.576
    },
    {
        "name": "Trial_73_Balanced",
        "trial": 73,
        "iterations": 649,
        "lr": 0.024421356483576424,
        "depth": 4,
        "l2": 11.788492530962955,
        "rand_str": 1.8357264315461386,
        "min_data_leaf": 22,
        "bootstrap": "Bernoulli",
        "subsample": 0.7250562968464499,
        "cv_f05": 0.5649,
        "cv_precision": 0.558,
        "cv_recall": 0.592,
        "cv_f1": 0.574
    },
    {
        "name": "Trial_57_High_F1",
        "trial": 57,
        "iterations": 763,
        "lr": 0.04563393175332064,
        "depth": 4,
        "l2": 5.629181221838124,
        "rand_str": 1.2915802799684024,
        "min_data_leaf": 14,
        "bootstrap": "Bayesian",
        "bagging_temp": 2.2171824555502404,
        "cv_f05": 0.5642,
        "cv_precision": 0.552,
        "cv_recall": 0.634,
        "cv_f1": 0.590
    },
    {
        "name": "Trial_53_Top_Precision",
        "trial": 53,
        "iterations": 637,
        "lr": 0.035671612771177995,
        "depth": 4,
        "l2": 8.079433096935066,
        "rand_str": 1.7339338804777007,
        "min_data_leaf": 14,
        "bootstrap": "Bernoulli",
        "subsample": 0.7223730517623818,
        "cv_f05": 0.5638,
        "cv_precision": 0.565,
        "cv_recall": 0.594,
        "cv_f1": 0.579
    },
    {
        "name": "Trial_90_Max_Precision",
        "trial": 90,
        "iterations": 851,
        "lr": 0.05589556359158724,
        "depth": 4,
        "l2": 11.116885429251306,
        "rand_str": 2.4489985121801245,
        "min_data_leaf": 25,
        "bootstrap": "Bernoulli",
        "subsample": 0.8525508049455583,
        "cv_f05": 0.5626,
        "cv_precision": 0.566,
        "cv_recall": 0.552,
        "cv_f1": 0.559
    },
    {
        "name": "Trial_35_Conservative",
        "trial": 35,
        "iterations": 621,
        "lr": 0.033756991755374714,
        "depth": 4,
        "l2": 12.977478540926173,
        "rand_str": 2.058560862981183,
        "min_data_leaf": 20,
        "bootstrap": "Bernoulli",
        "subsample": 0.750613817369306,
        "cv_f05": 0.5629,
        "cv_precision": 0.562,
        "cv_recall": 0.587,
        "cv_f1": 0.574
    },
    {
        "name": "Trial_43_High_Recall",
        "trial": 43,
        "iterations": 683,
        "lr": 0.04304321773202563,
        "depth": 4,
        "l2": 12.555495688003719,
        "rand_str": 2.778037803783163,
        "min_data_leaf": 18,
        "bootstrap": "Bernoulli",
        "subsample": 0.7774776609307122,
        "cv_f05": 0.5629,
        "cv_precision": 0.536,
        "cv_recall": 0.697,
        "cv_f1": 0.606
    },
    {
        "name": "Trial_54_Stable",
        "trial": 54,
        "iterations": 632,
        "lr": 0.035144891267128345,
        "depth": 4,
        "l2": 6.489802684741546,
        "rand_str": 1.684970724115755,
        "min_data_leaf": 13,
        "bootstrap": "Bernoulli",
        "subsample": 0.7147121803375743,
        "cv_f05": 0.5631,
        "cv_precision": 0.557,
        "cv_recall": 0.571,
        "cv_f1": 0.564
    },
    {
        "name": "Trial_66_Alternative",
        "trial": 66,
        "iterations": 634,
        "lr": 0.03590178982335925,
        "depth": 4,
        "l2": 9.860110251607157,
        "rand_str": 1.9680368933611923,
        "min_data_leaf": 14,
        "bootstrap": "Bernoulli",
        "subsample": 0.7299161795768209,
        "cv_f05": 0.5628,
        "cv_precision": 0.562,
        "cv_recall": 0.589,
        "cv_f1": 0.575
    }
]

print("🚀 MULTI-TRIAL CATBOOST EVALUATION")
print(f"📊 Testing {len(TOP_TRIALS)} top configurations on validation data")
print("="*80)

# ─── DATA PREPARATION (IDENTICAL TO OPTIMIZATION) ───────────
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df[df.index >= "2018-01-01"]

if "target" not in df.columns:
    raise ValueError("❌ Missing 'target' column!")

categoricals = df.select_dtypes(include=["object", "category"]).columns.tolist()
categoricals = [c for c in categoricals if c != "target"]

df.drop(columns=[c for c in DROP_COLS if c in df.columns], inplace=True)
df.dropna(inplace=True)

X = df.drop(columns="target")
y = df["target"].astype(int)

cat_indices = [X.columns.get_loc(c) for c in categoricals if c in X.columns]

print(f"✅ Data shape: {X.shape}")
print(f"✅ Class distribution: {np.bincount(y)}")
print(f"✅ Categorical features: {len(cat_indices)}")

# ─── TRAIN/VALIDATION SPLIT ─────────────────────────────────
cut = int(len(df) * (1 - VAL_FRAC))
X_train, X_val = X.iloc[:cut], X.iloc[cut:]
y_train, y_val = y.iloc[:cut], y.iloc[cut:]

print(f"✅ Train size: {len(X_train):,}")
print(f"✅ Validation size: {len(X_val):,}")

# ─── EVALUATE EACH TRIAL CONFIGURATION ──────────────────────
results = []
best_model = None
best_score = 0
best_predictions = None

print(f"\n{'='*80}")
print("EVALUATING TOP TRIAL CONFIGURATIONS")
print(f"{'='*80}")

for i, trial_config in enumerate(TOP_TRIALS, 1):
    print(f"\n🔬 [{i}/{len(TOP_TRIALS)}] {trial_config['name']}")
    print(f"   Expected CV: P={trial_config['cv_precision']:.3f}, R={trial_config['cv_recall']:.3f}, F1={trial_config['cv_f1']:.3f}, F0.5={trial_config['cv_f05']:.3f}")
    
    start_time = time.time()
    
    try:
        # Build parameters for this trial
        params = {
            "iterations": trial_config["iterations"],
            "learning_rate": trial_config["lr"],
            "depth": trial_config["depth"],
            "l2_leaf_reg": trial_config["l2"],
            "random_strength": trial_config["rand_str"],
            "min_data_in_leaf": trial_config["min_data_leaf"],
            "bootstrap_type": trial_config["bootstrap"],
            "random_seed": SEED,
            "task_type": "GPU",
            "verbose": False,
            "use_best_model": True,
            "loss_function": "Logloss",
            "eval_metric": "F1",
            "early_stopping_rounds": min(100, trial_config["iterations"] // 4)
        }
        
        # Add bootstrap-specific parameters
        if trial_config["bootstrap"] == "Bernoulli":
            params["subsample"] = trial_config["subsample"]
        elif trial_config["bootstrap"] == "Bayesian":
            params["bagging_temperature"] = trial_config["bagging_temp"]
        
        # Train model
        model = CatBoostClassifier(**params)
        
        train_pool = Pool(X_train, y_train, cat_features=cat_indices if cat_indices else None)
        val_pool = Pool(X_val, y_val, cat_features=cat_indices if cat_indices else None)
        
        model.fit(train_pool, eval_set=val_pool)
        
        # Get predictions
        y_prob = model.predict_proba(X_val)[:, 1]
        y_pred_50 = (y_prob >= 0.5).astype(int)
        
        # Calculate metrics at 0.5 threshold
        precision_50 = precision_score(y_val, y_pred_50, zero_division=0)
        recall_50 = recall_score(y_val, y_pred_50, zero_division=0)
        f1_50 = f1_score(y_val, y_pred_50, zero_division=0)
        f05_50 = fbeta_score(y_val, y_pred_50, beta=BETA, zero_division=0)
        
        # Find optimal threshold for F0.5
        thresholds = np.arange(0.1, 0.9, 0.05)
        best_thresh = 0.5
        best_f05 = f05_50
        
        for thresh in thresholds:
            y_pred_thresh = (y_prob >= thresh).astype(int)
            f05_thresh = fbeta_score(y_val, y_pred_thresh, beta=BETA, zero_division=0)
            if f05_thresh > best_f05:
                best_f05 = f05_thresh
                best_thresh = thresh
        
        # Calculate metrics at optimal threshold
        y_pred_opt = (y_prob >= best_thresh).astype(int)
        precision_opt = precision_score(y_val, y_pred_opt, zero_division=0)
        recall_opt = recall_score(y_val, y_pred_opt, zero_division=0)
        f1_opt = f1_score(y_val, y_pred_opt, zero_division=0)
        f05_opt = fbeta_score(y_val, y_pred_opt, beta=BETA, zero_division=0)
        
        accuracy = accuracy_score(y_val, y_pred_opt)
        auc = roc_auc_score(y_val, y_prob)
        
        train_time = time.time() - start_time
        
        # Store results
        result = {
            "name": trial_config["name"],
            "trial": trial_config["trial"],
            "train_time": train_time,
            # CV metrics for comparison
            "cv_precision": trial_config["cv_precision"],
            "cv_recall": trial_config["cv_recall"],
            "cv_f1": trial_config["cv_f1"],
            "cv_f05": trial_config["cv_f05"],
            # Validation metrics at 0.5 threshold
            "val_precision_50": precision_50,
            "val_recall_50": recall_50,
            "val_f1_50": f1_50,
            "val_f05_50": f05_50,
            # Validation metrics at optimal threshold
            "optimal_threshold": best_thresh,
            "val_precision_opt": precision_opt,
            "val_recall_opt": recall_opt,
            "val_f1_opt": f1_opt,
            "val_f05_opt": f05_opt,
            "val_accuracy": accuracy,
            "val_auc": auc,
            "predictions": y_prob,
            "predictions_binary": y_pred_opt
        }
        
        results.append(result)
        
        print(f"   ⏱️ Training time: {train_time:.1f}s")
        print(f"   📊 Val @ 0.5   : P={precision_50:.3f}, R={recall_50:.3f}, F1={f1_50:.3f}, F0.5={f05_50:.3f}")
        print(f"   🎯 Val @ {best_thresh:.2f}  : P={precision_opt:.3f}, R={recall_opt:.3f}, F1={f1_opt:.3f}, F0.5={f05_opt:.3f}")
        print(f"   ✅ AUC: {auc:.3f}, Accuracy: {accuracy:.3f}")
        
        # Track best model
        if f05_opt > best_score:
            best_score = f05_opt
            best_model = result
            best_predictions = y_prob
            
    except Exception as e:
        print(f"   ❌ Training failed: {str(e)[:50]}...")
        continue

# ─── RESULTS SUMMARY ────────────────────────────────────────
print(f"\n{'='*80}")
print("FINAL RESULTS SUMMARY")
print(f"{'='*80}")

if results:
    # Sort by F0.5 score
    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values('val_f05_opt', ascending=False)
    
    print(f"\n🏆 RANKING BY VALIDATION F0.5 SCORE:")
    print("-" * 120)
    print(f"{'Rank':<4} {'Trial':<25} {'Val F0.5':<8} {'Val Prec':<9} {'Val Rec':<8} {'Val F1':<7} {'Thresh':<7} {'CV F0.5':<7} {'Δ F0.5':<7}")
    print("-" * 120)
    
    for rank, (_, row) in enumerate(results_df.iterrows(), 1):
        delta_f05 = row['val_f05_opt'] - row['cv_f05']
        print(f"{rank:<4} {row['name']:<25} {row['val_f05_opt']:<8.3f} {row['val_precision_opt']:<9.3f} {row['val_recall_opt']:<8.3f} {row['val_f1_opt']:<7.3f} {row['optimal_threshold']:<7.2f} {row['cv_f05']:<7.3f} {delta_f05:+.3f}")
    
    print("-" * 120)
    
    # Best model details
    best_trial = results_df.iloc[0]
    print(f"\n🥇 BEST MODEL: {best_trial['name']}")
    print(f"   📈 Validation F0.5: {best_trial['val_f05_opt']:.4f}")
    print(f"   🎯 Optimal threshold: {best_trial['optimal_threshold']:.3f}")
    print(f"   📊 Precision: {best_trial['val_precision_opt']:.3f}")
    print(f"   📊 Recall: {best_trial['val_recall_opt']:.3f}")
    print(f"   📊 F1 Score: {best_trial['val_f1_opt']:.3f}")
    print(f"   📊 Accuracy: {best_trial['val_accuracy']:.3f}")
    print(f"   📊 AUC-ROC: {best_trial['val_auc']:.3f}")
    
    # Performance vs CV comparison
    print(f"\n📊 CV vs VALIDATION COMPARISON:")
    avg_cv_f05 = results_df['cv_f05'].mean()
    avg_val_f05 = results_df['val_f05_opt'].mean()
    print(f"   Average CV F0.5: {avg_cv_f05:.3f}")
    print(f"   Average Val F0.5: {avg_val_f05:.3f}")
    print(f"   Overall difference: {avg_val_f05 - avg_cv_f05:+.3f}")
    
    # Save best predictions
    best_pred_df = pd.DataFrame({
        "prob_up": best_trial['predictions'],
        f"pred_{best_trial['optimal_threshold']:.2f}": best_trial['predictions_binary'],
        "actual": y_val.values
    }, index=X_val.index)
    
    save_path = SAVE_DIR / "catboost_best_trial_predictions.csv"
    best_pred_df[["prob_up", f"pred_{best_trial['optimal_threshold']:.2f}"]].to_csv(save_path)
    print(f"\n💾 Best model predictions saved: {save_path}")
    
    # Save all results
    results_summary = {
        "best_trial": best_trial['name'],
        "best_f05": best_trial['val_f05_opt'],
        "all_results": results_df.to_dict('records'),
        "summary_stats": {
            "avg_cv_f05": avg_cv_f05,
            "avg_val_f05": avg_val_f05,
            "best_improvement": results_df['val_f05_opt'].max() - results_df['cv_f05'].max(),
            "trials_completed": len(results)
        }
    }
    
    joblib.dump(results_summary, SAVE_DIR / "multi_trial_results.pkl")
    print(f"📁 Detailed results saved: {SAVE_DIR / 'multi_trial_results.pkl'}")
    
else:
    print("❌ No trials completed successfully!")

print(f"\n🎉 EVALUATION COMPLETED!")
print(f"✅ {len(results)} out of {len(TOP_TRIALS)} trials completed successfully")

🚀 MULTI-TRIAL CATBOOST EVALUATION
📊 Testing 10 top configurations on validation data
✅ Data shape: (15855, 31)
✅ Class distribution: [7758 8097]
✅ Categorical features: 0
✅ Train size: 12,684
✅ Validation size: 3,171

EVALUATING TOP TRIAL CONFIGURATIONS

🔬 [1/10] Trial_98_Best_Overall
   Expected CV: P=0.565, R=0.596, F1=0.580, F0.5=0.567
   ⏱️ Training time: 2.0s
   📊 Val @ 0.5   : P=0.558, R=0.588, F1=0.572, F0.5=0.563
   🎯 Val @ 0.10  : P=0.522, R=1.000, F1=0.686, F0.5=0.577
   ✅ AUC: 0.549, Accuracy: 0.522

🔬 [2/10] Trial_22_High_Precision
   Expected CV: P=0.562, R=0.590, F1=0.576, F0.5=0.566
   ⏱️ Training time: 1.9s
   📊 Val @ 0.5   : P=0.546, R=0.612, F1=0.577, F0.5=0.558
   🎯 Val @ 0.10  : P=0.522, R=1.000, F1=0.686, F0.5=0.577
   ✅ AUC: 0.545, Accuracy: 0.522

🔬 [3/10] Trial_73_Balanced
   Expected CV: P=0.558, R=0.592, F1=0.574, F0.5=0.565
   ⏱️ Training time: 1.8s
   📊 Val @ 0.5   : P=0.546, R=0.612, F1=0.577, F0.5=0.558
   🎯 Val @ 0.10  : P=0.522, R=1.000, F1=0.686, F0.5=0

In [16]:
print(f"{'Rank':<4} {'Trial':<25} {'F0.5@0.5':<8} {'Prec@0.5':<9} {'Rec@0.5':<8} {'F1@0.5':<7} {'CV F0.5':<7} {'Δ F0.5':<7}")
...
for rank, (_, row) in enumerate(results_df.iterrows(), 1):
    delta_f05 = row['val_f05_50'] - row['cv_f05']
    print(f"{rank:<4} {row['name']:<25} {row['val_f05_50']:<8.3f} {row['val_precision_50']:<9.3f} {row['val_recall_50']:<8.3f} {row['val_f1_50']:<7.3f} {row['cv_f05']:<7.3f} {delta_f05:+.3f}")


Rank Trial                     F0.5@0.5 Prec@0.5  Rec@0.5  F1@0.5  CV F0.5 Δ F0.5 
1    Trial_98_Best_Overall     0.563    0.558     0.588    0.572   0.567   -0.003
2    Trial_22_High_Precision   0.558    0.546     0.612    0.577   0.566   -0.008
3    Trial_73_Balanced         0.558    0.546     0.612    0.577   0.565   -0.007
4    Trial_57_High_F1          0.557    0.575     0.495    0.532   0.564   -0.008
5    Trial_53_Top_Precision    0.558    0.546     0.612    0.577   0.564   -0.006
6    Trial_90_Max_Precision    0.558    0.546     0.612    0.577   0.563   -0.005
7    Trial_35_Conservative     0.558    0.546     0.612    0.577   0.563   -0.005
8    Trial_43_High_Recall      0.558    0.546     0.612    0.577   0.563   -0.005
9    Trial_54_Stable           0.558    0.546     0.612    0.577   0.563   -0.006
10   Trial_66_Alternative      0.558    0.546     0.612    0.577   0.563   -0.005


In [2]:
# =============================================================
#  BEST PRECISION MODEL TRAINER & PREDICTION CSV GENERATOR
# =============================================================
import numpy as np
import pandas as pd
import time
import sys
import warnings
from pathlib import Path
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    precision_score, recall_score, accuracy_score, 
    f1_score, roc_auc_score, classification_report, confusion_matrix
)
from sklearn.feature_selection import VarianceThreshold
import joblib

warnings.filterwarnings("ignore")
np.random.seed(42)

# ──────────────────────────────────────────────────────────────
# 1) CONFIGURATION
# ──────────────────────────────────────────────────────────────
CSV_FILE = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")
TIME_COL = "timestamp"
TARGET_COL = "target"
START_DATE = "2018-01-01"
TEST_FRAC = 0.20
RANDOM_STATE = 42

# Output paths
OUTPUT_DIR = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\Predictions_folder")
OUTPUT_DIR.mkdir(exist_ok=True)
PREDICTIONS_CSV = OUTPUT_DIR / "validation_predictions.csv"
MODEL_PATH = OUTPUT_DIR / "best_precision_model.pkl"

# BEST PRECISION PARAMETERS (Original_Best_ElasticNet)
BEST_PRECISION_PARAMS = {
    'C': 0.0016351310838425184,
    'class_weight': None,
    'l1_ratio': 0.2636043819680166,
    'penalty': 'elasticnet',
    'solver': 'saga',
    'max_iter': 5000,
    'random_state': RANDOM_STATE
}

DROP_COLS = [
    'open', 'high', 'low', 'high_low', 'high_close', 'low_close', 'typical_price',
    'volume_breakout', 'volume_breakdown', 'break_upper_band', 'break_lower_band',
    'vol_spike_1_5x', 'overbought_reversal', 'oversold_reversal', 'macd_cross_up',
    'macd_cross_down', 'macd_rising', 'bollinger_upper', 'bollinger_lower',
    'MACD_line', 'MACD_signal', 'stoch_%D', 'momentum_alignment',
    'bullish_scenario_1', 'bullish_scenario_5', 'bearish_scenario_1'
]

# ──────────────────────────────────────────────────────────────
# 2) DATA LOADING & PREPROCESSING
# ──────────────────────────────────────────────────────────────
def load_and_prepare_data():
    """Load and prepare data for best precision model training."""
    print("🏆 BEST PRECISION MODEL TRAINING")
    print("=" * 50)
    
    if not CSV_FILE.exists():
        sys.exit(f"❌ File not found: {CSV_FILE}")
    
    print(f"📂 Loading data from: {CSV_FILE}")
    df = pd.read_csv(CSV_FILE, parse_dates=[TIME_COL]).set_index(TIME_COL).sort_index()
    df = df.loc[START_DATE:].copy()
    
    if TARGET_COL not in df.columns:
        sys.exit(f"❌ '{TARGET_COL}' column missing!")
    
    # Remove specified columns
    X = df.drop(columns=[c for c in DROP_COLS if c in df.columns] + [TARGET_COL], errors="ignore")
    y = df[TARGET_COL]
    
    print(f"📊 Dataset shape: {X.shape}")
    print(f"📈 Date range: {df.index.min()} to {df.index.max()}")
    print(f"🎯 Target distribution: {y.value_counts().to_dict()}")
    
    # Handle missing and infinite values
    original_size = len(X)
    
    # Check for missing values
    missing_vals = X.isnull().sum()
    if missing_vals.sum() > 0:
        print(f"⚠️ Handling {missing_vals.sum()} missing values...")
        mask = ~(X.isnull().any(axis=1) | y.isnull())
        X, y = X[mask], y[mask]
        print(f"📊 Shape after removing missing: {X.shape}")
    
    # Check for infinite values
    inf_mask = np.isinf(X.select_dtypes(include=[np.number])).any(axis=1)
    if inf_mask.sum() > 0:
        print(f"⚠️ Handling {inf_mask.sum()} infinite values...")
        X, y = X[~inf_mask], y[~inf_mask]
        print(f"📊 Final shape: {X.shape}")
    
    # Keep track of valid indices for timestamps
    if missing_vals.sum() > 0 or inf_mask.sum() > 0:
        if missing_vals.sum() > 0:
            valid_mask = ~(df.drop(columns=[c for c in DROP_COLS if c in df.columns] + [TARGET_COL], errors="ignore").isnull().any(axis=1) | df[TARGET_COL].isnull())
        else:
            valid_mask = slice(None)
        
        if inf_mask.sum() > 0:
            if isinstance(valid_mask, slice):
                valid_mask = ~inf_mask
            else:
                valid_mask = valid_mask & ~inf_mask
        
        timestamps = df.index[valid_mask]
    else:
        timestamps = df.index
    
    removed_samples = original_size - len(X)
    if removed_samples > 0:
        print(f"📉 Removed {removed_samples} samples due to missing/infinite values")
    
    return X, y, timestamps

def preprocess_features(X_train, X_test):
    """Preprocess features with variance filtering."""
    print("\n🔧 Preprocessing features...")
    
    # Remove constant features
    constant_cols = X_train.columns[X_train.std() == 0]
    if len(constant_cols) > 0:
        print(f"⚠️ Removing {len(constant_cols)} constant features")
        X_train = X_train.drop(columns=constant_cols)
        X_test = X_test.drop(columns=constant_cols)
    
    # Remove low-variance features
    variance_selector = VarianceThreshold(threshold=0.01)
    n_features_before = X_train.shape[1]
    
    X_train_selected = pd.DataFrame(
        variance_selector.fit_transform(X_train),
        index=X_train.index,
        columns=X_train.columns[variance_selector.get_support()]
    )
    X_test_selected = pd.DataFrame(
        variance_selector.transform(X_test),
        index=X_test.index,
        columns=X_train.columns[variance_selector.get_support()]
    )
    
    n_features_after = X_train_selected.shape[1]
    if n_features_before != n_features_after:
        print(f"⚠️ Removed {n_features_before - n_features_after} low-variance features")
    
    print(f"✅ Final feature count: {n_features_after}")
    return X_train_selected, X_test_selected, variance_selector

# ──────────────────────────────────────────────────────────────
# 3) MODEL TRAINING
# ──────────────────────────────────────────────────────────────
def create_best_precision_pipeline():
    """Create pipeline with best precision parameters."""
    return Pipeline([
        ('scaler', StandardScaler()),
        ('logreg', LogisticRegression(**BEST_PRECISION_PARAMS))
    ])

def train_best_precision_model(X_train, y_train):
    """Train the best precision model."""
    print("\n🎯 TRAINING BEST PRECISION MODEL")
    print("=" * 45)
    
    pipeline = create_best_precision_pipeline()
    
    print("🏆 Using best precision parameters:")
    for param, value in BEST_PRECISION_PARAMS.items():
        print(f"   {param:<15}: {value}")
    
    print("\n⏱️ Training model...")
    start_time = time.time()
    pipeline.fit(X_train, y_train)
    training_time = time.time() - start_time
    
    print(f"✅ Model trained in {training_time:.2f} seconds")
    
    # Check convergence
    logreg = pipeline.named_steps['logreg']
    if hasattr(logreg, 'n_iter_'):
        n_iter = logreg.n_iter_[0] if isinstance(logreg.n_iter_, np.ndarray) else logreg.n_iter_
        if n_iter >= logreg.max_iter:
            print(f"⚠️ Model may not have converged (used {n_iter}/{logreg.max_iter} iterations)")
        else:
            print(f"✅ Model converged in {n_iter} iterations")
    
    return pipeline, training_time

# ──────────────────────────────────────────────────────────────
# 4) MODEL EVALUATION
# ──────────────────────────────────────────────────────────────
def f_beta_half(y_true, y_pred):
    """Custom F-beta score with beta=0.5 (precision-weighted)."""
    p = precision_score(y_true, y_pred, zero_division=0)
    r = recall_score(y_true, y_pred, zero_division=0)
    beta = 0.5
    if (p + r) == 0:
        return 0.0
    return (1 + beta**2) * p * r / (beta**2 * p + r)

def evaluate_best_precision_model(model, X_train, y_train, X_test, y_test):
    """Evaluate the best precision model."""
    print("\n📊 MODEL EVALUATION")
    print("=" * 30)
    
    # Make predictions
    y_train_pred = model.predict(X_train)
    y_train_prob = model.predict_proba(X_train)
    
    y_test_pred = model.predict(X_test)
    y_test_prob = model.predict_proba(X_test)
    
    # Calculate metrics for test set
    test_metrics = {
        'accuracy': accuracy_score(y_test, y_test_pred),
        'precision': precision_score(y_test, y_test_pred, zero_division=0),
        'recall': recall_score(y_test, y_test_pred, zero_division=0),
        'f1': f1_score(y_test, y_test_pred, zero_division=0),
        'f_beta_0.5': f_beta_half(y_test, y_test_pred),
        'roc_auc': roc_auc_score(y_test, y_test_prob[:, 1]) if len(np.unique(y_test)) > 1 else 0.0
    }
    
    print("📈 TEST SET METRICS:")
    print("-" * 25)
    for metric_name, value in test_metrics.items():
        print(f"   {metric_name:<12}: {value:.4f}")
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_test_pred)
    print(f"\n🎯 CONFUSION MATRIX:")
    print("-" * 25)
    print(f"True Negatives:  {cm[0,0]:>6}")
    print(f"False Positives: {cm[0,1]:>6}")
    print(f"False Negatives: {cm[1,0]:>6}")
    print(f"True Positives:  {cm[1,1]:>6}")
    
    return {
        'test_metrics': test_metrics,
        'train_predictions': y_train_pred,
        'train_probabilities': y_train_prob,
        'test_predictions': y_test_pred,
        'test_probabilities': y_test_prob
    }

# ──────────────────────────────────────────────────────────────
# 5) PREDICTION CSV GENERATION
# ──────────────────────────────────────────────────────────────
def generate_predictions_csv(X_test, y_test, results, test_timestamps):
    """Generate CSV with ONLY validation (test) predictions in exact requested format."""
    print("\n💾 GENERATING VALIDATION PREDICTIONS CSV")
    print("=" * 45)
    
    # Get test predictions and probabilities
    test_predictions = results['test_predictions']
    test_probabilities = results['test_probabilities']
    
    # Create DataFrame in exact requested format
    predictions_df = pd.DataFrame({
        'timestamp': test_timestamps,
        'prob_up': test_probabilities[:, 1],      # Probability of class 1 (up)
        'prob_down': test_probabilities[:, 0],    # Probability of class 0 (down)
        'winning_prob': np.maximum(test_probabilities[:, 1], test_probabilities[:, 0]),  # Maximum probability
        'prediction': test_predictions,           # Final prediction (0 or 1)
        'actual': y_test.values                   # Actual target value (0 or 1)
    })
    
    # Sort by timestamp
    predictions_df = predictions_df.sort_values('timestamp').reset_index(drop=True)
    
    # Format timestamp to match your example (DD/MM/YYYY HH:MM)
    predictions_df['timestamp'] = predictions_df['timestamp'].dt.strftime('%d/%m/%Y %H:%M')
    
    # Save to CSV
    predictions_df.to_csv(PREDICTIONS_CSV, index=False)
    
    print(f"✅ Validation predictions saved to: {PREDICTIONS_CSV}")
    print(f"📊 Total validation predictions: {len(predictions_df):,}")
    
    # Display sample predictions in your exact format
    print(f"\n📋 SAMPLE PREDICTIONS (First 10 rows in your format):")
    print("-" * 80)
    print("timestamp            prob_up  prob_down winning_prob prediction actual")
    print("-" * 80)
    for idx, row in predictions_df.head(10).iterrows():
        print(f"{row['timestamp']:<20} {row['prob_up']:.6f} {row['prob_down']:.6f} {row['winning_prob']:.6f} {row['prediction']:<10} {row['actual']}")
    
    # Calculate validation accuracy
    accuracy = (predictions_df['prediction'] == predictions_df['actual']).mean()
    
    # Summary statistics
    print(f"\n📈 VALIDATION SUMMARY:")
    print("-" * 25)
    print(f"Validation accuracy: {accuracy:.4f}")
    print(f"Average winning probability: {predictions_df['winning_prob'].mean():.4f}")
    print(f"High confidence predictions (>0.7): {(predictions_df['winning_prob'] > 0.7).sum():,}")
    print(f"Correct high confidence predictions: {((predictions_df['winning_prob'] > 0.7) & (predictions_df['prediction'] == predictions_df['actual'])).sum():,}")
    
    return predictions_df

# ──────────────────────────────────────────────────────────────
# 6) MODEL SAVING
# ──────────────────────────────────────────────────────────────
def save_best_precision_model(model, variance_selector, feature_names, results, training_time):
    """Save the complete best precision model."""
    print(f"\n💾 SAVING BEST PRECISION MODEL")
    print("=" * 35)
    
    model_package = {
        'model': model,
        'variance_selector': variance_selector,
        'feature_names': feature_names,
        'parameters': BEST_PRECISION_PARAMS,
        'performance_metrics': results['test_metrics'],
        'training_info': {
            'training_date': pd.Timestamp.now(),
            'training_time_seconds': training_time,
            'algorithm': 'LogisticRegression ElasticNet (Best Precision)',
            'total_features': len(feature_names),
            'preprocessing_steps': ['StandardScaler', 'VarianceThreshold'],
            'model_description': 'Highest precision model from parameter comparison'
        }
    }
    
    joblib.dump(model_package, MODEL_PATH)
    print(f"✅ Model saved to: {MODEL_PATH}")
    
    return model_package

# ──────────────────────────────────────────────────────────────
# 7) MAIN EXECUTION
# ──────────────────────────────────────────────────────────────
def main():
    """Main execution function."""
    
    # Load and prepare data
    X, y, timestamps = load_and_prepare_data()
    
    # Chronological split
    split_idx = int(len(X) * (1 - TEST_FRAC))
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
    train_timestamps = timestamps[:split_idx]
    test_timestamps = timestamps[split_idx:]
    
    print(f"\n📊 TRAIN/TEST SPLIT:")
    print("-" * 25)
    print(f"   Train: {X_train.shape[0]:,} samples")
    print(f"   Test:  {X_test.shape[0]:,} samples")
    print(f"   Train period: {train_timestamps.min()} to {train_timestamps.max()}")
    print(f"   Test period:  {test_timestamps.min()} to {test_timestamps.max()}")
    
    # Preprocess features
    X_train_processed, X_test_processed, variance_selector = preprocess_features(X_train, X_test)
    
    # Train best precision model
    best_model, training_time = train_best_precision_model(X_train_processed, y_train)
    
    # Evaluate model
    results = evaluate_best_precision_model(best_model, X_train_processed, y_train, X_test_processed, y_test)
    
    # Generate validation predictions CSV (test set only)
    predictions_df = generate_predictions_csv(
        X_test_processed, y_test, results, test_timestamps
    )
    
    # Save model
    model_package = save_best_precision_model(
        best_model, variance_selector, list(X_train_processed.columns), 
        results, training_time
    )
    
    print(f"\n🎉 BEST PRECISION MODEL TRAINING COMPLETED!")
    print("=" * 50)
    print(f"📁 Files Generated:")
    print(f"   🔸 Predictions: {PREDICTIONS_CSV.name}")
    print(f"   🔸 Model:       {MODEL_PATH.name}")
    print(f"\n🏆 Final Test Precision: {results['test_metrics']['precision']:.4f}")
    print(f"🎯 Final Test F1-Score:  {results['test_metrics']['f1']:.4f}")
    
    return {
        'model': best_model,
        'predictions_df': predictions_df,
        'results': results,
        'model_package': model_package
    }

# ──────────────────────────────────────────────────────────────
# 8) SCRIPT EXECUTION
# ──────────────────────────────────────────────────────────────
if __name__ == "__main__":
    final_results = main()

🏆 BEST PRECISION MODEL TRAINING
📂 Loading data from: C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv
📊 Dataset shape: (15855, 46)
📈 Date range: 2018-01-01 00:00:00 to 2025-03-28 00:00:00
🎯 Target distribution: {1: 8097, 0: 7758}

📊 TRAIN/TEST SPLIT:
-------------------------
   Train: 12,684 samples
   Test:  3,171 samples
   Train period: 2018-01-01 00:00:00 to 2023-10-16 12:00:00
   Test period:  2023-10-16 16:00:00 to 2025-03-28 00:00:00

🔧 Preprocessing features...
⚠️ Removed 2 low-variance features
✅ Final feature count: 44

🎯 TRAINING BEST PRECISION MODEL
🏆 Using best precision parameters:
   C              : 0.0016351310838425184
   class_weight   : None
   l1_ratio       : 0.2636043819680166
   penalty        : elasticnet
   solver         : saga
   max_iter       : 5000
   random_state   : 42

⏱️ Training model...
✅ Model trained in 0.10 seconds
✅ Model converged in 15 iterations

📊 MODEL EVA

In [24]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score

# Load the predictions CSV
csv_path = r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\src\Models\models\models\catboost_trial57_predictions.csv"
df = pd.read_csv(csv_path)

# Ensure column names are correct and lowercase
df.columns = df.columns.str.strip().str.lower()

# Extract actual and predicted values
y_true = df['actual']
y_pred = df['prediction']  # prediction at threshold 0.5

# Calculate metrics
precision = precision_score(y_true, y_pred, zero_division=0)
recall = recall_score(y_true, y_pred, zero_division=0)
f1 = f1_score(y_true, y_pred, zero_division=0)

# Print results
print("📊 Evaluation at threshold 0.5:")
print(f"Precision: {precision:.3f}")
print(f"Recall   : {recall:.3f}")
print(f"F1 Score : {f1:.3f}")


📊 Evaluation at threshold 0.5:
Precision: 0.575
Recall   : 0.495
F1 Score : 0.532
