In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import optuna
import shap
import mlflow
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import os
import sys

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from config import PROCESSED_DATA_PATH, MLFLOW_TRACKING_URI, MODEL_NAME

# --- Set up MLflow CORRECTLY ---
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment("ETF_Trend_Prediction")

# --- Load Data ---
data = pd.read_parquet(PROCESSED_DATA_PATH)

# Separate features (X) and target (y)
X = data.drop('target', axis=1)
y = data['target']

  import pkg_resources  # noqa: TID251


2025/08/31 14:05:15 INFO mlflow.tracking.fluent: Experiment with name 'ETF_Trend_Prediction' does not exist. Creating a new experiment.


In [2]:
# Define the chronological split point
# For example, use data up to the end of 2021 for training, and 2022 onwards for testing.
train_end_date = '2021-12-31'
test_start_date = '2022-01-01'

X_train = X.loc[:train_end_date]
y_train = y.loc[:train_end_date]

X_test = X.loc[test_start_date:]
y_test = y.loc[test_start_date:]

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

mlflow.set_experiment("ETF_Trend_Prediction")

Training set size: 2380
Test set size: 908


<Experiment: artifact_location='file:C:\\Users\\dawso\\Dev\\Personal\\AIGrind\\mlops-etf-forecasting\\mlruns/492371556750088850', creation_time=1756663515713, experiment_id='492371556750088850', last_update_time=1756663515713, lifecycle_stage='active', name='ETF_Trend_Prediction', tags={}>

In [3]:
# Train Logistic Regression
with mlflow.start_run(run_name="LogisticRegression_Baseline"):
    model_lr = LogisticRegression(max_iter=1000, random_state=42)
    model_lr.fit(X_train, y_train)
    y_pred_lr = model_lr.predict(X_test)
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_lr))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_lr))
    print(f"Logistic Regression F1 Score: {f1_score(y_test, y_pred_lr):.4f}")

# Train Random Forest
with mlflow.start_run(run_name="RandomForest_Baseline"):
    model_rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_rf))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_rf))
    print(f"Random Forest F1 Score: {f1_score(y_test, y_pred_rf):.4f}")

Logistic Regression F1 Score: 0.6968


Random Forest F1 Score: 0.6290


In [None]:
def objective(trial):
    # Define the search space for hyperparameters
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': 42
    }
    
    model = xgb.XGBClassifier(**params)
    
    # Use TimeSeriesSplit for robust cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    score = cross_val_score(model, X_train, y_train, cv=tscv, scoring='f1', n_jobs=1).mean()
    
    return score

In [5]:
# Run the study to find the best params
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=125) 

best_params = study.best_params
print("Best XGBoost Params:", best_params)

# Train the final XGBoost model with the best parameters and log to MLflow
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    print("--- Training and Logging Champion XGBoost Model ---")
    best_params = study.best_params
    
    # Log hyperparameters
    mlflow.log_params(best_params)
    
    # Train the model
    model_xgb = xgb.XGBClassifier(**best_params, random_state=42)
    model_xgb.fit(X_train, y_train)
    
    # Evaluate and log metrics
    y_pred_xgb = model_xgb.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred_xgb)
    f1 = f1_score(y_test, y_pred_xgb)
    roc_auc = roc_auc_score(y_test, model_xgb.predict_proba(X_test)[:, 1])
    
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("roc_auc", roc_auc)
    
    print(f"Champion XGBoost F1 Score: {f1:.4f}")

    # Log the model itself
    mlflow.xgboost.log_model(
        xgb_model=model_xgb,
        artifact_path="xgb-model",
        registered_model_name="etf-xgboost-predictor" # This also registers the model
    )
    print("Champion model logged and registered.")

    # --- NEW CODE TO ADD STARTS HERE ---
    
    print("--- Logging SHAP assets for the dashboard ---")
    import joblib # Make sure joblib is imported
    
    # 1. Create and save the SHAP explainer object
    explainer = shap.TreeExplainer(model_xgb)
    joblib.dump(explainer, "explainer.joblib")
    
    # 2. Save the X_test dataframe needed for plotting
    X_test.to_parquet("X_test.parquet")
    
    # 3. Log these files as MLflow artifacts in specific sub-folders
    mlflow.log_artifact("explainer.joblib", artifact_path="shap_explainer")
    mlflow.log_artifact("X_test.parquet", artifact_path="shap_xtest")
    
    print("SHAP explainer and X_test data successfully logged as artifacts.")

[I 2025-08-31 14:05:16,414] A new study created in memory with name: no-name-2c938972-51bc-48bc-906b-e0e29be072d8


[I 2025-08-31 14:05:19,127] Trial 0 finished with value: 0.5360648099528504 and parameters: {'n_estimators': 913, 'max_depth': 9, 'learning_rate': 0.268623433264274, 'subsample': 0.6970252053947417, 'colsample_bytree': 0.7860761957433005, 'gamma': 2.9927657451165164}. Best is trial 0 with value: 0.5360648099528504.


[I 2025-08-31 14:05:21,022] Trial 1 finished with value: 0.5273697647558737 and parameters: {'n_estimators': 969, 'max_depth': 6, 'learning_rate': 0.26896643435380124, 'subsample': 0.7911558053027973, 'colsample_bytree': 0.5234654285441125, 'gamma': 0.6665220179909237}. Best is trial 0 with value: 0.5360648099528504.


[I 2025-08-31 14:05:24,477] Trial 2 finished with value: 0.5136679077501333 and parameters: {'n_estimators': 501, 'max_depth': 8, 'learning_rate': 0.016080635025102645, 'subsample': 0.9000955317950565, 'colsample_bytree': 0.8984698273219339, 'gamma': 1.477836729875634}. Best is trial 0 with value: 0.5360648099528504.


[I 2025-08-31 14:05:26,526] Trial 3 finished with value: 0.499456117817623 and parameters: {'n_estimators': 359, 'max_depth': 5, 'learning_rate': 0.03974017879621874, 'subsample': 0.9432716782847458, 'colsample_bytree': 0.9646054177029721, 'gamma': 0.1120877927400421}. Best is trial 0 with value: 0.5360648099528504.


[I 2025-08-31 14:05:26,996] Trial 4 finished with value: 0.5057143477206046 and parameters: {'n_estimators': 964, 'max_depth': 9, 'learning_rate': 0.2508963256323556, 'subsample': 0.8807700280626858, 'colsample_bytree': 0.6226431487001718, 'gamma': 3.460606958331714}. Best is trial 0 with value: 0.5360648099528504.


[I 2025-08-31 14:05:27,682] Trial 5 finished with value: 0.5203959454643822 and parameters: {'n_estimators': 223, 'max_depth': 8, 'learning_rate': 0.05442227195771873, 'subsample': 0.7480233934653346, 'colsample_bytree': 0.7013344636278643, 'gamma': 1.823621235977178}. Best is trial 0 with value: 0.5360648099528504.


[I 2025-08-31 14:05:28,060] Trial 6 finished with value: 0.5238796479405996 and parameters: {'n_estimators': 885, 'max_depth': 5, 'learning_rate': 0.1327684448876689, 'subsample': 0.9466982093189397, 'colsample_bytree': 0.6734778043988818, 'gamma': 4.934053970531739}. Best is trial 0 with value: 0.5360648099528504.


[I 2025-08-31 14:05:29,520] Trial 7 finished with value: 0.5372222520648308 and parameters: {'n_estimators': 984, 'max_depth': 7, 'learning_rate': 0.05240033232034921, 'subsample': 0.5674339015165311, 'colsample_bytree': 0.6073584736850872, 'gamma': 0.9935685179165271}. Best is trial 7 with value: 0.5372222520648308.


[I 2025-08-31 14:05:29,731] Trial 8 finished with value: 0.5258468055539682 and parameters: {'n_estimators': 296, 'max_depth': 3, 'learning_rate': 0.26425407484003616, 'subsample': 0.6585395326168579, 'colsample_bytree': 0.6409647255789305, 'gamma': 2.905459421537024}. Best is trial 7 with value: 0.5372222520648308.


[I 2025-08-31 14:05:30,510] Trial 9 finished with value: 0.5164784220740913 and parameters: {'n_estimators': 333, 'max_depth': 7, 'learning_rate': 0.12825632092670297, 'subsample': 0.9053941715231755, 'colsample_bytree': 0.8273305909144275, 'gamma': 0.3935128017954437}. Best is trial 7 with value: 0.5372222520648308.


[I 2025-08-31 14:05:31,364] Trial 10 finished with value: 0.5464265908013557 and parameters: {'n_estimators': 714, 'max_depth': 10, 'learning_rate': 0.0918514937250572, 'subsample': 0.5064533717499109, 'colsample_bytree': 0.5348096371208361, 'gamma': 1.5787209658231873}. Best is trial 10 with value: 0.5464265908013557.


[I 2025-08-31 14:05:32,273] Trial 11 finished with value: 0.5485642323827269 and parameters: {'n_estimators': 734, 'max_depth': 10, 'learning_rate': 0.08326694983169317, 'subsample': 0.5076268950791095, 'colsample_bytree': 0.5044866771329994, 'gamma': 1.4830839781947214}. Best is trial 11 with value: 0.5485642323827269.


[I 2025-08-31 14:05:32,978] Trial 12 finished with value: 0.5448897914868012 and parameters: {'n_estimators': 740, 'max_depth': 10, 'learning_rate': 0.11471972921903531, 'subsample': 0.5008443236435659, 'colsample_bytree': 0.5314651351632818, 'gamma': 1.844390233197644}. Best is trial 11 with value: 0.5485642323827269.


[I 2025-08-31 14:05:33,525] Trial 13 finished with value: 0.5467984485019981 and parameters: {'n_estimators': 677, 'max_depth': 10, 'learning_rate': 0.1979876143962108, 'subsample': 0.502604014110098, 'colsample_bytree': 0.5125680290358565, 'gamma': 2.207854113082056}. Best is trial 11 with value: 0.5485642323827269.


[I 2025-08-31 14:05:33,899] Trial 14 finished with value: 0.5536592676490326 and parameters: {'n_estimators': 603, 'max_depth': 10, 'learning_rate': 0.21021729198380285, 'subsample': 0.5972672681202065, 'colsample_bytree': 0.500324721308941, 'gamma': 3.6955075966085786}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:34,249] Trial 15 finished with value: 0.526141007183565 and parameters: {'n_estimators': 539, 'max_depth': 9, 'learning_rate': 0.19156375282505383, 'subsample': 0.5985745557500997, 'colsample_bytree': 0.5894666688117478, 'gamma': 4.2288260844499455}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:34,660] Trial 16 finished with value: 0.5536499007067321 and parameters: {'n_estimators': 617, 'max_depth': 8, 'learning_rate': 0.18594622044381068, 'subsample': 0.6184657925317563, 'colsample_bytree': 0.7330019047903813, 'gamma': 3.6385856903681906}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:35,000] Trial 17 finished with value: 0.5433491490847082 and parameters: {'n_estimators': 469, 'max_depth': 8, 'learning_rate': 0.19578323739392858, 'subsample': 0.6218807864131453, 'colsample_bytree': 0.7271390049584345, 'gamma': 3.8925377982774463}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:35,366] Trial 18 finished with value: 0.5385559505735544 and parameters: {'n_estimators': 664, 'max_depth': 3, 'learning_rate': 0.2268836689958738, 'subsample': 0.7158788980885311, 'colsample_bytree': 0.8274415989106859, 'gamma': 4.628224731738687}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:35,551] Trial 19 finished with value: 0.535689076920873 and parameters: {'n_estimators': 114, 'max_depth': 6, 'learning_rate': 0.1752911821983416, 'subsample': 0.8084033213298775, 'colsample_bytree': 0.9884007516369249, 'gamma': 3.6248516415061154}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:36,026] Trial 20 finished with value: 0.535781091807394 and parameters: {'n_estimators': 604, 'max_depth': 9, 'learning_rate': 0.23042934225845096, 'subsample': 0.657869201276748, 'colsample_bytree': 0.9006068837255975, 'gamma': 2.917738625406516}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:36,606] Trial 21 finished with value: 0.5360021150084411 and parameters: {'n_estimators': 793, 'max_depth': 10, 'learning_rate': 0.1550498595674599, 'subsample': 0.5590709554800787, 'colsample_bytree': 0.5748331854007573, 'gamma': 2.3126956310804063}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:37,112] Trial 22 finished with value: 0.5127928947641723 and parameters: {'n_estimators': 811, 'max_depth': 8, 'learning_rate': 0.08837609925281807, 'subsample': 0.5612705777367494, 'colsample_bytree': 0.5610262274249078, 'gamma': 4.075762373341879}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:37,494] Trial 23 finished with value: 0.5373919342361416 and parameters: {'n_estimators': 591, 'max_depth': 9, 'learning_rate': 0.29170388293389776, 'subsample': 0.6117573742020497, 'colsample_bytree': 0.6764352675418699, 'gamma': 3.456471371929002}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:37,907] Trial 24 finished with value: 0.5177602043784224 and parameters: {'n_estimators': 434, 'max_depth': 10, 'learning_rate': 0.15919268961774688, 'subsample': 0.5485124760557046, 'colsample_bytree': 0.505333779390202, 'gamma': 2.657081892502126}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:38,486] Trial 25 finished with value: 0.5415937387298589 and parameters: {'n_estimators': 620, 'max_depth': 7, 'learning_rate': 0.22304934253203904, 'subsample': 0.6587060431322465, 'colsample_bytree': 0.7868053554141876, 'gamma': 1.1280024445596593}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:39,003] Trial 26 finished with value: 0.5369435705292922 and parameters: {'n_estimators': 792, 'max_depth': 9, 'learning_rate': 0.08739376752812547, 'subsample': 0.5987190773744127, 'colsample_bytree': 0.6417009347357006, 'gamma': 4.365865746408534}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:39,404] Trial 27 finished with value: 0.5376208178059787 and parameters: {'n_estimators': 536, 'max_depth': 10, 'learning_rate': 0.18491612038052846, 'subsample': 0.5351469209586472, 'colsample_bytree': 0.567873342669144, 'gamma': 3.2568925487054408}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:39,716] Trial 28 finished with value: 0.5394759629297391 and parameters: {'n_estimators': 418, 'max_depth': 8, 'learning_rate': 0.21443836576587483, 'subsample': 0.6329767383352107, 'colsample_bytree': 0.7371759929673809, 'gamma': 3.896152090083682}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:40,233] Trial 29 finished with value: 0.5153548941953299 and parameters: {'n_estimators': 846, 'max_depth': 9, 'learning_rate': 0.16890293711614274, 'subsample': 0.7096255730108105, 'colsample_bytree': 0.7836236481845692, 'gamma': 3.212916869902024}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:40,781] Trial 30 finished with value: 0.5401792110652276 and parameters: {'n_estimators': 730, 'max_depth': 4, 'learning_rate': 0.1396416814973801, 'subsample': 0.6822412939690892, 'colsample_bytree': 0.9079848569989009, 'gamma': 2.44514302895632}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:41,301] Trial 31 finished with value: 0.5273467313964483 and parameters: {'n_estimators': 702, 'max_depth': 10, 'learning_rate': 0.20806082661550077, 'subsample': 0.5227228172648697, 'colsample_bytree': 0.5040823170427163, 'gamma': 2.1565896396222013}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:41,736] Trial 32 finished with value: 0.5234264618053637 and parameters: {'n_estimators': 636, 'max_depth': 10, 'learning_rate': 0.24323610569005005, 'subsample': 0.5830176301375873, 'colsample_bytree': 0.548135147207582, 'gamma': 2.6464648678315004}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:42,321] Trial 33 finished with value: 0.5528012787743872 and parameters: {'n_estimators': 576, 'max_depth': 10, 'learning_rate': 0.1983833617305403, 'subsample': 0.5276157623311889, 'colsample_bytree': 0.512968451083533, 'gamma': 1.2536092494464197}. Best is trial 14 with value: 0.5536592676490326.


[I 2025-08-31 14:05:42,984] Trial 34 finished with value: 0.5565858410359867 and parameters: {'n_estimators': 542, 'max_depth': 8, 'learning_rate': 0.17548487405788513, 'subsample': 0.5364831979991835, 'colsample_bytree': 0.6030800219609134, 'gamma': 0.9964709025916985}. Best is trial 34 with value: 0.5565858410359867.


[I 2025-08-31 14:05:43,490] Trial 35 finished with value: 0.5373084905186081 and parameters: {'n_estimators': 571, 'max_depth': 6, 'learning_rate': 0.17471571154424537, 'subsample': 0.8165143005071821, 'colsample_bytree': 0.5979364040153996, 'gamma': 1.0452783593008195}. Best is trial 34 with value: 0.5565858410359867.


[I 2025-08-31 14:05:44,069] Trial 36 finished with value: 0.5620938573710326 and parameters: {'n_estimators': 472, 'max_depth': 7, 'learning_rate': 0.20605451688965276, 'subsample': 0.5783701379410066, 'colsample_bytree': 0.6393493006273717, 'gamma': 0.7599265769960273}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:44,462] Trial 37 finished with value: 0.5281968615570909 and parameters: {'n_estimators': 511, 'max_depth': 7, 'learning_rate': 0.2452512618985387, 'subsample': 0.9941852933036796, 'colsample_bytree': 0.6739297431603086, 'gamma': 0.7066668519500914}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:45,085] Trial 38 finished with value: 0.5598481341808357 and parameters: {'n_estimators': 389, 'max_depth': 6, 'learning_rate': 0.27890550884918097, 'subsample': 0.6286636697091086, 'colsample_bytree': 0.7029870459250794, 'gamma': 0.19659336168323982}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:45,759] Trial 39 finished with value: 0.521264939236594 and parameters: {'n_estimators': 390, 'max_depth': 5, 'learning_rate': 0.2899748521374151, 'subsample': 0.7451635928491338, 'colsample_bytree': 0.6336076950634969, 'gamma': 0.03335383068406239}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:46,271] Trial 40 finished with value: 0.5278424142768416 and parameters: {'n_estimators': 318, 'max_depth': 6, 'learning_rate': 0.27093513252752044, 'subsample': 0.5908254977883378, 'colsample_bytree': 0.7076385230421153, 'gamma': 0.48496891664219044}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:46,832] Trial 41 finished with value: 0.5503445270704285 and parameters: {'n_estimators': 474, 'max_depth': 7, 'learning_rate': 0.2633821367438682, 'subsample': 0.6485297170075612, 'colsample_bytree': 0.7708314417997252, 'gamma': 0.7525173992828874}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:47,758] Trial 42 finished with value: 0.5573211740072557 and parameters: {'n_estimators': 235, 'max_depth': 8, 'learning_rate': 0.14892427952704845, 'subsample': 0.5663782187393669, 'colsample_bytree': 0.7000678712062656, 'gamma': 0.21638557618725512}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:48,337] Trial 43 finished with value: 0.5465993700967882 and parameters: {'n_estimators': 240, 'max_depth': 5, 'learning_rate': 0.12132253429769202, 'subsample': 0.5787495293539187, 'colsample_bytree': 0.6567514090776262, 'gamma': 0.22156217795178673}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:49,002] Trial 44 finished with value: 0.539554629518645 and parameters: {'n_estimators': 167, 'max_depth': 7, 'learning_rate': 0.1525048788744365, 'subsample': 0.6853098158872835, 'colsample_bytree': 0.6985679834478944, 'gamma': 0.3571804113745275}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:49,643] Trial 45 finished with value: 0.5462511361068085 and parameters: {'n_estimators': 285, 'max_depth': 8, 'learning_rate': 0.14504448471842843, 'subsample': 0.5413897591666097, 'colsample_bytree': 0.6256551314670463, 'gamma': 0.8128315483009636}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:50,485] Trial 46 finished with value: 0.542715935192282 and parameters: {'n_estimators': 378, 'max_depth': 6, 'learning_rate': 0.11093716875850218, 'subsample': 0.5698586684921786, 'colsample_bytree': 0.6954718804689911, 'gamma': 0.5692322192404834}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:52,681] Trial 47 finished with value: 0.5446556668973022 and parameters: {'n_estimators': 455, 'max_depth': 7, 'learning_rate': 0.022680317761655217, 'subsample': 0.6396007948013657, 'colsample_bytree': 0.6022801066993646, 'gamma': 8.847015672291958e-05}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:53,138] Trial 48 finished with value: 0.5413267571661835 and parameters: {'n_estimators': 245, 'max_depth': 5, 'learning_rate': 0.27989487040181726, 'subsample': 0.6063379099191142, 'colsample_bytree': 0.6558183581113282, 'gamma': 0.29148374258922327}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:53,494] Trial 49 finished with value: 0.5303103363006952 and parameters: {'n_estimators': 353, 'max_depth': 8, 'learning_rate': 0.2100083693459026, 'subsample': 0.8554428216630784, 'colsample_bytree': 0.7564519161207913, 'gamma': 1.8322841273814214}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:53,914] Trial 50 finished with value: 0.5517493144944665 and parameters: {'n_estimators': 494, 'max_depth': 4, 'learning_rate': 0.23570490118267556, 'subsample': 0.523655433807569, 'colsample_bytree': 0.5442192882897577, 'gamma': 1.2889665818558134}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:54,557] Trial 51 finished with value: 0.5459150654939459 and parameters: {'n_estimators': 521, 'max_depth': 8, 'learning_rate': 0.18403913603076022, 'subsample': 0.6220168039041045, 'colsample_bytree': 0.7184214547517037, 'gamma': 0.8734386083852832}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:55,521] Trial 52 finished with value: 0.5506106543873257 and parameters: {'n_estimators': 652, 'max_depth': 8, 'learning_rate': 0.1632624885483046, 'subsample': 0.5795284979761987, 'colsample_bytree': 0.829500938784742, 'gamma': 0.5973936955703734}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:55,801] Trial 53 finished with value: 0.531818732933966 and parameters: {'n_estimators': 408, 'max_depth': 7, 'learning_rate': 0.2584007652650074, 'subsample': 0.5545554479399661, 'colsample_bytree': 0.6867417330525509, 'gamma': 4.675923747931013}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:56,178] Trial 54 finished with value: 0.5537472795959381 and parameters: {'n_estimators': 554, 'max_depth': 6, 'learning_rate': 0.18316921747458523, 'subsample': 0.6751709871519995, 'colsample_bytree': 0.7506927713802883, 'gamma': 3.651679306404686}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:56,617] Trial 55 finished with value: 0.5450140838098141 and parameters: {'n_estimators': 441, 'max_depth': 6, 'learning_rate': 0.20234178840516498, 'subsample': 0.6717376864160072, 'colsample_bytree': 0.7503093019625913, 'gamma': 1.6699733676664894}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:57,047] Trial 56 finished with value: 0.5368624781165059 and parameters: {'n_estimators': 105, 'max_depth': 6, 'learning_rate': 0.10394236598495836, 'subsample': 0.7210501097092898, 'colsample_bytree': 0.6574133256323316, 'gamma': 0.19509020460981194}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:57,814] Trial 57 finished with value: 0.5282613126000006 and parameters: {'n_estimators': 561, 'max_depth': 5, 'learning_rate': 0.13383083360217296, 'subsample': 0.6277146744335711, 'colsample_bytree': 0.5803405995840114, 'gamma': 0.4251437507529369}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:58,209] Trial 58 finished with value: 0.5358863590552317 and parameters: {'n_estimators': 199, 'max_depth': 6, 'learning_rate': 0.21938338703968482, 'subsample': 0.6059018009843592, 'colsample_bytree': 0.6129639127125939, 'gamma': 0.8918879625085145}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:58,585] Trial 59 finished with value: 0.5025152202603451 and parameters: {'n_estimators': 504, 'max_depth': 7, 'learning_rate': 0.1773944357556642, 'subsample': 0.7682836374201399, 'colsample_bytree': 0.8116328716550187, 'gamma': 3.101198625055364}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:58,988] Trial 60 finished with value: 0.5599991509072196 and parameters: {'n_estimators': 550, 'max_depth': 9, 'learning_rate': 0.14610428462673025, 'subsample': 0.563898197608592, 'colsample_bytree': 0.646482430470305, 'gamma': 3.5563763963273156}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:59,499] Trial 61 finished with value: 0.5219570728821374 and parameters: {'n_estimators': 552, 'max_depth': 9, 'learning_rate': 0.07244229201344829, 'subsample': 0.5669540448903962, 'colsample_bytree': 0.6526748048730984, 'gamma': 3.6817100623405175}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:05:59,939] Trial 62 finished with value: 0.5382841159849893 and parameters: {'n_estimators': 478, 'max_depth': 9, 'learning_rate': 0.14704403745119393, 'subsample': 0.5461091352804417, 'colsample_bytree': 0.6734039850866822, 'gamma': 3.4461543119200053}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:06:00,323] Trial 63 finished with value: 0.5195312541091827 and parameters: {'n_estimators': 603, 'max_depth': 9, 'learning_rate': 0.16467041633391108, 'subsample': 0.6416247334469636, 'colsample_bytree': 0.7145865247673924, 'gamma': 4.241211847599246}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:06:00,624] Trial 64 finished with value: 0.5400924397710615 and parameters: {'n_estimators': 280, 'max_depth': 8, 'learning_rate': 0.1280981897564756, 'subsample': 0.593436820097937, 'colsample_bytree': 0.7396893229122901, 'gamma': 3.831900706034939}. Best is trial 36 with value: 0.5620938573710326.


[I 2025-08-31 14:06:00,986] Trial 65 finished with value: 0.5660509029058814 and parameters: {'n_estimators': 548, 'max_depth': 9, 'learning_rate': 0.181947783642202, 'subsample': 0.5668416231995351, 'colsample_bytree': 0.6269684676243361, 'gamma': 4.552817163455614}. Best is trial 65 with value: 0.5660509029058814.


[I 2025-08-31 14:06:01,333] Trial 66 finished with value: 0.5487641304292361 and parameters: {'n_estimators': 531, 'max_depth': 9, 'learning_rate': 0.18635274711521446, 'subsample': 0.5128075143783778, 'colsample_bytree': 0.6201666784098113, 'gamma': 4.839277722620272}. Best is trial 65 with value: 0.5660509029058814.


[I 2025-08-31 14:06:01,535] Trial 67 finished with value: 0.5526328957951859 and parameters: {'n_estimators': 140, 'max_depth': 8, 'learning_rate': 0.15512489938358598, 'subsample': 0.5387236676938353, 'colsample_bytree': 0.6393156435744309, 'gamma': 4.592581660272667}. Best is trial 65 with value: 0.5660509029058814.


[I 2025-08-31 14:06:02,056] Trial 68 finished with value: 0.5551819317715221 and parameters: {'n_estimators': 695, 'max_depth': 9, 'learning_rate': 0.17473163198227107, 'subsample': 0.5623435200544554, 'colsample_bytree': 0.6800493040732503, 'gamma': 2.810698804786778}. Best is trial 65 with value: 0.5660509029058814.


[I 2025-08-31 14:06:02,637] Trial 69 finished with value: 0.5371009305219092 and parameters: {'n_estimators': 681, 'max_depth': 9, 'learning_rate': 0.1722578848230873, 'subsample': 0.559179237734192, 'colsample_bytree': 0.6846103113311519, 'gamma': 2.060169718684182}. Best is trial 65 with value: 0.5660509029058814.


[I 2025-08-31 14:06:03,192] Trial 70 finished with value: 0.5346854237176856 and parameters: {'n_estimators': 766, 'max_depth': 9, 'learning_rate': 0.1426444469161045, 'subsample': 0.5158536846351245, 'colsample_bytree': 0.5829668237747829, 'gamma': 2.8776487175945173}. Best is trial 65 with value: 0.5660509029058814.


[I 2025-08-31 14:06:03,579] Trial 71 finished with value: 0.5242534628319583 and parameters: {'n_estimators': 575, 'max_depth': 9, 'learning_rate': 0.19534429329234987, 'subsample': 0.5746217324147455, 'colsample_bytree': 0.6679038590244616, 'gamma': 4.034997554048882}. Best is trial 65 with value: 0.5660509029058814.


[I 2025-08-31 14:06:04,015] Trial 72 finished with value: 0.5345263990845288 and parameters: {'n_estimators': 637, 'max_depth': 8, 'learning_rate': 0.1815577167538053, 'subsample': 0.6108598358163603, 'colsample_bytree': 0.6106112537641089, 'gamma': 2.733486494500493}. Best is trial 65 with value: 0.5660509029058814.


[I 2025-08-31 14:06:04,432] Trial 73 finished with value: 0.5667282625753016 and parameters: {'n_estimators': 486, 'max_depth': 8, 'learning_rate': 0.16512939735640558, 'subsample': 0.5868476724491993, 'colsample_bytree': 0.6344942395348465, 'gamma': 3.2321893745042587}. Best is trial 73 with value: 0.5667282625753016.


[I 2025-08-31 14:06:04,817] Trial 74 finished with value: 0.5532398877073341 and parameters: {'n_estimators': 418, 'max_depth': 8, 'learning_rate': 0.16530380160461186, 'subsample': 0.5522898451068334, 'colsample_bytree': 0.6378413540615979, 'gamma': 3.315302107066526}. Best is trial 73 with value: 0.5667282625753016.


[I 2025-08-31 14:06:05,439] Trial 75 finished with value: 0.5564678311014608 and parameters: {'n_estimators': 956, 'max_depth': 8, 'learning_rate': 0.13589923099384227, 'subsample': 0.5022219999122578, 'colsample_bytree': 0.5968729444048784, 'gamma': 3.09241233798809}. Best is trial 73 with value: 0.5667282625753016.


[I 2025-08-31 14:06:06,067] Trial 76 finished with value: 0.5480650533165006 and parameters: {'n_estimators': 904, 'max_depth': 8, 'learning_rate': 0.12165530401962613, 'subsample': 0.5327128511289141, 'colsample_bytree': 0.559220111291697, 'gamma': 2.9894335910429706}. Best is trial 73 with value: 0.5667282625753016.


[I 2025-08-31 14:06:06,723] Trial 77 finished with value: 0.5605137691771982 and parameters: {'n_estimators': 998, 'max_depth': 7, 'learning_rate': 0.1346822336703239, 'subsample': 0.5877507054142339, 'colsample_bytree': 0.6003617543447219, 'gamma': 2.473725411984071}. Best is trial 73 with value: 0.5667282625753016.


[I 2025-08-31 14:06:07,145] Trial 78 finished with value: 0.5299646819833514 and parameters: {'n_estimators': 344, 'max_depth': 7, 'learning_rate': 0.10573402293655437, 'subsample': 0.5911972583371671, 'colsample_bytree': 0.6244137835778623, 'gamma': 2.516012310238123}. Best is trial 73 with value: 0.5667282625753016.


[I 2025-08-31 14:06:07,680] Trial 79 finished with value: 0.5211135283381678 and parameters: {'n_estimators': 379, 'max_depth': 7, 'learning_rate': 0.1467061676703219, 'subsample': 0.5917547129206764, 'colsample_bytree': 0.5911198499131503, 'gamma': 1.2134524150594581}. Best is trial 73 with value: 0.5667282625753016.


[I 2025-08-31 14:06:08,318] Trial 80 finished with value: 0.5678889260705875 and parameters: {'n_estimators': 318, 'max_depth': 7, 'learning_rate': 0.2981769403832059, 'subsample': 0.5781031834852014, 'colsample_bytree': 0.6453421327273385, 'gamma': 0.170415496595339}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:09,028] Trial 81 finished with value: 0.5450417680832637 and parameters: {'n_estimators': 450, 'max_depth': 7, 'learning_rate': 0.2819898474342588, 'subsample': 0.5760412813232815, 'colsample_bytree': 0.6473664195289379, 'gamma': 0.21932442235760352}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:09,747] Trial 82 finished with value: 0.5636704082680228 and parameters: {'n_estimators': 320, 'max_depth': 7, 'learning_rate': 0.2993667192189414, 'subsample': 0.5452327567837089, 'colsample_bytree': 0.6659298662159026, 'gamma': 0.08789458948944845}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:10,362] Trial 83 finished with value: 0.5570030670362477 and parameters: {'n_estimators': 266, 'max_depth': 7, 'learning_rate': 0.297896757425459, 'subsample': 0.6174275308848064, 'colsample_bytree': 0.6676548527931003, 'gamma': 0.1522124034966938}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:10,866] Trial 84 finished with value: 0.5672848798929374 and parameters: {'n_estimators': 338, 'max_depth': 7, 'learning_rate': 0.2847717550515545, 'subsample': 0.5879965253947455, 'colsample_bytree': 0.6308999060412991, 'gamma': 0.5494644671768598}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:11,392] Trial 85 finished with value: 0.56165003927726 and parameters: {'n_estimators': 317, 'max_depth': 7, 'learning_rate': 0.28407707077882366, 'subsample': 0.6028913091410829, 'colsample_bytree': 0.6292302229181407, 'gamma': 0.5547815226723054}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:11,879] Trial 86 finished with value: 0.5571581829043318 and parameters: {'n_estimators': 315, 'max_depth': 7, 'learning_rate': 0.2891144712634639, 'subsample': 0.5873733725357395, 'colsample_bytree': 0.6302827116522913, 'gamma': 0.48992744691591356}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:12,323] Trial 87 finished with value: 0.5388422562305779 and parameters: {'n_estimators': 315, 'max_depth': 7, 'learning_rate': 0.29945954033337613, 'subsample': 0.6050161088641378, 'colsample_bytree': 0.6181670436635114, 'gamma': 0.6761314367122488}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:12,896] Trial 88 finished with value: 0.5540565767263178 and parameters: {'n_estimators': 364, 'max_depth': 7, 'learning_rate': 0.2713664529236474, 'subsample': 0.5436255134340945, 'colsample_bytree': 0.5677690871455429, 'gamma': 0.35023794238593636}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:13,303] Trial 89 finished with value: 0.5457210063941274 and parameters: {'n_estimators': 331, 'max_depth': 7, 'learning_rate': 0.25583395749006677, 'subsample': 0.582575806204744, 'colsample_bytree': 0.6435153930213875, 'gamma': 1.3795487802809347}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:13,555] Trial 90 finished with value: 0.541260329159476 and parameters: {'n_estimators': 298, 'max_depth': 7, 'learning_rate': 0.27705968189444935, 'subsample': 0.5262549314455098, 'colsample_bytree': 0.6642753213236094, 'gamma': 4.444343832220373}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:14,260] Trial 91 finished with value: 0.5428671901613047 and parameters: {'n_estimators': 402, 'max_depth': 6, 'learning_rate': 0.2839804745720288, 'subsample': 0.6302297894559068, 'colsample_bytree': 0.6084000970472703, 'gamma': 0.08530231328977696}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:14,668] Trial 92 finished with value: 0.5406722699165387 and parameters: {'n_estimators': 212, 'max_depth': 6, 'learning_rate': 0.2932892579256254, 'subsample': 0.5996649495098146, 'colsample_bytree': 0.630552549430785, 'gamma': 0.5583621431294172}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:15,277] Trial 93 finished with value: 0.5315720436240358 and parameters: {'n_estimators': 486, 'max_depth': 7, 'learning_rate': 0.2774927440847223, 'subsample': 0.6538328998881066, 'colsample_bytree': 0.6527502342451561, 'gamma': 0.29695571560413336}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:15,852] Trial 94 finished with value: 0.5488462698630602 and parameters: {'n_estimators': 365, 'max_depth': 6, 'learning_rate': 0.2657196184620566, 'subsample': 0.5544751915741821, 'colsample_bytree': 0.6907851141709899, 'gamma': 0.42386996339953437}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:16,744] Trial 95 finished with value: 0.5461558049796043 and parameters: {'n_estimators': 429, 'max_depth': 7, 'learning_rate': 0.2725242572369994, 'subsample': 0.5699958104218119, 'colsample_bytree': 0.5908395804625127, 'gamma': 0.017988802297915107}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:17,155] Trial 96 finished with value: 0.5332961004089177 and parameters: {'n_estimators': 255, 'max_depth': 7, 'learning_rate': 0.29013743960461447, 'subsample': 0.6206155881016476, 'colsample_bytree': 0.6437654075350091, 'gamma': 0.7657294193504767}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:17,417] Trial 97 finished with value: 0.5378557295466739 and parameters: {'n_estimators': 391, 'max_depth': 6, 'learning_rate': 0.28669178757240144, 'subsample': 0.5810003922662718, 'colsample_bytree': 0.6615348131799641, 'gamma': 4.976187871551751}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:18,211] Trial 98 finished with value: 0.5408136841441322 and parameters: {'n_estimators': 467, 'max_depth': 7, 'learning_rate': 0.25947211947712684, 'subsample': 0.6392820603699133, 'colsample_bytree': 0.6263273308956734, 'gamma': 0.1202224495705052}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:18,910] Trial 99 finished with value: 0.5536482773694332 and parameters: {'n_estimators': 338, 'max_depth': 8, 'learning_rate': 0.15797974264504502, 'subsample': 0.5987892166302997, 'colsample_bytree': 0.6120760538021346, 'gamma': 0.6087760059582625}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:19,245] Trial 100 finished with value: 0.5619709362924763 and parameters: {'n_estimators': 294, 'max_depth': 6, 'learning_rate': 0.24984539937366412, 'subsample': 0.6142889048068676, 'colsample_bytree': 0.7057340984241922, 'gamma': 2.0714702072539968}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:19,559] Trial 101 finished with value: 0.5308929223124874 and parameters: {'n_estimators': 288, 'max_depth': 6, 'learning_rate': 0.240615655766864, 'subsample': 0.6138458323250836, 'colsample_bytree': 0.7035418890935007, 'gamma': 2.1689926684910446}. Best is trial 80 with value: 0.5678889260705875.


[I 2025-08-31 14:06:19,908] Trial 102 finished with value: 0.5699618406271464 and parameters: {'n_estimators': 267, 'max_depth': 6, 'learning_rate': 0.25385920773304743, 'subsample': 0.56658628874106, 'colsample_bytree': 0.7206254696580231, 'gamma': 1.9615514982318594}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:20,238] Trial 103 finished with value: 0.5539470801780666 and parameters: {'n_estimators': 269, 'max_depth': 6, 'learning_rate': 0.2503861034615346, 'subsample': 0.5652186142219667, 'colsample_bytree': 0.7244804844659137, 'gamma': 2.3440580102915405}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:20,645] Trial 104 finished with value: 0.5346935823226937 and parameters: {'n_estimators': 305, 'max_depth': 7, 'learning_rate': 0.23051868080969184, 'subsample': 0.5498702996059155, 'colsample_bytree': 0.680453283369758, 'gamma': 1.5886127611125058}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:21,025] Trial 105 finished with value: 0.5462452715890984 and parameters: {'n_estimators': 330, 'max_depth': 7, 'learning_rate': 0.25082512501636806, 'subsample': 0.5844648245020936, 'colsample_bytree': 0.6357116589064802, 'gamma': 1.9961134899468669}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:21,272] Trial 106 finished with value: 0.5277952999823805 and parameters: {'n_estimators': 191, 'max_depth': 6, 'learning_rate': 0.296553288952554, 'subsample': 0.5723108563066797, 'colsample_bytree': 0.6498644850472131, 'gamma': 2.536779676216831}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:21,771] Trial 107 finished with value: 0.5304014645869666 and parameters: {'n_estimators': 1000, 'max_depth': 5, 'learning_rate': 0.21739512284149762, 'subsample': 0.9186941329882417, 'colsample_bytree': 0.7137731378248058, 'gamma': 1.9113204126568526}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:22,160] Trial 108 finished with value: 0.5408306116292758 and parameters: {'n_estimators': 236, 'max_depth': 7, 'learning_rate': 0.2681165912857481, 'subsample': 0.5409501544355476, 'colsample_bytree': 0.6020577109453482, 'gamma': 1.0914874526469318}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:22,675] Trial 109 finished with value: 0.5560718087482214 and parameters: {'n_estimators': 585, 'max_depth': 6, 'learning_rate': 0.20262000062589647, 'subsample': 0.5561272544163036, 'colsample_bytree': 0.6926056691658227, 'gamma': 1.6932845394583762}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:23,333] Trial 110 finished with value: 0.5395849928330397 and parameters: {'n_estimators': 857, 'max_depth': 7, 'learning_rate': 0.12736287861998036, 'subsample': 0.5154157731904069, 'colsample_bytree': 0.617835729776931, 'gamma': 2.372989086310986}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:23,835] Trial 111 finished with value: 0.5607891145871285 and parameters: {'n_estimators': 509, 'max_depth': 6, 'learning_rate': 0.2741863270325561, 'subsample': 0.6057515624541405, 'colsample_bytree': 0.7292861182777612, 'gamma': 0.9011556991931111}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:24,242] Trial 112 finished with value: 0.5316695098527588 and parameters: {'n_estimators': 507, 'max_depth': 6, 'learning_rate': 0.2623143517630732, 'subsample': 0.6048101002780876, 'colsample_bytree': 0.7640956128554306, 'gamma': 2.2096832173341454}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:24,735] Trial 113 finished with value: 0.5539437046668755 and parameters: {'n_estimators': 530, 'max_depth': 6, 'learning_rate': 0.28526334408555826, 'subsample': 0.5965904899476212, 'colsample_bytree': 0.6720680412923401, 'gamma': 0.9217300119519579}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:25,211] Trial 114 finished with value: 0.5504535769984962 and parameters: {'n_estimators': 276, 'max_depth': 7, 'learning_rate': 0.29372144480286966, 'subsample': 0.5742601298819546, 'colsample_bytree': 0.7244334634237489, 'gamma': 0.6926185016947551}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:25,767] Trial 115 finished with value: 0.5316117888874263 and parameters: {'n_estimators': 461, 'max_depth': 6, 'learning_rate': 0.27401912975306747, 'subsample': 0.5644079388154254, 'colsample_bytree': 0.6345100911859614, 'gamma': 0.5224353959761154}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:26,182] Trial 116 finished with value: 0.5403541130670478 and parameters: {'n_estimators': 517, 'max_depth': 6, 'learning_rate': 0.27990244893644267, 'subsample': 0.5307362699376419, 'colsample_bytree': 0.731532021429239, 'gamma': 2.6572725846402694}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:26,552] Trial 117 finished with value: 0.5661910315100191 and parameters: {'n_estimators': 552, 'max_depth': 7, 'learning_rate': 0.25578663839012944, 'subsample': 0.5861079241308734, 'colsample_bytree': 0.7419539922257856, 'gamma': 3.828789283374143}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:26,799] Trial 118 finished with value: 0.5500702434659227 and parameters: {'n_estimators': 216, 'max_depth': 7, 'learning_rate': 0.2460636170444579, 'subsample': 0.5857540521510022, 'colsample_bytree': 0.785441773073189, 'gamma': 3.7811980022051235}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:27,034] Trial 119 finished with value: 0.5441807746036552 and parameters: {'n_estimators': 306, 'max_depth': 7, 'learning_rate': 0.2527462574638514, 'subsample': 0.619990796725469, 'colsample_bytree': 0.7380496666980723, 'gamma': 4.79432561668347}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:27,290] Trial 120 finished with value: 0.5279716674808579 and parameters: {'n_estimators': 261, 'max_depth': 5, 'learning_rate': 0.2372844346505655, 'subsample': 0.6087488477163199, 'colsample_bytree': 0.759947701618299, 'gamma': 4.011072900603731}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:27,716] Trial 121 finished with value: 0.56995344902062 and parameters: {'n_estimators': 619, 'max_depth': 7, 'learning_rate': 0.2655613519293515, 'subsample': 0.5867935626694073, 'colsample_bytree': 0.7717808687597444, 'gamma': 3.422673653837116}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:28,258] Trial 122 finished with value: 0.5608518879258815 and parameters: {'n_estimators': 957, 'max_depth': 7, 'learning_rate': 0.26574709412933545, 'subsample': 0.590264146485603, 'colsample_bytree': 0.7422193354544706, 'gamma': 3.491867438114267}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:28,555] Trial 123 finished with value: 0.5394932516873525 and parameters: {'n_estimators': 358, 'max_depth': 7, 'learning_rate': 0.26398433205047933, 'subsample': 0.6344762226514704, 'colsample_bytree': 0.7410671615610452, 'gamma': 3.324853404498667}. Best is trial 102 with value: 0.5699618406271464.


[I 2025-08-31 14:06:28,922] Trial 124 finished with value: 0.5675549111365958 and parameters: {'n_estimators': 497, 'max_depth': 7, 'learning_rate': 0.26738141695733236, 'subsample': 0.5931595488992469, 'colsample_bytree': 0.7499406933935214, 'gamma': 3.4151368953833}. Best is trial 102 with value: 0.5699618406271464.


Best XGBoost Params: {'n_estimators': 267, 'max_depth': 6, 'learning_rate': 0.25385920773304743, 'subsample': 0.56658628874106, 'colsample_bytree': 0.7206254696580231, 'gamma': 1.9615514982318594}
--- Training and Logging Champion XGBoost Model ---


Champion XGBoost F1 Score: 0.6112




Successfully registered model 'etf-xgboost-predictor'.
Created version '1' of model 'etf-xgboost-predictor'.


Champion model logged and registered.
--- Logging SHAP assets for the dashboard ---


SHAP explainer and X_test data successfully logged as artifacts.


In [6]:
# --- MLP Challenger Model ---
# Step 1: Imports and Data Scaling
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# Deep learning models are sensitive to feature scale. We must standardize our data.
# We fit the scaler ONLY on the training data to prevent data leakage from the test set.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data successfully scaled.")
print(f"Shape of scaled training data: {X_train_scaled.shape}")

Data successfully scaled.
Shape of scaled training data: (2380, 32)


In [7]:
# Convert numpy arrays to PyTorch Tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders to handle batching
# We don't shuffle time-series data to preserve temporal order if needed, 
# but for a simple MLP, shuffling is often acceptable. Let's keep it False for rigor.
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("PyTorch Tensors and DataLoaders created.")

PyTorch Tensors and DataLoaders created.


In [8]:
# Step 2: Define the MLP Architecture
class ETF_MLP(nn.Module):
    def __init__(self, input_size, hidden_size_1=128, hidden_size_2=64, dropout_rate=0.5):
        """
        Initializes the MLP model.
        
        Args:
            input_size (int): The number of input features.
            hidden_size_1 (int): Number of neurons in the first hidden layer.
            hidden_size_2 (int): Number of neurons in the second hidden layer.
            dropout_rate (float): The dropout probability.
        """
        super(ETF_MLP, self).__init__()
        
        # --- Layer Definitions ---
        self.layer_1 = nn.Linear(input_size, hidden_size_1)
        self.bn_1 = nn.BatchNorm1d(hidden_size_1)
        
        self.layer_2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.bn_2 = nn.BatchNorm1d(hidden_size_2)
        
        self.output_layer = nn.Linear(hidden_size_2, 1)
        
        # --- Activation and Regularization ---
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)
        
    def forward(self, x):
        """ The forward pass of the model. """
        # First hidden layer
        x = self.layer_1(x)
        x = self.bn_1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Second hidden layer
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Output layer with sigmoid for binary classification
        x = torch.sigmoid(self.output_layer(x))
        return x

# Instantiate the model to test
input_features = X_train.shape[1]
model_mlp = ETF_MLP(input_size=input_features)
print("MLP Model Architecture:")
print(model_mlp)

MLP Model Architecture:
ETF_MLP(
  (layer_1): Linear(in_features=32, out_features=128, bias=True)
  (bn_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_2): Linear(in_features=128, out_features=64, bias=True)
  (bn_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)


In [9]:
# Step 3: Manual MLP Training and Evaluation

# --- Configuration ---
INPUT_SIZE = X_train.shape[1]
LEARNING_RATE = 0.001
EPOCHS = 50

# --- Model, Loss, Optimizer (Demonstrates 5.2, 5.3) ---
model_mlp = ETF_MLP(input_size=INPUT_SIZE, dropout_rate=0.4)
criterion = nn.BCELoss() # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=LEARNING_RATE) # Adam Optimizer
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) # LR Schedule

# --- MLflow Logging ---
with mlflow.start_run(run_name="MLP_Manual_Baseline") as run:
    mlflow.log_params({"learning_rate": LEARNING_RATE, "epochs": EPOCHS, "optimizer": "Adam"})
    
    # --- Training Loop ---
    for epoch in range(EPOCHS):
        model_mlp.train() # Set model to training mode
        for features, labels in train_loader:
            # Forward pass
            outputs = model_mlp(features)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Update learning rate
        scheduler.step()
        
        # --- Evaluation on Test Set ---
        model_mlp.eval() # Set model to evaluation mode
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for features, labels in test_loader:
                outputs = model_mlp(features)
                predicted = (outputs > 0.5).float()
                all_preds.extend(predicted.numpy())
                all_labels.extend(labels.numpy())
        
        # Calculate and log F1 score for the epoch
        f1 = f1_score(all_labels, all_preds)
        mlflow.log_metric("test_f1_score", f1, step=epoch)

    print(f"Final MLP F1 Score from manual run: {f1:.4f}")
    # Log the final model
    mlflow.pytorch.log_model(model_mlp, "mlp-model")

Final MLP F1 Score from manual run: 0.1261


In [10]:
# In notebooks/04_model_training_and_evaluation.ipynb, THE FINAL CELL

from mlflow.tracking import MlflowClient
import joblib

# Train and log the final champion model
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    print("--- Training and Logging Champion XGBoost Model ---")
    best_params = study.best_params
    
    mlflow.log_params(best_params)
    
    # Train the model
    model_xgb = xgb.XGBClassifier(**best_params, random_state=42)
    model_xgb.fit(X_train, y_train)
    
    # Evaluate and log metrics
    y_pred_xgb = model_xgb.predict(X_test)
    f1 = f1_score(y_test, y_pred_xgb)
    mlflow.log_metric("f1_score", f1)
    print(f"Champion XGBoost F1 Score: {f1:.4f}")

    # --- Log the Model, SHAP Assets, and Promote ---
    
    # 1. Log the model itself. This will create a new version.
    model_info = mlflow.xgboost.log_model(
        xgb_model=model_xgb,
        artifact_path="xgb-model",
        registered_model_name=MODEL_NAME
    )

    # 2. Find the new model version using the run_id from the logged model
    client = MlflowClient()
    run_id = model_info.run_id
    model_versions = client.search_model_versions(f"run_id='{run_id}'")
    new_version = model_versions[0].version # The first result should be our new version
    print(f"Model registered as '{MODEL_NAME}' version {new_version}.")

    # 3. Log SHAP assets for the dashboard
    explainer = shap.TreeExplainer(model_xgb)
    joblib.dump(explainer, "explainer.joblib")
    X_test.to_parquet("X_test.parquet")
    mlflow.log_artifact("explainer.joblib", artifact_path="shap_assets")
    mlflow.log_artifact("X_test.parquet", artifact_path="shap_assets")
    print("SHAP assets logged.")
    
    # 4. Promote this new version to the "Production" stage
    print(f"\n--- Promoting Model Version {new_version} to Production ---")
    client.transition_model_version_stage(
        name=MODEL_NAME,
        version=new_version,
        stage="Production",
        archive_existing_versions=True  # Safely archive any old production model
    )
    print(f"Successfully promoted model version {new_version} to 'Production'.")

--- Training and Logging Champion XGBoost Model ---
Champion XGBoost F1 Score: 0.6112




Registered model 'etf-xgboost-predictor' already exists. Creating a new version of this model...
Created version '2' of model 'etf-xgboost-predictor'.
  client.transition_model_version_stage(


Model registered as 'etf-xgboost-predictor' version 2.
SHAP assets logged.

--- Promoting Model Version 2 to Production ---
Successfully promoted model version 2 to 'Production'.
