In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import optuna
import shap
import mlflow
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import os
import sys

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from config import PROCESSED_DATA_PATH, MLFLOW_TRACKING_URI, MODEL_NAME

# --- Set up MLflow CORRECTLY ---
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment("ETF_Trend_Prediction")

# --- Load Data ---
data = pd.read_parquet(PROCESSED_DATA_PATH)

# Separate features (X) and target (y)
X = data.drop('target', axis=1)
y = data['target']

  import pkg_resources  # noqa: TID251


In [2]:
# Define the chronological split point
# For example, use data up to the end of 2021 for training, and 2022 onwards for testing.
train_end_date = '2021-12-31'
test_start_date = '2022-01-01'

X_train = X.loc[:train_end_date]
y_train = y.loc[:train_end_date]

X_test = X.loc[test_start_date:]
y_test = y.loc[test_start_date:]

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

mlflow.set_experiment("ETF_Trend_Prediction")

Training set size: 2380
Test set size: 908


<Experiment: artifact_location='file:C:\\Users\\dawso\\Dev\\Personal\\AIGrind\\mlops-etf-forecasting\\mlruns/492371556750088850', creation_time=1756663515713, experiment_id='492371556750088850', last_update_time=1756663515713, lifecycle_stage='active', name='ETF_Trend_Prediction', tags={}>

In [3]:
# Train Logistic Regression
with mlflow.start_run(run_name="LogisticRegression_Baseline"):
    model_lr = LogisticRegression(max_iter=1000, random_state=42)
    model_lr.fit(X_train, y_train)
    y_pred_lr = model_lr.predict(X_test)
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_lr))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_lr))
    
    # Log the model
    mlflow.sklearn.log_model(model_lr, "logistic-regression-model")
    
    print(f"Logistic Regression F1 Score: {f1_score(y_test, y_pred_lr):.4f}")

# Train Random Forest
with mlflow.start_run(run_name="RandomForest_Baseline"):
    model_rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_rf))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_rf))
    
    # Log the model
    mlflow.sklearn.log_model(model_rf, "random-forest-model")
    
    print(f"Random Forest F1 Score: {f1_score(y_test, y_pred_rf):.4f}")

Logistic Regression F1 Score: 0.6968


Random Forest F1 Score: 0.6150


In [4]:
def objective(trial):
    # Define the search space for hyperparameters
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': 42
    }
    
    model = xgb.XGBClassifier(**params)
    
    # Use TimeSeriesSplit for robust cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    score = cross_val_score(model, X_train, y_train, cv=tscv, scoring='f1', n_jobs=1).mean()
    
    return score

In [5]:
# Run the study to find the best params
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=125) 

best_params = study.best_params
print("Best XGBoost Params:", best_params)

# Train the final XGBoost model with the best parameters and log to MLflow
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    print("--- Training and Logging Champion XGBoost Model ---")
    best_params = study.best_params
    
    # Log hyperparameters
    mlflow.log_params(best_params)
    
    # Train the model
    model_xgb = xgb.XGBClassifier(**best_params, random_state=42)
    model_xgb.fit(X_train, y_train)
    
    # Evaluate and log metrics
    y_pred_xgb = model_xgb.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred_xgb)
    f1 = f1_score(y_test, y_pred_xgb)
    roc_auc = roc_auc_score(y_test, model_xgb.predict_proba(X_test)[:, 1])
    
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("roc_auc", roc_auc)
    
    print(f"Champion XGBoost F1 Score: {f1:.4f}")

    # Log the model itself
    mlflow.xgboost.log_model(
        xgb_model=model_xgb,
        artifact_path="xgb-model",
        registered_model_name="etf-xgboost-predictor" # This also registers the model
    )
    print("Champion model logged and registered.")

    # --- NEW CODE TO ADD STARTS HERE ---
    
    print("--- Logging SHAP assets for the dashboard ---")
    import joblib # Make sure joblib is imported
    
    # 1. Create and save the SHAP explainer object
    explainer = shap.TreeExplainer(model_xgb)
    joblib.dump(explainer, "explainer.joblib")
    
    # 2. Save the X_test dataframe needed for plotting
    X_test.to_parquet("X_test.parquet")
    
    # 3. Log these files as MLflow artifacts in specific sub-folders
    mlflow.log_artifact("explainer.joblib", artifact_path="shap_explainer")
    mlflow.log_artifact("X_test.parquet", artifact_path="shap_xtest")
    
    print("SHAP explainer and X_test data successfully logged as artifacts.")

[I 2025-08-31 14:14:36,888] A new study created in memory with name: no-name-b1b799c3-be07-4c52-9f94-5b8872c19cd0


[I 2025-08-31 14:14:37,551] Trial 0 finished with value: 0.5699325032049504 and parameters: {'n_estimators': 228, 'max_depth': 6, 'learning_rate': 0.27510275619761154, 'subsample': 0.5927076736717027, 'colsample_bytree': 0.5294098136302359, 'gamma': 1.253941844064339}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:39,548] Trial 1 finished with value: 0.5381634766210224 and parameters: {'n_estimators': 626, 'max_depth': 9, 'learning_rate': 0.16844856325555027, 'subsample': 0.7848356651791066, 'colsample_bytree': 0.8614868388232892, 'gamma': 0.16947113218465149}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:40,547] Trial 2 finished with value: 0.5476173218853527 and parameters: {'n_estimators': 554, 'max_depth': 10, 'learning_rate': 0.21926859993548978, 'subsample': 0.5080156216522229, 'colsample_bytree': 0.7222682171069764, 'gamma': 4.238724597983925}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:41,222] Trial 3 finished with value: 0.5405376347540621 and parameters: {'n_estimators': 152, 'max_depth': 7, 'learning_rate': 0.17905008105851217, 'subsample': 0.6211649265528605, 'colsample_bytree': 0.77722328900958, 'gamma': 1.4913123810017914}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:42,714] Trial 4 finished with value: 0.5506539002875716 and parameters: {'n_estimators': 952, 'max_depth': 4, 'learning_rate': 0.08708048619077477, 'subsample': 0.5003494974331048, 'colsample_bytree': 0.688503784198395, 'gamma': 4.842209985508516}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:43,138] Trial 5 finished with value: 0.5277949695039141 and parameters: {'n_estimators': 177, 'max_depth': 9, 'learning_rate': 0.08878375342851594, 'subsample': 0.700857297594419, 'colsample_bytree': 0.6509625895041766, 'gamma': 4.478402061989276}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:43,830] Trial 6 finished with value: 0.5412798977337722 and parameters: {'n_estimators': 401, 'max_depth': 10, 'learning_rate': 0.18391979932825964, 'subsample': 0.514294705453458, 'colsample_bytree': 0.672418265728138, 'gamma': 4.072684086279362}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:45,024] Trial 7 finished with value: 0.5168293691621476 and parameters: {'n_estimators': 803, 'max_depth': 9, 'learning_rate': 0.09355725750045774, 'subsample': 0.7160114379966204, 'colsample_bytree': 0.8518789777289473, 'gamma': 3.3818496224560564}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:48,010] Trial 8 finished with value: 0.5241232468813474 and parameters: {'n_estimators': 906, 'max_depth': 6, 'learning_rate': 0.03977969990369104, 'subsample': 0.8140520033654783, 'colsample_bytree': 0.9695817343816178, 'gamma': 0.39115709892666384}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:49,218] Trial 9 finished with value: 0.5354303094300285 and parameters: {'n_estimators': 903, 'max_depth': 7, 'learning_rate': 0.17982901138262838, 'subsample': 0.6575154579498144, 'colsample_bytree': 0.794603044575634, 'gamma': 4.0676008267430515}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:49,764] Trial 10 finished with value: 0.47037776465322 and parameters: {'n_estimators': 332, 'max_depth': 3, 'learning_rate': 0.2913079151168004, 'subsample': 0.973991141108774, 'colsample_bytree': 0.5106367747218568, 'gamma': 2.1998692567780873}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:50,845] Trial 11 finished with value: 0.5493287749338589 and parameters: {'n_estimators': 686, 'max_depth': 4, 'learning_rate': 0.2986832274118393, 'subsample': 0.5716234821715022, 'colsample_bytree': 0.5226479620426315, 'gamma': 1.363515743408383}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:51,563] Trial 12 finished with value: 0.5184022884649371 and parameters: {'n_estimators': 375, 'max_depth': 5, 'learning_rate': 0.11033696913936758, 'subsample': 0.5995533547652643, 'colsample_bytree': 0.5989450785765714, 'gamma': 2.845567798833977}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:54,168] Trial 13 finished with value: 0.5480443597920955 and parameters: {'n_estimators': 746, 'max_depth': 5, 'learning_rate': 0.010121901550033965, 'subsample': 0.502190318880018, 'colsample_bytree': 0.5930758731402428, 'gamma': 1.2564669099425663}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:54,902] Trial 14 finished with value: 0.49291628107592045 and parameters: {'n_estimators': 475, 'max_depth': 3, 'learning_rate': 0.2504567355805112, 'subsample': 0.8651191329416661, 'colsample_bytree': 0.5758321452322066, 'gamma': 2.2699028240384687}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:56,248] Trial 15 finished with value: 0.536595539982857 and parameters: {'n_estimators': 983, 'max_depth': 6, 'learning_rate': 0.1274671265466979, 'subsample': 0.5679397200717453, 'colsample_bytree': 0.693500993062705, 'gamma': 4.856660359070823}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:56,850] Trial 16 finished with value: 0.5413391807172865 and parameters: {'n_estimators': 227, 'max_depth': 4, 'learning_rate': 0.06161283230627366, 'subsample': 0.6405476751227666, 'colsample_bytree': 0.9831540644792539, 'gamma': 3.2642243853459862}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:57,695] Trial 17 finished with value: 0.5629658683850354 and parameters: {'n_estimators': 283, 'max_depth': 5, 'learning_rate': 0.13478684163257965, 'subsample': 0.5684995812552311, 'colsample_bytree': 0.6448590008282474, 'gamma': 0.7638302055711893}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:58,405] Trial 18 finished with value: 0.5468307401413686 and parameters: {'n_estimators': 262, 'max_depth': 8, 'learning_rate': 0.23272120950605665, 'subsample': 0.6743247860815532, 'colsample_bytree': 0.6201810559764067, 'gamma': 0.79851712700275}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:58,816] Trial 19 finished with value: 0.5436265855048362 and parameters: {'n_estimators': 109, 'max_depth': 5, 'learning_rate': 0.13957760355329027, 'subsample': 0.5742994578396745, 'colsample_bytree': 0.5500185167271312, 'gamma': 1.8089958586912118}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:14:59,478] Trial 20 finished with value: 0.5189876531543998 and parameters: {'n_estimators': 295, 'max_depth': 6, 'learning_rate': 0.2682823068390183, 'subsample': 0.7519471181423611, 'colsample_bytree': 0.6318698196073671, 'gamma': 0.7421020928283744}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:00,781] Trial 21 finished with value: 0.5247704573330884 and parameters: {'n_estimators': 471, 'max_depth': 4, 'learning_rate': 0.06689076308680239, 'subsample': 0.5453408906200479, 'colsample_bytree': 0.7368906207568057, 'gamma': 0.8322371630159835}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:02,232] Trial 22 finished with value: 0.5446188500229169 and parameters: {'n_estimators': 470, 'max_depth': 5, 'learning_rate': 0.2129832348205342, 'subsample': 0.6170542559757944, 'colsample_bytree': 0.5535508231330197, 'gamma': 0.0025447475625327565}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:02,816] Trial 23 finished with value: 0.5423464619500045 and parameters: {'n_estimators': 225, 'max_depth': 4, 'learning_rate': 0.14542666094031673, 'subsample': 0.5466542322228575, 'colsample_bytree': 0.6939862422857243, 'gamma': 1.8853154096941762}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:03,607] Trial 24 finished with value: 0.5479461080010374 and parameters: {'n_estimators': 387, 'max_depth': 7, 'learning_rate': 0.11724703396859752, 'subsample': 0.6024866631983026, 'colsample_bytree': 0.8040063305280173, 'gamma': 2.8289747768716613}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:04,812] Trial 25 finished with value: 0.5186127660588594 and parameters: {'n_estimators': 581, 'max_depth': 3, 'learning_rate': 0.08576600926537092, 'subsample': 0.535513090015833, 'colsample_bytree': 0.6554486775146815, 'gamma': 1.0517549960900512}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:06,093] Trial 26 finished with value: 0.4919006107778726 and parameters: {'n_estimators': 796, 'max_depth': 6, 'learning_rate': 0.20593618927883475, 'subsample': 0.9784690251126198, 'colsample_bytree': 0.7211395266047046, 'gamma': 0.4785733406071775}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:06,948] Trial 27 finished with value: 0.5295456778962245 and parameters: {'n_estimators': 309, 'max_depth': 5, 'learning_rate': 0.03993461253022841, 'subsample': 0.6845890777081397, 'colsample_bytree': 0.5451192138329511, 'gamma': 3.478775425187059}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:07,422] Trial 28 finished with value: 0.5115173478655967 and parameters: {'n_estimators': 177, 'max_depth': 4, 'learning_rate': 0.15467306645349532, 'subsample': 0.890833603913016, 'colsample_bytree': 0.5026795394291874, 'gamma': 1.6074557044991364}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:09,140] Trial 29 finished with value: 0.5498833867428653 and parameters: {'n_estimators': 634, 'max_depth': 8, 'learning_rate': 0.16779777864342, 'subsample': 0.7371738223246134, 'colsample_bytree': 0.9320046270642479, 'gamma': 0.28937472952926235}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:10,512] Trial 30 finished with value: 0.5373648146507295 and parameters: {'n_estimators': 988, 'max_depth': 5, 'learning_rate': 0.12604566956524646, 'subsample': 0.5828236798968497, 'colsample_bytree': 0.6163990261430766, 'gamma': 4.9656320685977215}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:11,967] Trial 31 finished with value: 0.5461443570238214 and parameters: {'n_estimators': 634, 'max_depth': 8, 'learning_rate': 0.1595250406991854, 'subsample': 0.7596626497382732, 'colsample_bytree': 0.8779155582580318, 'gamma': 0.3442644500886876}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:13,055] Trial 32 finished with value: 0.5424617014885973 and parameters: {'n_estimators': 667, 'max_depth': 8, 'learning_rate': 0.1987322793933632, 'subsample': 0.8409620977760488, 'colsample_bytree': 0.8912626719552, 'gamma': 1.1228819880125553}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:14,523] Trial 33 finished with value: 0.5529785650477681 and parameters: {'n_estimators': 555, 'max_depth': 7, 'learning_rate': 0.27326591830539326, 'subsample': 0.920188360765469, 'colsample_bytree': 0.9474526190373421, 'gamma': 0.03229860355925618}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:15,454] Trial 34 finished with value: 0.5230140073978373 and parameters: {'n_estimators': 570, 'max_depth': 7, 'learning_rate': 0.27595514573043756, 'subsample': 0.9263545097033375, 'colsample_bytree': 0.7626355799946322, 'gamma': 0.6731287017210905}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:16,163] Trial 35 finished with value: 0.5322815674821378 and parameters: {'n_estimators': 105, 'max_depth': 6, 'learning_rate': 0.23716079762793674, 'subsample': 0.7877824184532872, 'colsample_bytree': 0.8347937834463892, 'gamma': 0.05114361186249994}. Best is trial 0 with value: 0.5699325032049504.


[I 2025-08-31 14:15:17,206] Trial 36 finished with value: 0.5711042484786059 and parameters: {'n_estimators': 446, 'max_depth': 7, 'learning_rate': 0.259924316426161, 'subsample': 0.5282578764895877, 'colsample_bytree': 0.710584461862011, 'gamma': 0.976393850577562}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:18,251] Trial 37 finished with value: 0.5520780639870988 and parameters: {'n_estimators': 505, 'max_depth': 7, 'learning_rate': 0.26754744521455004, 'subsample': 0.5313567328221265, 'colsample_bytree': 0.7238732969944977, 'gamma': 1.018153552416838}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:19,285] Trial 38 finished with value: 0.5599104371071963 and parameters: {'n_estimators': 424, 'max_depth': 7, 'learning_rate': 0.2526152352233069, 'subsample': 0.6478545532152983, 'colsample_bytree': 0.6656853927041354, 'gamma': 0.531015861148532}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:20,163] Trial 39 finished with value: 0.5506776085816056 and parameters: {'n_estimators': 423, 'max_depth': 9, 'learning_rate': 0.24300424405709592, 'subsample': 0.6355197234487765, 'colsample_bytree': 0.6603059916771605, 'gamma': 1.6920955264976891}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:20,961] Trial 40 finished with value: 0.5689989110705069 and parameters: {'n_estimators': 335, 'max_depth': 6, 'learning_rate': 0.2556385746351236, 'subsample': 0.6530316422843686, 'colsample_bytree': 0.7055049869336107, 'gamma': 0.5604418239329919}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:21,762] Trial 41 finished with value: 0.5603335375369309 and parameters: {'n_estimators': 359, 'max_depth': 6, 'learning_rate': 0.2570973273368593, 'subsample': 0.6635988986448411, 'colsample_bytree': 0.7054973313942071, 'gamma': 0.6242628088839194}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:22,473] Trial 42 finished with value: 0.5345087925479064 and parameters: {'n_estimators': 357, 'max_depth': 6, 'learning_rate': 0.22794072308682448, 'subsample': 0.712282575697607, 'colsample_bytree': 0.7557732578610287, 'gamma': 1.3832925199488324}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:23,113] Trial 43 finished with value: 0.5523220389417005 and parameters: {'n_estimators': 256, 'max_depth': 6, 'learning_rate': 0.28894244017262266, 'subsample': 0.6150073446631961, 'colsample_bytree': 0.7061244497453107, 'gamma': 0.8660675692288181}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:23,868] Trial 44 finished with value: 0.5105540006449517 and parameters: {'n_estimators': 334, 'max_depth': 6, 'learning_rate': 0.19296379812769865, 'subsample': 0.671441410068718, 'colsample_bytree': 0.6381730468227158, 'gamma': 1.2280969847279373}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:24,506] Trial 45 finished with value: 0.5646278019036609 and parameters: {'n_estimators': 195, 'max_depth': 5, 'learning_rate': 0.2577220546792138, 'subsample': 0.5899001290949478, 'colsample_bytree': 0.7898364078868859, 'gamma': 0.5820467227504418}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:25,012] Trial 46 finished with value: 0.5539855422150007 and parameters: {'n_estimators': 174, 'max_depth': 5, 'learning_rate': 0.22362441974368338, 'subsample': 0.5185874087577004, 'colsample_bytree': 0.7930237448117724, 'gamma': 2.0321481191477853}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:25,574] Trial 47 finished with value: 0.5709051108699701 and parameters: {'n_estimators': 212, 'max_depth': 5, 'learning_rate': 0.2867048986243989, 'subsample': 0.589483620561279, 'colsample_bytree': 0.7827076250088331, 'gamma': 1.499299189279345}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:26,117] Trial 48 finished with value: 0.5498877028330753 and parameters: {'n_estimators': 215, 'max_depth': 5, 'learning_rate': 0.28558378286122776, 'subsample': 0.5887113994175514, 'colsample_bytree': 0.8256249175086302, 'gamma': 1.5628885768641383}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:26,674] Trial 49 finished with value: 0.5556904657079815 and parameters: {'n_estimators': 162, 'max_depth': 6, 'learning_rate': 0.25802896416804166, 'subsample': 0.5511201228727496, 'colsample_bytree': 0.779197427198984, 'gamma': 1.0042112621095687}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:27,576] Trial 50 finished with value: 0.5429924053027757 and parameters: {'n_estimators': 216, 'max_depth': 10, 'learning_rate': 0.29913094955304464, 'subsample': 0.6305042348913676, 'colsample_bytree': 0.8205510358113615, 'gamma': 0.24880318869769535}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:28,210] Trial 51 finished with value: 0.5423273856739439 and parameters: {'n_estimators': 288, 'max_depth': 5, 'learning_rate': 0.2860938655648502, 'subsample': 0.5621730195612106, 'colsample_bytree': 0.7431219348519994, 'gamma': 1.3846737142528687}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:28,769] Trial 52 finished with value: 0.5455819105173482 and parameters: {'n_estimators': 139, 'max_depth': 5, 'learning_rate': 0.24536068128648528, 'subsample': 0.5993419463764824, 'colsample_bytree': 0.7632235676236558, 'gamma': 0.544198958546182}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:29,441] Trial 53 finished with value: 0.5565490371803253 and parameters: {'n_estimators': 271, 'max_depth': 4, 'learning_rate': 0.26241362271292124, 'subsample': 0.5017085766708984, 'colsample_bytree': 0.5784432437771505, 'gamma': 0.8455179206214563}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:30,209] Trial 54 finished with value: 0.5428805655315134 and parameters: {'n_estimators': 324, 'max_depth': 5, 'learning_rate': 0.2785088893757166, 'subsample': 0.5238770456078963, 'colsample_bytree': 0.681273131022597, 'gamma': 1.2028898047598822}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:30,801] Trial 55 finished with value: 0.5460473143397666 and parameters: {'n_estimators': 245, 'max_depth': 7, 'learning_rate': 0.23750920951717824, 'subsample': 0.5685490263433168, 'colsample_bytree': 0.852856954404247, 'gamma': 2.6402474079069154}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:31,396] Trial 56 finished with value: 0.5468689156238373 and parameters: {'n_estimators': 205, 'max_depth': 6, 'learning_rate': 0.26906998410044386, 'subsample': 0.5986830197327739, 'colsample_bytree': 0.7810689636955728, 'gamma': 0.979269716652031}. Best is trial 36 with value: 0.5711042484786059.


[I 2025-08-31 14:15:32,401] Trial 57 finished with value: 0.5746132542759954 and parameters: {'n_estimators': 416, 'max_depth': 5, 'learning_rate': 0.2967925848200456, 'subsample': 0.6209659554642839, 'colsample_bytree': 0.7248873343680917, 'gamma': 0.4423149403021891}. Best is trial 57 with value: 0.5746132542759954.


[I 2025-08-31 14:15:33,227] Trial 58 finished with value: 0.5534065568728281 and parameters: {'n_estimators': 418, 'max_depth': 4, 'learning_rate': 0.29618130727428094, 'subsample': 0.6187531982912906, 'colsample_bytree': 0.8050478862386737, 'gamma': 2.213449691149713}. Best is trial 57 with value: 0.5746132542759954.


[I 2025-08-31 14:15:34,335] Trial 59 finished with value: 0.5485506241863574 and parameters: {'n_estimators': 516, 'max_depth': 6, 'learning_rate': 0.2814900525175713, 'subsample': 0.6510777590965783, 'colsample_bytree': 0.7266630943124697, 'gamma': 0.35408037833639716}. Best is trial 57 with value: 0.5746132542759954.


[I 2025-08-31 14:15:35,579] Trial 60 finished with value: 0.5498996670677924 and parameters: {'n_estimators': 453, 'max_depth': 5, 'learning_rate': 0.21449977472113407, 'subsample': 0.6963347662950051, 'colsample_bytree': 0.7409826338242879, 'gamma': 0.21248943657589747}. Best is trial 57 with value: 0.5746132542759954.


[I 2025-08-31 14:15:36,404] Trial 61 finished with value: 0.5691490975096507 and parameters: {'n_estimators': 347, 'max_depth': 5, 'learning_rate': 0.2992569258032857, 'subsample': 0.5484219764005247, 'colsample_bytree': 0.7686634560651153, 'gamma': 0.6903646960376788}. Best is trial 57 with value: 0.5746132542759954.


[I 2025-08-31 14:15:37,255] Trial 62 finished with value: 0.5511826806360334 and parameters: {'n_estimators': 380, 'max_depth': 5, 'learning_rate': 0.2916029859900139, 'subsample': 0.583937119111945, 'colsample_bytree': 0.7061002370384455, 'gamma': 0.482649262244741}. Best is trial 57 with value: 0.5746132542759954.


[I 2025-08-31 14:15:38,014] Trial 63 finished with value: 0.5408218811376149 and parameters: {'n_estimators': 343, 'max_depth': 4, 'learning_rate': 0.29888393043825195, 'subsample': 0.5547992257757424, 'colsample_bytree': 0.7742371557843531, 'gamma': 0.6482494845829736}. Best is trial 57 with value: 0.5746132542759954.


[I 2025-08-31 14:15:38,719] Trial 64 finished with value: 0.5658233112116364 and parameters: {'n_estimators': 306, 'max_depth': 6, 'learning_rate': 0.26512860330690263, 'subsample': 0.5391928987092532, 'colsample_bytree': 0.7939835209636936, 'gamma': 1.3423280983626418}. Best is trial 57 with value: 0.5746132542759954.


[I 2025-08-31 14:15:39,406] Trial 65 finished with value: 0.5788421459710156 and parameters: {'n_estimators': 307, 'max_depth': 6, 'learning_rate': 0.27885671840132886, 'subsample': 0.529467163326835, 'colsample_bytree': 0.7498929565095431, 'gamma': 1.4672857075199708}. Best is trial 65 with value: 0.5788421459710156.


[I 2025-08-31 14:15:40,182] Trial 66 finished with value: 0.5728917339755606 and parameters: {'n_estimators': 407, 'max_depth': 7, 'learning_rate': 0.28121633795261575, 'subsample': 0.5142623691610133, 'colsample_bytree': 0.6750137490546799, 'gamma': 1.9720421124615748}. Best is trial 65 with value: 0.5788421459710156.


[I 2025-08-31 14:15:41,072] Trial 67 finished with value: 0.5522535046064918 and parameters: {'n_estimators': 503, 'max_depth': 7, 'learning_rate': 0.28073360895040683, 'subsample': 0.5145761492006315, 'colsample_bytree': 0.6831569982444546, 'gamma': 1.7089004561770405}. Best is trial 65 with value: 0.5788421459710156.


[I 2025-08-31 14:15:41,817] Trial 68 finished with value: 0.5496168599064808 and parameters: {'n_estimators': 412, 'max_depth': 8, 'learning_rate': 0.2734601397587655, 'subsample': 0.5303920343741091, 'colsample_bytree': 0.5258276553971722, 'gamma': 1.9812760736277197}. Best is trial 65 with value: 0.5788421459710156.


[I 2025-08-31 14:15:42,617] Trial 69 finished with value: 0.5514430813825048 and parameters: {'n_estimators': 451, 'max_depth': 7, 'learning_rate': 0.28918908180924563, 'subsample': 0.5035685873562149, 'colsample_bytree': 0.7470374570698842, 'gamma': 2.369795967730076}. Best is trial 65 with value: 0.5788421459710156.


[I 2025-08-31 14:15:43,384] Trial 70 finished with value: 0.5842173002780164 and parameters: {'n_estimators': 391, 'max_depth': 7, 'learning_rate': 0.29996663018752107, 'subsample': 0.5543429548673404, 'colsample_bytree': 0.7293463858251527, 'gamma': 1.4753357191987948}. Best is trial 70 with value: 0.5842173002780164.


[I 2025-08-31 14:15:44,148] Trial 71 finished with value: 0.5676670998932398 and parameters: {'n_estimators': 384, 'max_depth': 7, 'learning_rate': 0.2798229977522687, 'subsample': 0.5702622125285951, 'colsample_bytree': 0.7220606450043906, 'gamma': 1.464968155512514}. Best is trial 70 with value: 0.5842173002780164.


[I 2025-08-31 14:15:44,951] Trial 72 finished with value: 0.5701170695924912 and parameters: {'n_estimators': 438, 'max_depth': 8, 'learning_rate': 0.29915939588820567, 'subsample': 0.549671016333922, 'colsample_bytree': 0.7596201060152034, 'gamma': 1.7948605795407973}. Best is trial 70 with value: 0.5842173002780164.


[I 2025-08-31 14:15:45,888] Trial 73 finished with value: 0.5358910595852489 and parameters: {'n_estimators': 532, 'max_depth': 9, 'learning_rate': 0.29095575425368597, 'subsample': 0.5228190954033021, 'colsample_bytree': 0.673009197116496, 'gamma': 1.8131229821870127}. Best is trial 70 with value: 0.5842173002780164.


[I 2025-08-31 14:15:46,705] Trial 74 finished with value: 0.5534882919444146 and parameters: {'n_estimators': 456, 'max_depth': 8, 'learning_rate': 0.2725523289573576, 'subsample': 0.5379977088887526, 'colsample_bytree': 0.6063624562688603, 'gamma': 2.126040977112912}. Best is trial 70 with value: 0.5842173002780164.


[I 2025-08-31 14:15:47,594] Trial 75 finished with value: 0.5659680153719504 and parameters: {'n_estimators': 487, 'max_depth': 8, 'learning_rate': 0.2833846387785682, 'subsample': 0.5568928010531802, 'colsample_bytree': 0.7544081140004567, 'gamma': 1.6259775422394438}. Best is trial 70 with value: 0.5842173002780164.


[I 2025-08-31 14:15:48,367] Trial 76 finished with value: 0.548874624067025 and parameters: {'n_estimators': 399, 'max_depth': 7, 'learning_rate': 0.24824812375043032, 'subsample': 0.516572138736016, 'colsample_bytree': 0.7361909616027427, 'gamma': 1.9413036364733312}. Best is trial 70 with value: 0.5842173002780164.


[I 2025-08-31 14:15:49,430] Trial 77 finished with value: 0.5376652135598032 and parameters: {'n_estimators': 603, 'max_depth': 8, 'learning_rate': 0.26514090770591325, 'subsample': 0.6087631295926946, 'colsample_bytree': 0.8132450080161245, 'gamma': 1.1731501927263084}. Best is trial 70 with value: 0.5842173002780164.


[I 2025-08-31 14:15:50,015] Trial 78 finished with value: 0.5864757019312599 and parameters: {'n_estimators': 242, 'max_depth': 7, 'learning_rate': 0.29215807925149717, 'subsample': 0.5781374861927562, 'colsample_bytree': 0.6928123197653162, 'gamma': 1.7941511522346378}. Best is trial 78 with value: 0.5864757019312599.


[I 2025-08-31 14:15:50,846] Trial 79 finished with value: 0.5923633752861017 and parameters: {'n_estimators': 447, 'max_depth': 8, 'learning_rate': 0.2893240343339127, 'subsample': 0.5773485236030977, 'colsample_bytree': 0.6973766245107151, 'gamma': 2.3881849209885626}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:51,278] Trial 80 finished with value: 0.5477509388734695 and parameters: {'n_estimators': 137, 'max_depth': 7, 'learning_rate': 0.2918491673239651, 'subsample': 0.5755515251343379, 'colsample_bytree': 0.6935525846761047, 'gamma': 2.42298907974134}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:52,149] Trial 81 finished with value: 0.5689044463821002 and parameters: {'n_estimators': 436, 'max_depth': 9, 'learning_rate': 0.2757882609896741, 'subsample': 0.5419125815666358, 'colsample_bytree': 0.7130706648580363, 'gamma': 2.625157870973077}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:52,905] Trial 82 finished with value: 0.5305388769032365 and parameters: {'n_estimators': 387, 'max_depth': 8, 'learning_rate': 0.293944057132751, 'subsample': 0.5593204936785869, 'colsample_bytree': 0.7338214560316402, 'gamma': 1.789319863241937}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:53,447] Trial 83 finished with value: 0.5088040523087092 and parameters: {'n_estimators': 243, 'max_depth': 8, 'learning_rate': 0.2847810968288442, 'subsample': 0.5828299273476231, 'colsample_bytree': 0.6487332655128379, 'gamma': 2.291484929772331}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:54,253] Trial 84 finished with value: 0.5672285315969561 and parameters: {'n_estimators': 485, 'max_depth': 7, 'learning_rate': 0.2999130369275067, 'subsample': 0.6257810941810398, 'colsample_bytree': 0.6936362171913751, 'gamma': 2.0724037070976147}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:55,117] Trial 85 finished with value: 0.5840000564893975 and parameters: {'n_estimators': 362, 'max_depth': 9, 'learning_rate': 0.27242463469830486, 'subsample': 0.528678623698161, 'colsample_bytree': 0.6753573172653763, 'gamma': 1.5295014458545904}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:55,949] Trial 86 finished with value: 0.56940416691671 and parameters: {'n_estimators': 368, 'max_depth': 9, 'learning_rate': 0.2609614020691533, 'subsample': 0.531067409662578, 'colsample_bytree': 0.6649442668031806, 'gamma': 1.4867983066178923}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:56,708] Trial 87 finished with value: 0.5767028172035185 and parameters: {'n_estimators': 317, 'max_depth': 10, 'learning_rate': 0.27368977433205066, 'subsample': 0.509046065815787, 'colsample_bytree': 0.6864939581872808, 'gamma': 1.530085388332654}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:57,570] Trial 88 finished with value: 0.5581541697319743 and parameters: {'n_estimators': 403, 'max_depth': 10, 'learning_rate': 0.27001616058530553, 'subsample': 0.5099895958188047, 'colsample_bytree': 0.6246040408216776, 'gamma': 1.326084168209102}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:58,314] Trial 89 finished with value: 0.5339419020991698 and parameters: {'n_estimators': 320, 'max_depth': 10, 'learning_rate': 0.24936347628891326, 'subsample': 0.5234023734252145, 'colsample_bytree': 0.674201641858103, 'gamma': 1.677455908195201}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:58,987] Trial 90 finished with value: 0.5625498639031022 and parameters: {'n_estimators': 286, 'max_depth': 9, 'learning_rate': 0.2774528434349865, 'subsample': 0.5012672048040228, 'colsample_bytree': 0.7147972025373892, 'gamma': 1.9238581011774545}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:15:59,584] Trial 91 finished with value: 0.5458158701844198 and parameters: {'n_estimators': 307, 'max_depth': 10, 'learning_rate': 0.2868718592310879, 'subsample': 0.9973230088297802, 'colsample_bytree': 0.6841621745205811, 'gamma': 1.507307154568931}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:00,309] Trial 92 finished with value: 0.5542657031526984 and parameters: {'n_estimators': 268, 'max_depth': 9, 'learning_rate': 0.2675017583685499, 'subsample': 0.5775689013571621, 'colsample_bytree': 0.6565406627883307, 'gamma': 1.079421633434289}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:00,869] Trial 93 finished with value: 0.5591900193380315 and parameters: {'n_estimators': 192, 'max_depth': 7, 'learning_rate': 0.28397448981816326, 'subsample': 0.5619687242266769, 'colsample_bytree': 0.7022209777615311, 'gamma': 1.4260191213248719}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:01,523] Trial 94 finished with value: 0.5504710244166129 and parameters: {'n_estimators': 364, 'max_depth': 7, 'learning_rate': 0.275066652997734, 'subsample': 0.5334731338662119, 'colsample_bytree': 0.6368045250825047, 'gamma': 3.1018134150654095}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:02,352] Trial 95 finished with value: 0.5323832062681293 and parameters: {'n_estimators': 349, 'max_depth': 10, 'learning_rate': 0.24064348342241124, 'subsample': 0.5962165755639597, 'colsample_bytree': 0.7307733443157804, 'gamma': 1.2816275329092268}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:03,126] Trial 96 finished with value: 0.5512412213862126 and parameters: {'n_estimators': 403, 'max_depth': 7, 'learning_rate': 0.25293173109656025, 'subsample': 0.5161881408689606, 'colsample_bytree': 0.718936355435237, 'gamma': 2.1406557128179142}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:03,718] Trial 97 finished with value: 0.5398839966997955 and parameters: {'n_estimators': 231, 'max_depth': 7, 'learning_rate': 0.2615499255181563, 'subsample': 0.5443520019364999, 'colsample_bytree': 0.6951188015375981, 'gamma': 1.573100550988573}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:04,604] Trial 98 finished with value: 0.5363829552398343 and parameters: {'n_estimators': 466, 'max_depth': 6, 'learning_rate': 0.28111245522660505, 'subsample': 0.6072197308179319, 'colsample_bytree': 0.6754295048790633, 'gamma': 0.9473366952761236}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:05,370] Trial 99 finished with value: 0.5306658609246439 and parameters: {'n_estimators': 431, 'max_depth': 8, 'learning_rate': 0.29001023083840693, 'subsample': 0.5659560764925475, 'colsample_bytree': 0.6425530704035844, 'gamma': 2.535922429566625}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:06,047] Trial 100 finished with value: 0.5360135520831278 and parameters: {'n_estimators': 322, 'max_depth': 7, 'learning_rate': 0.29183154685407253, 'subsample': 0.5268701579241429, 'colsample_bytree': 0.665151121870759, 'gamma': 1.7353510423995095}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:07,345] Trial 101 finished with value: 0.5282226490688606 and parameters: {'n_estimators': 891, 'max_depth': 8, 'learning_rate': 0.29544294520453673, 'subsample': 0.5499787953210481, 'colsample_bytree': 0.7570650614609661, 'gamma': 1.8571358439115282}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:08,186] Trial 102 finished with value: 0.5449461937910095 and parameters: {'n_estimators': 440, 'max_depth': 9, 'learning_rate': 0.2768486051174256, 'subsample': 0.5516483380866587, 'colsample_bytree': 0.7468555710413805, 'gamma': 1.6141479965964038}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:08,875] Trial 103 finished with value: 0.5526609080868696 and parameters: {'n_estimators': 283, 'max_depth': 8, 'learning_rate': 0.26885298326337065, 'subsample': 0.5107351799520242, 'colsample_bytree': 0.7853129128097783, 'gamma': 1.8492933034602679}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:09,671] Trial 104 finished with value: 0.5634896124347959 and parameters: {'n_estimators': 394, 'max_depth': 10, 'learning_rate': 0.28700182294396015, 'subsample': 0.538226254661008, 'colsample_bytree': 0.7673506164660597, 'gamma': 2.0245155472934013}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:10,508] Trial 105 finished with value: 0.5465698625957156 and parameters: {'n_estimators': 369, 'max_depth': 8, 'learning_rate': 0.29451728766769475, 'subsample': 0.5909489988024722, 'colsample_bytree': 0.7012637225485503, 'gamma': 1.240908696680029}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:11,873] Trial 106 finished with value: 0.5611207251607329 and parameters: {'n_estimators': 545, 'max_depth': 7, 'learning_rate': 0.09863610936097036, 'subsample': 0.5776309748654316, 'colsample_bytree': 0.7320730017530325, 'gamma': 1.5229896818107127}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:14,082] Trial 107 finished with value: 0.5703239358490195 and parameters: {'n_estimators': 254, 'max_depth': 9, 'learning_rate': 0.01417889589130475, 'subsample': 0.5625744134429335, 'colsample_bytree': 0.7121638951504536, 'gamma': 1.7248453424959334}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:15,305] Trial 108 finished with value: 0.5584790039379106 and parameters: {'n_estimators': 259, 'max_depth': 9, 'learning_rate': 0.03877276220441542, 'subsample': 0.5232030761447259, 'colsample_bytree': 0.685603471008399, 'gamma': 2.2737551353927605}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:16,128] Trial 109 finished with value: 0.5385147252530085 and parameters: {'n_estimators': 306, 'max_depth': 9, 'learning_rate': 0.1506607301329136, 'subsample': 0.6391722502412713, 'colsample_bytree': 0.7133434950465964, 'gamma': 1.4286618577751398}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:16,681] Trial 110 finished with value: 0.5350597127677786 and parameters: {'n_estimators': 231, 'max_depth': 6, 'learning_rate': 0.06817156084224536, 'subsample': 0.5652429185361081, 'colsample_bytree': 0.6512049704119356, 'gamma': 4.333629473277626}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:17,702] Trial 111 finished with value: 0.5480470584366638 and parameters: {'n_estimators': 493, 'max_depth': 9, 'learning_rate': 0.1790407121081119, 'subsample': 0.5440520533653684, 'colsample_bytree': 0.7505740454951638, 'gamma': 1.7287822833662598}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:18,622] Trial 112 finished with value: 0.5577787406482222 and parameters: {'n_estimators': 413, 'max_depth': 7, 'learning_rate': 0.2801862226409646, 'subsample': 0.5101809659124631, 'colsample_bytree': 0.725731562618513, 'gamma': 1.1162467967327847}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:19,375] Trial 113 finished with value: 0.5516752387374778 and parameters: {'n_estimators': 341, 'max_depth': 8, 'learning_rate': 0.29993243980351225, 'subsample': 0.5546456200700853, 'colsample_bytree': 0.7096202507468181, 'gamma': 1.6328839125804806}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:21,102] Trial 114 finished with value: 0.536534754058112 and parameters: {'n_estimators': 472, 'max_depth': 10, 'learning_rate': 0.016496312777809316, 'subsample': 0.8122086956365027, 'colsample_bytree': 0.7413745089245563, 'gamma': 2.809383493299402}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:21,927] Trial 115 finished with value: 0.5457310117897869 and parameters: {'n_estimators': 441, 'max_depth': 7, 'learning_rate': 0.27156947307912993, 'subsample': 0.5340537821682003, 'colsample_bytree': 0.6969351593413188, 'gamma': 1.7837314583481718}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:22,487] Trial 116 finished with value: 0.5146508140908643 and parameters: {'n_estimators': 274, 'max_depth': 3, 'learning_rate': 0.2852415740425611, 'subsample': 0.5866548395230661, 'colsample_bytree': 0.6860945325649569, 'gamma': 1.934243289939073}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:23,131] Trial 117 finished with value: 0.5196747928175094 and parameters: {'n_estimators': 295, 'max_depth': 6, 'learning_rate': 0.2927473904297593, 'subsample': 0.7333911935234985, 'colsample_bytree': 0.7716578176590856, 'gamma': 0.9192771710155077}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:23,954] Trial 118 finished with value: 0.546569269696484 and parameters: {'n_estimators': 376, 'max_depth': 8, 'learning_rate': 0.26282528427124396, 'subsample': 0.5014680429471798, 'colsample_bytree': 0.6754127178288187, 'gamma': 1.3320243275305765}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:24,520] Trial 119 finished with value: 0.542689898489498 and parameters: {'n_estimators': 250, 'max_depth': 7, 'learning_rate': 0.25680016729810584, 'subsample': 0.6109817368010805, 'colsample_bytree': 0.7205266559891151, 'gamma': 2.0510243604724527}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:24,998] Trial 120 finished with value: 0.5630461821958549 and parameters: {'n_estimators': 188, 'max_depth': 5, 'learning_rate': 0.2743385244145994, 'subsample': 0.5726963077495979, 'colsample_bytree': 0.7602240888938269, 'gamma': 2.1736470622474213}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:25,576] Trial 121 finished with value: 0.5491846404176863 and parameters: {'n_estimators': 212, 'max_depth': 6, 'learning_rate': 0.2833699446792863, 'subsample': 0.5996372099982002, 'colsample_bytree': 0.8001070030555485, 'gamma': 1.5317009258816254}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:26,068] Trial 122 finished with value: 0.57398502862824 and parameters: {'n_estimators': 155, 'max_depth': 6, 'learning_rate': 0.2881240425247761, 'subsample': 0.5544340495316074, 'colsample_bytree': 0.5588581215512881, 'gamma': 1.1997045734639873}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:26,518] Trial 123 finished with value: 0.5739055088515354 and parameters: {'n_estimators': 133, 'max_depth': 6, 'learning_rate': 0.2891818317470053, 'subsample': 0.5584242786314517, 'colsample_bytree': 0.5163840173585238, 'gamma': 1.1704947243695125}. Best is trial 79 with value: 0.5923633752861017.


[I 2025-08-31 14:16:27,006] Trial 124 finished with value: 0.5339912929603658 and parameters: {'n_estimators': 151, 'max_depth': 6, 'learning_rate': 0.2885664886095217, 'subsample': 0.5604571238746316, 'colsample_bytree': 0.537647433397039, 'gamma': 1.1623372763067736}. Best is trial 79 with value: 0.5923633752861017.


Best XGBoost Params: {'n_estimators': 447, 'max_depth': 8, 'learning_rate': 0.2893240343339127, 'subsample': 0.5773485236030977, 'colsample_bytree': 0.6973766245107151, 'gamma': 2.3881849209885626}
--- Training and Logging Champion XGBoost Model ---


Champion XGBoost F1 Score: 0.5879




Registered model 'etf-xgboost-predictor' already exists. Creating a new version of this model...
Created version '3' of model 'etf-xgboost-predictor'.


Champion model logged and registered.
--- Logging SHAP assets for the dashboard ---


SHAP explainer and X_test data successfully logged as artifacts.


In [6]:
# --- MLP Challenger Model ---
# Step 1: Imports and Data Scaling
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# Deep learning models are sensitive to feature scale. We must standardize our data.
# We fit the scaler ONLY on the training data to prevent data leakage from the test set.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data successfully scaled.")
print(f"Shape of scaled training data: {X_train_scaled.shape}")

Data successfully scaled.
Shape of scaled training data: (2380, 32)


In [7]:
# Convert numpy arrays to PyTorch Tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders to handle batching
# We don't shuffle time-series data to preserve temporal order if needed, 
# but for a simple MLP, shuffling is often acceptable. Let's keep it False for rigor.
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("PyTorch Tensors and DataLoaders created.")

PyTorch Tensors and DataLoaders created.


In [8]:
# Step 2: Define the MLP Architecture
class ETF_MLP(nn.Module):
    def __init__(self, input_size, hidden_size_1=128, hidden_size_2=64, dropout_rate=0.5):
        """
        Initializes the MLP model.
        
        Args:
            input_size (int): The number of input features.
            hidden_size_1 (int): Number of neurons in the first hidden layer.
            hidden_size_2 (int): Number of neurons in the second hidden layer.
            dropout_rate (float): The dropout probability.
        """
        super(ETF_MLP, self).__init__()
        
        # --- Layer Definitions ---
        self.layer_1 = nn.Linear(input_size, hidden_size_1)
        self.bn_1 = nn.BatchNorm1d(hidden_size_1)
        
        self.layer_2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.bn_2 = nn.BatchNorm1d(hidden_size_2)
        
        self.output_layer = nn.Linear(hidden_size_2, 1)
        
        # --- Activation and Regularization ---
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)
        
    def forward(self, x):
        """ The forward pass of the model. """
        # First hidden layer
        x = self.layer_1(x)
        x = self.bn_1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Second hidden layer
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Output layer with sigmoid for binary classification
        x = torch.sigmoid(self.output_layer(x))
        return x

# Instantiate the model to test
input_features = X_train.shape[1]
model_mlp = ETF_MLP(input_size=input_features)
print("MLP Model Architecture:")
print(model_mlp)

MLP Model Architecture:
ETF_MLP(
  (layer_1): Linear(in_features=32, out_features=128, bias=True)
  (bn_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_2): Linear(in_features=128, out_features=64, bias=True)
  (bn_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)


In [9]:
# Step 3: Manual MLP Training and Evaluation

# --- Configuration ---
INPUT_SIZE = X_train.shape[1]
LEARNING_RATE = 0.001
EPOCHS = 50

# --- Model, Loss, Optimizer (Demonstrates 5.2, 5.3) ---
model_mlp = ETF_MLP(input_size=INPUT_SIZE, dropout_rate=0.4)
criterion = nn.BCELoss() # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=LEARNING_RATE) # Adam Optimizer
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) # LR Schedule

# --- MLflow Logging ---
with mlflow.start_run(run_name="MLP_Manual_Baseline") as run:
    mlflow.log_params({"learning_rate": LEARNING_RATE, "epochs": EPOCHS, "optimizer": "Adam"})
    
    # --- Training Loop ---
    for epoch in range(EPOCHS):
        model_mlp.train() # Set model to training mode
        for features, labels in train_loader:
            # Forward pass
            outputs = model_mlp(features)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Update learning rate
        scheduler.step()
        
        # --- Evaluation on Test Set ---
        model_mlp.eval() # Set model to evaluation mode
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for features, labels in test_loader:
                outputs = model_mlp(features)
                predicted = (outputs > 0.5).float()
                all_preds.extend(predicted.numpy())
                all_labels.extend(labels.numpy())
        
        # Calculate and log F1 score for the epoch
        f1 = f1_score(all_labels, all_preds)
        mlflow.log_metric("test_f1_score", f1, step=epoch)

    print(f"Final MLP F1 Score from manual run: {f1:.4f}")
    # Log the final model
    mlflow.pytorch.log_model(model_mlp, "mlp-model")

Final MLP F1 Score from manual run: 0.1692


In [None]:
# In notebooks/04_model_training_and_evaluation.ipynb, THE FINAL CELL

from mlflow.tracking import MlflowClient
import joblib

# Train and log the final champion model
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    print("--- Training and Logging Champion XGBoost Model ---")
    best_params = study.best_params
    
    mlflow.log_params(best_params)
    
    # Train the model
    model_xgb = xgb.XGBClassifier(**best_params, random_state=42)
    model_xgb.fit(X_train, y_train)
    
    # Evaluate and log metrics
    y_pred_xgb = model_xgb.predict(X_test)
    f1 = f1_score(y_test, y_pred_xgb)
    mlflow.log_metric("f1_score", f1)
    print(f"Champion XGBoost F1 Score: {f1:.4f}")

    # --- Log the Model, SHAP Assets, and Promote ---
    
    # 1. Log the model itself. This will create a new version.
    model_info = mlflow.xgboost.log_model(
        xgb_model=model_xgb,
        artifact_path="xgb-model",
        registered_model_name=MODEL_NAME
    )

    # 2. Find the new model version using the run_id from the logged model
    client = MlflowClient()
    run_id = model_info.run_id
    model_versions = client.search_model_versions(f"run_id='{run_id}'")
    new_version = model_versions[0].version # The first result should be our new version
    print(f"Model registered as '{MODEL_NAME}' version {new_version}.")

    # 3. Log SHAP assets and test data for the dashboard
    explainer = shap.TreeExplainer(model_xgb)
    joblib.dump(explainer, "explainer.joblib")
    X_test.to_parquet("X_test.parquet")
    y_test.to_frame().to_parquet("y_test.parquet") # Save y_test for historical comparison

    mlflow.log_artifact("explainer.joblib", artifact_path="shap_assets")
    mlflow.log_artifact("X_test.parquet", artifact_path="shap_assets")
    mlflow.log_artifact("y_test.parquet", artifact_path="shap_assets") # Log y_test artifact
    print("SHAP assets and test data logged.")
    
    # 4. Promote this new version to the "Production" stage
    print(f"\n--- Promoting Model Version {new_version} to Production ---")
    client.transition_model_version_stage(
        name=MODEL_NAME,
        version=new_version,
        stage="Production",
        archive_existing_versions=True  # Safely archive any old production model
    )
    print(f"Successfully promoted model version {new_version} to 'Production'.")

--- Training and Logging Champion XGBoost Model ---


Champion XGBoost F1 Score: 0.5879




Registered model 'etf-xgboost-predictor' already exists. Creating a new version of this model...
Created version '4' of model 'etf-xgboost-predictor'.


Model registered as 'etf-xgboost-predictor' version 4.


SHAP assets logged.

--- Promoting Model Version 4 to Production ---
Successfully promoted model version 4 to 'Production'.


  client.transition_model_version_stage(
