In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import optuna
import shap
import mlflow
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import os
import sys

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from config import PROCESSED_DATA_PATH, MLFLOW_TRACKING_URI, MODEL_NAME

# --- Set up MLflow CORRECTLY ---
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment("ETF_Trend_Prediction")

# --- Load Data ---
data = pd.read_parquet(PROCESSED_DATA_PATH)

# Separate features (X) and target (y)
X = data.drop('target', axis=1)
y = data['target']

  import pkg_resources  # noqa: TID251


2025/08/31 14:33:15 INFO mlflow.tracking.fluent: Experiment with name 'ETF_Trend_Prediction' does not exist. Creating a new experiment.


In [2]:
# Define the chronological split point
# For example, use data up to the end of 2021 for training, and 2022 onwards for testing.
train_end_date = '2021-12-31'
test_start_date = '2022-01-01'

X_train = X.loc[:train_end_date]
y_train = y.loc[:train_end_date]

X_test = X.loc[test_start_date:]
y_test = y.loc[test_start_date:]

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

mlflow.set_experiment("ETF_Trend_Prediction")

Training set size: 2380
Test set size: 908


<Experiment: artifact_location='file:C:\\Users\\dawso\\Dev\\Personal\\AIGrind\\mlops-etf-forecasting\\mlruns/729986291096297859', creation_time=1756665195382, experiment_id='729986291096297859', last_update_time=1756665195382, lifecycle_stage='active', name='ETF_Trend_Prediction', tags={}>

In [3]:
# Train Logistic Regression
with mlflow.start_run(run_name="LogisticRegression_Baseline"):
    model_lr = LogisticRegression(max_iter=1000, random_state=42)
    model_lr.fit(X_train, y_train)
    y_pred_lr = model_lr.predict(X_test)
    y_proba_lr = model_lr.predict_proba(X_test)[:, 1]
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_lr))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_lr))
    mlflow.log_metric("roc_auc", roc_auc_score(y_test, y_proba_lr))
    
    # Log the model
    mlflow.sklearn.log_model(model_lr, "logistic-regression-model")
    
    print(f"Logistic Regression F1 Score: {f1_score(y_test, y_pred_lr):.4f}")

# Train Random Forest
with mlflow.start_run(run_name="RandomForest_Baseline"):
    model_rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)
    y_proba_rf = model_rf.predict_proba(X_test)[:, 1]

    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_rf))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_rf))
    mlflow.log_metric("roc_auc", roc_auc_score(y_test, y_proba_rf))
    
    # Log the model
    mlflow.sklearn.log_model(model_rf, "random-forest-model")
    
    print(f"Random Forest F1 Score: {f1_score(y_test, y_pred_rf):.4f}")

Logistic Regression F1 Score: 0.6968


Random Forest F1 Score: 0.6254


In [4]:
def objective(trial):
    # Define the search space for hyperparameters
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': 42
    }
    
    model = xgb.XGBClassifier(**params)
    
    # Use TimeSeriesSplit for robust cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    score = cross_val_score(model, X_train, y_train, cv=tscv, scoring='f1', n_jobs=1).mean()
    
    return score

In [5]:
# Run the study to find the best params
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=125) 

best_params = study.best_params
print("Best XGBoost Params:", best_params)

# Train the final XGBoost model with the best parameters and log to MLflow
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    print("--- Training and Logging Champion XGBoost Model ---")
    best_params = study.best_params
    
    # Log hyperparameters
    mlflow.log_params(best_params)
    
    # Train the model
    model_xgb = xgb.XGBClassifier(**best_params, random_state=42)
    model_xgb.fit(X_train, y_train)
    
    # Evaluate and log metrics
    y_pred_xgb = model_xgb.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred_xgb)
    f1 = f1_score(y_test, y_pred_xgb)
    roc_auc = roc_auc_score(y_test, model_xgb.predict_proba(X_test)[:, 1])
    
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("roc_auc", roc_auc)
    
    print(f"Champion XGBoost F1 Score: {f1:.4f}")

    # Log the model itself
    mlflow.xgboost.log_model(
        xgb_model=model_xgb,
        artifact_path="xgb-model",
        registered_model_name="etf-xgboost-predictor" # This also registers the model
    )
    print("Champion model logged and registered.")

    # --- NEW CODE TO ADD STARTS HERE ---
    
    print("--- Logging SHAP assets for the dashboard ---")
    import joblib # Make sure joblib is imported
    
    # 1. Create and save the SHAP explainer object
    explainer = shap.TreeExplainer(model_xgb)
    joblib.dump(explainer, "explainer.joblib")
    
    # 2. Save the X_test dataframe needed for plotting
    X_test.to_parquet("X_test.parquet")
    
    # 3. Log these files as MLflow artifacts in specific sub-folders
    mlflow.log_artifact("explainer.joblib", artifact_path="shap_explainer")
    mlflow.log_artifact("X_test.parquet", artifact_path="shap_xtest")
    
    print("SHAP explainer and X_test data successfully logged as artifacts.")

[I 2025-08-31 14:33:24,294] A new study created in memory with name: no-name-a26425f3-6e7c-498f-8a28-007a4432c162


[I 2025-08-31 14:33:25,033] Trial 0 finished with value: 0.5357091247506927 and parameters: {'n_estimators': 400, 'max_depth': 3, 'learning_rate': 0.21713107759793857, 'subsample': 0.6567332315505667, 'colsample_bytree': 0.7314988246989431, 'gamma': 4.432191410301617}. Best is trial 0 with value: 0.5357091247506927.


[I 2025-08-31 14:33:25,648] Trial 1 finished with value: 0.530491046008968 and parameters: {'n_estimators': 222, 'max_depth': 7, 'learning_rate': 0.20508394778332936, 'subsample': 0.9248435268596653, 'colsample_bytree': 0.6172472777785829, 'gamma': 0.9899725517365721}. Best is trial 0 with value: 0.5357091247506927.


[I 2025-08-31 14:33:26,346] Trial 2 finished with value: 0.589976970498242 and parameters: {'n_estimators': 135, 'max_depth': 7, 'learning_rate': 0.047268597779540907, 'subsample': 0.505454818414049, 'colsample_bytree': 0.9413683382288129, 'gamma': 4.0928820211574575}. Best is trial 2 with value: 0.589976970498242.


[I 2025-08-31 14:33:28,109] Trial 3 finished with value: 0.5464863263552768 and parameters: {'n_estimators': 423, 'max_depth': 8, 'learning_rate': 0.11249670016234453, 'subsample': 0.7411614008222419, 'colsample_bytree': 0.7393543499071544, 'gamma': 0.30826151380250744}. Best is trial 2 with value: 0.589976970498242.


[I 2025-08-31 14:33:30,058] Trial 4 finished with value: 0.5416752751020715 and parameters: {'n_estimators': 985, 'max_depth': 8, 'learning_rate': 0.1066183997803127, 'subsample': 0.7218117645136921, 'colsample_bytree': 0.8737222793617024, 'gamma': 1.3475593234302758}. Best is trial 2 with value: 0.589976970498242.


[I 2025-08-31 14:33:30,831] Trial 5 finished with value: 0.5360139575181339 and parameters: {'n_estimators': 286, 'max_depth': 5, 'learning_rate': 0.15374367081965218, 'subsample': 0.5673611648008, 'colsample_bytree': 0.9990325853763318, 'gamma': 2.3193032893251235}. Best is trial 2 with value: 0.589976970498242.


[I 2025-08-31 14:33:31,767] Trial 6 finished with value: 0.5099827862090488 and parameters: {'n_estimators': 566, 'max_depth': 3, 'learning_rate': 0.20861526321852064, 'subsample': 0.669215436086404, 'colsample_bytree': 0.6120801483985484, 'gamma': 1.2037121570464704}. Best is trial 2 with value: 0.589976970498242.


[I 2025-08-31 14:33:33,295] Trial 7 finished with value: 0.5244856176407934 and parameters: {'n_estimators': 754, 'max_depth': 7, 'learning_rate': 0.1558668246233844, 'subsample': 0.6297889296617967, 'colsample_bytree': 0.709197541475892, 'gamma': 0.7068592184162231}. Best is trial 2 with value: 0.589976970498242.


[I 2025-08-31 14:33:34,350] Trial 8 finished with value: 0.5030284716670712 and parameters: {'n_estimators': 583, 'max_depth': 7, 'learning_rate': 0.23188277232722754, 'subsample': 0.9902901075690186, 'colsample_bytree': 0.5030534865919377, 'gamma': 0.43105031339482514}. Best is trial 2 with value: 0.589976970498242.


[I 2025-08-31 14:33:35,572] Trial 9 finished with value: 0.5179671235110782 and parameters: {'n_estimators': 889, 'max_depth': 3, 'learning_rate': 0.16133476260632204, 'subsample': 0.6132402514395074, 'colsample_bytree': 0.6800331443992167, 'gamma': 4.992785403771346}. Best is trial 2 with value: 0.589976970498242.


[I 2025-08-31 14:33:36,774] Trial 10 finished with value: 0.5731954877397062 and parameters: {'n_estimators': 133, 'max_depth': 10, 'learning_rate': 0.01307326590405719, 'subsample': 0.8558974615312555, 'colsample_bytree': 0.8910264448912246, 'gamma': 3.702096913768718}. Best is trial 2 with value: 0.589976970498242.


[I 2025-08-31 14:33:37,703] Trial 11 finished with value: 0.5974461964860355 and parameters: {'n_estimators': 101, 'max_depth': 10, 'learning_rate': 0.016268822893593646, 'subsample': 0.8434190217260844, 'colsample_bytree': 0.8876460274214883, 'gamma': 3.7350322880142532}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:38,585] Trial 12 finished with value: 0.5552515599503167 and parameters: {'n_estimators': 113, 'max_depth': 10, 'learning_rate': 0.024561169857478523, 'subsample': 0.8642611856292717, 'colsample_bytree': 0.8474343126874149, 'gamma': 3.4495138693425975}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:39,432] Trial 13 finished with value: 0.548742347697541 and parameters: {'n_estimators': 298, 'max_depth': 5, 'learning_rate': 0.06484266033310268, 'subsample': 0.5131822459861624, 'colsample_bytree': 0.9848531373309998, 'gamma': 3.178205253466055}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:40,225] Trial 14 finished with value: 0.5368762251399126 and parameters: {'n_estimators': 112, 'max_depth': 9, 'learning_rate': 0.058542285243438665, 'subsample': 0.8166850381607664, 'colsample_bytree': 0.9296179560524647, 'gamma': 2.5021126954820323}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:40,880] Trial 15 finished with value: 0.5417642201086249 and parameters: {'n_estimators': 436, 'max_depth': 5, 'learning_rate': 0.2874745516772526, 'subsample': 0.7933411672469846, 'colsample_bytree': 0.812159594696223, 'gamma': 4.314555618135875}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:42,952] Trial 16 finished with value: 0.5272303077359967 and parameters: {'n_estimators': 695, 'max_depth': 9, 'learning_rate': 0.05877071674146081, 'subsample': 0.5034837601227691, 'colsample_bytree': 0.8234385118949278, 'gamma': 2.7447891133776077}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:43,448] Trial 17 finished with value: 0.5317070313692843 and parameters: {'n_estimators': 237, 'max_depth': 6, 'learning_rate': 0.09672696251502186, 'subsample': 0.9240176635566206, 'colsample_bytree': 0.9157172772614169, 'gamma': 3.9848896449237934}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:45,055] Trial 18 finished with value: 0.5483494923271139 and parameters: {'n_estimators': 344, 'max_depth': 9, 'learning_rate': 0.03344176498711068, 'subsample': 0.7004081045261398, 'colsample_bytree': 0.7950271410717827, 'gamma': 1.8081535648186313}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:45,550] Trial 19 finished with value: 0.5657866178632829 and parameters: {'n_estimators': 197, 'max_depth': 6, 'learning_rate': 0.0807983775201861, 'subsample': 0.7715984555113338, 'colsample_bytree': 0.9507735496828709, 'gamma': 4.927181375330178}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:46,856] Trial 20 finished with value: 0.5437216365333172 and parameters: {'n_estimators': 491, 'max_depth': 8, 'learning_rate': 0.038977641941792554, 'subsample': 0.5671085248694443, 'colsample_bytree': 0.7758738402527232, 'gamma': 3.189490443368703}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:47,841] Trial 21 finished with value: 0.5724445265426052 and parameters: {'n_estimators': 103, 'max_depth': 10, 'learning_rate': 0.014460644241217593, 'subsample': 0.8610068268315301, 'colsample_bytree': 0.8877200146357935, 'gamma': 3.7781628714730173}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:48,929] Trial 22 finished with value: 0.5677635941499458 and parameters: {'n_estimators': 201, 'max_depth': 10, 'learning_rate': 0.013258094191128098, 'subsample': 0.845978332013764, 'colsample_bytree': 0.8858162215719317, 'gamma': 4.332934079912161}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:49,530] Trial 23 finished with value: 0.5345607306370206 and parameters: {'n_estimators': 158, 'max_depth': 9, 'learning_rate': 0.0466025001451447, 'subsample': 0.9293973675657247, 'colsample_bytree': 0.9515964852588242, 'gamma': 3.4995476106880417}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:50,222] Trial 24 finished with value: 0.5041428170740889 and parameters: {'n_estimators': 313, 'max_depth': 10, 'learning_rate': 0.08371588706230548, 'subsample': 0.8211316863830892, 'colsample_bytree': 0.8376025954549723, 'gamma': 3.9535504446649625}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:50,647] Trial 25 finished with value: 0.48356957881992546 and parameters: {'n_estimators': 194, 'max_depth': 4, 'learning_rate': 0.1293375938046452, 'subsample': 0.9017531099750041, 'colsample_bytree': 0.912992496851917, 'gamma': 2.9013623791790897}. Best is trial 11 with value: 0.5974461964860355.


[I 2025-08-31 14:33:51,325] Trial 26 finished with value: 0.6005563696166802 and parameters: {'n_estimators': 352, 'max_depth': 8, 'learning_rate': 0.038960984960156084, 'subsample': 0.9930012514878016, 'colsample_bytree': 0.9618830914059073, 'gamma': 4.559410560321896}. Best is trial 26 with value: 0.6005563696166802.


[I 2025-08-31 14:33:51,941] Trial 27 finished with value: 0.6076446716842059 and parameters: {'n_estimators': 362, 'max_depth': 8, 'learning_rate': 0.07512123435197395, 'subsample': 0.9966172148877556, 'colsample_bytree': 0.9614204712456429, 'gamma': 4.382137887253557}. Best is trial 27 with value: 0.6076446716842059.


[I 2025-08-31 14:33:52,755] Trial 28 finished with value: 0.5838944923139383 and parameters: {'n_estimators': 517, 'max_depth': 8, 'learning_rate': 0.07843277245414014, 'subsample': 0.9916199856172199, 'colsample_bytree': 0.9824536914237351, 'gamma': 4.657666752533939}. Best is trial 27 with value: 0.6076446716842059.


[I 2025-08-31 14:33:53,358] Trial 29 finished with value: 0.49379979079005076 and parameters: {'n_estimators': 362, 'max_depth': 9, 'learning_rate': 0.13584918673218255, 'subsample': 0.9599226633512068, 'colsample_bytree': 0.9677063825889893, 'gamma': 4.5812876508387985}. Best is trial 27 with value: 0.6076446716842059.


[I 2025-08-31 14:33:54,282] Trial 30 finished with value: 0.5461541135595909 and parameters: {'n_estimators': 670, 'max_depth': 8, 'learning_rate': 0.181940828946626, 'subsample': 0.9691814667881283, 'colsample_bytree': 0.8545853865234452, 'gamma': 4.571642747952431}. Best is trial 27 with value: 0.6076446716842059.


[I 2025-08-31 14:33:54,968] Trial 31 finished with value: 0.5336270190379294 and parameters: {'n_estimators': 270, 'max_depth': 7, 'learning_rate': 0.04624904694410806, 'subsample': 0.902619732342738, 'colsample_bytree': 0.9322839586669024, 'gamma': 4.158240881787364}. Best is trial 27 with value: 0.6076446716842059.


[I 2025-08-31 14:33:55,783] Trial 32 finished with value: 0.5026027885177322 and parameters: {'n_estimators': 389, 'max_depth': 6, 'learning_rate': 0.031434271013490694, 'subsample': 0.9597641000120887, 'colsample_bytree': 0.9491979327714831, 'gamma': 4.100934523363869}. Best is trial 27 with value: 0.6076446716842059.


[I 2025-08-31 14:33:56,562] Trial 33 finished with value: 0.5475815045950091 and parameters: {'n_estimators': 450, 'max_depth': 8, 'learning_rate': 0.06570940926086359, 'subsample': 0.8909272136926383, 'colsample_bytree': 0.9183412639601983, 'gamma': 4.763241412317683}. Best is trial 27 with value: 0.6076446716842059.


[I 2025-08-31 14:33:57,015] Trial 34 finished with value: 0.6136236439211635 and parameters: {'n_estimators': 253, 'max_depth': 7, 'learning_rate': 0.09363174314515903, 'subsample': 0.9992612916628323, 'colsample_bytree': 0.9639307064592133, 'gamma': 4.361467588700034}. Best is trial 34 with value: 0.6136236439211635.


[I 2025-08-31 14:33:57,628] Trial 35 finished with value: 0.5067816902473108 and parameters: {'n_estimators': 345, 'max_depth': 7, 'learning_rate': 0.09679658396319069, 'subsample': 0.9352785435520917, 'colsample_bytree': 0.9691975712548321, 'gamma': 4.4341926010698}. Best is trial 34 with value: 0.6136236439211635.


[I 2025-08-31 14:33:58,132] Trial 36 finished with value: 0.5002230134959295 and parameters: {'n_estimators': 252, 'max_depth': 8, 'learning_rate': 0.13331793928446461, 'subsample': 0.9713892587942066, 'colsample_bytree': 0.9982454979292221, 'gamma': 3.638788675557219}. Best is trial 34 with value: 0.6136236439211635.


[I 2025-08-31 14:33:58,532] Trial 37 finished with value: 0.5311420646804932 and parameters: {'n_estimators': 178, 'max_depth': 6, 'learning_rate': 0.11559301674527037, 'subsample': 0.9930585442515877, 'colsample_bytree': 0.8666495960285064, 'gamma': 3.320619033642613}. Best is trial 34 with value: 0.6136236439211635.


[I 2025-08-31 14:34:01,069] Trial 38 finished with value: 0.5318942694957911 and parameters: {'n_estimators': 398, 'max_depth': 7, 'learning_rate': 0.07211754950790922, 'subsample': 0.9428191487270586, 'colsample_bytree': 0.8957921012030003, 'gamma': 0.0011711037333022034}. Best is trial 34 with value: 0.6136236439211635.


[I 2025-08-31 14:34:02,032] Trial 39 finished with value: 0.5218258423238501 and parameters: {'n_estimators': 475, 'max_depth': 9, 'learning_rate': 0.0942080368057623, 'subsample': 0.8876247417731984, 'colsample_bytree': 0.6538739074430904, 'gamma': 2.0197330350671323}. Best is trial 34 with value: 0.6136236439211635.


[I 2025-08-31 14:34:02,481] Trial 40 finished with value: 0.6314819479985739 and parameters: {'n_estimators': 237, 'max_depth': 7, 'learning_rate': 0.05064296600151617, 'subsample': 0.999132162210802, 'colsample_bytree': 0.5719343071586109, 'gamma': 4.813111117772804}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:02,935] Trial 41 finished with value: 0.5991439401638615 and parameters: {'n_estimators': 235, 'max_depth': 7, 'learning_rate': 0.05436598746965672, 'subsample': 0.9932410233946536, 'colsample_bytree': 0.5266907580154893, 'gamma': 4.311389760454325}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:03,490] Trial 42 finished with value: 0.6106094566082753 and parameters: {'n_estimators': 312, 'max_depth': 8, 'learning_rate': 0.052180967354066334, 'subsample': 0.9953631180186114, 'colsample_bytree': 0.532159532719577, 'gamma': 4.811909195339644}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:04,103] Trial 43 finished with value: 0.5921982666150514 and parameters: {'n_estimators': 314, 'max_depth': 8, 'learning_rate': 0.03753304120276418, 'subsample': 0.9526387864322736, 'colsample_bytree': 0.5542778045901381, 'gamma': 4.8202847077886455}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:04,593] Trial 44 finished with value: 0.6072502159879104 and parameters: {'n_estimators': 283, 'max_depth': 7, 'learning_rate': 0.10913272305246657, 'subsample': 0.997982432759835, 'colsample_bytree': 0.5790068255313556, 'gamma': 4.916652911759929}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:05,072] Trial 45 finished with value: 0.5497622188295885 and parameters: {'n_estimators': 281, 'max_depth': 7, 'learning_rate': 0.11646241491133963, 'subsample': 0.9724476109144133, 'colsample_bytree': 0.5910576520020718, 'gamma': 4.987754555400468}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:05,706] Trial 46 finished with value: 0.5446089936166476 and parameters: {'n_estimators': 399, 'max_depth': 7, 'learning_rate': 0.0937490264880186, 'subsample': 0.9197865028622857, 'colsample_bytree': 0.5812969293010501, 'gamma': 4.77736687496593}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:06,118] Trial 47 finished with value: 0.5907031518902943 and parameters: {'n_estimators': 228, 'max_depth': 6, 'learning_rate': 0.10575307440820281, 'subsample': 0.9987147947104278, 'colsample_bytree': 0.6332982300276739, 'gamma': 4.166269464957577}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:06,449] Trial 48 finished with value: 0.538526203327631 and parameters: {'n_estimators': 154, 'max_depth': 7, 'learning_rate': 0.17061655454315322, 'subsample': 0.969244066912115, 'colsample_bytree': 0.7137338878847975, 'gamma': 4.495052004167719}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:06,958] Trial 49 finished with value: 0.5292599633066248 and parameters: {'n_estimators': 312, 'max_depth': 8, 'learning_rate': 0.25474913330607635, 'subsample': 0.9475166375032638, 'colsample_bytree': 0.5013450976680964, 'gamma': 3.9225793101594686}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:08,052] Trial 50 finished with value: 0.558909428090559 and parameters: {'n_estimators': 880, 'max_depth': 6, 'learning_rate': 0.12454209522845183, 'subsample': 0.9775344775583079, 'colsample_bytree': 0.5387536449709449, 'gamma': 4.712716861955217}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:08,623] Trial 51 finished with value: 0.6107768575667024 and parameters: {'n_estimators': 364, 'max_depth': 8, 'learning_rate': 0.07156818094281918, 'subsample': 0.9935336766937112, 'colsample_bytree': 0.567112363984084, 'gamma': 4.975626712490562}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:09,441] Trial 52 finished with value: 0.5931871261118487 and parameters: {'n_estimators': 600, 'max_depth': 8, 'learning_rate': 0.14540390315794272, 'subsample': 0.99989156769897, 'colsample_bytree': 0.5812865182301646, 'gamma': 4.341569937102517}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:09,953] Trial 53 finished with value: 0.5516875497507352 and parameters: {'n_estimators': 269, 'max_depth': 7, 'learning_rate': 0.08303991259077453, 'subsample': 0.9209995310175242, 'colsample_bytree': 0.6140204777814317, 'gamma': 4.974924530050756}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:10,587] Trial 54 finished with value: 0.5513192095587011 and parameters: {'n_estimators': 365, 'max_depth': 8, 'learning_rate': 0.0685530716527849, 'subsample': 0.9476918663335048, 'colsample_bytree': 0.5595043364282383, 'gamma': 4.782608095205514}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:11,274] Trial 55 finished with value: 0.5840662153480483 and parameters: {'n_estimators': 448, 'max_depth': 7, 'learning_rate': 0.05702251176740561, 'subsample': 0.9785373809541263, 'colsample_bytree': 0.5333511209958625, 'gamma': 4.455046770808178}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:11,869] Trial 56 finished with value: 0.5398307817131853 and parameters: {'n_estimators': 315, 'max_depth': 9, 'learning_rate': 0.1039218164407503, 'subsample': 0.8770487205784272, 'colsample_bytree': 0.6680161347014114, 'gamma': 4.965880137889323}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:12,322] Trial 57 finished with value: 0.5590355324734978 and parameters: {'n_estimators': 210, 'max_depth': 8, 'learning_rate': 0.08738935029423596, 'subsample': 0.9115821505668386, 'colsample_bytree': 0.5990572526269689, 'gamma': 4.250545878596379}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:13,234] Trial 58 finished with value: 0.5203033660802701 and parameters: {'n_estimators': 253, 'max_depth': 7, 'learning_rate': 0.07474617204880118, 'subsample': 0.937829467317501, 'colsample_bytree': 0.7602004524059094, 'gamma': 0.960841517486819}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:14,189] Trial 59 finished with value: 0.5343011457376566 and parameters: {'n_estimators': 419, 'max_depth': 5, 'learning_rate': 0.02422510195546175, 'subsample': 0.7304686556384092, 'colsample_bytree': 0.5542968782629257, 'gamma': 3.9257425115307494}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:14,989] Trial 60 finished with value: 0.5375856401282609 and parameters: {'n_estimators': 162, 'max_depth': 6, 'learning_rate': 0.04887638004520298, 'subsample': 0.6821700865969129, 'colsample_bytree': 0.6286358246757364, 'gamma': 1.586939300979672}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:15,602] Trial 61 finished with value: 0.5977649632921648 and parameters: {'n_estimators': 342, 'max_depth': 8, 'learning_rate': 0.0616688483693565, 'subsample': 0.9826551382918081, 'colsample_bytree': 0.5720154326319868, 'gamma': 4.605980110199377}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:16,236] Trial 62 finished with value: 0.6061920412882993 and parameters: {'n_estimators': 288, 'max_depth': 9, 'learning_rate': 0.026929845029936787, 'subsample': 0.960924030939069, 'colsample_bytree': 0.5164545342466343, 'gamma': 4.599998590019304}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:16,927] Trial 63 finished with value: 0.6142037462672019 and parameters: {'n_estimators': 290, 'max_depth': 9, 'learning_rate': 0.02153356929999556, 'subsample': 0.9602873707630252, 'colsample_bytree': 0.5206941125062926, 'gamma': 4.822445010815971}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:17,693] Trial 64 finished with value: 0.595802125913439 and parameters: {'n_estimators': 377, 'max_depth': 9, 'learning_rate': 0.021092850613441926, 'subsample': 0.9787479045525892, 'colsample_bytree': 0.540097128285622, 'gamma': 4.834168933405388}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:18,314] Trial 65 finished with value: 0.5798708495322374 and parameters: {'n_estimators': 328, 'max_depth': 9, 'learning_rate': 0.04834162015294732, 'subsample': 0.9562622660281486, 'colsample_bytree': 0.5173684942751176, 'gamma': 4.445189819757458}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:19,177] Trial 66 finished with value: 0.5974247746429596 and parameters: {'n_estimators': 544, 'max_depth': 8, 'learning_rate': 0.07184445916906898, 'subsample': 0.9824132987706639, 'colsample_bytree': 0.5569848231180239, 'gamma': 4.840802836561916}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:19,670] Trial 67 finished with value: 0.5090085365581636 and parameters: {'n_estimators': 218, 'max_depth': 7, 'learning_rate': 0.09066127435261429, 'subsample': 0.6289550025686811, 'colsample_bytree': 0.5997671249970842, 'gamma': 4.724460803481704}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:20,310] Trial 68 finished with value: 0.5263346197746329 and parameters: {'n_estimators': 259, 'max_depth': 8, 'learning_rate': 0.04201563973195942, 'subsample': 0.933870449031123, 'colsample_bytree': 0.5697225102549365, 'gamma': 4.056330428785354}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:20,829] Trial 69 finished with value: 0.6065093641120255 and parameters: {'n_estimators': 290, 'max_depth': 8, 'learning_rate': 0.05478698476383791, 'subsample': 0.9967053517756125, 'colsample_bytree': 0.6480898519679154, 'gamma': 4.990517051277045}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:21,357] Trial 70 finished with value: 0.510238976168722 and parameters: {'n_estimators': 235, 'max_depth': 9, 'learning_rate': 0.10249017079088754, 'subsample': 0.9640094442821313, 'colsample_bytree': 0.6948426998195552, 'gamma': 3.826783831012275}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:21,878] Trial 71 finished with value: 0.6103319536355075 and parameters: {'n_estimators': 308, 'max_depth': 7, 'learning_rate': 0.06197868362803069, 'subsample': 0.9869617410186798, 'colsample_bytree': 0.6320495025141336, 'gamma': 4.974303838748999}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:22,548] Trial 72 finished with value: 0.5573547016514303 and parameters: {'n_estimators': 419, 'max_depth': 7, 'learning_rate': 0.06589419781305195, 'subsample': 0.9539426474791278, 'colsample_bytree': 0.5166713465387094, 'gamma': 4.667849020611843}. Best is trial 40 with value: 0.6314819479985739.


[I 2025-08-31 14:34:23,131] Trial 73 finished with value: 0.6471724306249695 and parameters: {'n_estimators': 186, 'max_depth': 7, 'learning_rate': 0.010268238856722296, 'subsample': 0.9999353380305052, 'colsample_bytree': 0.5451409070666482, 'gamma': 4.84611117381448}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:23,624] Trial 74 finished with value: 0.5571749240526127 and parameters: {'n_estimators': 131, 'max_depth': 6, 'learning_rate': 0.031974578276450144, 'subsample': 0.7667333643129903, 'colsample_bytree': 0.5425294134399726, 'gamma': 4.242273218063674}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:24,298] Trial 75 finished with value: 0.6305569686729796 and parameters: {'n_estimators': 181, 'max_depth': 7, 'learning_rate': 0.010901931917253693, 'subsample': 0.9856310381413446, 'colsample_bytree': 0.5034094998476492, 'gamma': 4.3618289950305416}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:24,927] Trial 76 finished with value: 0.6271663122487452 and parameters: {'n_estimators': 175, 'max_depth': 7, 'learning_rate': 0.012235212536299167, 'subsample': 0.978553617115344, 'colsample_bytree': 0.5029614112137978, 'gamma': 4.5594829419637115}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:25,462] Trial 77 finished with value: 0.6085745013922541 and parameters: {'n_estimators': 172, 'max_depth': 6, 'learning_rate': 0.01570191049825373, 'subsample': 0.9669848984008896, 'colsample_bytree': 0.5022167453646028, 'gamma': 4.494083117099495}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:26,369] Trial 78 finished with value: 0.5972434000945742 and parameters: {'n_estimators': 139, 'max_depth': 7, 'learning_rate': 0.011987907429401186, 'subsample': 0.9430814550865644, 'colsample_bytree': 0.5218258947110626, 'gamma': 2.3873406620321216}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:27,008] Trial 79 finished with value: 0.5745485226604096 and parameters: {'n_estimators': 190, 'max_depth': 7, 'learning_rate': 0.021021395112838064, 'subsample': 0.5668958653219822, 'colsample_bytree': 0.5114849134949815, 'gamma': 4.641406464101105}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:27,539] Trial 80 finished with value: 0.5489085810653309 and parameters: {'n_estimators': 133, 'max_depth': 6, 'learning_rate': 0.034507034002824656, 'subsample': 0.9046586572928097, 'colsample_bytree': 0.5426476606213442, 'gamma': 3.6246289732954495}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:28,296] Trial 81 finished with value: 0.634570634580428 and parameters: {'n_estimators': 209, 'max_depth': 7, 'learning_rate': 0.010629218309672126, 'subsample': 0.9848329181907227, 'colsample_bytree': 0.5263743081150095, 'gamma': 4.823997448727939}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:28,844] Trial 82 finished with value: 0.6060380497480259 and parameters: {'n_estimators': 208, 'max_depth': 7, 'learning_rate': 0.027634964413353343, 'subsample': 0.9809514946782447, 'colsample_bytree': 0.5334406544247591, 'gamma': 4.828588364006291}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:29,599] Trial 83 finished with value: 0.6288855380930135 and parameters: {'n_estimators': 179, 'max_depth': 7, 'learning_rate': 0.01034109237656543, 'subsample': 0.9841367117431749, 'colsample_bytree': 0.5494355683785463, 'gamma': 4.530777095444976}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:30,243] Trial 84 finished with value: 0.607084669100985 and parameters: {'n_estimators': 182, 'max_depth': 7, 'learning_rate': 0.017823069472997596, 'subsample': 0.9700652801674307, 'colsample_bytree': 0.5616959180968113, 'gamma': 4.373058917340321}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:31,093] Trial 85 finished with value: 0.6149820886956225 and parameters: {'n_estimators': 152, 'max_depth': 7, 'learning_rate': 0.010254779018876156, 'subsample': 0.9302027883950756, 'colsample_bytree': 0.5497296685880699, 'gamma': 4.218760735926822}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:31,443] Trial 86 finished with value: 0.6412433546528735 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.014138378694462156, 'subsample': 0.955254828620919, 'colsample_bytree': 0.5470500754844664, 'gamma': 4.175317031690105}. Best is trial 73 with value: 0.6471724306249695.


[I 2025-08-31 14:34:31,763] Trial 87 finished with value: 0.6680990408894647 and parameters: {'n_estimators': 104, 'max_depth': 3, 'learning_rate': 0.010909140523280215, 'subsample': 0.9264252280906553, 'colsample_bytree': 0.5486215158647755, 'gamma': 4.119150863545741}. Best is trial 87 with value: 0.6680990408894647.


[I 2025-08-31 14:34:32,093] Trial 88 finished with value: 0.671907552666141 and parameters: {'n_estimators': 103, 'max_depth': 3, 'learning_rate': 0.010021582894384306, 'subsample': 0.9234005106744587, 'colsample_bytree': 0.5523794772670825, 'gamma': 4.090322685581977}. Best is trial 88 with value: 0.671907552666141.


[I 2025-08-31 14:34:32,403] Trial 89 finished with value: 0.5692577226433173 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.028393617890750157, 'subsample': 0.8261219454597061, 'colsample_bytree': 0.5003377899519391, 'gamma': 4.0552161724678415}. Best is trial 88 with value: 0.671907552666141.


[I 2025-08-31 14:34:32,747] Trial 90 finished with value: 0.5427023932234649 and parameters: {'n_estimators': 122, 'max_depth': 4, 'learning_rate': 0.04214134019085402, 'subsample': 0.9177960081390767, 'colsample_bytree': 0.5963950342489586, 'gamma': 4.108722597727027}. Best is trial 88 with value: 0.671907552666141.


[I 2025-08-31 14:34:33,168] Trial 91 finished with value: 0.6296243852676144 and parameters: {'n_estimators': 147, 'max_depth': 3, 'learning_rate': 0.012083489060028093, 'subsample': 0.9316693100798554, 'colsample_bytree': 0.5502647520012812, 'gamma': 3.7973133769078324}. Best is trial 88 with value: 0.671907552666141.


[I 2025-08-31 14:34:33,490] Trial 92 finished with value: 0.675193632188239 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.010143340955317942, 'subsample': 0.8765499631775118, 'colsample_bytree': 0.5478898604572036, 'gamma': 3.8113068209322676}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:33,808] Trial 93 finished with value: 0.6078965922238485 and parameters: {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.01908584048447475, 'subsample': 0.886116270739312, 'colsample_bytree': 0.5867904265505639, 'gamma': 3.386537893098887}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:34,251] Trial 94 finished with value: 0.6291876568987514 and parameters: {'n_estimators': 123, 'max_depth': 4, 'learning_rate': 0.010280351026373011, 'subsample': 0.9083433827042001, 'colsample_bytree': 0.5464375227132838, 'gamma': 3.5303264756775548}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:34,677] Trial 95 finished with value: 0.5286103377864181 and parameters: {'n_estimators': 118, 'max_depth': 4, 'learning_rate': 0.028908701719630444, 'subsample': 0.8659973865748092, 'colsample_bytree': 0.6071987048957984, 'gamma': 3.571990150615899}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:35,046] Trial 96 finished with value: 0.5249776912363173 and parameters: {'n_estimators': 144, 'max_depth': 3, 'learning_rate': 0.036153976114564286, 'subsample': 0.9070233547064954, 'colsample_bytree': 0.5730894695339493, 'gamma': 3.8556379596773485}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:35,399] Trial 97 finished with value: 0.5898329333254669 and parameters: {'n_estimators': 121, 'max_depth': 3, 'learning_rate': 0.0178469459335397, 'subsample': 0.8929269489526404, 'colsample_bytree': 0.5292259128385374, 'gamma': 2.9011009382199227}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:35,872] Trial 98 finished with value: 0.5360971929914782 and parameters: {'n_estimators': 159, 'max_depth': 4, 'learning_rate': 0.024149560988473307, 'subsample': 0.9246943158721312, 'colsample_bytree': 0.5461636402296941, 'gamma': 3.7619436178677472}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:36,243] Trial 99 finished with value: 0.5118299784389129 and parameters: {'n_estimators': 147, 'max_depth': 3, 'learning_rate': 0.034600598515180436, 'subsample': 0.947907426012142, 'colsample_bytree': 0.5627943528345303, 'gamma': 3.502710669144937}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:36,776] Trial 100 finished with value: 0.5429125240086654 and parameters: {'n_estimators': 200, 'max_depth': 3, 'learning_rate': 0.017538012856607575, 'subsample': 0.8418314353807794, 'colsample_bytree': 0.52803229944987, 'gamma': 3.303062699525788}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:37,158] Trial 101 finished with value: 0.6500196123693153 and parameters: {'n_estimators': 120, 'max_depth': 3, 'learning_rate': 0.011236841323578172, 'subsample': 0.8743496671312012, 'colsample_bytree': 0.5522319190296294, 'gamma': 3.9892699130294704}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:37,543] Trial 102 finished with value: 0.652054214015839 and parameters: {'n_estimators': 124, 'max_depth': 3, 'learning_rate': 0.01066190441772095, 'subsample': 0.8736503209939116, 'colsample_bytree': 0.5767804429393546, 'gamma': 3.946978634976309}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:37,904] Trial 103 finished with value: 0.5537780066139754 and parameters: {'n_estimators': 115, 'max_depth': 3, 'learning_rate': 0.025569029162351722, 'subsample': 0.8753298870390164, 'colsample_bytree': 0.577984099634115, 'gamma': 3.7373524097652435}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:38,302] Trial 104 finished with value: 0.5152938752388077 and parameters: {'n_estimators': 164, 'max_depth': 3, 'learning_rate': 0.04042374728970041, 'subsample': 0.8376107738660367, 'colsample_bytree': 0.5117761875921749, 'gamma': 4.004604202689191}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:38,614] Trial 105 finished with value: 0.5170783008730673 and parameters: {'n_estimators': 142, 'max_depth': 3, 'learning_rate': 0.1956987982108476, 'subsample': 0.8960455691005276, 'colsample_bytree': 0.5852954001873611, 'gamma': 3.908655809675287}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:38,951] Trial 106 finished with value: 0.6368402116411631 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.016773924767160168, 'subsample': 0.7937255789620246, 'colsample_bytree': 0.5639341644559275, 'gamma': 4.175968091545236}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:39,442] Trial 107 finished with value: 0.5415265643174755 and parameters: {'n_estimators': 113, 'max_depth': 4, 'learning_rate': 0.021249958356328952, 'subsample': 0.8582886914202086, 'colsample_bytree': 0.5637780941322793, 'gamma': 4.263443621418062}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:39,963] Trial 108 finished with value: 0.50470420898025 and parameters: {'n_estimators': 223, 'max_depth': 3, 'learning_rate': 0.02978073493482137, 'subsample': 0.8750371356303192, 'colsample_bytree': 0.6180061955787873, 'gamma': 4.136213830686307}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:40,475] Trial 109 finished with value: 0.5726965652837018 and parameters: {'n_estimators': 190, 'max_depth': 3, 'learning_rate': 0.016933148994752262, 'subsample': 0.7722812378580622, 'colsample_bytree': 0.5315644866664889, 'gamma': 4.010871216988186}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:41,851] Trial 110 finished with value: 0.4724795423178774 and parameters: {'n_estimators': 956, 'max_depth': 3, 'learning_rate': 0.04260386153057139, 'subsample': 0.7888267011943995, 'colsample_bytree': 0.5716094488493778, 'gamma': 4.380363683945512}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:42,256] Trial 111 finished with value: 0.6515295850927043 and parameters: {'n_estimators': 134, 'max_depth': 3, 'learning_rate': 0.010508368148843188, 'subsample': 0.9337121232852155, 'colsample_bytree': 0.556544969871797, 'gamma': 3.698449363903912}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:42,585] Trial 112 finished with value: 0.6172921937241871 and parameters: {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.021604223803200963, 'subsample': 0.8079879353322373, 'colsample_bytree': 0.5384584231052052, 'gamma': 4.1837952045886455}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:43,060] Trial 113 finished with value: 0.5958846235959372 and parameters: {'n_estimators': 131, 'max_depth': 4, 'learning_rate': 0.014703461588827436, 'subsample': 0.8477076532448267, 'colsample_bytree': 0.5573538578562581, 'gamma': 3.660452881035773}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:43,448] Trial 114 finished with value: 0.5176175488185335 and parameters: {'n_estimators': 166, 'max_depth': 3, 'learning_rate': 0.033738663981396466, 'subsample': 0.9396677666617369, 'colsample_bytree': 0.5114865995072957, 'gamma': 3.9457247163764064}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:43,970] Trial 115 finished with value: 0.5235349765260284 and parameters: {'n_estimators': 201, 'max_depth': 3, 'learning_rate': 0.025159491497334895, 'subsample': 0.7153032417178955, 'colsample_bytree': 0.5946984361498059, 'gamma': 4.066903141349571}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:44,264] Trial 116 finished with value: 0.5670749073271665 and parameters: {'n_estimators': 132, 'max_depth': 3, 'learning_rate': 0.29703240744317394, 'subsample': 0.9145761709553216, 'colsample_bytree': 0.5232304323430852, 'gamma': 3.8827134233285956}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:44,802] Trial 117 finished with value: 0.5781914372518512 and parameters: {'n_estimators': 163, 'max_depth': 4, 'learning_rate': 0.015896209078294286, 'subsample': 0.8045161396803507, 'colsample_bytree': 0.6069745260779801, 'gamma': 4.31753416512592}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:46,360] Trial 118 finished with value: 0.48429970063860484 and parameters: {'n_estimators': 752, 'max_depth': 3, 'learning_rate': 0.010195014511907607, 'subsample': 0.8663287544462726, 'colsample_bytree': 0.5675401268040946, 'gamma': 3.1818085808982506}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:46,685] Trial 119 finished with value: 0.5668137401782822 and parameters: {'n_estimators': 103, 'max_depth': 3, 'learning_rate': 0.030032657382390027, 'subsample': 0.8850792351655892, 'colsample_bytree': 0.5375383662383668, 'gamma': 4.463725512997621}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:47,172] Trial 120 finished with value: 0.5387461204457448 and parameters: {'n_estimators': 182, 'max_depth': 3, 'learning_rate': 0.02166595847875047, 'subsample': 0.830009325025983, 'colsample_bytree': 0.5800894746024013, 'gamma': 3.713767459344436}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:47,607] Trial 121 finished with value: 0.5991532061318499 and parameters: {'n_estimators': 147, 'max_depth': 3, 'learning_rate': 0.01566030457139358, 'subsample': 0.9283427981649714, 'colsample_bytree': 0.5525082138308652, 'gamma': 3.7809952480937943}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:47,914] Trial 122 finished with value: 0.49674602724785 and parameters: {'n_estimators': 149, 'max_depth': 3, 'learning_rate': 0.2250692718287089, 'subsample': 0.9390249448001546, 'colsample_bytree': 0.5536486919592734, 'gamma': 4.1660831117335375}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:48,318] Trial 123 finished with value: 0.614240271419433 and parameters: {'n_estimators': 125, 'max_depth': 3, 'learning_rate': 0.014487782282315367, 'subsample': 0.9541788875903626, 'colsample_bytree': 0.5444069467627538, 'gamma': 3.8423448258391972}. Best is trial 92 with value: 0.675193632188239.


[I 2025-08-31 14:34:48,883] Trial 124 finished with value: 0.5724344372347433 and parameters: {'n_estimators': 241, 'max_depth': 3, 'learning_rate': 0.01006213887162525, 'subsample': 0.9726574157149791, 'colsample_bytree': 0.5210716608978188, 'gamma': 3.995870115793903}. Best is trial 92 with value: 0.675193632188239.




Best XGBoost Params: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.010143340955317942, 'subsample': 0.8765499631775118, 'colsample_bytree': 0.5478898604572036, 'gamma': 3.8113068209322676}
--- Training and Logging Champion XGBoost Model ---
Champion XGBoost F1 Score: 0.6963


Champion model logged and registered.
--- Logging SHAP assets for the dashboard ---
SHAP explainer and X_test data successfully logged as artifacts.


Successfully registered model 'etf-xgboost-predictor'.
Created version '1' of model 'etf-xgboost-predictor'.


In [6]:
# --- MLP Challenger Model ---
# Step 1: Imports and Data Scaling
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# Deep learning models are sensitive to feature scale. We must standardize our data.
# We fit the scaler ONLY on the training data to prevent data leakage from the test set.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data successfully scaled.")
print(f"Shape of scaled training data: {X_train_scaled.shape}")

Data successfully scaled.
Shape of scaled training data: (2380, 32)


In [7]:
# Convert numpy arrays to PyTorch Tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders to handle batching
# We don't shuffle time-series data to preserve temporal order if needed, 
# but for a simple MLP, shuffling is often acceptable. Let's keep it False for rigor.
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("PyTorch Tensors and DataLoaders created.")

PyTorch Tensors and DataLoaders created.


In [8]:
# Step 2: Define the MLP Architecture
class ETF_MLP(nn.Module):
    def __init__(self, input_size, hidden_size_1=128, hidden_size_2=64, dropout_rate=0.5):
        """
        Initializes the MLP model.
        
        Args:
            input_size (int): The number of input features.
            hidden_size_1 (int): Number of neurons in the first hidden layer.
            hidden_size_2 (int): Number of neurons in the second hidden layer.
            dropout_rate (float): The dropout probability.
        """
        super(ETF_MLP, self).__init__()
        
        # --- Layer Definitions ---
        self.layer_1 = nn.Linear(input_size, hidden_size_1)
        self.bn_1 = nn.BatchNorm1d(hidden_size_1)
        
        self.layer_2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.bn_2 = nn.BatchNorm1d(hidden_size_2)
        
        self.output_layer = nn.Linear(hidden_size_2, 1)
        
        # --- Activation and Regularization ---
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)
        
    def forward(self, x):
        """ The forward pass of the model. """
        # First hidden layer
        x = self.layer_1(x)
        x = self.bn_1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Second hidden layer
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Output layer with sigmoid for binary classification
        x = torch.sigmoid(self.output_layer(x))
        return x

# Instantiate the model to test
input_features = X_train.shape[1]
model_mlp = ETF_MLP(input_size=input_features)
print("MLP Model Architecture:")
print(model_mlp)

MLP Model Architecture:
ETF_MLP(
  (layer_1): Linear(in_features=32, out_features=128, bias=True)
  (bn_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_2): Linear(in_features=128, out_features=64, bias=True)
  (bn_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)


In [9]:
# Step 3: Manual MLP Training and Evaluation

# --- Configuration ---
INPUT_SIZE = X_train.shape[1]
LEARNING_RATE = 0.001
EPOCHS = 50

# --- Model, Loss, Optimizer (Demonstrates 5.2, 5.3) ---
model_mlp = ETF_MLP(input_size=INPUT_SIZE, dropout_rate=0.4)
criterion = nn.BCELoss() # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=LEARNING_RATE) # Adam Optimizer
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) # LR Schedule

# --- MLflow Logging ---
with mlflow.start_run(run_name="MLP_Manual_Baseline") as run:
    mlflow.log_params({"learning_rate": LEARNING_RATE, "epochs": EPOCHS, "optimizer": "Adam"})
    
    # --- Training Loop ---
    for epoch in range(EPOCHS):
        model_mlp.train() # Set model to training mode
        for features, labels in train_loader:
            # Forward pass
            outputs = model_mlp(features)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Update learning rate
        scheduler.step()
        
    # --- Final Evaluation on Test Set ---
    model_mlp.eval() # Set model to evaluation mode
    all_preds = []
    all_probas = []
    all_labels = []
    with torch.no_grad():
        for features, labels in test_loader:
            outputs = model_mlp(features)
            predicted_classes = (outputs > 0.5).float()
            all_preds.extend(predicted_classes.numpy())
            all_probas.extend(outputs.numpy())
            all_labels.extend(labels.numpy())
    
    # Calculate and log all final metrics
    f1 = f1_score(all_labels, all_preds)
    accuracy = accuracy_score(all_labels, all_preds)
    roc_auc = roc_auc_score(all_labels, all_probas)
    
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("roc_auc", roc_auc)

    print(f"Final MLP F1 Score from manual run: {f1:.4f}")
    print(f"Final MLP Accuracy from manual run: {accuracy:.4f}")
    print(f"Final MLP ROC AUC from manual run: {roc_auc:.4f}")

    # Log the final model
    mlflow.pytorch.log_model(model_mlp, "mlp-model")

Final MLP F1 Score from manual run: 0.2013
Final MLP Accuracy from manual run: 0.4670
Final MLP ROC AUC from manual run: 0.4638


In [10]:
# In notebooks/04_model_training_and_evaluation.ipynb, THE FINAL CELL

from mlflow.tracking import MlflowClient
import joblib

# Train and log the final champion model
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    print("--- Training and Logging Champion XGBoost Model ---")
    best_params = study.best_params
    
    mlflow.log_params(best_params)
    
    # Train the model
    model_xgb = xgb.XGBClassifier(**best_params, random_state=42)
    model_xgb.fit(X_train, y_train)
    
    # Evaluate and log metrics
    y_pred_xgb = model_xgb.predict(X_test)
    f1 = f1_score(y_test, y_pred_xgb)
    mlflow.log_metric("f1_score", f1)
    print(f"Champion XGBoost F1 Score: {f1:.4f}")

    # --- Log the Model, SHAP Assets, and Promote ---
    
    # 1. Log the model itself. This will create a new version.
    model_info = mlflow.xgboost.log_model(
        xgb_model=model_xgb,
        artifact_path="xgb-model",
        registered_model_name=MODEL_NAME
    )

    # 2. Find the new model version using the run_id from the logged model
    client = MlflowClient()
    run_id = model_info.run_id
    model_versions = client.search_model_versions(f"run_id='{run_id}'")
    new_version = model_versions[0].version # The first result should be our new version
    print(f"Model registered as '{MODEL_NAME}' version {new_version}.")

    # 3. Log SHAP assets and test data for the dashboard
    explainer = shap.TreeExplainer(model_xgb)
    joblib.dump(explainer, "explainer.joblib")
    X_test.to_parquet("X_test.parquet")
    y_test.to_frame().to_parquet("y_test.parquet") # Save y_test for historical comparison

    mlflow.log_artifact("explainer.joblib", artifact_path="shap_assets")
    mlflow.log_artifact("X_test.parquet", artifact_path="shap_assets")
    mlflow.log_artifact("y_test.parquet", artifact_path="shap_assets") # Log y_test artifact
    print("SHAP assets and test data logged.")
    
    # 4. Promote this new version to the "Production" stage
    print(f"\n--- Promoting Model Version {new_version} to Production ---")
    client.transition_model_version_stage(
        name=MODEL_NAME,
        version=new_version,
        stage="Production",
        archive_existing_versions=True  # Safely archive any old production model
    )
    print(f"Successfully promoted model version {new_version} to 'Production'.")

--- Training and Logging Champion XGBoost Model ---
Champion XGBoost F1 Score: 0.6963




Model registered as 'etf-xgboost-predictor' version 2.
SHAP assets and test data logged.

--- Promoting Model Version 2 to Production ---
Successfully promoted model version 2 to 'Production'.


Registered model 'etf-xgboost-predictor' already exists. Creating a new version of this model...
Created version '2' of model 'etf-xgboost-predictor'.
  client.transition_model_version_stage(
