In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import optuna
import shap
import mlflow
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import os
import sys

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from config import PROCESSED_DATA_PATH, MLFLOW_TRACKING_URI, MODEL_NAME

# --- Set up MLflow CORRECTLY ---
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment("ETF_Trend_Prediction")

# --- Load Data ---
data = pd.read_parquet(PROCESSED_DATA_PATH)

# Separate features (X) and target (y)
X = data.drop('target', axis=1)
y = data['target']

  import pkg_resources  # noqa: TID251


In [2]:
# Define the chronological split point
# For example, use data up to the end of 2021 for training, and 2022 onwards for testing.
train_end_date = '2021-12-31'
test_start_date = '2022-01-01'

X_train = X.loc[:train_end_date]
y_train = y.loc[:train_end_date]

X_test = X.loc[test_start_date:]
y_test = y.loc[test_start_date:]

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

mlflow.set_experiment("ETF_Trend_Prediction")

Training set size: 2380
Test set size: 908


<Experiment: artifact_location='file:C:\\Users\\dawso\\Dev\\Personal\\AIGrind\\mlops-etf-forecasting\\mlruns/729986291096297859', creation_time=1756665195382, experiment_id='729986291096297859', last_update_time=1756665195382, lifecycle_stage='active', name='ETF_Trend_Prediction', tags={}>

In [3]:
# Train Logistic Regression
with mlflow.start_run(run_name="LogisticRegression_Baseline"):
    model_lr = LogisticRegression(max_iter=1000, random_state=42)
    model_lr.fit(X_train, y_train)
    y_pred_lr = model_lr.predict(X_test)
    y_proba_lr = model_lr.predict_proba(X_test)[:, 1]
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_lr))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_lr))
    mlflow.log_metric("roc_auc", roc_auc_score(y_test, y_proba_lr))
    
    # Log the model
    mlflow.sklearn.log_model(model_lr, "logistic-regression-model")
    
    print(f"Logistic Regression F1 Score: {f1_score(y_test, y_pred_lr):.4f}")

# Train Random Forest
with mlflow.start_run(run_name="RandomForest_Baseline"):
    model_rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)
    y_proba_rf = model_rf.predict_proba(X_test)[:, 1]

    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_rf))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_rf))
    mlflow.log_metric("roc_auc", roc_auc_score(y_test, y_proba_rf))
    
    # Log the model
    mlflow.sklearn.log_model(model_rf, "random-forest-model")
    
    print(f"Random Forest F1 Score: {f1_score(y_test, y_pred_rf):.4f}")

Logistic Regression F1 Score: 0.6968


Random Forest F1 Score: 0.6254


In [4]:
def objective(trial):
    # Define the search space for hyperparameters
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': 42
    }
    
    model = xgb.XGBClassifier(**params)
    
    # Use TimeSeriesSplit for robust cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    score = cross_val_score(model, X_train, y_train, cv=tscv, scoring='f1', n_jobs=1).mean()
    
    return score

In [5]:
# Run the study to find the best params
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=125) 

best_params = study.best_params
print("Best XGBoost Params:", best_params)

# Train the final XGBoost model with the best parameters and log to MLflow
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    print("--- Training and Logging Champion XGBoost Model ---")
    best_params = study.best_params
    
    # Log hyperparameters
    mlflow.log_params(best_params)
    
    # Train the model
    model_xgb = xgb.XGBClassifier(**best_params, random_state=42)
    model_xgb.fit(X_train, y_train)
    
    # Evaluate and log metrics
    y_pred_xgb = model_xgb.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred_xgb)
    f1 = f1_score(y_test, y_pred_xgb)
    roc_auc = roc_auc_score(y_test, model_xgb.predict_proba(X_test)[:, 1])
    
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("roc_auc", roc_auc)
    
    print(f"Champion XGBoost F1 Score: {f1:.4f}")

    # Log the model itself
    mlflow.xgboost.log_model(
        xgb_model=model_xgb,
        artifact_path="xgb-model",
        registered_model_name="etf-xgboost-predictor" # This also registers the model
    )
    print("Champion model logged and registered.")

    # --- NEW CODE TO ADD STARTS HERE ---
    
    print("--- Logging SHAP assets for the dashboard ---")
    import joblib # Make sure joblib is imported
    
    # 1. Create and save the SHAP explainer object
    explainer = shap.TreeExplainer(model_xgb)
    joblib.dump(explainer, "explainer.joblib")
    
    # 2. Save the X_test dataframe needed for plotting
    X_test.to_parquet("X_test.parquet")
    
    # 3. Log these files as MLflow artifacts in specific sub-folders
    mlflow.log_artifact("explainer.joblib", artifact_path="shap_explainer")
    mlflow.log_artifact("X_test.parquet", artifact_path="shap_xtest")
    
    print("SHAP explainer and X_test data successfully logged as artifacts.")

[I 2025-08-31 14:52:13,815] A new study created in memory with name: no-name-73f0d188-d0bf-4095-8383-fde0635dc217


[I 2025-08-31 14:52:15,313] Trial 0 finished with value: 0.5231379015168509 and parameters: {'n_estimators': 508, 'max_depth': 10, 'learning_rate': 0.21171668792034823, 'subsample': 0.8259072113675471, 'colsample_bytree': 0.6546347740150822, 'gamma': 0.7751211293375365}. Best is trial 0 with value: 0.5231379015168509.


[I 2025-08-31 14:52:16,377] Trial 1 finished with value: 0.5060869677560338 and parameters: {'n_estimators': 438, 'max_depth': 3, 'learning_rate': 0.19583280143560905, 'subsample': 0.5049277145345092, 'colsample_bytree': 0.804663940092224, 'gamma': 0.8830889627919053}. Best is trial 0 with value: 0.5231379015168509.


[I 2025-08-31 14:52:17,603] Trial 2 finished with value: 0.5210572483757051 and parameters: {'n_estimators': 367, 'max_depth': 10, 'learning_rate': 0.10894142976165791, 'subsample': 0.8607017403472224, 'colsample_bytree': 0.6538572187023242, 'gamma': 0.915742579010333}. Best is trial 0 with value: 0.5231379015168509.


[I 2025-08-31 14:52:18,939] Trial 3 finished with value: 0.5427917685552213 and parameters: {'n_estimators': 773, 'max_depth': 6, 'learning_rate': 0.08069536637570925, 'subsample': 0.9558384562085709, 'colsample_bytree': 0.7844044351974371, 'gamma': 4.413940418618982}. Best is trial 3 with value: 0.5427917685552213.


[I 2025-08-31 14:52:20,036] Trial 4 finished with value: 0.5166734745941992 and parameters: {'n_estimators': 284, 'max_depth': 9, 'learning_rate': 0.09129606145810543, 'subsample': 0.6915017168921138, 'colsample_bytree': 0.8828624371626187, 'gamma': 1.662032008528087}. Best is trial 3 with value: 0.5427917685552213.


[I 2025-08-31 14:52:21,306] Trial 5 finished with value: 0.4957458822197829 and parameters: {'n_estimators': 712, 'max_depth': 4, 'learning_rate': 0.12370675488960654, 'subsample': 0.8851194794223292, 'colsample_bytree': 0.6069358693405393, 'gamma': 2.408954523213471}. Best is trial 3 with value: 0.5427917685552213.


[I 2025-08-31 14:52:22,682] Trial 6 finished with value: 0.5304718197504278 and parameters: {'n_estimators': 690, 'max_depth': 8, 'learning_rate': 0.10164638990640426, 'subsample': 0.724643179836615, 'colsample_bytree': 0.9395610599230554, 'gamma': 3.989729385579893}. Best is trial 3 with value: 0.5427917685552213.


[I 2025-08-31 14:52:23,077] Trial 7 finished with value: 0.5410991963434935 and parameters: {'n_estimators': 200, 'max_depth': 5, 'learning_rate': 0.26726234226230344, 'subsample': 0.9940035118842961, 'colsample_bytree': 0.9806267270307893, 'gamma': 4.088393585009204}. Best is trial 3 with value: 0.5427917685552213.


[I 2025-08-31 14:52:24,021] Trial 8 finished with value: 0.5338923310949906 and parameters: {'n_estimators': 390, 'max_depth': 9, 'learning_rate': 0.20504569517560214, 'subsample': 0.8331553008208867, 'colsample_bytree': 0.8984916477455885, 'gamma': 1.8134605790168368}. Best is trial 3 with value: 0.5427917685552213.


[I 2025-08-31 14:52:24,787] Trial 9 finished with value: 0.49746442369878824 and parameters: {'n_estimators': 492, 'max_depth': 4, 'learning_rate': 0.25046936247969875, 'subsample': 0.8742983840754235, 'colsample_bytree': 0.5189626029706885, 'gamma': 4.982472637302412}. Best is trial 3 with value: 0.5427917685552213.


[I 2025-08-31 14:52:26,635] Trial 10 finished with value: 0.5525803029057835 and parameters: {'n_estimators': 991, 'max_depth': 6, 'learning_rate': 0.016573023422498334, 'subsample': 0.9976427949732856, 'colsample_bytree': 0.7780754101147677, 'gamma': 3.44034107121122}. Best is trial 10 with value: 0.5525803029057835.


[I 2025-08-31 14:52:28,342] Trial 11 finished with value: 0.5654964406829268 and parameters: {'n_estimators': 995, 'max_depth': 6, 'learning_rate': 0.03510779450368051, 'subsample': 0.9998335849126858, 'colsample_bytree': 0.77309851535282, 'gamma': 3.3660091298985977}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:30,497] Trial 12 finished with value: 0.552750480287851 and parameters: {'n_estimators': 995, 'max_depth': 7, 'learning_rate': 0.018919625839655414, 'subsample': 0.9981530728348922, 'colsample_bytree': 0.7249651160470216, 'gamma': 3.2292134713106266}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:34,095] Trial 13 finished with value: 0.5275239123994448 and parameters: {'n_estimators': 954, 'max_depth': 7, 'learning_rate': 0.01157299725414157, 'subsample': 0.6402237189743234, 'colsample_bytree': 0.6984628901023557, 'gamma': 3.159204442024192}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:35,683] Trial 14 finished with value: 0.5257838600943654 and parameters: {'n_estimators': 852, 'max_depth': 7, 'learning_rate': 0.05649610598147406, 'subsample': 0.9301598133839111, 'colsample_bytree': 0.8393818203974762, 'gamma': 2.9326464751507477}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:37,633] Trial 15 finished with value: 0.5043950783406161 and parameters: {'n_estimators': 860, 'max_depth': 5, 'learning_rate': 0.045853383703619646, 'subsample': 0.786742624878825, 'colsample_bytree': 0.7246542303724923, 'gamma': 2.429671128573503}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:38,610] Trial 16 finished with value: 0.5252059697767095 and parameters: {'n_estimators': 609, 'max_depth': 8, 'learning_rate': 0.1578815159103131, 'subsample': 0.9308290275549422, 'colsample_bytree': 0.5550756052285545, 'gamma': 3.573364767030586}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:42,179] Trial 17 finished with value: 0.5400472007687876 and parameters: {'n_estimators': 867, 'max_depth': 5, 'learning_rate': 0.04784774681166922, 'subsample': 0.5875439826606856, 'colsample_bytree': 0.7102443245980057, 'gamma': 0.08759125322378303}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:42,734] Trial 18 finished with value: 0.559403466117372 and parameters: {'n_estimators': 109, 'max_depth': 8, 'learning_rate': 0.14557903158577482, 'subsample': 0.7692584599532645, 'colsample_bytree': 0.8581126604023379, 'gamma': 2.8218567034810214}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:43,278] Trial 19 finished with value: 0.5215230995055002 and parameters: {'n_estimators': 152, 'max_depth': 8, 'learning_rate': 0.29928294725406696, 'subsample': 0.7642124286788088, 'colsample_bytree': 0.8506414078484391, 'gamma': 2.762564361322318}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:44,877] Trial 20 finished with value: 0.5407385213563002 and parameters: {'n_estimators': 616, 'max_depth': 9, 'learning_rate': 0.15228469667654906, 'subsample': 0.6486746582753253, 'colsample_bytree': 0.9966357476799504, 'gamma': 2.0987422624978818}. Best is trial 11 with value: 0.5654964406829268.


[I 2025-08-31 14:52:46,593] Trial 21 finished with value: 0.5664049707828308 and parameters: {'n_estimators': 939, 'max_depth': 7, 'learning_rate': 0.06680042484823065, 'subsample': 0.9502713000407859, 'colsample_bytree': 0.7501858400733917, 'gamma': 3.4830106597549033}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:52:48,180] Trial 22 finished with value: 0.5246170201995677 and parameters: {'n_estimators': 912, 'max_depth': 6, 'learning_rate': 0.1393931836822036, 'subsample': 0.9211470137070696, 'colsample_bytree': 0.8165095035940374, 'gamma': 3.700064363170893}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:52:48,698] Trial 23 finished with value: 0.5500199101546706 and parameters: {'n_estimators': 104, 'max_depth': 8, 'learning_rate': 0.07209118221587996, 'subsample': 0.8127667139465754, 'colsample_bytree': 0.756586900405484, 'gamma': 4.535877273895888}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:52:50,071] Trial 24 finished with value: 0.5073700731964322 and parameters: {'n_estimators': 796, 'max_depth': 7, 'learning_rate': 0.17527280125542333, 'subsample': 0.9484952814545513, 'colsample_bytree': 0.8787078301919373, 'gamma': 2.8205389035205357}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:52:50,950] Trial 25 finished with value: 0.5190406645105395 and parameters: {'n_estimators': 265, 'max_depth': 7, 'learning_rate': 0.038568045767645276, 'subsample': 0.907301995886391, 'colsample_bytree': 0.9256911722767569, 'gamma': 3.7760310185619366}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:52:52,701] Trial 26 finished with value: 0.5534897011143576 and parameters: {'n_estimators': 922, 'max_depth': 6, 'learning_rate': 0.12433250013863448, 'subsample': 0.5408051732517088, 'colsample_bytree': 0.6495858003235608, 'gamma': 3.2682096152102815}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:52:54,078] Trial 27 finished with value: 0.5536793479002308 and parameters: {'n_estimators': 768, 'max_depth': 8, 'learning_rate': 0.07300817322094752, 'subsample': 0.9690384432350343, 'colsample_bytree': 0.8394619143267714, 'gamma': 4.315512012628497}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:52:55,996] Trial 28 finished with value: 0.5197943067898445 and parameters: {'n_estimators': 621, 'max_depth': 9, 'learning_rate': 0.06493470236820931, 'subsample': 0.7138380659992406, 'colsample_bytree': 0.752286054990106, 'gamma': 2.039505603183882}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:52:58,657] Trial 29 finished with value: 0.5192400486506614 and parameters: {'n_estimators': 528, 'max_depth': 10, 'learning_rate': 0.030487270661470223, 'subsample': 0.8190588434201789, 'colsample_bytree': 0.6006961847074026, 'gamma': 1.3151825450443848}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:52:59,853] Trial 30 finished with value: 0.505317358540618 and parameters: {'n_estimators': 679, 'max_depth': 5, 'learning_rate': 0.18375044076595087, 'subsample': 0.8553747690699472, 'colsample_bytree': 0.6935398511076913, 'gamma': 2.663622572196989}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:01,257] Trial 31 finished with value: 0.548949770720235 and parameters: {'n_estimators': 791, 'max_depth': 8, 'learning_rate': 0.08991464325865516, 'subsample': 0.9704564348589723, 'colsample_bytree': 0.8366315095698554, 'gamma': 4.27050016700203}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:02,714] Trial 32 finished with value: 0.5391147515697122 and parameters: {'n_estimators': 913, 'max_depth': 8, 'learning_rate': 0.22444369723513377, 'subsample': 0.9676965683534171, 'colsample_bytree': 0.7966926203333594, 'gamma': 4.977463302101603}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:04,059] Trial 33 finished with value: 0.5207673527109462 and parameters: {'n_estimators': 743, 'max_depth': 7, 'learning_rate': 0.11732564846891308, 'subsample': 0.9040097542957805, 'colsample_bytree': 0.8583027636153592, 'gamma': 3.993630214194887}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:05,516] Trial 34 finished with value: 0.530063963414882 and parameters: {'n_estimators': 860, 'max_depth': 6, 'learning_rate': 0.07245680276422196, 'subsample': 0.9662092018180031, 'colsample_bytree': 0.8218026629710129, 'gamma': 4.672674670915577}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:06,955] Trial 35 finished with value: 0.5042692284920329 and parameters: {'n_estimators': 805, 'max_depth': 10, 'learning_rate': 0.09259962260039165, 'subsample': 0.892421095773993, 'colsample_bytree': 0.7753558993420451, 'gamma': 2.9691447724829088}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:07,941] Trial 36 finished with value: 0.5526688912774721 and parameters: {'n_estimators': 460, 'max_depth': 9, 'learning_rate': 0.060636644207182185, 'subsample': 0.942362999781592, 'colsample_bytree': 0.9323110568121505, 'gamma': 3.7477077136026415}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:09,314] Trial 37 finished with value: 0.48612014392638747 and parameters: {'n_estimators': 951, 'max_depth': 3, 'learning_rate': 0.035447482196052246, 'subsample': 0.8605359438033798, 'colsample_bytree': 0.672769712739624, 'gamma': 3.456315456261698}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:09,952] Trial 38 finished with value: 0.5586816917310606 and parameters: {'n_estimators': 285, 'max_depth': 8, 'learning_rate': 0.10642689519096558, 'subsample': 0.6433293796025483, 'colsample_bytree': 0.9000094242453075, 'gamma': 4.258981793538594}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:10,746] Trial 39 finished with value: 0.5541937261089723 and parameters: {'n_estimators': 276, 'max_depth': 6, 'learning_rate': 0.10618778484538274, 'subsample': 0.6638617824561047, 'colsample_bytree': 0.955638263352094, 'gamma': 2.209828392601842}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:11,466] Trial 40 finished with value: 0.5362921006296151 and parameters: {'n_estimators': 350, 'max_depth': 7, 'learning_rate': 0.13457233253815234, 'subsample': 0.5732358875409023, 'colsample_bytree': 0.9091607706849291, 'gamma': 3.8722259813613107}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:12,200] Trial 41 finished with value: 0.5445516390231026 and parameters: {'n_estimators': 264, 'max_depth': 6, 'learning_rate': 0.10398650188256611, 'subsample': 0.6661458502768055, 'colsample_bytree': 0.965930131894398, 'gamma': 2.2066486797692457}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:12,914] Trial 42 finished with value: 0.5583394383753743 and parameters: {'n_estimators': 204, 'max_depth': 5, 'learning_rate': 0.11206969104857495, 'subsample': 0.6046406548480221, 'colsample_bytree': 0.8786872686105189, 'gamma': 1.5798879697150374}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:13,547] Trial 43 finished with value: 0.5535463827172163 and parameters: {'n_estimators': 191, 'max_depth': 4, 'learning_rate': 0.08743173269629798, 'subsample': 0.6119572955167393, 'colsample_bytree': 0.8823680079847503, 'gamma': 1.554842223650695}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:14,405] Trial 44 finished with value: 0.5370306835167875 and parameters: {'n_estimators': 214, 'max_depth': 5, 'learning_rate': 0.15408118199758053, 'subsample': 0.5327026836460851, 'colsample_bytree': 0.9032304432085678, 'gamma': 0.6120541952543004}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:15,266] Trial 45 finished with value: 0.5457521516666064 and parameters: {'n_estimators': 325, 'max_depth': 7, 'learning_rate': 0.13815045813327995, 'subsample': 0.7009731217814112, 'colsample_bytree': 0.873182638344092, 'gamma': 1.3175587867838552}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:15,613] Trial 46 finished with value: 0.5149884971541507 and parameters: {'n_estimators': 104, 'max_depth': 4, 'learning_rate': 0.11353903394759098, 'subsample': 0.751684697134013, 'colsample_bytree': 0.8061407493665588, 'gamma': 3.1264566727319085}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:16,417] Trial 47 finished with value: 0.5167877919843067 and parameters: {'n_estimators': 415, 'max_depth': 5, 'learning_rate': 0.1699613527835466, 'subsample': 0.5001820960330638, 'colsample_bytree': 0.7740872931798133, 'gamma': 2.658137883412445}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:16,872] Trial 48 finished with value: 0.5484707366555344 and parameters: {'n_estimators': 156, 'max_depth': 8, 'learning_rate': 0.12546210232484556, 'subsample': 0.6234889056482305, 'colsample_bytree': 0.7410582065349979, 'gamma': 4.0663635060189325}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:17,828] Trial 49 finished with value: 0.5577483097834259 and parameters: {'n_estimators': 224, 'max_depth': 6, 'learning_rate': 0.023984890175251856, 'subsample': 0.555633178763803, 'colsample_bytree': 0.8624712573729879, 'gamma': 3.3916265071240015}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:18,933] Trial 50 finished with value: 0.545137202768295 and parameters: {'n_estimators': 153, 'max_depth': 9, 'learning_rate': 0.05469311821798056, 'subsample': 0.6002635743987094, 'colsample_bytree': 0.9148822745572694, 'gamma': 1.8153707561755452}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:20,084] Trial 51 finished with value: 0.5661467536267484 and parameters: {'n_estimators': 237, 'max_depth': 6, 'learning_rate': 0.014358349119753276, 'subsample': 0.5654887521850014, 'colsample_bytree': 0.863928083061371, 'gamma': 3.3590742452335958}. Best is trial 21 with value: 0.5664049707828308.


[I 2025-08-31 14:53:21,588] Trial 52 finished with value: 0.5761415641137786 and parameters: {'n_estimators': 306, 'max_depth': 6, 'learning_rate': 0.011199519637680764, 'subsample': 0.6785648730821521, 'colsample_bytree': 0.8958494764170057, 'gamma': 3.017357827444835}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:23,280] Trial 53 finished with value: 0.5679807963448404 and parameters: {'n_estimators': 352, 'max_depth': 7, 'learning_rate': 0.011605838344222469, 'subsample': 0.6714945807284182, 'colsample_bytree': 0.7914886659238841, 'gamma': 3.1341520512417915}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:24,705] Trial 54 finished with value: 0.5412759073835389 and parameters: {'n_estimators': 319, 'max_depth': 7, 'learning_rate': 0.014326999293977245, 'subsample': 0.780322485087509, 'colsample_bytree': 0.7848849314535016, 'gamma': 3.030984570850751}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:25,556] Trial 55 finished with value: 0.5516046908580874 and parameters: {'n_estimators': 369, 'max_depth': 6, 'learning_rate': 0.0446970286096838, 'subsample': 0.7320751259365263, 'colsample_bytree': 0.8094923831370872, 'gamma': 3.6077845367488437}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:26,993] Trial 56 finished with value: 0.5426568116952601 and parameters: {'n_estimators': 474, 'max_depth': 7, 'learning_rate': 0.028231765309090613, 'subsample': 0.6947047732283987, 'colsample_bytree': 0.7325700081785564, 'gamma': 2.514468217816644}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:28,352] Trial 57 finished with value: 0.5460686410331923 and parameters: {'n_estimators': 410, 'max_depth': 6, 'learning_rate': 0.015476615523676863, 'subsample': 0.6841353409443368, 'colsample_bytree': 0.7636966527554925, 'gamma': 3.310825274679616}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:29,376] Trial 58 finished with value: 0.5622978315910271 and parameters: {'n_estimators': 231, 'max_depth': 7, 'learning_rate': 0.024077485271221687, 'subsample': 0.734147027332809, 'colsample_bytree': 0.8237089845556094, 'gamma': 2.9294865289357115}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:31,919] Trial 59 finished with value: 0.545063263690879 and parameters: {'n_estimators': 964, 'max_depth': 7, 'learning_rate': 0.010965529041533839, 'subsample': 0.7367293626062577, 'colsample_bytree': 0.8229887524316115, 'gamma': 3.4642026550346072}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:32,698] Trial 60 finished with value: 0.5549267899622526 and parameters: {'n_estimators': 239, 'max_depth': 6, 'learning_rate': 0.03747075597995324, 'subsample': 0.7139228981479245, 'colsample_bytree': 0.7898037875741084, 'gamma': 3.1287589775687645}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:33,869] Trial 61 finished with value: 0.5310004288955088 and parameters: {'n_estimators': 309, 'max_depth': 7, 'learning_rate': 0.022761634606246056, 'subsample': 0.7948142320200694, 'colsample_bytree': 0.8456134666562215, 'gamma': 2.7996062847113548}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:34,462] Trial 62 finished with value: 0.5464404634836937 and parameters: {'n_estimators': 133, 'max_depth': 7, 'learning_rate': 0.028119548074440427, 'subsample': 0.9843567407594506, 'colsample_bytree': 0.8296661440628162, 'gamma': 2.9169659113942297}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:35,465] Trial 63 finished with value: 0.5309650918070203 and parameters: {'n_estimators': 350, 'max_depth': 6, 'learning_rate': 0.043142573869732095, 'subsample': 0.6724595365097601, 'colsample_bytree': 0.7063780975289635, 'gamma': 2.5373752347678096}. Best is trial 52 with value: 0.5761415641137786.


[I 2025-08-31 14:53:36,458] Trial 64 finished with value: 0.578660450276016 and parameters: {'n_estimators': 178, 'max_depth': 6, 'learning_rate': 0.010534300325728628, 'subsample': 0.7635374882208043, 'colsample_bytree': 0.8578560260780848, 'gamma': 3.203081819395229}. Best is trial 64 with value: 0.578660450276016.


[I 2025-08-31 14:53:37,061] Trial 65 finished with value: 0.5278885457450891 and parameters: {'n_estimators': 183, 'max_depth': 6, 'learning_rate': 0.051338745871742725, 'subsample': 0.7481022594988724, 'colsample_bytree': 0.801381584284948, 'gamma': 3.2500600899981547}. Best is trial 64 with value: 0.578660450276016.


[I 2025-08-31 14:53:37,914] Trial 66 finished with value: 0.6142658371931382 and parameters: {'n_estimators': 171, 'max_depth': 6, 'learning_rate': 0.010318620065060497, 'subsample': 0.5275534536321043, 'colsample_bytree': 0.7444384024595965, 'gamma': 3.5803148199765724}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:38,523] Trial 67 finished with value: 0.5609789951381213 and parameters: {'n_estimators': 168, 'max_depth': 5, 'learning_rate': 0.03213326699975808, 'subsample': 0.5277369665304037, 'colsample_bytree': 0.7457378866078077, 'gamma': 3.6484572671917905}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:40,149] Trial 68 finished with value: 0.5531294566276523 and parameters: {'n_estimators': 581, 'max_depth': 5, 'learning_rate': 0.010194381108896804, 'subsample': 0.5661287641413497, 'colsample_bytree': 0.6801938753414764, 'gamma': 3.8784374008916407}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:42,300] Trial 69 finished with value: 0.5329292193892267 and parameters: {'n_estimators': 997, 'max_depth': 6, 'learning_rate': 0.01979768084222683, 'subsample': 0.5146271876408068, 'colsample_bytree': 0.7677320495389943, 'gamma': 3.5085999170035276}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:43,021] Trial 70 finished with value: 0.5055127432888844 and parameters: {'n_estimators': 257, 'max_depth': 6, 'learning_rate': 0.03679767182430509, 'subsample': 0.84150491973746, 'colsample_bytree': 0.7220891530231156, 'gamma': 3.0964777855532652}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:44,100] Trial 71 finished with value: 0.5564259235880998 and parameters: {'n_estimators': 243, 'max_depth': 7, 'learning_rate': 0.02322968397259511, 'subsample': 0.5452190062109997, 'colsample_bytree': 0.7930277282607163, 'gamma': 3.2678535466221423}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:44,926] Trial 72 finished with value: 0.5380882826110798 and parameters: {'n_estimators': 293, 'max_depth': 6, 'learning_rate': 0.04853382700273346, 'subsample': 0.6272995162162814, 'colsample_bytree': 0.8911356058899352, 'gamma': 3.323818668956417}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:46,130] Trial 73 finished with value: 0.5550900292855048 and parameters: {'n_estimators': 219, 'max_depth': 7, 'learning_rate': 0.018277982870882635, 'subsample': 0.7131127033843945, 'colsample_bytree': 0.8626360402006124, 'gamma': 2.9288368393740374}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:46,591] Trial 74 finished with value: 0.5490829467210099 and parameters: {'n_estimators': 186, 'max_depth': 6, 'learning_rate': 0.06457128182136068, 'subsample': 0.9901376887615786, 'colsample_bytree': 0.818432322479444, 'gamma': 2.6393663323491663}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:47,227] Trial 75 finished with value: 0.5796714842302049 and parameters: {'n_estimators': 137, 'max_depth': 7, 'learning_rate': 0.030925651271980388, 'subsample': 0.5858355377734066, 'colsample_bytree': 0.7592370268695536, 'gamma': 3.823913330158783}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:47,786] Trial 76 finished with value: 0.5611145019976752 and parameters: {'n_estimators': 118, 'max_depth': 6, 'learning_rate': 0.03194979668360952, 'subsample': 0.5723500324525805, 'colsample_bytree': 0.7552680116074687, 'gamma': 3.8716147785789374}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:48,259] Trial 77 finished with value: 0.5667035432372306 and parameters: {'n_estimators': 124, 'max_depth': 5, 'learning_rate': 0.042760443827994925, 'subsample': 0.5929304338049762, 'colsample_bytree': 0.7210986540307041, 'gamma': 3.648535621461082}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:48,682] Trial 78 finished with value: 0.5368268565016278 and parameters: {'n_estimators': 142, 'max_depth': 4, 'learning_rate': 0.03991398856534212, 'subsample': 0.5874385024881866, 'colsample_bytree': 0.6461611295962353, 'gamma': 4.187768135279326}. Best is trial 66 with value: 0.6142658371931382.


[I 2025-08-31 14:53:49,331] Trial 79 finished with value: 0.6329228918989988 and parameters: {'n_estimators': 132, 'max_depth': 5, 'learning_rate': 0.010532047151395241, 'subsample': 0.5880291966013534, 'colsample_bytree': 0.7259614339462355, 'gamma': 3.648215343330096}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:49,846] Trial 80 finished with value: 0.5733253802848755 and parameters: {'n_estimators': 128, 'max_depth': 5, 'learning_rate': 0.058175039274445256, 'subsample': 0.5878947937322189, 'colsample_bytree': 0.7196021184185638, 'gamma': 3.6633310919921014}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:50,300] Trial 81 finished with value: 0.5451585741187469 and parameters: {'n_estimators': 121, 'max_depth': 5, 'learning_rate': 0.07736455996035456, 'subsample': 0.5875280154712776, 'colsample_bytree': 0.7118448098412573, 'gamma': 3.7602148244530342}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:50,931] Trial 82 finished with value: 0.5496886431387115 and parameters: {'n_estimators': 182, 'max_depth': 5, 'learning_rate': 0.061642254742125945, 'subsample': 0.6254438358673027, 'colsample_bytree': 0.7332327596174787, 'gamma': 3.6148413762347205}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:51,338] Trial 83 finished with value: 0.5617601111373973 and parameters: {'n_estimators': 101, 'max_depth': 4, 'learning_rate': 0.05233176068044932, 'subsample': 0.5167917185501291, 'colsample_bytree': 0.6875171146727197, 'gamma': 3.9140769423536046}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:51,966] Trial 84 finished with value: 0.5649884296729217 and parameters: {'n_estimators': 133, 'max_depth': 5, 'learning_rate': 0.02735605896094606, 'subsample': 0.653596072316837, 'colsample_bytree': 0.7183985341573945, 'gamma': 3.5278683623772493}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:52,518] Trial 85 finished with value: 0.5558484805099665 and parameters: {'n_estimators': 166, 'max_depth': 5, 'learning_rate': 0.042409869376328194, 'subsample': 0.5538682584770367, 'colsample_bytree': 0.6697130365000324, 'gamma': 4.466754074527722}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:53,759] Trial 86 finished with value: 0.5431917910918888 and parameters: {'n_estimators': 510, 'max_depth': 4, 'learning_rate': 0.018496926461456377, 'subsample': 0.5984415779044776, 'colsample_bytree': 0.7026714691742788, 'gamma': 4.088044999881028}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:54,223] Trial 87 finished with value: 0.5417079340975839 and parameters: {'n_estimators': 201, 'max_depth': 5, 'learning_rate': 0.25118651449394286, 'subsample': 0.6343119522371995, 'colsample_bytree': 0.7468215431201626, 'gamma': 3.717495657145806}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:54,796] Trial 88 finished with value: 0.5562257388558266 and parameters: {'n_estimators': 137, 'max_depth': 7, 'learning_rate': 0.03296821096564396, 'subsample': 0.6119113225787126, 'colsample_bytree': 0.6290922674853292, 'gamma': 4.004864000011324}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:55,480] Trial 89 finished with value: 0.5412100819537404 and parameters: {'n_estimators': 169, 'max_depth': 5, 'learning_rate': 0.05855125816561668, 'subsample': 0.5864238416655877, 'colsample_bytree': 0.7382542814588837, 'gamma': 3.168428266951905}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:57,084] Trial 90 finished with value: 0.5339237746406941 and parameters: {'n_estimators': 648, 'max_depth': 4, 'learning_rate': 0.011041823210531005, 'subsample': 0.6536293837935905, 'colsample_bytree': 0.7577879203788939, 'gamma': 3.5155986975535014}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:58,135] Trial 91 finished with value: 0.5570863172848599 and parameters: {'n_estimators': 253, 'max_depth': 6, 'learning_rate': 0.019242738260140606, 'subsample': 0.5570241396917825, 'colsample_bytree': 0.7749328916279237, 'gamma': 3.338702200459763}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:58,971] Trial 92 finished with value: 0.6011114822604682 and parameters: {'n_estimators': 205, 'max_depth': 5, 'learning_rate': 0.010159955465394782, 'subsample': 0.5421933576812594, 'colsample_bytree': 0.7311636949589105, 'gamma': 3.414348537395812}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:53:59,559] Trial 93 finished with value: 0.5725277924479346 and parameters: {'n_estimators': 154, 'max_depth': 5, 'learning_rate': 0.02739537491084497, 'subsample': 0.5408348439546925, 'colsample_bytree': 0.7302487320507997, 'gamma': 3.8271218563370275}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:00,216] Trial 94 finished with value: 0.6190623018827759 and parameters: {'n_estimators': 155, 'max_depth': 5, 'learning_rate': 0.010385754248268481, 'subsample': 0.5364457550295227, 'colsample_bytree': 0.7283978960730476, 'gamma': 3.782232650614342}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:00,905] Trial 95 finished with value: 0.5726324947765523 and parameters: {'n_estimators': 200, 'max_depth': 5, 'learning_rate': 0.02531625192939813, 'subsample': 0.5309183328232547, 'colsample_bytree': 0.6921579272678061, 'gamma': 3.7885658468866117}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:01,580] Trial 96 finished with value: 0.5724565675449222 and parameters: {'n_estimators': 208, 'max_depth': 5, 'learning_rate': 0.025360801063614026, 'subsample': 0.5305879001396506, 'colsample_bytree': 0.6626636937730894, 'gamma': 4.141924666902002}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:02,197] Trial 97 finished with value: 0.6308100896410942 and parameters: {'n_estimators': 147, 'max_depth': 5, 'learning_rate': 0.010115936555635759, 'subsample': 0.5429984297296405, 'colsample_bytree': 0.6931362581894677, 'gamma': 3.8035143530751747}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:02,643] Trial 98 finished with value: 0.5932521795244459 and parameters: {'n_estimators': 169, 'max_depth': 3, 'learning_rate': 0.018676153993503055, 'subsample': 0.5133156208414689, 'colsample_bytree': 0.6942809634256605, 'gamma': 4.3507602237766765}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:03,075] Trial 99 finished with value: 0.5796620964163061 and parameters: {'n_estimators': 171, 'max_depth': 3, 'learning_rate': 0.016194122239951536, 'subsample': 0.5766777115294797, 'colsample_bytree': 0.629928031385465, 'gamma': 4.69927338644265}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:03,504] Trial 100 finished with value: 0.5935076252008027 and parameters: {'n_estimators': 170, 'max_depth': 3, 'learning_rate': 0.016803958549943078, 'subsample': 0.522985535117302, 'colsample_bytree': 0.6046898940622567, 'gamma': 4.677945053255172}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:03,939] Trial 101 finished with value: 0.5969674445207009 and parameters: {'n_estimators': 168, 'max_depth': 3, 'learning_rate': 0.016375979479413445, 'subsample': 0.5137205497892722, 'colsample_bytree': 0.602034409120023, 'gamma': 4.7181197744543235}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:04,382] Trial 102 finished with value: 0.5875662706965528 and parameters: {'n_estimators': 173, 'max_depth': 3, 'learning_rate': 0.019183305690135474, 'subsample': 0.5152800951025192, 'colsample_bytree': 0.5997229575060903, 'gamma': 4.76899833820069}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:04,889] Trial 103 finished with value: 0.6050648352185714 and parameters: {'n_estimators': 157, 'max_depth': 3, 'learning_rate': 0.017000915622469446, 'subsample': 0.5111128868127648, 'colsample_bytree': 0.5845706331073324, 'gamma': 4.785002206238856}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:05,313] Trial 104 finished with value: 0.5948899903866456 and parameters: {'n_estimators': 154, 'max_depth': 3, 'learning_rate': 0.01880255860743643, 'subsample': 0.5095681059807631, 'colsample_bytree': 0.5723464026886615, 'gamma': 4.85334475054045}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:05,716] Trial 105 finished with value: 0.6032674498732892 and parameters: {'n_estimators': 153, 'max_depth': 3, 'learning_rate': 0.018124977969389295, 'subsample': 0.5102068563740352, 'colsample_bytree': 0.5670854776757016, 'gamma': 4.814056394623108}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:06,097] Trial 106 finished with value: 0.5610510555783268 and parameters: {'n_estimators': 151, 'max_depth': 3, 'learning_rate': 0.03736510745566424, 'subsample': 0.5077157791707139, 'colsample_bytree': 0.5669058903541044, 'gamma': 4.883999839560383}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:06,618] Trial 107 finished with value: 0.5519144915322355 and parameters: {'n_estimators': 222, 'max_depth': 3, 'learning_rate': 0.020705240000124456, 'subsample': 0.5216376899859326, 'colsample_bytree': 0.5587427513622751, 'gamma': 4.582039950516268}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:06,939] Trial 108 finished with value: 0.6328791612694384 and parameters: {'n_estimators': 106, 'max_depth': 3, 'learning_rate': 0.016862102065671604, 'subsample': 0.507935305336951, 'colsample_bytree': 0.5335654017543212, 'gamma': 4.391650783801768}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:07,217] Trial 109 finished with value: 0.5650302082225984 and parameters: {'n_estimators': 115, 'max_depth': 3, 'learning_rate': 0.20225267678367187, 'subsample': 0.5055290787578628, 'colsample_bytree': 0.5006453088424998, 'gamma': 4.8286391213906015}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:07,513] Trial 110 finished with value: 0.602989896289056 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.03531506050858557, 'subsample': 0.5456624192417311, 'colsample_bytree': 0.5848301529126346, 'gamma': 4.992572098985286}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:07,916] Trial 111 finished with value: 0.6287781365090007 and parameters: {'n_estimators': 149, 'max_depth': 3, 'learning_rate': 0.014986619771297378, 'subsample': 0.5439767295653967, 'colsample_bytree': 0.5815920017183267, 'gamma': 4.5986273511737625}. Best is trial 79 with value: 0.6329228918989988.


[I 2025-08-31 14:54:08,220] Trial 112 finished with value: 0.6922990132358218 and parameters: {'n_estimators': 103, 'max_depth': 3, 'learning_rate': 0.010138038374462326, 'subsample': 0.5485065480459534, 'colsample_bytree': 0.5373300726066178, 'gamma': 4.9888960913180265}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:08,532] Trial 113 finished with value: 0.6002669760123 and parameters: {'n_estimators': 110, 'max_depth': 3, 'learning_rate': 0.0343437041973997, 'subsample': 0.5466994425532014, 'colsample_bytree': 0.5838508526949504, 'gamma': 4.9790401332910434}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:08,819] Trial 114 finished with value: 0.5987888138185676 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.03355570574649383, 'subsample': 0.5480989853520895, 'colsample_bytree': 0.5381845787869726, 'gamma': 4.969411910590678}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:09,184] Trial 115 finished with value: 0.6731030896933792 and parameters: {'n_estimators': 101, 'max_depth': 4, 'learning_rate': 0.010654044169843576, 'subsample': 0.5372263317405955, 'colsample_bytree': 0.5324887776991117, 'gamma': 4.43851935341674}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:09,644] Trial 116 finished with value: 0.6450111310593862 and parameters: {'n_estimators': 144, 'max_depth': 4, 'learning_rate': 0.011037288136255088, 'subsample': 0.5360981758457339, 'colsample_bytree': 0.5406855079289393, 'gamma': 4.605061141446392}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:10,086] Trial 117 finished with value: 0.5650266497333798 and parameters: {'n_estimators': 142, 'max_depth': 4, 'learning_rate': 0.024370453333227994, 'subsample': 0.5636956794653254, 'colsample_bytree': 0.5438390039152875, 'gamma': 4.455514223558394}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:10,450] Trial 118 finished with value: 0.5762823906898984 and parameters: {'n_estimators': 117, 'max_depth': 4, 'learning_rate': 0.029091920724633437, 'subsample': 0.5357806783236044, 'colsample_bytree': 0.5283599089718722, 'gamma': 4.529525636640284}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:10,799] Trial 119 finished with value: 0.6703292476464862 and parameters: {'n_estimators': 125, 'max_depth': 3, 'learning_rate': 0.01042229042764088, 'subsample': 0.5006701531818415, 'colsample_bytree': 0.5147196129496291, 'gamma': 4.637534119205575}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:11,234] Trial 120 finished with value: 0.6306658066444664 and parameters: {'n_estimators': 142, 'max_depth': 4, 'learning_rate': 0.012935783015329726, 'subsample': 0.5033308847145969, 'colsample_bytree': 0.5111491580013983, 'gamma': 4.617722295359587}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:11,675] Trial 121 finished with value: 0.6288418235543057 and parameters: {'n_estimators': 144, 'max_depth': 4, 'learning_rate': 0.013289381225360643, 'subsample': 0.5008895055003302, 'colsample_bytree': 0.5146714244822778, 'gamma': 4.604927628203383}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:12,090] Trial 122 finished with value: 0.6339162435543543 and parameters: {'n_estimators': 132, 'max_depth': 4, 'learning_rate': 0.01377322522506328, 'subsample': 0.5003235144102405, 'colsample_bytree': 0.513679499424257, 'gamma': 4.606677316500686}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:12,504] Trial 123 finished with value: 0.650530342676279 and parameters: {'n_estimators': 130, 'max_depth': 4, 'learning_rate': 0.011104734230870546, 'subsample': 0.5006326166264272, 'colsample_bytree': 0.5161721351930932, 'gamma': 4.596003537190471}. Best is trial 112 with value: 0.6922990132358218.


[I 2025-08-31 14:54:12,897] Trial 124 finished with value: 0.605705964928562 and parameters: {'n_estimators': 126, 'max_depth': 4, 'learning_rate': 0.022512554541976065, 'subsample': 0.5000300908259429, 'colsample_bytree': 0.5120115260318976, 'gamma': 4.3544030602122525}. Best is trial 112 with value: 0.6922990132358218.




Best XGBoost Params: {'n_estimators': 103, 'max_depth': 3, 'learning_rate': 0.010138038374462326, 'subsample': 0.5485065480459534, 'colsample_bytree': 0.5373300726066178, 'gamma': 4.9888960913180265}
--- Training and Logging Champion XGBoost Model ---
Champion XGBoost F1 Score: 0.6963


Champion model logged and registered.
--- Logging SHAP assets for the dashboard ---
SHAP explainer and X_test data successfully logged as artifacts.


Registered model 'etf-xgboost-predictor' already exists. Creating a new version of this model...
Created version '3' of model 'etf-xgboost-predictor'.


In [6]:
# --- MLP Challenger Model ---
# Step 1: Imports and Data Scaling
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# Deep learning models are sensitive to feature scale. We must standardize our data.
# We fit the scaler ONLY on the training data to prevent data leakage from the test set.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data successfully scaled.")
print(f"Shape of scaled training data: {X_train_scaled.shape}")

Data successfully scaled.
Shape of scaled training data: (2380, 32)


In [7]:
# Convert numpy arrays to PyTorch Tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders to handle batching
# We don't shuffle time-series data to preserve temporal order if needed, 
# but for a simple MLP, shuffling is often acceptable. Let's keep it False for rigor.
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("PyTorch Tensors and DataLoaders created.")

PyTorch Tensors and DataLoaders created.


In [8]:
# Step 2: Define the MLP Architecture
class ETF_MLP(nn.Module):
    def __init__(self, input_size, hidden_size_1=128, hidden_size_2=64, dropout_rate=0.5):
        """
        Initializes the MLP model.
        
        Args:
            input_size (int): The number of input features.
            hidden_size_1 (int): Number of neurons in the first hidden layer.
            hidden_size_2 (int): Number of neurons in the second hidden layer.
            dropout_rate (float): The dropout probability.
        """
        super(ETF_MLP, self).__init__()
        
        # --- Layer Definitions ---
        self.layer_1 = nn.Linear(input_size, hidden_size_1)
        self.bn_1 = nn.BatchNorm1d(hidden_size_1)
        
        self.layer_2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.bn_2 = nn.BatchNorm1d(hidden_size_2)
        
        self.output_layer = nn.Linear(hidden_size_2, 1)
        
        # --- Activation and Regularization ---
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)
        
    def forward(self, x):
        """ The forward pass of the model. """
        # First hidden layer
        x = self.layer_1(x)
        x = self.bn_1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Second hidden layer
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Output layer with sigmoid for binary classification
        x = torch.sigmoid(self.output_layer(x))
        return x

# Instantiate the model to test
input_features = X_train.shape[1]
model_mlp = ETF_MLP(input_size=input_features)
print("MLP Model Architecture:")
print(model_mlp)

MLP Model Architecture:
ETF_MLP(
  (layer_1): Linear(in_features=32, out_features=128, bias=True)
  (bn_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_2): Linear(in_features=128, out_features=64, bias=True)
  (bn_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)


In [9]:
# Step 3: Manual MLP Training and Evaluation

# --- Configuration ---
INPUT_SIZE = X_train.shape[1]
LEARNING_RATE = 0.001
EPOCHS = 50

# --- Model, Loss, Optimizer (Demonstrates 5.2, 5.3) ---
model_mlp = ETF_MLP(input_size=INPUT_SIZE, dropout_rate=0.4)
criterion = nn.BCELoss() # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=LEARNING_RATE) # Adam Optimizer
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) # LR Schedule

# --- MLflow Logging ---
with mlflow.start_run(run_name="MLP_Manual_Baseline") as run:
    mlflow.log_params({"learning_rate": LEARNING_RATE, "epochs": EPOCHS, "optimizer": "Adam"})
    
    # --- Training Loop ---
    for epoch in range(EPOCHS):
        model_mlp.train() # Set model to training mode
        for features, labels in train_loader:
            # Forward pass
            outputs = model_mlp(features)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Update learning rate
        scheduler.step()
        
    # --- Final Evaluation on Test Set ---
    model_mlp.eval() # Set model to evaluation mode
    all_preds = []
    all_probas = []
    all_labels = []
    with torch.no_grad():
        for features, labels in test_loader:
            outputs = model_mlp(features)
            predicted_classes = (outputs > 0.5).float()
            all_preds.extend(predicted_classes.numpy())
            all_probas.extend(outputs.numpy())
            all_labels.extend(labels.numpy())
    
    # Calculate and log all final metrics
    f1 = f1_score(all_labels, all_preds)
    accuracy = accuracy_score(all_labels, all_preds)
    roc_auc = roc_auc_score(all_labels, all_probas)
    
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("roc_auc", roc_auc)

    print(f"Final MLP F1 Score from manual run: {f1:.4f}")
    print(f"Final MLP Accuracy from manual run: {accuracy:.4f}")
    print(f"Final MLP ROC AUC from manual run: {roc_auc:.4f}")

    # Log the final model
    mlflow.pytorch.log_model(model_mlp, "mlp-model")

Final MLP F1 Score from manual run: 0.2036
Final MLP Accuracy from manual run: 0.4659
Final MLP ROC AUC from manual run: 0.4658


In [10]:
# In notebooks/04_model_training_and_evaluation.ipynb, THE FINAL CELL

from mlflow.tracking import MlflowClient
import joblib

# Train and log the final champion model
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    print("--- Training and Logging Champion XGBoost Model ---")
    best_params = study.best_params
    
    mlflow.log_params(best_params)
    
    # Train the model
    model_xgb = xgb.XGBClassifier(**best_params, random_state=42)
    model_xgb.fit(X_train, y_train)
    
    # Evaluate and log metrics
    y_pred_xgb = model_xgb.predict(X_test)
    f1 = f1_score(y_test, y_pred_xgb)
    mlflow.log_metric("f1_score", f1)
    print(f"Champion XGBoost F1 Score: {f1:.4f}")

    # --- Log the Model, SHAP Assets, and Promote ---
    
    # 1. Log the model itself. This will create a new version.
    model_info = mlflow.xgboost.log_model(
        xgb_model=model_xgb,
        artifact_path="xgb-model",
        registered_model_name=MODEL_NAME
    )

    # 2. Find the new model version using the run_id from the logged model
    client = MlflowClient()
    run_id = model_info.run_id
    model_versions = client.search_model_versions(f"run_id='{run_id}'")
    new_version = model_versions[0].version # The first result should be our new version
    print(f"Model registered as '{MODEL_NAME}' version {new_version}.")

    # 3. Log SHAP assets and test data for the dashboard
    explainer = shap.TreeExplainer(model_xgb)
    joblib.dump(explainer, "explainer.joblib")
    X_test.to_parquet("X_test.parquet")
    y_test.to_frame().to_parquet("y_test.parquet") # Save y_test for historical comparison

    mlflow.log_artifact("explainer.joblib", artifact_path="shap_assets")
    mlflow.log_artifact("X_test.parquet", artifact_path="shap_assets")
    mlflow.log_artifact("y_test.parquet", artifact_path="shap_assets") # Log y_test artifact
    print("SHAP assets and test data logged.")
    
    # 4. Promote this new version to the "Production" stage
    print(f"\n--- Promoting Model Version {new_version} to Production ---")
    client.transition_model_version_stage(
        name=MODEL_NAME,
        version=new_version,
        stage="Production",
        archive_existing_versions=True  # Safely archive any old production model
    )
    print(f"Successfully promoted model version {new_version} to 'Production'.")

--- Training and Logging Champion XGBoost Model ---
Champion XGBoost F1 Score: 0.6963




Model registered as 'etf-xgboost-predictor' version 4.
SHAP assets and test data logged.

--- Promoting Model Version 4 to Production ---
Successfully promoted model version 4 to 'Production'.


Registered model 'etf-xgboost-predictor' already exists. Creating a new version of this model...
Created version '4' of model 'etf-xgboost-predictor'.
  client.transition_model_version_stage(
