In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import optuna
import shap
import mlflow
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

mlflow.set_tracking_uri("../mlruns")

# Load your processed data
DATA_PATH = '../data/processed/etf_features.parquet'
data = pd.read_parquet(DATA_PATH)

# Separate features (X) and target (y)
X = data.drop('target', axis=1)
y = data['target']

  import pkg_resources  # noqa: TID251


In [2]:
# Define the chronological split point
# For example, use data up to the end of 2021 for training, and 2022 onwards for testing.
train_end_date = '2021-12-31'
test_start_date = '2022-01-01'

X_train = X.loc[:train_end_date]
y_train = y.loc[:train_end_date]

X_test = X.loc[test_start_date:]
y_test = y.loc[test_start_date:]

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

mlflow.set_experiment("ETF_Trend_Prediction")

2025/08/31 12:18:31 INFO mlflow.tracking.fluent: Experiment with name 'ETF_Trend_Prediction' does not exist. Creating a new experiment.


Training set size: 2380
Test set size: 908


<Experiment: artifact_location='file:C:/Users/dawso/Dev/Personal/AIGrind/mlops-etf-forecasting/notebooks/../mlruns/625716881864412338', creation_time=1756657111359, experiment_id='625716881864412338', last_update_time=1756657111359, lifecycle_stage='active', name='ETF_Trend_Prediction', tags={}>

In [3]:
# Train Logistic Regression
with mlflow.start_run(run_name="LogisticRegression_Baseline"):
    model_lr = LogisticRegression(max_iter=1000, random_state=42)
    model_lr.fit(X_train, y_train)
    y_pred_lr = model_lr.predict(X_test)
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_lr))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_lr))
    print(f"Logistic Regression F1 Score: {f1_score(y_test, y_pred_lr):.4f}")

# Train Random Forest
with mlflow.start_run(run_name="RandomForest_Baseline"):
    model_rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_rf))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_rf))
    print(f"Random Forest F1 Score: {f1_score(y_test, y_pred_rf):.4f}")

Logistic Regression F1 Score: 0.6968


Random Forest F1 Score: 0.6318


In [4]:
def objective(trial):
    # Define the search space for hyperparameters
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': 42
    }
    
    model = xgb.XGBClassifier(**params)
    
    # Use TimeSeriesSplit for robust cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    score = cross_val_score(model, X_train, y_train, cv=tscv, scoring='f1', n_jobs=-1).mean()
    
    return score

In [5]:
# Run the study to find the best params
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=125) 

best_params = study.best_params
print("Best XGBoost Params:", best_params)

# Train the final XGBoost model with the best parameters and log to MLflow
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    final_xgb_model = xgb.XGBClassifier(**best_params, random_state=42)
    final_xgb_model.fit(X_train, y_train)
    y_pred_xgb = final_xgb_model.predict(X_test)
    y_pred_proba_xgb = final_xgb_model.predict_proba(X_test)[:, 1]

    f1 = f1_score(y_test, y_pred_xgb)
    print(f"Final Tuned XGBoost F1 Score: {f1:.4f}")

    mlflow.log_params(best_params)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_xgb))
    mlflow.log_metric("roc_auc", roc_auc_score(y_test, y_pred_proba_xgb))

    mlflow.xgboost.log_model(final_xgb_model, "xgb-model")
    champion_run_id = run.info.run_id # Capture run ID

    # --- SHAP Plot Generation and Logging (Move these lines here) ---
    print("\nSHAP analysis complete and plot logged to MLflow.")

    # 1. Create a SHAP Explainer
    explainer = shap.TreeExplainer(final_xgb_model)
    shap_values = explainer.shap_values(X_test) # Or X_train, depending on what you want to explain

    # 2. Generate and save the SHAP summary plot to a temporary file
    # Ensure you import matplotlib.pyplot as plt
    fig, ax = plt.subplots(figsize=(10, 8)) # You might want to specify figure size
    shap.summary_plot(shap_values, X_test, show=False, plot_size=(8, 6)) # show=False prevents immediate display
    plt.title("SHAP Feature Importance for XGBoost Model") # Add a title
    plot_filename = "shap_summary_champion.png" # Give it a more descriptive name
    plt.savefig(plot_filename, bbox_inches='tight', dpi=300) # Save the plot to a file
    plt.close() # Close the plot to free memory

    # 3. Log the saved plot as an MLflow artifact to the *current* active run
    mlflow.log_artifact(plot_filename)

[I 2025-08-31 12:18:32,137] A new study created in memory with name: no-name-fc6fc975-8bd9-46f8-8d85-786464eea616


[I 2025-08-31 12:18:35,728] Trial 0 finished with value: 0.5285851562753703 and parameters: {'n_estimators': 974, 'max_depth': 6, 'learning_rate': 0.13505351251303205, 'subsample': 0.7138811369636299, 'colsample_bytree': 0.7731763970136045, 'gamma': 4.855402932806989}. Best is trial 0 with value: 0.5285851562753703.


[I 2025-08-31 12:18:38,013] Trial 1 finished with value: 0.5354766451821651 and parameters: {'n_estimators': 397, 'max_depth': 9, 'learning_rate': 0.0912202138729572, 'subsample': 0.8821000944448848, 'colsample_bytree': 0.6818146973420138, 'gamma': 3.6278112376818945}. Best is trial 1 with value: 0.5354766451821651.


[I 2025-08-31 12:18:39,592] Trial 2 finished with value: 0.49844744245268907 and parameters: {'n_estimators': 185, 'max_depth': 3, 'learning_rate': 0.14690619026360235, 'subsample': 0.971445200128919, 'colsample_bytree': 0.5953488613265214, 'gamma': 0.11948767021399687}. Best is trial 1 with value: 0.5354766451821651.


[I 2025-08-31 12:18:41,361] Trial 3 finished with value: 0.5498723559942421 and parameters: {'n_estimators': 652, 'max_depth': 10, 'learning_rate': 0.13224795233245287, 'subsample': 0.6316351318142617, 'colsample_bytree': 0.5160512894185337, 'gamma': 1.1275932347684474}. Best is trial 3 with value: 0.5498723559942421.


[I 2025-08-31 12:18:41,628] Trial 4 finished with value: 0.5263832848739725 and parameters: {'n_estimators': 149, 'max_depth': 10, 'learning_rate': 0.26680305566822704, 'subsample': 0.8050527437625995, 'colsample_bytree': 0.9406790353487042, 'gamma': 2.6834049332414844}. Best is trial 3 with value: 0.5498723559942421.


[I 2025-08-31 12:18:41,924] Trial 5 finished with value: 0.5203316196591578 and parameters: {'n_estimators': 524, 'max_depth': 4, 'learning_rate': 0.28814304637364596, 'subsample': 0.7563207339256661, 'colsample_bytree': 0.9176419018590172, 'gamma': 4.298635329894258}. Best is trial 3 with value: 0.5498723559942421.


[I 2025-08-31 12:18:42,170] Trial 6 finished with value: 0.508636047492228 and parameters: {'n_estimators': 267, 'max_depth': 3, 'learning_rate': 0.23915935983641423, 'subsample': 0.7458879866263377, 'colsample_bytree': 0.7061629307453472, 'gamma': 3.2449879188501547}. Best is trial 3 with value: 0.5498723559942421.


[I 2025-08-31 12:18:42,839] Trial 7 finished with value: 0.5024233825832134 and parameters: {'n_estimators': 814, 'max_depth': 7, 'learning_rate': 0.11019414861521018, 'subsample': 0.8776405907785619, 'colsample_bytree': 0.6650042997556952, 'gamma': 3.1252979173130715}. Best is trial 3 with value: 0.5498723559942421.


[I 2025-08-31 12:18:44,800] Trial 8 finished with value: 0.5055592777807346 and parameters: {'n_estimators': 843, 'max_depth': 8, 'learning_rate': 0.023422604455120344, 'subsample': 0.8651613005065782, 'colsample_bytree': 0.7292325716998393, 'gamma': 1.6968669961236715}. Best is trial 3 with value: 0.5498723559942421.


[I 2025-08-31 12:18:45,539] Trial 9 finished with value: 0.5235505701259184 and parameters: {'n_estimators': 439, 'max_depth': 5, 'learning_rate': 0.07194956977602547, 'subsample': 0.6220080198469076, 'colsample_bytree': 0.5124841345611693, 'gamma': 1.8592572706008526}. Best is trial 3 with value: 0.5498723559942421.


[I 2025-08-31 12:18:47,399] Trial 10 finished with value: 0.5494025024151874 and parameters: {'n_estimators': 691, 'max_depth': 10, 'learning_rate': 0.18932261129140412, 'subsample': 0.5179595998644159, 'colsample_bytree': 0.5212585133990313, 'gamma': 0.044403480311077104}. Best is trial 3 with value: 0.5498723559942421.


[I 2025-08-31 12:18:48,333] Trial 11 finished with value: 0.5427593114762996 and parameters: {'n_estimators': 651, 'max_depth': 10, 'learning_rate': 0.20971257855026462, 'subsample': 0.505964651893153, 'colsample_bytree': 0.5164406638206638, 'gamma': 0.624769028251356}. Best is trial 3 with value: 0.5498723559942421.


[I 2025-08-31 12:18:49,233] Trial 12 finished with value: 0.5599690264864577 and parameters: {'n_estimators': 656, 'max_depth': 8, 'learning_rate': 0.18752501539170668, 'subsample': 0.5197020178916544, 'colsample_bytree': 0.5835873742993448, 'gamma': 1.0272601403814297}. Best is trial 12 with value: 0.5599690264864577.


[I 2025-08-31 12:18:49,992] Trial 13 finished with value: 0.5316510734975339 and parameters: {'n_estimators': 658, 'max_depth': 8, 'learning_rate': 0.18433915497568454, 'subsample': 0.6122298117379151, 'colsample_bytree': 0.596547862384102, 'gamma': 1.1615278244377063}. Best is trial 12 with value: 0.5599690264864577.


[I 2025-08-31 12:18:50,928] Trial 14 finished with value: 0.5303728841514672 and parameters: {'n_estimators': 784, 'max_depth': 8, 'learning_rate': 0.1631626506753226, 'subsample': 0.6063850770720022, 'colsample_bytree': 0.8147992415831851, 'gamma': 1.7641675925382332}. Best is trial 12 with value: 0.5599690264864577.


[I 2025-08-31 12:18:52,596] Trial 15 finished with value: 0.5376481903199005 and parameters: {'n_estimators': 542, 'max_depth': 9, 'learning_rate': 0.04938252271616589, 'subsample': 0.5695069801895624, 'colsample_bytree': 0.5989209893650079, 'gamma': 1.098267495616096}. Best is trial 12 with value: 0.5599690264864577.


[I 2025-08-31 12:18:53,521] Trial 16 finished with value: 0.5419264769337034 and parameters: {'n_estimators': 975, 'max_depth': 7, 'learning_rate': 0.23094345231068475, 'subsample': 0.6811590907302529, 'colsample_bytree': 0.5776629465645211, 'gamma': 0.9288397405660419}. Best is trial 12 with value: 0.5599690264864577.


[I 2025-08-31 12:18:54,100] Trial 17 finished with value: 0.5430104473448035 and parameters: {'n_estimators': 381, 'max_depth': 9, 'learning_rate': 0.14305279724346137, 'subsample': 0.6569157071897743, 'colsample_bytree': 0.6345887195613581, 'gamma': 2.0833896868420045}. Best is trial 12 with value: 0.5599690264864577.


[I 2025-08-31 12:18:55,631] Trial 18 finished with value: 0.5412192262841876 and parameters: {'n_estimators': 722, 'max_depth': 6, 'learning_rate': 0.11179858946591854, 'subsample': 0.5695746401019681, 'colsample_bytree': 0.8589581693693696, 'gamma': 0.6138278497458207}. Best is trial 12 with value: 0.5599690264864577.


[I 2025-08-31 12:18:56,249] Trial 19 finished with value: 0.5520281386106334 and parameters: {'n_estimators': 585, 'max_depth': 9, 'learning_rate': 0.17723268172184853, 'subsample': 0.5441519313490879, 'colsample_bytree': 0.5548832988906376, 'gamma': 2.388233041183374}. Best is trial 12 with value: 0.5599690264864577.


[I 2025-08-31 12:18:56,861] Trial 20 finished with value: 0.5223807660304561 and parameters: {'n_estimators': 473, 'max_depth': 7, 'learning_rate': 0.17550595625724513, 'subsample': 0.54150285670368, 'colsample_bytree': 0.7761619354332125, 'gamma': 2.4678340059362243}. Best is trial 12 with value: 0.5599690264864577.


[I 2025-08-31 12:18:57,575] Trial 21 finished with value: 0.5622184855320789 and parameters: {'n_estimators': 618, 'max_depth': 9, 'learning_rate': 0.20949662280570058, 'subsample': 0.5477182343550975, 'colsample_bytree': 0.5555056537676865, 'gamma': 1.4473878510916105}. Best is trial 21 with value: 0.5622184855320789.


[I 2025-08-31 12:18:58,128] Trial 22 finished with value: 0.5500267964792303 and parameters: {'n_estimators': 592, 'max_depth': 9, 'learning_rate': 0.2130032478603545, 'subsample': 0.5594892438065092, 'colsample_bytree': 0.5633054477468259, 'gamma': 2.3023714250397616}. Best is trial 21 with value: 0.5622184855320789.


[I 2025-08-31 12:18:58,803] Trial 23 finished with value: 0.5239588074885265 and parameters: {'n_estimators': 579, 'max_depth': 8, 'learning_rate': 0.26026430730536415, 'subsample': 0.5262173324873787, 'colsample_bytree': 0.637657394370716, 'gamma': 1.432268993884707}. Best is trial 21 with value: 0.5622184855320789.


[I 2025-08-31 12:18:59,486] Trial 24 finished with value: 0.5336652396826531 and parameters: {'n_estimators': 762, 'max_depth': 9, 'learning_rate': 0.20959403924157172, 'subsample': 0.5001922889429704, 'colsample_bytree': 0.5592284298129607, 'gamma': 2.702048508028109}. Best is trial 21 with value: 0.5622184855320789.


[I 2025-08-31 12:19:00,907] Trial 25 finished with value: 0.5564385707083016 and parameters: {'n_estimators': 885, 'max_depth': 8, 'learning_rate': 0.1981947295252467, 'subsample': 0.5889205981311892, 'colsample_bytree': 0.9955743843763993, 'gamma': 0.6006102546602199}. Best is trial 21 with value: 0.5622184855320789.


[I 2025-08-31 12:19:02,153] Trial 26 finished with value: 0.5717070066659942 and parameters: {'n_estimators': 895, 'max_depth': 8, 'learning_rate': 0.23673482516245875, 'subsample': 0.5843026841149371, 'colsample_bytree': 0.9790618021260659, 'gamma': 0.6247599175118006}. Best is trial 26 with value: 0.5717070066659942.


[I 2025-08-31 12:19:03,293] Trial 27 finished with value: 0.5590145217201098 and parameters: {'n_estimators': 876, 'max_depth': 7, 'learning_rate': 0.23551530523216363, 'subsample': 0.6637900432371107, 'colsample_bytree': 0.8340637373757702, 'gamma': 0.3575235054716621}. Best is trial 26 with value: 0.5717070066659942.


[I 2025-08-31 12:19:03,826] Trial 28 finished with value: 0.5278944754477577 and parameters: {'n_estimators': 320, 'max_depth': 6, 'learning_rate': 0.29134842321027266, 'subsample': 0.7055954808170762, 'colsample_bytree': 0.8832181086745251, 'gamma': 1.3738611649192682}. Best is trial 26 with value: 0.5717070066659942.


[I 2025-08-31 12:19:04,864] Trial 29 finished with value: 0.5634192693692672 and parameters: {'n_estimators': 911, 'max_depth': 8, 'learning_rate': 0.2671788001843813, 'subsample': 0.5820831828687014, 'colsample_bytree': 0.7677419495647513, 'gamma': 0.8509237895854773}. Best is trial 26 with value: 0.5717070066659942.


[I 2025-08-31 12:19:05,931] Trial 30 finished with value: 0.5338175153651157 and parameters: {'n_estimators': 927, 'max_depth': 5, 'learning_rate': 0.25980277371859856, 'subsample': 0.6400097226273784, 'colsample_bytree': 0.762428629175466, 'gamma': 0.4048617535418113}. Best is trial 26 with value: 0.5717070066659942.


[I 2025-08-31 12:19:07,039] Trial 31 finished with value: 0.5722564036004956 and parameters: {'n_estimators': 924, 'max_depth': 8, 'learning_rate': 0.2462067311759621, 'subsample': 0.5897337440300942, 'colsample_bytree': 0.9928854307888846, 'gamma': 0.845856059089068}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:08,206] Trial 32 finished with value: 0.5377130159346952 and parameters: {'n_estimators': 999, 'max_depth': 7, 'learning_rate': 0.27911720407297985, 'subsample': 0.6032598316069486, 'colsample_bytree': 0.9968753705761388, 'gamma': 0.7914237702091843}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:09,256] Trial 33 finished with value: 0.5421605607373245 and parameters: {'n_estimators': 942, 'max_depth': 9, 'learning_rate': 0.23471789359890996, 'subsample': 0.5821919057179217, 'colsample_bytree': 0.9502555915012025, 'gamma': 1.5244582503789625}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:09,775] Trial 34 finished with value: 0.507258868875426 and parameters: {'n_estimators': 890, 'max_depth': 8, 'learning_rate': 0.25361285425665553, 'subsample': 0.9766591269952789, 'colsample_bytree': 0.957916121342439, 'gamma': 4.943298626775251}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:11,120] Trial 35 finished with value: 0.5506498591393731 and parameters: {'n_estimators': 746, 'max_depth': 9, 'learning_rate': 0.2235558543930774, 'subsample': 0.689739161976747, 'colsample_bytree': 0.8955623911509787, 'gamma': 0.2246202048486886}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:12,012] Trial 36 finished with value: 0.523891206872852 and parameters: {'n_estimators': 925, 'max_depth': 10, 'learning_rate': 0.29774141404603893, 'subsample': 0.7556799957711725, 'colsample_bytree': 0.8025961900147686, 'gamma': 1.2664827093884095}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:14,293] Trial 37 finished with value: 0.567811990559714 and parameters: {'n_estimators': 843, 'max_depth': 8, 'learning_rate': 0.24242710767828732, 'subsample': 0.6378866305954765, 'colsample_bytree': 0.9610456991842692, 'gamma': 0.004817675692756707}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:15,955] Trial 38 finished with value: 0.5582334737213636 and parameters: {'n_estimators': 824, 'max_depth': 7, 'learning_rate': 0.27382554924736435, 'subsample': 0.7946821977250543, 'colsample_bytree': 0.9705660516899882, 'gamma': 0.05379392803802607}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:16,687] Trial 39 finished with value: 0.5417909974405309 and parameters: {'n_estimators': 853, 'max_depth': 8, 'learning_rate': 0.24476543302210088, 'subsample': 0.6441243784059433, 'colsample_bytree': 0.9350156163035419, 'gamma': 3.9066884385660443}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:17,800] Trial 40 finished with value: 0.5440018586069819 and parameters: {'n_estimators': 940, 'max_depth': 6, 'learning_rate': 0.27951258031895254, 'subsample': 0.7281708297969569, 'colsample_bytree': 0.912575027019813, 'gamma': 0.36644750607244303}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:18,459] Trial 41 finished with value: 0.5524732051756092 and parameters: {'n_estimators': 100, 'max_depth': 8, 'learning_rate': 0.25050652335583773, 'subsample': 0.5467535389596619, 'colsample_bytree': 0.9838385744650815, 'gamma': 0.6955610417810203}. Best is trial 31 with value: 0.5722564036004956.


[I 2025-08-31 12:19:19,637] Trial 42 finished with value: 0.5813960149220408 and parameters: {'n_estimators': 797, 'max_depth': 8, 'learning_rate': 0.22381670790466257, 'subsample': 0.5843290482720296, 'colsample_bytree': 0.725145513558952, 'gamma': 0.49961633397232363}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:20,325] Trial 43 finished with value: 0.5275600292096393 and parameters: {'n_estimators': 800, 'max_depth': 7, 'learning_rate': 0.2698433182145877, 'subsample': 0.9200374994020951, 'colsample_bytree': 0.7044664311519389, 'gamma': 0.9256948689578987}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:21,549] Trial 44 finished with value: 0.5482503734431576 and parameters: {'n_estimators': 908, 'max_depth': 8, 'learning_rate': 0.228052594708003, 'subsample': 0.6279364916343527, 'colsample_bytree': 0.9259558185155049, 'gamma': 0.46809403555848683}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:22,864] Trial 45 finished with value: 0.5625640004969252 and parameters: {'n_estimators': 858, 'max_depth': 7, 'learning_rate': 0.24909483234056493, 'subsample': 0.5918096148769331, 'colsample_bytree': 0.7291748424563657, 'gamma': 0.17136301459843895}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:25,193] Trial 46 finished with value: 0.5708622863787232 and parameters: {'n_estimators': 997, 'max_depth': 8, 'learning_rate': 0.22381551104078026, 'subsample': 0.6701958043104981, 'colsample_bytree': 0.8688750514819766, 'gamma': 0.005873489504604368}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:27,606] Trial 47 finished with value: 0.5453299950318868 and parameters: {'n_estimators': 997, 'max_depth': 8, 'learning_rate': 0.21978066906071866, 'subsample': 0.6664538859438224, 'colsample_bytree': 0.863128578521615, 'gamma': 0.008893368539336088}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:29,163] Trial 48 finished with value: 0.5243724797043268 and parameters: {'n_estimators': 968, 'max_depth': 5, 'learning_rate': 0.16284055541666945, 'subsample': 0.6264184234296243, 'colsample_bytree': 0.9682738543934912, 'gamma': 0.2920066254484927}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:30,051] Trial 49 finished with value: 0.5280154165042653 and parameters: {'n_estimators': 712, 'max_depth': 3, 'learning_rate': 0.1970079504235935, 'subsample': 0.7831215087675176, 'colsample_bytree': 0.9036440573559066, 'gamma': 0.4983291303238834}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:32,272] Trial 50 finished with value: 0.5568903052064627 and parameters: {'n_estimators': 817, 'max_depth': 7, 'learning_rate': 0.1978854209040316, 'subsample': 0.7121067672330875, 'colsample_bytree': 0.9435016678881921, 'gamma': 0.015619294995628419}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:33,304] Trial 51 finished with value: 0.5546249487454201 and parameters: {'n_estimators': 961, 'max_depth': 8, 'learning_rate': 0.26486215956895937, 'subsample': 0.5961999086513518, 'colsample_bytree': 0.7850077789768287, 'gamma': 0.913717393918724}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:34,358] Trial 52 finished with value: 0.5423784551361703 and parameters: {'n_estimators': 846, 'max_depth': 8, 'learning_rate': 0.23488484709373839, 'subsample': 0.5700091667023196, 'colsample_bytree': 0.747572554452631, 'gamma': 0.5982205966626415}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:34,888] Trial 53 finished with value: 0.5321696453428826 and parameters: {'n_estimators': 782, 'max_depth': 9, 'learning_rate': 0.2852311727887022, 'subsample': 0.6159061563557203, 'colsample_bytree': 0.6681644917455603, 'gamma': 4.657890660765508}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:35,977] Trial 54 finished with value: 0.5628035711423636 and parameters: {'n_estimators': 902, 'max_depth': 8, 'learning_rate': 0.2438350201157058, 'subsample': 0.6561742601191631, 'colsample_bytree': 0.8505827593805653, 'gamma': 0.7836096422601313}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:37,489] Trial 55 finished with value: 0.554506903773895 and parameters: {'n_estimators': 955, 'max_depth': 10, 'learning_rate': 0.21888524634020423, 'subsample': 0.8251858726576204, 'colsample_bytree': 0.8825522192382501, 'gamma': 0.15744328695758958}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:38,803] Trial 56 finished with value: 0.5296574902105069 and parameters: {'n_estimators': 907, 'max_depth': 9, 'learning_rate': 0.1256239872158148, 'subsample': 0.6805955953592842, 'colsample_bytree': 0.9790423516321778, 'gamma': 1.1521040102163653}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:39,861] Trial 57 finished with value: 0.5435910875344983 and parameters: {'n_estimators': 1000, 'max_depth': 4, 'learning_rate': 0.25923566983498836, 'subsample': 0.5757373166350159, 'colsample_bytree': 0.7010881229936943, 'gamma': 0.25449788362149217}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:41,427] Trial 58 finished with value: 0.5355432919818364 and parameters: {'n_estimators': 830, 'max_depth': 8, 'learning_rate': 0.07586387503707873, 'subsample': 0.5293725589402136, 'colsample_bytree': 0.8151480514909669, 'gamma': 1.6505862277830055}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:45,131] Trial 59 finished with value: 0.5493456234456644 and parameters: {'n_estimators': 874, 'max_depth': 7, 'learning_rate': 0.022966194188126787, 'subsample': 0.6099037143905214, 'colsample_bytree': 0.7261314079673599, 'gamma': 0.7905644090610799}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:45,877] Trial 60 finished with value: 0.5115363750545023 and parameters: {'n_estimators': 758, 'max_depth': 8, 'learning_rate': 0.20303929327084297, 'subsample': 0.5611732724462994, 'colsample_bytree': 0.9279438554504122, 'gamma': 3.034503894649159}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:46,957] Trial 61 finished with value: 0.5304155358570662 and parameters: {'n_estimators': 916, 'max_depth': 8, 'learning_rate': 0.24205072782595402, 'subsample': 0.6491583546262654, 'colsample_bytree': 0.861868060814381, 'gamma': 0.7584348617212151}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:47,758] Trial 62 finished with value: 0.5478722248958183 and parameters: {'n_estimators': 901, 'max_depth': 9, 'learning_rate': 0.24716039660125927, 'subsample': 0.7314888827287113, 'colsample_bytree': 0.8324162900718287, 'gamma': 1.9133572023088854}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:48,753] Trial 63 finished with value: 0.5614921103039751 and parameters: {'n_estimators': 968, 'max_depth': 8, 'learning_rate': 0.2282726242694734, 'subsample': 0.6706148686230137, 'colsample_bytree': 0.7930337105006284, 'gamma': 0.9757558331934775}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:49,707] Trial 64 finished with value: 0.5460517603665684 and parameters: {'n_estimators': 864, 'max_depth': 7, 'learning_rate': 0.2666424716397781, 'subsample': 0.6942630464528367, 'colsample_bytree': 0.7559520546461101, 'gamma': 0.5055140359539755}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:50,683] Trial 65 finished with value: 0.5755576122544278 and parameters: {'n_estimators': 793, 'max_depth': 8, 'learning_rate': 0.2178279259680424, 'subsample': 0.6346847709737702, 'colsample_bytree': 0.8448193786837412, 'gamma': 1.0800520978720518}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:51,640] Trial 66 finished with value: 0.5356896874212272 and parameters: {'n_estimators': 796, 'max_depth': 9, 'learning_rate': 0.21667684371063706, 'subsample': 0.6320526462642256, 'colsample_bytree': 0.9563854967667255, 'gamma': 1.2786903099418192}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:52,790] Trial 67 finished with value: 0.5516901981571495 and parameters: {'n_estimators': 685, 'max_depth': 8, 'learning_rate': 0.1791143951442692, 'subsample': 0.588698514558501, 'colsample_bytree': 0.998147456624453, 'gamma': 1.0443508998141886}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:53,684] Trial 68 finished with value: 0.562832768365902 and parameters: {'n_estimators': 221, 'max_depth': 7, 'learning_rate': 0.2066428624510507, 'subsample': 0.6141912462548308, 'colsample_bytree': 0.8796777140013086, 'gamma': 0.3338183441264979}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:54,624] Trial 69 finished with value: 0.5568993058702327 and parameters: {'n_estimators': 731, 'max_depth': 8, 'learning_rate': 0.25470350266061, 'subsample': 0.559491750215789, 'colsample_bytree': 0.6906818046192185, 'gamma': 0.5851030221562323}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:56,392] Trial 70 finished with value: 0.5449130407264307 and parameters: {'n_estimators': 936, 'max_depth': 10, 'learning_rate': 0.18987041179743053, 'subsample': 0.5330350114576736, 'colsample_bytree': 0.8169609897495606, 'gamma': 0.1767519372033528}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:57,312] Trial 71 finished with value: 0.5397723426749 and parameters: {'n_estimators': 220, 'max_depth': 7, 'learning_rate': 0.20786952857975136, 'subsample': 0.6206006408376545, 'colsample_bytree': 0.8857867304820591, 'gamma': 0.3725624854856962}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:58,310] Trial 72 finished with value: 0.5596857646400392 and parameters: {'n_estimators': 512, 'max_depth': 8, 'learning_rate': 0.23710690129853015, 'subsample': 0.5983738503447422, 'colsample_bytree': 0.9154068140966346, 'gamma': 0.44127281188894873}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:59,083] Trial 73 finished with value: 0.5303910011688602 and parameters: {'n_estimators': 269, 'max_depth': 7, 'learning_rate': 0.22751475046362124, 'subsample': 0.6407920971281309, 'colsample_bytree': 0.7399220765521776, 'gamma': 0.2790393525428163}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:19:59,966] Trial 74 finished with value: 0.5222984846507457 and parameters: {'n_estimators': 439, 'max_depth': 8, 'learning_rate': 0.21608472321864974, 'subsample': 0.6049467991438267, 'colsample_bytree': 0.7717007025296434, 'gamma': 0.6672307575514468}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:00,852] Trial 75 finished with value: 0.5135081611093308 and parameters: {'n_estimators': 376, 'max_depth': 9, 'learning_rate': 0.22354252503863833, 'subsample': 0.9995136645651277, 'colsample_bytree': 0.8325233977948364, 'gamma': 0.1297903195982969}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:01,928] Trial 76 finished with value: 0.5466621642026824 and parameters: {'n_estimators': 772, 'max_depth': 6, 'learning_rate': 0.2774800295171334, 'subsample': 0.5127718774286333, 'colsample_bytree': 0.9708046164723404, 'gamma': 0.8626170002182454}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:03,320] Trial 77 finished with value: 0.5591083352596017 and parameters: {'n_estimators': 833, 'max_depth': 7, 'learning_rate': 0.1680021204752256, 'subsample': 0.5836956635878952, 'colsample_bytree': 0.9822191750865534, 'gamma': 0.553112122820348}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:04,298] Trial 78 finished with value: 0.5367912346971849 and parameters: {'n_estimators': 618, 'max_depth': 9, 'learning_rate': 0.14985914868528824, 'subsample': 0.5507141311835019, 'colsample_bytree': 0.8475402833345314, 'gamma': 1.572718023187032}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:05,343] Trial 79 finished with value: 0.5631473936497842 and parameters: {'n_estimators': 881, 'max_depth': 8, 'learning_rate': 0.20615887469786742, 'subsample': 0.6360848569383483, 'colsample_bytree': 0.8808404021919988, 'gamma': 1.2264830766962986}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:06,336] Trial 80 finished with value: 0.5664618267275148 and parameters: {'n_estimators': 878, 'max_depth': 8, 'learning_rate': 0.23824632156340647, 'subsample': 0.6334968467620629, 'colsample_bytree': 0.9012159772575683, 'gamma': 1.2625496621967098}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:07,352] Trial 81 finished with value: 0.5430861198156334 and parameters: {'n_estimators': 878, 'max_depth': 8, 'learning_rate': 0.2393364526136708, 'subsample': 0.6301749748063418, 'colsample_bytree': 0.9480815935696174, 'gamma': 1.228347818833918}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:08,230] Trial 82 finished with value: 0.5473615096406956 and parameters: {'n_estimators': 807, 'max_depth': 8, 'learning_rate': 0.25851779726625557, 'subsample': 0.6524544245557309, 'colsample_bytree': 0.8978993791314348, 'gamma': 1.0676089408126368}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:09,347] Trial 83 finished with value: 0.5557592412751154 and parameters: {'n_estimators': 980, 'max_depth': 8, 'learning_rate': 0.23300100451193087, 'subsample': 0.5743279169393879, 'colsample_bytree': 0.8742459530162467, 'gamma': 1.3523806222352104}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:14,433] Trial 84 finished with value: 0.5412099914436863 and parameters: {'n_estimators': 848, 'max_depth': 9, 'learning_rate': 0.010986274682152852, 'subsample': 0.6740105817973087, 'colsample_bytree': 0.9322817319190684, 'gamma': 1.8590107694101554}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:15,484] Trial 85 finished with value: 0.5348102733109257 and parameters: {'n_estimators': 941, 'max_depth': 8, 'learning_rate': 0.24998060560190105, 'subsample': 0.6412917004009682, 'colsample_bytree': 0.9116244564111151, 'gamma': 1.1271631627221617}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:16,532] Trial 86 finished with value: 0.5452627308026968 and parameters: {'n_estimators': 896, 'max_depth': 8, 'learning_rate': 0.1942809981272395, 'subsample': 0.6910951593742082, 'colsample_bytree': 0.9613006234031855, 'gamma': 1.4621475523329104}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:17,479] Trial 87 finished with value: 0.5656922780477356 and parameters: {'n_estimators': 922, 'max_depth': 8, 'learning_rate': 0.2233108089571035, 'subsample': 0.6008809435351317, 'colsample_bytree': 0.8672158927311938, 'gamma': 2.1675832894977054}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:18,433] Trial 88 finished with value: 0.5606660421418123 and parameters: {'n_estimators': 924, 'max_depth': 8, 'learning_rate': 0.2706546983346971, 'subsample': 0.6066357667020706, 'colsample_bytree': 0.7150768757421793, 'gamma': 0.8877441414526037}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:19,316] Trial 89 finished with value: 0.5569919265840683 and parameters: {'n_estimators': 954, 'max_depth': 9, 'learning_rate': 0.2987582617145922, 'subsample': 0.5931016487975644, 'colsample_bytree': 0.8444656735043663, 'gamma': 2.3311095858922237}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:20,528] Trial 90 finished with value: 0.5508060825884169 and parameters: {'n_estimators': 983, 'max_depth': 7, 'learning_rate': 0.23046416441011477, 'subsample': 0.618921459453854, 'colsample_bytree': 0.9854293083260848, 'gamma': 0.6886383065522483}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:21,251] Trial 91 finished with value: 0.5305542105311444 and parameters: {'n_estimators': 874, 'max_depth': 8, 'learning_rate': 0.21239041019259247, 'subsample': 0.581152133057835, 'colsample_bytree': 0.866309189718907, 'gamma': 3.647233116761872}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:22,005] Trial 92 finished with value: 0.5193385935068443 and parameters: {'n_estimators': 835, 'max_depth': 8, 'learning_rate': 0.22224975634313718, 'subsample': 0.6597936814390551, 'colsample_bytree': 0.8906947578612661, 'gamma': 2.0711887468262153}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:22,824] Trial 93 finished with value: 0.5569540187137949 and parameters: {'n_estimators': 891, 'max_depth': 8, 'learning_rate': 0.20336618627852127, 'subsample': 0.558896996192157, 'colsample_bytree': 0.906593273581996, 'gamma': 2.5633629442678285}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:23,570] Trial 94 finished with value: 0.5365127057353638 and parameters: {'n_estimators': 918, 'max_depth': 8, 'learning_rate': 0.2410311564696046, 'subsample': 0.6369210639514281, 'colsample_bytree': 0.6382478819505605, 'gamma': 2.1379514862367976}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:24,526] Trial 95 finished with value: 0.5420730153099247 and parameters: {'n_estimators': 858, 'max_depth': 8, 'learning_rate': 0.2528130106902858, 'subsample': 0.6229081919706345, 'colsample_bytree': 0.8693050153050905, 'gamma': 1.0004692032944722}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:25,295] Trial 96 finished with value: 0.5465988556691401 and parameters: {'n_estimators': 804, 'max_depth': 9, 'learning_rate': 0.22712671491927136, 'subsample': 0.6010639539506174, 'colsample_bytree': 0.9261166580540869, 'gamma': 2.9969600085826786}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:26,196] Trial 97 finished with value: 0.5427979896426097 and parameters: {'n_estimators': 950, 'max_depth': 8, 'learning_rate': 0.2861889425697228, 'subsample': 0.5677091164115032, 'colsample_bytree': 0.8019547456143283, 'gamma': 1.218891245142245}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:27,182] Trial 98 finished with value: 0.562401109937027 and parameters: {'n_estimators': 789, 'max_depth': 7, 'learning_rate': 0.21423421332484968, 'subsample': 0.7019070511227965, 'colsample_bytree': 0.9444430461049506, 'gamma': 0.8234249177890457}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:28,928] Trial 99 finished with value: 0.5665687649001586 and parameters: {'n_estimators': 817, 'max_depth': 9, 'learning_rate': 0.2631529440135178, 'subsample': 0.536268556289086, 'colsample_bytree': 0.8251731368308248, 'gamma': 0.034298674407618815}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:30,525] Trial 100 finished with value: 0.5429784151180449 and parameters: {'n_estimators': 748, 'max_depth': 9, 'learning_rate': 0.26414814259724245, 'subsample': 0.5473117768058376, 'colsample_bytree': 0.8242368410409492, 'gamma': 0.0660526464461116}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:32,573] Trial 101 finished with value: 0.5582718016034756 and parameters: {'n_estimators': 881, 'max_depth': 8, 'learning_rate': 0.24512364977641551, 'subsample': 0.5845261918317238, 'colsample_bytree': 0.8544137182377892, 'gamma': 0.0028444502878541555}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:33,849] Trial 102 finished with value: 0.5636814414526929 and parameters: {'n_estimators': 815, 'max_depth': 9, 'learning_rate': 0.2585681657396808, 'subsample': 0.5381772985187344, 'colsample_bytree': 0.8448913370201946, 'gamma': 0.23307295051941535}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:35,229] Trial 103 finished with value: 0.5678305785472876 and parameters: {'n_estimators': 823, 'max_depth': 10, 'learning_rate': 0.2585021134849181, 'subsample': 0.5198375480876526, 'colsample_bytree': 0.8458507272463457, 'gamma': 0.20757135042366304}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:36,696] Trial 104 finished with value: 0.5668290487340987 and parameters: {'n_estimators': 820, 'max_depth': 10, 'learning_rate': 0.25777127854229326, 'subsample': 0.5181568368556861, 'colsample_bytree': 0.8426233715183743, 'gamma': 0.20265382665936743}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:37,877] Trial 105 finished with value: 0.5749326305047996 and parameters: {'n_estimators': 689, 'max_depth': 10, 'learning_rate': 0.2762734746583363, 'subsample': 0.5197282688141505, 'colsample_bytree': 0.8256425594179755, 'gamma': 0.4278915963307106}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:38,894] Trial 106 finished with value: 0.5717761192731572 and parameters: {'n_estimators': 703, 'max_depth': 10, 'learning_rate': 0.2919253027944219, 'subsample': 0.5188629745521064, 'colsample_bytree': 0.8008785734700479, 'gamma': 0.443761437395401}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:39,947] Trial 107 finished with value: 0.5513823005284271 and parameters: {'n_estimators': 679, 'max_depth': 10, 'learning_rate': 0.2895827605051015, 'subsample': 0.5203401538742661, 'colsample_bytree': 0.7815284071406817, 'gamma': 0.4085045376261086}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:41,348] Trial 108 finished with value: 0.5656491901584869 and parameters: {'n_estimators': 724, 'max_depth': 10, 'learning_rate': 0.27515852724629075, 'subsample': 0.5107543410443587, 'colsample_bytree': 0.8037474850932713, 'gamma': 0.12467956180686725}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:42,558] Trial 109 finished with value: 0.5630938940803393 and parameters: {'n_estimators': 699, 'max_depth': 10, 'learning_rate': 0.2710835533512443, 'subsample': 0.5007444818005462, 'colsample_bytree': 0.7923273518777991, 'gamma': 0.3046894142348198}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:43,604] Trial 110 finished with value: 0.5494831139085508 and parameters: {'n_estimators': 761, 'max_depth': 10, 'learning_rate': 0.2805946194240122, 'subsample': 0.5209654991617679, 'colsample_bytree': 0.8280455352845726, 'gamma': 0.5191891907039865}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:44,997] Trial 111 finished with value: 0.5562879874347992 and parameters: {'n_estimators': 779, 'max_depth': 10, 'learning_rate': 0.2551585580171692, 'subsample': 0.5285441317731734, 'colsample_bytree': 0.8382922056536104, 'gamma': 0.18783537223785535}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:45,971] Trial 112 finished with value: 0.5455484663305072 and parameters: {'n_estimators': 644, 'max_depth': 10, 'learning_rate': 0.2617869527806178, 'subsample': 0.5377368460457094, 'colsample_bytree': 0.8208010023888791, 'gamma': 0.45712351529140033}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:47,150] Trial 113 finished with value: 0.5536958635490438 and parameters: {'n_estimators': 831, 'max_depth': 10, 'learning_rate': 0.24678398389329104, 'subsample': 0.5119605087833504, 'colsample_bytree': 0.8061003441647819, 'gamma': 0.6519104224028507}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:48,816] Trial 114 finished with value: 0.5656266531058957 and parameters: {'n_estimators': 819, 'max_depth': 10, 'learning_rate': 0.2822973877352939, 'subsample': 0.5502404912653549, 'colsample_bytree': 0.8593246367589846, 'gamma': 0.10343076113227995}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:50,113] Trial 115 finished with value: 0.5716335661022612 and parameters: {'n_estimators': 735, 'max_depth': 10, 'learning_rate': 0.23248763101296915, 'subsample': 0.5258929262633268, 'colsample_bytree': 0.8102685093823966, 'gamma': 0.2761023508222376}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:51,188] Trial 116 finished with value: 0.5487282085993757 and parameters: {'n_estimators': 748, 'max_depth': 10, 'learning_rate': 0.26769462131134153, 'subsample': 0.5227917198759261, 'colsample_bytree': 0.7908190539047399, 'gamma': 0.3590608274139211}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:52,309] Trial 117 finished with value: 0.5700193132924719 and parameters: {'n_estimators': 717, 'max_depth': 10, 'learning_rate': 0.2916397170798898, 'subsample': 0.5397515230492692, 'colsample_bytree': 0.8100260566091898, 'gamma': 0.263617290086422}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:53,495] Trial 118 finished with value: 0.5395115085331375 and parameters: {'n_estimators': 734, 'max_depth': 10, 'learning_rate': 0.29688378177457, 'subsample': 0.5081594191001738, 'colsample_bytree': 0.7548340005792473, 'gamma': 0.2748698774534161}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:54,418] Trial 119 finished with value: 0.5661073771702213 and parameters: {'n_estimators': 672, 'max_depth': 10, 'learning_rate': 0.2947446159212594, 'subsample': 0.5583788059514173, 'colsample_bytree': 0.8142685749239961, 'gamma': 0.469078908338198}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:55,362] Trial 120 finished with value: 0.5745941861928324 and parameters: {'n_estimators': 641, 'max_depth': 10, 'learning_rate': 0.28924350173738556, 'subsample': 0.5295275037144597, 'colsample_bytree': 0.7654219961806225, 'gamma': 0.5779007992536065}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:56,403] Trial 121 finished with value: 0.5500625630730165 and parameters: {'n_estimators': 706, 'max_depth': 10, 'learning_rate': 0.2906498767361729, 'subsample': 0.5282371200777018, 'colsample_bytree': 0.8392090534802009, 'gamma': 0.5570203853442586}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:57,361] Trial 122 finished with value: 0.5582041907761098 and parameters: {'n_estimators': 652, 'max_depth': 10, 'learning_rate': 0.283714369302298, 'subsample': 0.5048430483957772, 'colsample_bytree': 0.7666308247237448, 'gamma': 0.7252883595300197}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:58,392] Trial 123 finished with value: 0.5551989192840256 and parameters: {'n_estimators': 628, 'max_depth': 10, 'learning_rate': 0.2782002520636024, 'subsample': 0.5417901404707448, 'colsample_bytree': 0.7378726512010612, 'gamma': 0.36356451924817207}. Best is trial 42 with value: 0.5813960149220408.


[I 2025-08-31 12:20:59,643] Trial 124 finished with value: 0.5851872383327192 and parameters: {'n_estimators': 667, 'max_depth': 10, 'learning_rate': 0.2920140478253734, 'subsample': 0.5166437467927958, 'colsample_bytree': 0.7762638403831928, 'gamma': 0.2118731109844641}. Best is trial 124 with value: 0.5851872383327192.


Best XGBoost Params: {'n_estimators': 667, 'max_depth': 10, 'learning_rate': 0.2920140478253734, 'subsample': 0.5166437467927958, 'colsample_bytree': 0.7762638403831928, 'gamma': 0.2118731109844641}


Final Tuned XGBoost F1 Score: 0.6057





SHAP analysis complete and plot logged to MLflow.


In [6]:
# --- MLP Challenger Model ---
# Step 1: Imports and Data Scaling
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# Deep learning models are sensitive to feature scale. We must standardize our data.
# We fit the scaler ONLY on the training data to prevent data leakage from the test set.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data successfully scaled.")
print(f"Shape of scaled training data: {X_train_scaled.shape}")

Data successfully scaled.
Shape of scaled training data: (2380, 32)


In [7]:
# Convert numpy arrays to PyTorch Tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders to handle batching
# We don't shuffle time-series data to preserve temporal order if needed, 
# but for a simple MLP, shuffling is often acceptable. Let's keep it False for rigor.
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("PyTorch Tensors and DataLoaders created.")

PyTorch Tensors and DataLoaders created.


In [8]:
# Step 2: Define the MLP Architecture
class ETF_MLP(nn.Module):
    def __init__(self, input_size, hidden_size_1=128, hidden_size_2=64, dropout_rate=0.5):
        """
        Initializes the MLP model.
        
        Args:
            input_size (int): The number of input features.
            hidden_size_1 (int): Number of neurons in the first hidden layer.
            hidden_size_2 (int): Number of neurons in the second hidden layer.
            dropout_rate (float): The dropout probability.
        """
        super(ETF_MLP, self).__init__()
        
        # --- Layer Definitions ---
        self.layer_1 = nn.Linear(input_size, hidden_size_1)
        self.bn_1 = nn.BatchNorm1d(hidden_size_1)
        
        self.layer_2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.bn_2 = nn.BatchNorm1d(hidden_size_2)
        
        self.output_layer = nn.Linear(hidden_size_2, 1)
        
        # --- Activation and Regularization ---
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)
        
    def forward(self, x):
        """ The forward pass of the model. """
        # First hidden layer
        x = self.layer_1(x)
        x = self.bn_1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Second hidden layer
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Output layer with sigmoid for binary classification
        x = torch.sigmoid(self.output_layer(x))
        return x

# Instantiate the model to test
input_features = X_train.shape[1]
model_mlp = ETF_MLP(input_size=input_features)
print("MLP Model Architecture:")
print(model_mlp)

MLP Model Architecture:
ETF_MLP(
  (layer_1): Linear(in_features=32, out_features=128, bias=True)
  (bn_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_2): Linear(in_features=128, out_features=64, bias=True)
  (bn_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)


In [9]:
# Step 3: Manual MLP Training and Evaluation

# --- Configuration ---
INPUT_SIZE = X_train.shape[1]
LEARNING_RATE = 0.001
EPOCHS = 50

# --- Model, Loss, Optimizer (Demonstrates 5.2, 5.3) ---
model_mlp = ETF_MLP(input_size=INPUT_SIZE, dropout_rate=0.4)
criterion = nn.BCELoss() # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=LEARNING_RATE) # Adam Optimizer
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) # LR Schedule

# --- MLflow Logging ---
with mlflow.start_run(run_name="MLP_Manual_Baseline") as run:
    mlflow.log_params({"learning_rate": LEARNING_RATE, "epochs": EPOCHS, "optimizer": "Adam"})
    
    # --- Training Loop ---
    for epoch in range(EPOCHS):
        model_mlp.train() # Set model to training mode
        for features, labels in train_loader:
            # Forward pass
            outputs = model_mlp(features)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Update learning rate
        scheduler.step()
        
        # --- Evaluation on Test Set ---
        model_mlp.eval() # Set model to evaluation mode
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for features, labels in test_loader:
                outputs = model_mlp(features)
                predicted = (outputs > 0.5).float()
                all_preds.extend(predicted.numpy())
                all_labels.extend(labels.numpy())
        
        # Calculate and log F1 score for the epoch
        f1 = f1_score(all_labels, all_preds)
        mlflow.log_metric("test_f1_score", f1, step=epoch)

    print(f"Final MLP F1 Score from manual run: {f1:.4f}")
    # Log the final model
    mlflow.pytorch.log_model(model_mlp, "mlp-model")

Final MLP F1 Score from manual run: 0.1130


In [10]:
import mlflow
import pandas as pd

# Ensure MLflow is pointing to your tracking server/directory
# mlflow.set_tracking_uri("../mlruns") # Uncomment if running in a new session/script

# Get the experiment by its name
experiment = mlflow.get_experiment_by_name("ETF_Trend_Prediction")

if experiment:
    # Search for all runs within this experiment
    runs_df = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        # Order by F1 score (desc) and then by start time (desc)
        order_by=["metrics.f1_score DESC", "start_time DESC"],
        output_format="pandas"
    )

    # --- New Logic to Extract All Metrics and Parameters ---
    # Identify all metric and parameter columns
    metric_cols = [col for col in runs_df.columns if col.startswith("metrics.")]
    param_cols = [col for col in runs_df.columns if col.startswith("params.")]

    # Select core run info, all metrics, and all parameters
    # The 'tags.mlflow.runName' contains the run name
    selected_cols = [
        "tags.mlflow.runName", "start_time", "run_id"
    ] + metric_cols + param_cols

    metrics_and_params = runs_df[selected_cols].copy()

    # Rename columns for better readability (optional, you can keep original for params if many)
    # This example renames just the core and metric columns
    metrics_and_params.rename(columns={
        "tags.mlflow.runName": "Run Name",
        "metrics.f1_score": "F1 Score",
        "metrics.accuracy": "Accuracy",
        "metrics.roc_auc": "ROC AUC"
        # Add more renames for specific metrics/params if you want,
        # but for ALL params, it might be too many to rename individually.
        # Keeping 'params.param_name' is often fine.
    }, inplace=True)

    print("Metrics and Parameters for 'ETF_Trend_Prediction' Experiment:")
    display(metrics_and_params)

else:
    print(f"Experiment 'ETF_Trend_Prediction' not found.")


Metrics and Parameters for 'ETF_Trend_Prediction' Experiment:


Unnamed: 0,Run Name,start_time,run_id,F1 Score,Accuracy,ROC AUC,metrics.test_f1_score,params.n_estimators,params.max_depth,params.subsample,params.gamma,params.colsample_bytree,params.learning_rate,params.epochs,params.optimizer
0,LogisticRegression_Baseline,2025-08-31 16:18:31.633000+00:00,4352ce839c3d48608b6b3e065c2449c7,0.696839,0.535242,,,,,,,,,,
1,RandomForest_Baseline,2025-08-31 16:18:31.785000+00:00,d3ae22e8ac3f4358b0ab831096f9ffa0,0.63176,0.527533,,,,,,,,,,
2,XGBoost_Tuned_Champion,2025-08-31 16:20:59.650000+00:00,ba2c37ba35b3434abeea8f5dff1ce1af,0.605672,0.52533,0.506446,,667.0,10.0,0.5166437467927958,0.2118731109844641,0.7762638403831928,0.2920140478253734,,
3,MLP_Manual_Baseline,2025-08-31 16:21:14.796000+00:00,c858ab1ba57b43f28e7d98986a372466,,,,0.112994,,,,,,0.001,50.0,Adam
