In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import optuna
import shap
import mlflow
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

mlflow.set_tracking_uri("../mlruns")

# Load your processed data
DATA_PATH = '../data/processed/etf_features.parquet'
data = pd.read_parquet(DATA_PATH)

# Separate features (X) and target (y)
X = data.drop('target', axis=1)
y = data['target']

  import pkg_resources  # noqa: TID251


In [2]:
# Define the chronological split point
# For example, use data up to the end of 2021 for training, and 2022 onwards for testing.
train_end_date = '2021-12-31'
test_start_date = '2022-01-01'

X_train = X.loc[:train_end_date]
y_train = y.loc[:train_end_date]

X_test = X.loc[test_start_date:]
y_test = y.loc[test_start_date:]

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

mlflow.set_experiment("ETF_Trend_Prediction")

Training set size: 2380
Test set size: 908


<Experiment: artifact_location='file:C:/Users/dawso/Dev/Personal/AIGrind/mlops-etf-forecasting/notebooks/../mlruns/922455140467852123', creation_time=1756652513785, experiment_id='922455140467852123', last_update_time=1756652513785, lifecycle_stage='active', name='ETF_Trend_Prediction', tags={}>

In [3]:
# Train Logistic Regression
with mlflow.start_run(run_name="LogisticRegression_Baseline"):
    model_lr = LogisticRegression(max_iter=1000, random_state=42)
    model_lr.fit(X_train, y_train)
    y_pred_lr = model_lr.predict(X_test)
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_lr))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_lr))
    print(f"Logistic Regression F1 Score: {f1_score(y_test, y_pred_lr):.4f}")

# Train Random Forest
with mlflow.start_run(run_name="RandomForest_Baseline"):
    model_rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_rf))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_rf))
    print(f"Random Forest F1 Score: {f1_score(y_test, y_pred_rf):.4f}")

Logistic Regression F1 Score: 0.6968


Random Forest F1 Score: 0.6207


In [4]:
def objective(trial):
    # Define the search space for hyperparameters
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': 42
    }
    
    model = xgb.XGBClassifier(**params)
    
    # Use TimeSeriesSplit for robust cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    score = cross_val_score(model, X_train, y_train, cv=tscv, scoring='f1', n_jobs=-1).mean()
    
    return score

In [5]:
# Run the study to find the best params
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=125) 

best_params = study.best_params
print("Best XGBoost Params:", best_params)

# Train the final XGBoost model with the best parameters and log to MLflow
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    final_xgb_model = xgb.XGBClassifier(**best_params, random_state=42)
    final_xgb_model.fit(X_train, y_train)
    y_pred_xgb = final_xgb_model.predict(X_test)
    y_pred_proba_xgb = final_xgb_model.predict_proba(X_test)[:, 1]

    f1 = f1_score(y_test, y_pred_xgb)
    print(f"Final Tuned XGBoost F1 Score: {f1:.4f}")

    mlflow.log_params(best_params)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_xgb))
    mlflow.log_metric("roc_auc", roc_auc_score(y_test, y_pred_proba_xgb))

    mlflow.xgboost.log_model(final_xgb_model, "xgb-model")
    champion_run_id = run.info.run_id # Capture run ID

    # --- SHAP Plot Generation and Logging (Move these lines here) ---
    print("\nSHAP analysis complete and plot logged to MLflow.")

    # 1. Create a SHAP Explainer
    explainer = shap.TreeExplainer(final_xgb_model)
    shap_values = explainer.shap_values(X_test) # Or X_train, depending on what you want to explain

    # 2. Generate and save the SHAP summary plot to a temporary file
    # Ensure you import matplotlib.pyplot as plt
    fig, ax = plt.subplots(figsize=(10, 8)) # You might want to specify figure size
    shap.summary_plot(shap_values, X_test, show=False, plot_size=(8, 6)) # show=False prevents immediate display
    plt.title("SHAP Feature Importance for XGBoost Model") # Add a title
    plot_filename = "shap_summary_champion.png" # Give it a more descriptive name
    plt.savefig(plot_filename, bbox_inches='tight', dpi=300) # Save the plot to a file
    plt.close() # Close the plot to free memory

    # 3. Log the saved plot as an MLflow artifact to the *current* active run
    mlflow.log_artifact(plot_filename)

[I 2025-08-31 11:07:34,820] A new study created in memory with name: no-name-9eb9e75e-1b67-456a-b638-89b86386134d


[I 2025-08-31 11:07:37,342] Trial 0 finished with value: 0.5183294910584063 and parameters: {'n_estimators': 170, 'max_depth': 8, 'learning_rate': 0.2843689379422614, 'subsample': 0.9683826547347845, 'colsample_bytree': 0.5249242303662738, 'gamma': 0.13528909162490554}. Best is trial 0 with value: 0.5183294910584063.


[I 2025-08-31 11:07:38,984] Trial 1 finished with value: 0.5290552246368015 and parameters: {'n_estimators': 255, 'max_depth': 9, 'learning_rate': 0.11802071659667386, 'subsample': 0.662474426108264, 'colsample_bytree': 0.9054477104658698, 'gamma': 2.7799817946090433}. Best is trial 1 with value: 0.5290552246368015.


[I 2025-08-31 11:07:40,673] Trial 2 finished with value: 0.5276169231562648 and parameters: {'n_estimators': 707, 'max_depth': 6, 'learning_rate': 0.05099179831998214, 'subsample': 0.8912545248420545, 'colsample_bytree': 0.6746522148732453, 'gamma': 4.652267591675001}. Best is trial 1 with value: 0.5290552246368015.


[I 2025-08-31 11:07:42,027] Trial 3 finished with value: 0.5120345992487118 and parameters: {'n_estimators': 722, 'max_depth': 5, 'learning_rate': 0.252649199959682, 'subsample': 0.9885403471116336, 'colsample_bytree': 0.8051227666151421, 'gamma': 2.449413335908339}. Best is trial 1 with value: 0.5290552246368015.


[I 2025-08-31 11:07:42,197] Trial 4 finished with value: 0.5099700688498598 and parameters: {'n_estimators': 231, 'max_depth': 4, 'learning_rate': 0.16417123574348186, 'subsample': 0.9248469778640015, 'colsample_bytree': 0.5202296432668109, 'gamma': 2.9032250157718313}. Best is trial 1 with value: 0.5290552246368015.


[I 2025-08-31 11:07:42,449] Trial 5 finished with value: 0.5032643036681945 and parameters: {'n_estimators': 397, 'max_depth': 3, 'learning_rate': 0.2469991769302138, 'subsample': 0.83682377898158, 'colsample_bytree': 0.9617375832494193, 'gamma': 3.3027284383079705}. Best is trial 1 with value: 0.5290552246368015.


[I 2025-08-31 11:07:43,342] Trial 6 finished with value: 0.4929534191821726 and parameters: {'n_estimators': 689, 'max_depth': 6, 'learning_rate': 0.1329504312179235, 'subsample': 0.9863796294303391, 'colsample_bytree': 0.9740507496772188, 'gamma': 0.2594823114163475}. Best is trial 1 with value: 0.5290552246368015.


[I 2025-08-31 11:07:43,792] Trial 7 finished with value: 0.5266058019591217 and parameters: {'n_estimators': 134, 'max_depth': 7, 'learning_rate': 0.17145924221149134, 'subsample': 0.9407826536206202, 'colsample_bytree': 0.7801844442222204, 'gamma': 0.44636935672762945}. Best is trial 1 with value: 0.5290552246368015.


[I 2025-08-31 11:07:44,513] Trial 8 finished with value: 0.5413518924140857 and parameters: {'n_estimators': 611, 'max_depth': 10, 'learning_rate': 0.290039141459265, 'subsample': 0.5401945669403496, 'colsample_bytree': 0.9896689709166715, 'gamma': 0.9750627176267196}. Best is trial 8 with value: 0.5413518924140857.


[I 2025-08-31 11:07:44,828] Trial 9 finished with value: 0.5236388273115603 and parameters: {'n_estimators': 282, 'max_depth': 3, 'learning_rate': 0.29819957618405263, 'subsample': 0.8413804023589402, 'colsample_bytree': 0.8748183762383108, 'gamma': 4.1186198212472824e-05}. Best is trial 8 with value: 0.5413518924140857.


[I 2025-08-31 11:07:48,020] Trial 10 finished with value: 0.5479000791592712 and parameters: {'n_estimators': 971, 'max_depth': 10, 'learning_rate': 0.015344496157063375, 'subsample': 0.5051288214277462, 'colsample_bytree': 0.6637973365731413, 'gamma': 1.4604806274746809}. Best is trial 10 with value: 0.5479000791592712.


[I 2025-08-31 11:07:50,813] Trial 11 finished with value: 0.5450717959205342 and parameters: {'n_estimators': 991, 'max_depth': 10, 'learning_rate': 0.016213679885287047, 'subsample': 0.5212697710296429, 'colsample_bytree': 0.648577692516734, 'gamma': 1.670671531079441}. Best is trial 10 with value: 0.5479000791592712.


[I 2025-08-31 11:07:52,707] Trial 12 finished with value: 0.5286164318638751 and parameters: {'n_estimators': 986, 'max_depth': 10, 'learning_rate': 0.02934766451444573, 'subsample': 0.503753873682862, 'colsample_bytree': 0.6453837522518445, 'gamma': 1.5704926318965815}. Best is trial 10 with value: 0.5479000791592712.


[I 2025-08-31 11:07:53,689] Trial 13 finished with value: 0.539100300653115 and parameters: {'n_estimators': 999, 'max_depth': 8, 'learning_rate': 0.07141325026955775, 'subsample': 0.6080826869278517, 'colsample_bytree': 0.6455639110412146, 'gamma': 1.6815797758661617}. Best is trial 10 with value: 0.5479000791592712.


[I 2025-08-31 11:07:56,042] Trial 14 finished with value: 0.5252256341204687 and parameters: {'n_estimators': 875, 'max_depth': 9, 'learning_rate': 0.016015876402361455, 'subsample': 0.7189386842418385, 'colsample_bytree': 0.7020750894401403, 'gamma': 1.7735790680060775}. Best is trial 10 with value: 0.5479000791592712.


[I 2025-08-31 11:07:57,112] Trial 15 finished with value: 0.5414549649743744 and parameters: {'n_estimators': 845, 'max_depth': 10, 'learning_rate': 0.08969296830659582, 'subsample': 0.5847306688531752, 'colsample_bytree': 0.5928217936172021, 'gamma': 1.1314334101148797}. Best is trial 10 with value: 0.5479000791592712.


[I 2025-08-31 11:07:58,462] Trial 16 finished with value: 0.5588022474366003 and parameters: {'n_estimators': 464, 'max_depth': 8, 'learning_rate': 0.010152790543467967, 'subsample': 0.6447789130457507, 'colsample_bytree': 0.737174136693942, 'gamma': 3.730230813455881}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:07:58,847] Trial 17 finished with value: 0.5283729716442748 and parameters: {'n_estimators': 449, 'max_depth': 8, 'learning_rate': 0.09265609833327121, 'subsample': 0.7068440630860573, 'colsample_bytree': 0.7269488137572108, 'gamma': 3.9243612566294392}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:07:59,301] Trial 18 finished with value: 0.5346470120142325 and parameters: {'n_estimators': 491, 'max_depth': 9, 'learning_rate': 0.05088600220824871, 'subsample': 0.6347958105204546, 'colsample_bytree': 0.7977916847000034, 'gamma': 4.992440608934327}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:07:59,610] Trial 19 finished with value: 0.5265801336917045 and parameters: {'n_estimators': 573, 'max_depth': 7, 'learning_rate': 0.2044146086923254, 'subsample': 0.7654259841704345, 'colsample_bytree': 0.5862528218759064, 'gamma': 3.8857206856858726}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:00,789] Trial 20 finished with value: 0.5309206804430365 and parameters: {'n_estimators': 819, 'max_depth': 9, 'learning_rate': 0.04911655775810261, 'subsample': 0.5460733477774105, 'colsample_bytree': 0.8487816724152909, 'gamma': 2.226564920820014}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:06,729] Trial 21 finished with value: 0.549798572118696 and parameters: {'n_estimators': 926, 'max_depth': 10, 'learning_rate': 0.010100836907784287, 'subsample': 0.5023334473617143, 'colsample_bytree': 0.741079308956033, 'gamma': 1.2037974012223862}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:08,324] Trial 22 finished with value: 0.5475451427927207 and parameters: {'n_estimators': 355, 'max_depth': 8, 'learning_rate': 0.03737718528271611, 'subsample': 0.57689775265342, 'colsample_bytree': 0.7463902919413599, 'gamma': 1.0479638221380514}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:10,085] Trial 23 finished with value: 0.5520547783271866 and parameters: {'n_estimators': 885, 'max_depth': 9, 'learning_rate': 0.011793303701472538, 'subsample': 0.6605723307130492, 'colsample_bytree': 0.7040273380515647, 'gamma': 3.3889873732305738}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:10,765] Trial 24 finished with value: 0.5303495137002209 and parameters: {'n_estimators': 780, 'max_depth': 9, 'learning_rate': 0.08051531433125347, 'subsample': 0.661074741404213, 'colsample_bytree': 0.7115372454379985, 'gamma': 3.676380011661221}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:11,321] Trial 25 finished with value: 0.5166916278053187 and parameters: {'n_estimators': 636, 'max_depth': 7, 'learning_rate': 0.06024740012669846, 'subsample': 0.768329467625904, 'colsample_bytree': 0.7467751209976352, 'gamma': 4.283374957837605}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:12,058] Trial 26 finished with value: 0.5125349469299041 and parameters: {'n_estimators': 903, 'max_depth': 8, 'learning_rate': 0.10353775680440067, 'subsample': 0.7091382517079227, 'colsample_bytree': 0.6050532526164429, 'gamma': 3.2891300822537417}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:12,949] Trial 27 finished with value: 0.5518973582742133 and parameters: {'n_estimators': 520, 'max_depth': 9, 'learning_rate': 0.03280357700274464, 'subsample': 0.6579962191990004, 'colsample_bytree': 0.7854756353687129, 'gamma': 3.44530997261062}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:13,832] Trial 28 finished with value: 0.5283089359636952 and parameters: {'n_estimators': 474, 'max_depth': 9, 'learning_rate': 0.0340818663254441, 'subsample': 0.6618456056226742, 'colsample_bytree': 0.8260954437350769, 'gamma': 3.3139815954221357}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:14,227] Trial 29 finished with value: 0.5120181254761609 and parameters: {'n_estimators': 549, 'max_depth': 8, 'learning_rate': 0.12748319711125372, 'subsample': 0.805443531213523, 'colsample_bytree': 0.7823090222716815, 'gamma': 4.313821901128712}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:14,918] Trial 30 finished with value: 0.5439776508606909 and parameters: {'n_estimators': 360, 'max_depth': 7, 'learning_rate': 0.07137547985618828, 'subsample': 0.6220774005406282, 'colsample_bytree': 0.9131522152860325, 'gamma': 2.992258577659336}. Best is trial 16 with value: 0.5588022474366003.


[I 2025-08-31 11:08:16,557] Trial 31 finished with value: 0.56009725103881 and parameters: {'n_estimators': 532, 'max_depth': 9, 'learning_rate': 0.01000642991984412, 'subsample': 0.5822150986846799, 'colsample_bytree': 0.6936773819493027, 'gamma': 3.607615966225764}. Best is trial 31 with value: 0.56009725103881.


[I 2025-08-31 11:08:17,334] Trial 32 finished with value: 0.5620014187902266 and parameters: {'n_estimators': 431, 'max_depth': 9, 'learning_rate': 0.03295738757416512, 'subsample': 0.5805589390970479, 'colsample_bytree': 0.6955075785097713, 'gamma': 3.6323112553137262}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:17,946] Trial 33 finished with value: 0.5579737897805804 and parameters: {'n_estimators': 412, 'max_depth': 8, 'learning_rate': 0.03904078020075327, 'subsample': 0.5811752527222849, 'colsample_bytree': 0.6939085618199834, 'gamma': 4.204469171226471}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:18,456] Trial 34 finished with value: 0.5253791997557784 and parameters: {'n_estimators': 426, 'max_depth': 8, 'learning_rate': 0.057589800486016064, 'subsample': 0.5797759949430793, 'colsample_bytree': 0.6846777919600237, 'gamma': 4.275831448204894}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:18,838] Trial 35 finished with value: 0.5472695375310324 and parameters: {'n_estimators': 291, 'max_depth': 6, 'learning_rate': 0.04027342117607599, 'subsample': 0.5518258521382187, 'colsample_bytree': 0.609942576691828, 'gamma': 4.671588116513232}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:19,090] Trial 36 finished with value: 0.5557997467739162 and parameters: {'n_estimators': 199, 'max_depth': 8, 'learning_rate': 0.1975004030937031, 'subsample': 0.5977705716565509, 'colsample_bytree': 0.6816364694178908, 'gamma': 3.948544397944959}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:19,468] Trial 37 finished with value: 0.5176889252447195 and parameters: {'n_estimators': 342, 'max_depth': 6, 'learning_rate': 0.10971065153099333, 'subsample': 0.6343610235867441, 'colsample_bytree': 0.6235854651361811, 'gamma': 2.734525005111073}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:19,794] Trial 38 finished with value: 0.5190918753496027 and parameters: {'n_estimators': 411, 'max_depth': 5, 'learning_rate': 0.14951225002101043, 'subsample': 0.5618430096297837, 'colsample_bytree': 0.5707839122116708, 'gamma': 3.6766912482892495}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:20,276] Trial 39 finished with value: 0.5134011604770998 and parameters: {'n_estimators': 653, 'max_depth': 7, 'learning_rate': 0.06611051150468776, 'subsample': 0.6950763588676045, 'colsample_bytree': 0.7639349264386439, 'gamma': 4.572888842422206}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:21,115] Trial 40 finished with value: 0.5471373877376722 and parameters: {'n_estimators': 558, 'max_depth': 5, 'learning_rate': 0.027458775488443415, 'subsample': 0.6194583081746428, 'colsample_bytree': 0.5373086371213744, 'gamma': 2.497084497882266}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:21,356] Trial 41 finished with value: 0.5436671801235478 and parameters: {'n_estimators': 196, 'max_depth': 8, 'learning_rate': 0.20148460515101818, 'subsample': 0.6018652890110783, 'colsample_bytree': 0.6899402416100493, 'gamma': 4.043278111021564}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:21,628] Trial 42 finished with value: 0.5599995348104699 and parameters: {'n_estimators': 135, 'max_depth': 8, 'learning_rate': 0.19255432323521213, 'subsample': 0.5930824465030431, 'colsample_bytree': 0.6724754601948683, 'gamma': 3.039263973328003}. Best is trial 32 with value: 0.5620014187902266.


[I 2025-08-31 11:08:21,882] Trial 43 finished with value: 0.5635529110906663 and parameters: {'n_estimators': 102, 'max_depth': 9, 'learning_rate': 0.2260022583560479, 'subsample': 0.5292487384290698, 'colsample_bytree': 0.6669316672726233, 'gamma': 3.0077856887855887}. Best is trial 43 with value: 0.5635529110906663.


[I 2025-08-31 11:08:22,112] Trial 44 finished with value: 0.5445832929679831 and parameters: {'n_estimators': 111, 'max_depth': 9, 'learning_rate': 0.24933075398531754, 'subsample': 0.5345558871786144, 'colsample_bytree': 0.659404113114108, 'gamma': 3.079892898507885}. Best is trial 43 with value: 0.5635529110906663.


[I 2025-08-31 11:08:22,434] Trial 45 finished with value: 0.561959488656195 and parameters: {'n_estimators': 157, 'max_depth': 10, 'learning_rate': 0.233934862500223, 'subsample': 0.5284474391363077, 'colsample_bytree': 0.6283197014961882, 'gamma': 2.2994388909372088}. Best is trial 43 with value: 0.5635529110906663.


[I 2025-08-31 11:08:22,768] Trial 46 finished with value: 0.5313654452585936 and parameters: {'n_estimators': 152, 'max_depth': 10, 'learning_rate': 0.22596373566409753, 'subsample': 0.5259729951598227, 'colsample_bytree': 0.6301558714648505, 'gamma': 2.116530757344162}. Best is trial 43 with value: 0.5635529110906663.


[I 2025-08-31 11:08:23,069] Trial 47 finished with value: 0.5668252529203356 and parameters: {'n_estimators': 247, 'max_depth': 10, 'learning_rate': 0.22378733537245477, 'subsample': 0.5668642792025815, 'colsample_bytree': 0.5659506006966533, 'gamma': 2.6453131318601875}. Best is trial 47 with value: 0.5668252529203356.


[I 2025-08-31 11:08:23,393] Trial 48 finished with value: 0.5492104836445345 and parameters: {'n_estimators': 257, 'max_depth': 10, 'learning_rate': 0.2625819368989781, 'subsample': 0.5556140267501041, 'colsample_bytree': 0.5025957541189945, 'gamma': 2.0339298395461283}. Best is trial 47 with value: 0.5668252529203356.


[I 2025-08-31 11:08:23,683] Trial 49 finished with value: 0.5405416631523517 and parameters: {'n_estimators': 214, 'max_depth': 10, 'learning_rate': 0.2710177811132415, 'subsample': 0.5208255424730754, 'colsample_bytree': 0.5546838261306686, 'gamma': 2.710612450974624}. Best is trial 47 with value: 0.5668252529203356.


[I 2025-08-31 11:08:23,975] Trial 50 finished with value: 0.5425011990904823 and parameters: {'n_estimators': 168, 'max_depth': 10, 'learning_rate': 0.23117673146512974, 'subsample': 0.5278057796384048, 'colsample_bytree': 0.6315077416647377, 'gamma': 2.606765493069268}. Best is trial 47 with value: 0.5668252529203356.


[I 2025-08-31 11:08:24,237] Trial 51 finished with value: 0.5602799347869764 and parameters: {'n_estimators': 104, 'max_depth': 9, 'learning_rate': 0.22484615948110542, 'subsample': 0.5649363455401334, 'colsample_bytree': 0.659150607547639, 'gamma': 2.330690204319265}. Best is trial 47 with value: 0.5668252529203356.


[I 2025-08-31 11:08:24,499] Trial 52 finished with value: 0.534910110585973 and parameters: {'n_estimators': 107, 'max_depth': 9, 'learning_rate': 0.23290165419255804, 'subsample': 0.5588277909852009, 'colsample_bytree': 0.5789228341868349, 'gamma': 2.270097543587767}. Best is trial 47 with value: 0.5668252529203356.


[I 2025-08-31 11:08:24,924] Trial 53 finished with value: 0.5679480275540774 and parameters: {'n_estimators': 293, 'max_depth': 9, 'learning_rate': 0.2160468809340274, 'subsample': 0.5683308455626912, 'colsample_bytree': 0.7176109191272263, 'gamma': 1.933893335904846}. Best is trial 53 with value: 0.5679480275540774.


[I 2025-08-31 11:08:25,226] Trial 54 finished with value: 0.5325954725613313 and parameters: {'n_estimators': 259, 'max_depth': 10, 'learning_rate': 0.21633115149228438, 'subsample': 0.8900019761650351, 'colsample_bytree': 0.7159304416053455, 'gamma': 2.354627584548302}. Best is trial 53 with value: 0.5679480275540774.


[I 2025-08-31 11:08:25,662] Trial 55 finished with value: 0.5363071775392685 and parameters: {'n_estimators': 322, 'max_depth': 9, 'learning_rate': 0.18712107432591776, 'subsample': 0.5006846138806629, 'colsample_bytree': 0.6523237975968764, 'gamma': 1.9605680537781605}. Best is trial 53 with value: 0.5679480275540774.


[I 2025-08-31 11:08:26,233] Trial 56 finished with value: 0.552003114217259 and parameters: {'n_estimators': 233, 'max_depth': 10, 'learning_rate': 0.17233206976967808, 'subsample': 0.5674801280826706, 'colsample_bytree': 0.560170814098767, 'gamma': 0.7606211405809455}. Best is trial 53 with value: 0.5679480275540774.


[I 2025-08-31 11:08:26,637] Trial 57 finished with value: 0.5454694144170861 and parameters: {'n_estimators': 181, 'max_depth': 10, 'learning_rate': 0.2169761740720721, 'subsample': 0.5451035419563489, 'colsample_bytree': 0.6024155817410808, 'gamma': 1.4032032248737185}. Best is trial 53 with value: 0.5679480275540774.


[I 2025-08-31 11:08:26,971] Trial 58 finished with value: 0.573035941001639 and parameters: {'n_estimators': 291, 'max_depth': 9, 'learning_rate': 0.24536038874601837, 'subsample': 0.6849162154456322, 'colsample_bytree': 0.7219405078194433, 'gamma': 1.8891657316086192}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:27,346] Trial 59 finished with value: 0.5293900047985611 and parameters: {'n_estimators': 296, 'max_depth': 9, 'learning_rate': 0.24225752884107582, 'subsample': 0.6156179803101943, 'colsample_bytree': 0.7242532174143241, 'gamma': 1.8444577251727754}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:27,637] Trial 60 finished with value: 0.5726230637558196 and parameters: {'n_estimators': 376, 'max_depth': 4, 'learning_rate': 0.2791146358835144, 'subsample': 0.5225825428025711, 'colsample_bytree': 0.6226486521199261, 'gamma': 2.8808219571779334}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:27,929] Trial 61 finished with value: 0.541243466522543 and parameters: {'n_estimators': 328, 'max_depth': 4, 'learning_rate': 0.2793114639456152, 'subsample': 0.5201628092641253, 'colsample_bytree': 0.7612974441474298, 'gamma': 2.9219538845063484}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:28,189] Trial 62 finished with value: 0.5192885305531774 and parameters: {'n_estimators': 378, 'max_depth': 3, 'learning_rate': 0.29869296554017793, 'subsample': 0.5402692207780079, 'colsample_bytree': 0.6191478557359339, 'gamma': 3.174028957725219}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:28,399] Trial 63 finished with value: 0.5108308037230156 and parameters: {'n_estimators': 232, 'max_depth': 4, 'learning_rate': 0.2610814121826619, 'subsample': 0.7398997890933996, 'colsample_bytree': 0.6384184425620885, 'gamma': 2.5637219752958478}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:28,775] Trial 64 finished with value: 0.5535217749994691 and parameters: {'n_estimators': 310, 'max_depth': 5, 'learning_rate': 0.23968937823589462, 'subsample': 0.5154888789483277, 'colsample_bytree': 0.6678541419555747, 'gamma': 1.4631139413194438}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:29,109] Trial 65 finished with value: 0.5537465283764812 and parameters: {'n_estimators': 379, 'max_depth': 4, 'learning_rate': 0.2126231446118099, 'subsample': 0.5356389546815928, 'colsample_bytree': 0.5395740169086061, 'gamma': 1.8996249995759484}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:29,433] Trial 66 finished with value: 0.5420056785910294 and parameters: {'n_estimators': 256, 'max_depth': 10, 'learning_rate': 0.28217811779452245, 'subsample': 0.6404827470323567, 'colsample_bytree': 0.592839381294203, 'gamma': 2.121158940498405}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:29,717] Trial 67 finished with value: 0.5239616926233974 and parameters: {'n_estimators': 276, 'max_depth': 9, 'learning_rate': 0.25850422169507237, 'subsample': 0.6742286524995678, 'colsample_bytree': 0.7099451706769186, 'gamma': 2.8624212716814617}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:29,980] Trial 68 finished with value: 0.5590424472172433 and parameters: {'n_estimators': 134, 'max_depth': 9, 'learning_rate': 0.18122896387965987, 'subsample': 0.5117905995197894, 'colsample_bytree': 0.7291983241977672, 'gamma': 3.523198532105025}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:30,294] Trial 69 finished with value: 0.5318799128266195 and parameters: {'n_estimators': 216, 'max_depth': 10, 'learning_rate': 0.2730944115009384, 'subsample': 0.7906111276200706, 'colsample_bytree': 0.7572560750388105, 'gamma': 1.6302785021801827}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:30,472] Trial 70 finished with value: 0.534660238746748 and parameters: {'n_estimators': 162, 'max_depth': 3, 'learning_rate': 0.2881679283457839, 'subsample': 0.682067057046334, 'colsample_bytree': 0.8137766080783693, 'gamma': 3.1852573507136643}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:30,712] Trial 71 finished with value: 0.5527036631636055 and parameters: {'n_estimators': 112, 'max_depth': 9, 'learning_rate': 0.2236350724593705, 'subsample': 0.5710779679814658, 'colsample_bytree': 0.6517932399181787, 'gamma': 2.4100618893907355}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:31,100] Trial 72 finished with value: 0.5661395472883799 and parameters: {'n_estimators': 447, 'max_depth': 9, 'learning_rate': 0.237799428574393, 'subsample': 0.5942677235276463, 'colsample_bytree': 0.6661210606501948, 'gamma': 2.5599497208053115}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:31,467] Trial 73 finished with value: 0.5210882154420385 and parameters: {'n_estimators': 446, 'max_depth': 10, 'learning_rate': 0.2516057453872012, 'subsample': 0.5965231892511075, 'colsample_bytree': 0.6797384751082725, 'gamma': 2.718634645879716}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:31,872] Trial 74 finished with value: 0.5483232101036277 and parameters: {'n_estimators': 491, 'max_depth': 9, 'learning_rate': 0.23855672033583433, 'subsample': 0.5473770509083434, 'colsample_bytree': 0.6161154647313172, 'gamma': 2.574414838910181}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:32,258] Trial 75 finished with value: 0.5573699395742269 and parameters: {'n_estimators': 390, 'max_depth': 9, 'learning_rate': 0.20840880295847203, 'subsample': 0.6098451654742862, 'colsample_bytree': 0.6387995509793146, 'gamma': 2.195511835274573}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:32,779] Trial 76 finished with value: 0.5210066564577035 and parameters: {'n_estimators': 354, 'max_depth': 10, 'learning_rate': 0.15086036190722935, 'subsample': 0.5838519280877719, 'colsample_bytree': 0.700143453590327, 'gamma': 1.7351582034263209}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:33,124] Trial 77 finished with value: 0.558376332570548 and parameters: {'n_estimators': 435, 'max_depth': 6, 'learning_rate': 0.2459392341417789, 'subsample': 0.6242674608269042, 'colsample_bytree': 0.7340631609886075, 'gamma': 2.858022326935618}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:33,448] Trial 78 finished with value: 0.5393726154488722 and parameters: {'n_estimators': 310, 'max_depth': 8, 'learning_rate': 0.2652650343644003, 'subsample': 0.5722505489049607, 'colsample_bytree': 0.6666955482729962, 'gamma': 2.462656740296432}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:33,804] Trial 79 finished with value: 0.5509757663572847 and parameters: {'n_estimators': 505, 'max_depth': 9, 'learning_rate': 0.23151253584677078, 'subsample': 0.7376795943575406, 'colsample_bytree': 0.5973586279789647, 'gamma': 2.6526097898994356}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:34,231] Trial 80 finished with value: 0.5233841357534205 and parameters: {'n_estimators': 458, 'max_depth': 9, 'learning_rate': 0.2563827359545333, 'subsample': 0.5510617797339327, 'colsample_bytree': 0.7213123065389443, 'gamma': 2.0492940738104024}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:34,515] Trial 81 finished with value: 0.5544987494802212 and parameters: {'n_estimators': 136, 'max_depth': 9, 'learning_rate': 0.22497435413516292, 'subsample': 0.5658910445873475, 'colsample_bytree': 0.6594540101992692, 'gamma': 2.2451689254770253}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:34,809] Trial 82 finished with value: 0.540211275675375 and parameters: {'n_estimators': 179, 'max_depth': 9, 'learning_rate': 0.22097643460278568, 'subsample': 0.5913321359878047, 'colsample_bytree': 0.6837489883354854, 'gamma': 2.357617487502637}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:35,114] Trial 83 finished with value: 0.5161519279595156 and parameters: {'n_estimators': 200, 'max_depth': 10, 'learning_rate': 0.20831582160836312, 'subsample': 0.5340972161002479, 'colsample_bytree': 0.7041861838348003, 'gamma': 2.8138706899978865}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:35,562] Trial 84 finished with value: 0.550817117392824 and parameters: {'n_estimators': 413, 'max_depth': 7, 'learning_rate': 0.13640040815588694, 'subsample': 0.5080642199006442, 'colsample_bytree': 0.6512683734113507, 'gamma': 2.322070772084525}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:35,876] Trial 85 finished with value: 0.5593640924431591 and parameters: {'n_estimators': 102, 'max_depth': 8, 'learning_rate': 0.19569254673640815, 'subsample': 0.5551957335199933, 'colsample_bytree': 0.6721918351321124, 'gamma': 1.8297067159432368}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:36,150] Trial 86 finished with value: 0.5312494117751478 and parameters: {'n_estimators': 276, 'max_depth': 9, 'learning_rate': 0.2347770351896672, 'subsample': 0.6035905319415193, 'colsample_bytree': 0.5818050338314635, 'gamma': 2.9857720980394036}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:36,465] Trial 87 finished with value: 0.5434362387458934 and parameters: {'n_estimators': 237, 'max_depth': 10, 'learning_rate': 0.24586526928975613, 'subsample': 0.5302481404541168, 'colsample_bytree': 0.6408190962312108, 'gamma': 2.5131257522846036}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:36,666] Trial 88 finished with value: 0.5176825077342291 and parameters: {'n_estimators': 125, 'max_depth': 9, 'learning_rate': 0.2529692756078234, 'subsample': 0.6503695224478384, 'colsample_bytree': 0.6245541575137651, 'gamma': 3.1298133867243516}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:37,031] Trial 89 finished with value: 0.5468858183194999 and parameters: {'n_estimators': 146, 'max_depth': 8, 'learning_rate': 0.21381624029817553, 'subsample': 0.5750261101660201, 'colsample_bytree': 0.696374920219427, 'gamma': 1.2867479106539834}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:37,441] Trial 90 finished with value: 0.531769226901929 and parameters: {'n_estimators': 338, 'max_depth': 10, 'learning_rate': 0.16426726471141367, 'subsample': 0.5882548720540063, 'colsample_bytree': 0.5615248991256895, 'gamma': 2.0029502188738726}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:38,440] Trial 91 finished with value: 0.5536285341225281 and parameters: {'n_estimators': 597, 'max_depth': 9, 'learning_rate': 0.021872769700322273, 'subsample': 0.5574010256778049, 'colsample_bytree': 0.7168636352132631, 'gamma': 3.5422582018769586}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:39,328] Trial 92 finished with value: 0.5710597298280355 and parameters: {'n_estimators': 522, 'max_depth': 9, 'learning_rate': 0.02128430291130823, 'subsample': 0.53913478789793, 'colsample_bytree': 0.689196757798857, 'gamma': 3.857768742020319}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:39,706] Trial 93 finished with value: 0.566746991215132 and parameters: {'n_estimators': 521, 'max_depth': 8, 'learning_rate': 0.2036977629114936, 'subsample': 0.544500603523168, 'colsample_bytree': 0.7409713254040948, 'gamma': 3.772788068383885}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:40,133] Trial 94 finished with value: 0.5582596952300459 and parameters: {'n_estimators': 533, 'max_depth': 8, 'learning_rate': 0.20285531760222403, 'subsample': 0.5411183151833948, 'colsample_bytree': 0.9333139889957942, 'gamma': 3.7362207504056775}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:40,738] Trial 95 finished with value: 0.5429202955414698 and parameters: {'n_estimators': 469, 'max_depth': 8, 'learning_rate': 0.04715686998434, 'subsample': 0.517527157202249, 'colsample_bytree': 0.7744938353107202, 'gamma': 3.799194919242667}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:41,145] Trial 96 finished with value: 0.5497096613390785 and parameters: {'n_estimators': 573, 'max_depth': 9, 'learning_rate': 0.18515825699519148, 'subsample': 0.5246214622166091, 'colsample_bytree': 0.7455271699131233, 'gamma': 4.043615945458551}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:41,461] Trial 97 finished with value: 0.5255033079294135 and parameters: {'n_estimators': 367, 'max_depth': 10, 'learning_rate': 0.22913077965750842, 'subsample': 0.6281860818583771, 'colsample_bytree': 0.6882408408015243, 'gamma': 3.3559599917244545}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:41,972] Trial 98 finished with value: 0.5474370759591276 and parameters: {'n_estimators': 410, 'max_depth': 7, 'learning_rate': 0.021136568325879568, 'subsample': 0.8660345678131862, 'colsample_bytree': 0.736513379430533, 'gamma': 4.457880684338488}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:42,392] Trial 99 finished with value: 0.5617968995367353 and parameters: {'n_estimators': 506, 'max_depth': 9, 'learning_rate': 0.27100902000631744, 'subsample': 0.5091663492987963, 'colsample_bytree': 0.7706270446259114, 'gamma': 3.268326667219846}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:42,779] Trial 100 finished with value: 0.5091595966862988 and parameters: {'n_estimators': 630, 'max_depth': 8, 'learning_rate': 0.23786583250405852, 'subsample': 0.5461279277989007, 'colsample_bytree': 0.7060648734596547, 'gamma': 4.130529809259327}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:43,198] Trial 101 finished with value: 0.5362992057489211 and parameters: {'n_estimators': 517, 'max_depth': 9, 'learning_rate': 0.2705920081693019, 'subsample': 0.5037295224247219, 'colsample_bytree': 0.7737522419812487, 'gamma': 3.2730572247837326}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:43,474] Trial 102 finished with value: 0.5229027369274101 and parameters: {'n_estimators': 486, 'max_depth': 9, 'learning_rate': 0.2943592003411035, 'subsample': 0.969843828560272, 'colsample_bytree': 0.7267303102772564, 'gamma': 3.4880335680144214}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:43,873] Trial 103 finished with value: 0.5266121498381392 and parameters: {'n_estimators': 581, 'max_depth': 9, 'learning_rate': 0.27649824734011486, 'subsample': 0.5128397831528343, 'colsample_bytree': 0.7534924628839577, 'gamma': 3.8016655015944916}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:44,283] Trial 104 finished with value: 0.5364951901056063 and parameters: {'n_estimators': 544, 'max_depth': 9, 'learning_rate': 0.24163661940048506, 'subsample': 0.5412550579977873, 'colsample_bytree': 0.6751489965051016, 'gamma': 3.2174724238587062}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:44,709] Trial 105 finished with value: 0.5331259649768891 and parameters: {'n_estimators': 504, 'max_depth': 10, 'learning_rate': 0.2649523190621784, 'subsample': 0.5271644858226443, 'colsample_bytree': 0.8470985605445518, 'gamma': 3.644279454436431}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:45,045] Trial 106 finished with value: 0.5386815320382414 and parameters: {'n_estimators': 433, 'max_depth': 9, 'learning_rate': 0.28539752803178997, 'subsample': 0.5632739440079558, 'colsample_bytree': 0.60924428863239, 'gamma': 3.4141847307763094}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:45,455] Trial 107 finished with value: 0.5610048045577992 and parameters: {'n_estimators': 476, 'max_depth': 9, 'learning_rate': 0.2180872964667153, 'subsample': 0.5369752709613705, 'colsample_bytree': 0.741489352382588, 'gamma': 2.9406706269168397}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:45,812] Trial 108 finished with value: 0.5508138359853925 and parameters: {'n_estimators': 390, 'max_depth': 10, 'learning_rate': 0.2559197590863612, 'subsample': 0.5790225835326803, 'colsample_bytree': 0.79195906986315, 'gamma': 3.043743658152432}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:46,170] Trial 109 finished with value: 0.5553118843015445 and parameters: {'n_estimators': 450, 'max_depth': 8, 'learning_rate': 0.24813640851427163, 'subsample': 0.5101629692768191, 'colsample_bytree': 0.8061719247095153, 'gamma': 3.9971419867920925}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:46,568] Trial 110 finished with value: 0.5514634669662896 and parameters: {'n_estimators': 673, 'max_depth': 9, 'learning_rate': 0.20994888266798753, 'subsample': 0.5547309091655196, 'colsample_bytree': 0.5151018766003419, 'gamma': 3.878640220157738}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:47,009] Trial 111 finished with value: 0.5625157550197926 and parameters: {'n_estimators': 475, 'max_depth': 9, 'learning_rate': 0.21865776774520473, 'subsample': 0.5272145302105531, 'colsample_bytree': 0.7677580162717371, 'gamma': 2.6691676291280158}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:47,469] Trial 112 finished with value: 0.5712036026416628 and parameters: {'n_estimators': 524, 'max_depth': 9, 'learning_rate': 0.22679876410380603, 'subsample': 0.5232238724637048, 'colsample_bytree': 0.7596609314333208, 'gamma': 2.699893048056515}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:47,931] Trial 113 finished with value: 0.5553100106508186 and parameters: {'n_estimators': 527, 'max_depth': 9, 'learning_rate': 0.22801748626021406, 'subsample': 0.5216757442761568, 'colsample_bytree': 0.7889540824342023, 'gamma': 2.7314153278066895}. Best is trial 58 with value: 0.573035941001639.


[I 2025-08-31 11:08:48,330] Trial 114 finished with value: 0.5831164453731658 and parameters: {'n_estimators': 424, 'max_depth': 5, 'learning_rate': 0.22226400175451194, 'subsample': 0.531449623541663, 'colsample_bytree': 0.7129322245155306, 'gamma': 2.1414651108117377}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:48,688] Trial 115 finished with value: 0.546577144491126 and parameters: {'n_estimators': 425, 'max_depth': 5, 'learning_rate': 0.21959397606379205, 'subsample': 0.546060393356094, 'colsample_bytree': 0.7113222102678434, 'gamma': 2.616732813485637}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:49,086] Trial 116 finished with value: 0.5553707421471594 and parameters: {'n_estimators': 457, 'max_depth': 4, 'learning_rate': 0.1990273036590866, 'subsample': 0.565150247047425, 'colsample_bytree': 0.7541320114665389, 'gamma': 2.4440252143624477}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:49,577] Trial 117 finished with value: 0.5567291361922297 and parameters: {'n_estimators': 490, 'max_depth': 5, 'learning_rate': 0.17762916350235902, 'subsample': 0.5318390890786195, 'colsample_bytree': 0.6950006611012086, 'gamma': 1.5257874443389343}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:49,986] Trial 118 finished with value: 0.5071687000912675 and parameters: {'n_estimators': 562, 'max_depth': 4, 'learning_rate': 0.1901487765953508, 'subsample': 0.5748439873549785, 'colsample_bytree': 0.7277131953687516, 'gamma': 2.8315652186648412}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:50,324] Trial 119 finished with value: 0.539841447172948 and parameters: {'n_estimators': 295, 'max_depth': 6, 'learning_rate': 0.2216926356549407, 'subsample': 0.6139466039421506, 'colsample_bytree': 0.7176756948179519, 'gamma': 2.104394686500625}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:50,796] Trial 120 finished with value: 0.5502903884362932 and parameters: {'n_estimators': 351, 'max_depth': 9, 'learning_rate': 0.2047219399577381, 'subsample': 0.5517746405091888, 'colsample_bytree': 0.7608114110206258, 'gamma': 1.746202769308709}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:51,207] Trial 121 finished with value: 0.5416652104569725 and parameters: {'n_estimators': 323, 'max_depth': 10, 'learning_rate': 0.23542570107531724, 'subsample': 0.52138216016535, 'colsample_bytree': 0.6822775835656698, 'gamma': 1.898639910736096}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:51,553] Trial 122 finished with value: 0.5245148573553753 and parameters: {'n_estimators': 399, 'max_depth': 3, 'learning_rate': 0.23100304554509912, 'subsample': 0.5326641429230948, 'colsample_bytree': 0.7380582968181728, 'gamma': 2.1994651118592645}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:51,972] Trial 123 finished with value: 0.5577571249146424 and parameters: {'n_estimators': 440, 'max_depth': 9, 'learning_rate': 0.21355676646656294, 'subsample': 0.5390846196495609, 'colsample_bytree': 0.6629421627673552, 'gamma': 2.5142787684806795}. Best is trial 114 with value: 0.5831164453731658.


[I 2025-08-31 11:08:52,453] Trial 124 finished with value: 0.549445873050343 and parameters: {'n_estimators': 542, 'max_depth': 9, 'learning_rate': 0.24311820976592657, 'subsample': 0.5892697744741007, 'colsample_bytree': 0.7016706035813555, 'gamma': 1.6283646765003086}. Best is trial 114 with value: 0.5831164453731658.


Best XGBoost Params: {'n_estimators': 424, 'max_depth': 5, 'learning_rate': 0.22226400175451194, 'subsample': 0.531449623541663, 'colsample_bytree': 0.7129322245155306, 'gamma': 2.1414651108117377}


Final Tuned XGBoost F1 Score: 0.5985





SHAP analysis complete and plot logged to MLflow.


In [6]:
# --- MLP Challenger Model ---
# Step 1: Imports and Data Scaling
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# Deep learning models are sensitive to feature scale. We must standardize our data.
# We fit the scaler ONLY on the training data to prevent data leakage from the test set.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data successfully scaled.")
print(f"Shape of scaled training data: {X_train_scaled.shape}")

Data successfully scaled.
Shape of scaled training data: (2380, 32)


In [7]:
# Convert numpy arrays to PyTorch Tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders to handle batching
# We don't shuffle time-series data to preserve temporal order if needed, 
# but for a simple MLP, shuffling is often acceptable. Let's keep it False for rigor.
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("PyTorch Tensors and DataLoaders created.")

PyTorch Tensors and DataLoaders created.


In [8]:
# Step 2: Define the MLP Architecture
class ETF_MLP(nn.Module):
    def __init__(self, input_size, hidden_size_1=128, hidden_size_2=64, dropout_rate=0.5):
        """
        Initializes the MLP model.
        
        Args:
            input_size (int): The number of input features.
            hidden_size_1 (int): Number of neurons in the first hidden layer.
            hidden_size_2 (int): Number of neurons in the second hidden layer.
            dropout_rate (float): The dropout probability.
        """
        super(ETF_MLP, self).__init__()
        
        # --- Layer Definitions ---
        self.layer_1 = nn.Linear(input_size, hidden_size_1)
        self.bn_1 = nn.BatchNorm1d(hidden_size_1)
        
        self.layer_2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.bn_2 = nn.BatchNorm1d(hidden_size_2)
        
        self.output_layer = nn.Linear(hidden_size_2, 1)
        
        # --- Activation and Regularization ---
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)
        
    def forward(self, x):
        """ The forward pass of the model. """
        # First hidden layer
        x = self.layer_1(x)
        x = self.bn_1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Second hidden layer
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Output layer with sigmoid for binary classification
        x = torch.sigmoid(self.output_layer(x))
        return x

# Instantiate the model to test
input_features = X_train.shape[1]
model_mlp = ETF_MLP(input_size=input_features)
print("MLP Model Architecture:")
print(model_mlp)

MLP Model Architecture:
ETF_MLP(
  (layer_1): Linear(in_features=32, out_features=128, bias=True)
  (bn_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_2): Linear(in_features=128, out_features=64, bias=True)
  (bn_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)


In [9]:
# Step 3: Manual MLP Training and Evaluation

# --- Configuration ---
INPUT_SIZE = X_train.shape[1]
LEARNING_RATE = 0.001
EPOCHS = 50

# --- Model, Loss, Optimizer (Demonstrates 5.2, 5.3) ---
model_mlp = ETF_MLP(input_size=INPUT_SIZE, dropout_rate=0.4)
criterion = nn.BCELoss() # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=LEARNING_RATE) # Adam Optimizer
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) # LR Schedule

# --- MLflow Logging ---
with mlflow.start_run(run_name="MLP_Manual_Baseline") as run:
    mlflow.log_params({"learning_rate": LEARNING_RATE, "epochs": EPOCHS, "optimizer": "Adam"})
    
    # --- Training Loop ---
    for epoch in range(EPOCHS):
        model_mlp.train() # Set model to training mode
        for features, labels in train_loader:
            # Forward pass
            outputs = model_mlp(features)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Update learning rate
        scheduler.step()
        
        # --- Evaluation on Test Set ---
        model_mlp.eval() # Set model to evaluation mode
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for features, labels in test_loader:
                outputs = model_mlp(features)
                predicted = (outputs > 0.5).float()
                all_preds.extend(predicted.numpy())
                all_labels.extend(labels.numpy())
        
        # Calculate and log F1 score for the epoch
        f1 = f1_score(all_labels, all_preds)
        mlflow.log_metric("test_f1_score", f1, step=epoch)

    print(f"Final MLP F1 Score from manual run: {f1:.4f}")
    # Log the final model
    mlflow.pytorch.log_model(model_mlp, "mlp-model")

Final MLP F1 Score from manual run: 0.1568


In [10]:
import mlflow
import pandas as pd

# Ensure MLflow is pointing to your tracking server/directory
# mlflow.set_tracking_uri("../mlruns") # Uncomment if running in a new session/script

# Get the experiment by its name
experiment = mlflow.get_experiment_by_name("ETF_Trend_Prediction")

if experiment:
    # Search for all runs within this experiment
    runs_df = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        # Order by F1 score (desc) and then by start time (desc)
        order_by=["metrics.f1_score DESC", "start_time DESC"],
        output_format="pandas"
    )

    # --- New Logic to Extract All Metrics and Parameters ---
    # Identify all metric and parameter columns
    metric_cols = [col for col in runs_df.columns if col.startswith("metrics.")]
    param_cols = [col for col in runs_df.columns if col.startswith("params.")]

    # Select core run info, all metrics, and all parameters
    # The 'tags.mlflow.runName' contains the run name
    selected_cols = [
        "tags.mlflow.runName", "start_time", "run_id"
    ] + metric_cols + param_cols

    metrics_and_params = runs_df[selected_cols].copy()

    # Rename columns for better readability (optional, you can keep original for params if many)
    # This example renames just the core and metric columns
    metrics_and_params.rename(columns={
        "tags.mlflow.runName": "Run Name",
        "metrics.f1_score": "F1 Score",
        "metrics.accuracy": "Accuracy",
        "metrics.roc_auc": "ROC AUC"
        # Add more renames for specific metrics/params if you want,
        # but for ALL params, it might be too many to rename individually.
        # Keeping 'params.param_name' is often fine.
    }, inplace=True)

    print("Metrics and Parameters for 'ETF_Trend_Prediction' Experiment:")
    display(metrics_and_params)

else:
    print(f"Experiment 'ETF_Trend_Prediction' not found.")


Metrics and Parameters for 'ETF_Trend_Prediction' Experiment:


Unnamed: 0,Run Name,start_time,run_id,F1 Score,Accuracy,ROC AUC,metrics.test_f1_score,params.max_depth,params.gamma,params.colsample_bytree,params.n_estimators,params.learning_rate,params.subsample,params.optimizer,params.epochs
0,LogisticRegression_Baseline,2025-08-31 15:07:34.419000+00:00,7034c380912143aaa2cf06421c423db9,0.696839,0.535242,,,,,,,,,,
1,LogisticRegression_Baseline,2025-08-31 15:01:53.999000+00:00,6bf309a40e994d72a9fee76d58a666cc,0.696839,0.535242,,,,,,,,,,
2,RandomForest_Baseline,2025-08-31 15:01:54.150000+00:00,098f8cd1870f4859b0cee1abb6d87a93,0.635976,0.529736,,,,,,,,,,
3,RandomForest_Baseline,2025-08-31 15:07:34.552000+00:00,8756a967e1a84589acd8b86056b80f3e,0.62069,0.515419,,,,,,,,,,
4,XGBoost_Tuned_Champion,2025-08-31 15:03:07.673000+00:00,38d868687ee041bd96d32f3a5dc68451,0.607735,0.530837,0.518111,,7.0,0.9178064491217854,0.6246378761517877,507.0,0.2554495345997615,0.5179475127627672,,
5,XGBoost_Tuned_Champion,2025-08-31 15:08:52.459000+00:00,b8c1a62575e04e448fdb3b40f3963226,0.59854,0.515419,0.503375,,5.0,2.1414651108117377,0.7129322245155306,424.0,0.2222640017545119,0.531449623541663,,
6,MLP_Manual_Baseline,2025-08-31 15:09:01.650000+00:00,a723e1cb7adc4741838664b12864d91a,,,,0.156794,,,,,0.001,,Adam,50.0
7,MLP_Manual_Baseline,2025-08-31 15:03:20.064000+00:00,1175aa4059b44cdcbcd1d51e9c86485d,,,,0.144404,,,,,0.001,,Adam,50.0
