In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import optuna
import shap
import mlflow
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

mlflow.set_tracking_uri("file:../mlruns")

# Load your processed data
DATA_PATH = '../data/processed/etf_features.parquet'
data = pd.read_parquet(DATA_PATH)

# Separate features (X) and target (y)
X = data.drop('target', axis=1)
y = data['target']

  import pkg_resources  # noqa: TID251


In [2]:
# Define the chronological split point
# For example, use data up to the end of 2021 for training, and 2022 onwards for testing.
train_end_date = '2021-12-31'
test_start_date = '2022-01-01'

X_train = X.loc[:train_end_date]
y_train = y.loc[:train_end_date]

X_test = X.loc[test_start_date:]
y_test = y.loc[test_start_date:]

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

mlflow.set_experiment("ETF_Trend_Prediction")

Training set size: 2380
Test set size: 908


<Experiment: artifact_location='file:C:/Users/dawso/Dev/Personal/AIGrind/mlops-etf-forecasting/notebooks/../mlruns/922455140467852123', creation_time=1756652513785, experiment_id='922455140467852123', last_update_time=1756652513785, lifecycle_stage='active', name='ETF_Trend_Prediction', tags={}>

In [3]:
# Train Logistic Regression
with mlflow.start_run(run_name="LogisticRegression_Baseline"):
    model_lr = LogisticRegression(max_iter=1000, random_state=42)
    model_lr.fit(X_train, y_train)
    y_pred_lr = model_lr.predict(X_test)
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_lr))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_lr))
    print(f"Logistic Regression F1 Score: {f1_score(y_test, y_pred_lr):.4f}")

# Train Random Forest
with mlflow.start_run(run_name="RandomForest_Baseline"):
    model_rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_rf))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_rf))
    print(f"Random Forest F1 Score: {f1_score(y_test, y_pred_rf):.4f}")

Logistic Regression F1 Score: 0.6968


Random Forest F1 Score: 0.6207


In [4]:
def objective(trial):
    # Define the search space for hyperparameters
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': 42
    }
    
    model = xgb.XGBClassifier(**params)
    
    # Use TimeSeriesSplit for robust cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    score = cross_val_score(model, X_train, y_train, cv=tscv, scoring='f1', n_jobs=-1).mean()
    
    return score

In [5]:
# Run the study to find the best params
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=125) 

best_params = study.best_params
print("Best XGBoost Params:", best_params)

# Train the final XGBoost model with the best parameters and log to MLflow
with mlflow.start_run(run_name="XGBoost_Tuned_Champion") as run:
    final_xgb_model = xgb.XGBClassifier(**best_params, random_state=42)
    final_xgb_model.fit(X_train, y_train)
    y_pred_xgb = final_xgb_model.predict(X_test)
    y_pred_proba_xgb = final_xgb_model.predict_proba(X_test)[:, 1]

    f1 = f1_score(y_test, y_pred_xgb)
    print(f"Final Tuned XGBoost F1 Score: {f1:.4f}")

    mlflow.log_params(best_params)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_xgb))
    mlflow.log_metric("roc_auc", roc_auc_score(y_test, y_pred_proba_xgb))

    mlflow.xgboost.log_model(final_xgb_model, "xgb-model")
    champion_run_id = run.info.run_id # Capture run ID

    # --- SHAP Plot Generation and Logging (Move these lines here) ---
    print("\nSHAP analysis complete and plot logged to MLflow.")

    # 1. Create a SHAP Explainer
    explainer = shap.TreeExplainer(final_xgb_model)
    shap_values = explainer.shap_values(X_test) # Or X_train, depending on what you want to explain

    # 2. Generate and save the SHAP summary plot to a temporary file
    # Ensure you import matplotlib.pyplot as plt
    fig, ax = plt.subplots(figsize=(10, 8)) # You might want to specify figure size
    shap.summary_plot(shap_values, X_test, show=False, plot_size=(8, 6)) # show=False prevents immediate display
    plt.title("SHAP Feature Importance for XGBoost Model") # Add a title
    plot_filename = "shap_summary_champion.png" # Give it a more descriptive name
    plt.savefig(plot_filename, bbox_inches='tight', dpi=300) # Save the plot to a file
    plt.close() # Close the plot to free memory

    # 3. Log the saved plot as an MLflow artifact to the *current* active run
    mlflow.log_artifact(plot_filename)

[I 2025-08-31 11:38:48,041] A new study created in memory with name: no-name-4833b5c2-96f9-4d7f-9c63-55230c73e030


[I 2025-08-31 11:38:52,164] Trial 0 finished with value: 0.5367294595445853 and parameters: {'n_estimators': 654, 'max_depth': 9, 'learning_rate': 0.22063741553126265, 'subsample': 0.5929194706252953, 'colsample_bytree': 0.5753972234365798, 'gamma': 2.528647489110017}. Best is trial 0 with value: 0.5367294595445853.


[I 2025-08-31 11:38:54,378] Trial 1 finished with value: 0.5233466477041834 and parameters: {'n_estimators': 962, 'max_depth': 7, 'learning_rate': 0.18185169588462125, 'subsample': 0.8669125287545385, 'colsample_bytree': 0.5441224553940079, 'gamma': 4.9239352035342465}. Best is trial 0 with value: 0.5367294595445853.


[I 2025-08-31 11:38:56,387] Trial 2 finished with value: 0.5473642902408437 and parameters: {'n_estimators': 967, 'max_depth': 10, 'learning_rate': 0.21777477099361903, 'subsample': 0.7695933590438239, 'colsample_bytree': 0.8537233825239301, 'gamma': 4.562311111170631}. Best is trial 2 with value: 0.5473642902408437.


[I 2025-08-31 11:38:58,736] Trial 3 finished with value: 0.5286999240632709 and parameters: {'n_estimators': 307, 'max_depth': 7, 'learning_rate': 0.18148818191772997, 'subsample': 0.780359401809388, 'colsample_bytree': 0.9467265535601892, 'gamma': 4.843819819950034}. Best is trial 2 with value: 0.5473642902408437.


[I 2025-08-31 11:38:59,257] Trial 4 finished with value: 0.5156125855484335 and parameters: {'n_estimators': 413, 'max_depth': 3, 'learning_rate': 0.18795922261446502, 'subsample': 0.7433208129511246, 'colsample_bytree': 0.9936050859920664, 'gamma': 0.367311016157732}. Best is trial 2 with value: 0.5473642902408437.


[I 2025-08-31 11:38:59,790] Trial 5 finished with value: 0.4861191030410771 and parameters: {'n_estimators': 944, 'max_depth': 6, 'learning_rate': 0.10738370171091734, 'subsample': 0.9385829699908453, 'colsample_bytree': 0.8283738265043137, 'gamma': 2.473310436823377}. Best is trial 2 with value: 0.5473642902408437.


[I 2025-08-31 11:39:01,353] Trial 6 finished with value: 0.5419291280639806 and parameters: {'n_estimators': 666, 'max_depth': 4, 'learning_rate': 0.019986976101368223, 'subsample': 0.5449941650495996, 'colsample_bytree': 0.8715288663624801, 'gamma': 2.4655329687717513}. Best is trial 2 with value: 0.5473642902408437.


[I 2025-08-31 11:39:01,834] Trial 7 finished with value: 0.5392801683363693 and parameters: {'n_estimators': 701, 'max_depth': 7, 'learning_rate': 0.24427712041798186, 'subsample': 0.7213766684678138, 'colsample_bytree': 0.6373157144642194, 'gamma': 2.206393168359917}. Best is trial 2 with value: 0.5473642902408437.


[I 2025-08-31 11:39:02,315] Trial 8 finished with value: 0.5194408269815958 and parameters: {'n_estimators': 652, 'max_depth': 10, 'learning_rate': 0.13698244226279496, 'subsample': 0.9644195155094755, 'colsample_bytree': 0.8076519791263153, 'gamma': 2.4299118417319994}. Best is trial 2 with value: 0.5473642902408437.


[I 2025-08-31 11:39:02,830] Trial 9 finished with value: 0.5426874596843201 and parameters: {'n_estimators': 680, 'max_depth': 6, 'learning_rate': 0.2676086688385686, 'subsample': 0.6007672576802792, 'colsample_bytree': 0.6200004791784353, 'gamma': 1.7458894745726679}. Best is trial 2 with value: 0.5473642902408437.


[I 2025-08-31 11:39:03,111] Trial 10 finished with value: 0.5648884722235483 and parameters: {'n_estimators': 114, 'max_depth': 9, 'learning_rate': 0.07861697857781712, 'subsample': 0.6732376577602959, 'colsample_bytree': 0.7182733272821284, 'gamma': 3.848333412040248}. Best is trial 10 with value: 0.5648884722235483.


[I 2025-08-31 11:39:03,462] Trial 11 finished with value: 0.556285951066698 and parameters: {'n_estimators': 139, 'max_depth': 10, 'learning_rate': 0.06736082037099601, 'subsample': 0.6717309777473468, 'colsample_bytree': 0.7073227669818221, 'gamma': 3.797993981763323}. Best is trial 10 with value: 0.5648884722235483.


[I 2025-08-31 11:39:03,772] Trial 12 finished with value: 0.5371032795659434 and parameters: {'n_estimators': 101, 'max_depth': 9, 'learning_rate': 0.06518767050176417, 'subsample': 0.6754153699592534, 'colsample_bytree': 0.7123111205906801, 'gamma': 3.715733964195393}. Best is trial 10 with value: 0.5648884722235483.


[I 2025-08-31 11:39:04,075] Trial 13 finished with value: 0.5508748415999898 and parameters: {'n_estimators': 129, 'max_depth': 9, 'learning_rate': 0.07674857581813958, 'subsample': 0.6686290214841778, 'colsample_bytree': 0.7237112772741574, 'gamma': 3.7316502520089214}. Best is trial 10 with value: 0.5648884722235483.


[I 2025-08-31 11:39:04,965] Trial 14 finished with value: 0.5606657194583129 and parameters: {'n_estimators': 266, 'max_depth': 8, 'learning_rate': 0.01844940903789051, 'subsample': 0.5087860962636136, 'colsample_bytree': 0.6820447346988322, 'gamma': 3.572648888691023}. Best is trial 10 with value: 0.5648884722235483.


[I 2025-08-31 11:39:06,210] Trial 15 finished with value: 0.5743371247933721 and parameters: {'n_estimators': 284, 'max_depth': 8, 'learning_rate': 0.01033894615843793, 'subsample': 0.5131077571878689, 'colsample_bytree': 0.6562251470085492, 'gamma': 3.0792570152444902}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:06,975] Trial 16 finished with value: 0.553068751216083 and parameters: {'n_estimators': 405, 'max_depth': 8, 'learning_rate': 0.04154711549432404, 'subsample': 0.5918770943307963, 'colsample_bytree': 0.7720452039003565, 'gamma': 3.1056918875642197}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:07,432] Trial 17 finished with value: 0.5238923615570716 and parameters: {'n_estimators': 262, 'max_depth': 8, 'learning_rate': 0.11134345591519888, 'subsample': 0.8688623889602654, 'colsample_bytree': 0.5091329131615033, 'gamma': 1.2630794497360234}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:07,825] Trial 18 finished with value: 0.5321260773345827 and parameters: {'n_estimators': 473, 'max_depth': 5, 'learning_rate': 0.10073361526844957, 'subsample': 0.5121287131268277, 'colsample_bytree': 0.6412817513140383, 'gamma': 4.223251297743707}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:08,313] Trial 19 finished with value: 0.53065058264043 and parameters: {'n_estimators': 206, 'max_depth': 8, 'learning_rate': 0.04594228252451549, 'subsample': 0.839112610502035, 'colsample_bytree': 0.7741341595730483, 'gamma': 3.0272621199068355}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:09,628] Trial 20 finished with value: 0.5647402523800599 and parameters: {'n_estimators': 354, 'max_depth': 9, 'learning_rate': 0.01179801470545748, 'subsample': 0.6314541551839645, 'colsample_bytree': 0.6030897427521527, 'gamma': 3.144549794868871}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:10,761] Trial 21 finished with value: 0.5431047625083129 and parameters: {'n_estimators': 362, 'max_depth': 9, 'learning_rate': 0.013980910720331528, 'subsample': 0.637094535969231, 'colsample_bytree': 0.593361945692322, 'gamma': 3.1385071875675625}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:11,351] Trial 22 finished with value: 0.5719777790041201 and parameters: {'n_estimators': 519, 'max_depth': 9, 'learning_rate': 0.04320320979528335, 'subsample': 0.580963403659646, 'colsample_bytree': 0.6618663635739236, 'gamma': 4.193814314649366}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:11,899] Trial 23 finished with value: 0.5449141632637755 and parameters: {'n_estimators': 510, 'max_depth': 8, 'learning_rate': 0.05275798480597501, 'subsample': 0.5354365882284551, 'colsample_bytree': 0.6561673843990296, 'gamma': 4.167367368640509}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:12,399] Trial 24 finished with value: 0.542028548559798 and parameters: {'n_estimators': 563, 'max_depth': 10, 'learning_rate': 0.08573200021461308, 'subsample': 0.5684875208490536, 'colsample_bytree': 0.673454302040637, 'gamma': 4.071772979620892}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:12,680] Trial 25 finished with value: 0.5519497558003337 and parameters: {'n_estimators': 200, 'max_depth': 9, 'learning_rate': 0.13938861243174866, 'subsample': 0.5575371725482994, 'colsample_bytree': 0.7423834042049922, 'gamma': 4.2972833371480155}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:13,457] Trial 26 finished with value: 0.5324638025041464 and parameters: {'n_estimators': 800, 'max_depth': 7, 'learning_rate': 0.03227575404552674, 'subsample': 0.7004352377957224, 'colsample_bytree': 0.5533989399306172, 'gamma': 3.451353608603409}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:13,700] Trial 27 finished with value: 0.5673332445712986 and parameters: {'n_estimators': 190, 'max_depth': 8, 'learning_rate': 0.2968964385542996, 'subsample': 0.6231915101935996, 'colsample_bytree': 0.7538167239224163, 'gamma': 4.639994579566775}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:13,941] Trial 28 finished with value: 0.5636133011356885 and parameters: {'n_estimators': 210, 'max_depth': 6, 'learning_rate': 0.28525557678848756, 'subsample': 0.6280750647391621, 'colsample_bytree': 0.9051836191760516, 'gamma': 4.6196200232935904}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:14,276] Trial 29 finished with value: 0.5577828565071916 and parameters: {'n_estimators': 459, 'max_depth': 8, 'learning_rate': 0.22309011067955414, 'subsample': 0.5872792857800074, 'colsample_bytree': 0.7768584536324094, 'gamma': 4.511424379026541}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:14,808] Trial 30 finished with value: 0.5604813005896945 and parameters: {'n_estimators': 851, 'max_depth': 5, 'learning_rate': 0.29361992749688015, 'subsample': 0.525419427883147, 'colsample_bytree': 0.5615836431876051, 'gamma': 1.7465945359684358}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:15,112] Trial 31 finished with value: 0.5292922544953855 and parameters: {'n_estimators': 180, 'max_depth': 9, 'learning_rate': 0.08927880417035888, 'subsample': 0.6195442604007302, 'colsample_bytree': 0.691820065524965, 'gamma': 4.0194385606012455}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:15,436] Trial 32 finished with value: 0.5477624071498097 and parameters: {'n_estimators': 256, 'max_depth': 8, 'learning_rate': 0.12962347889120585, 'subsample': 0.5863007993067447, 'colsample_bytree': 0.7320259499453946, 'gamma': 3.438015913589876}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:15,809] Trial 33 finished with value: 0.5003008530078855 and parameters: {'n_estimators': 584, 'max_depth': 10, 'learning_rate': 0.16106133491510877, 'subsample': 0.6521529551793093, 'colsample_bytree': 0.6696294739485477, 'gamma': 4.60148032739131}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:16,144] Trial 34 finished with value: 0.5281423247695587 and parameters: {'n_estimators': 299, 'max_depth': 7, 'learning_rate': 0.057562261935671814, 'subsample': 0.7058615764438215, 'colsample_bytree': 0.8029918927068105, 'gamma': 4.999789692148908}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:16,851] Trial 35 finished with value: 0.5476600914671954 and parameters: {'n_estimators': 166, 'max_depth': 9, 'learning_rate': 0.03326657508647203, 'subsample': 0.7929498202569429, 'colsample_bytree': 0.7486384779589305, 'gamma': 2.8177374358803453}. Best is trial 15 with value: 0.5743371247933721.


[I 2025-08-31 11:39:17,143] Trial 36 finished with value: 0.5746307404305865 and parameters: {'n_estimators': 335, 'max_depth': 7, 'learning_rate': 0.1991999011840774, 'subsample': 0.5590105430632688, 'colsample_bytree': 0.6150115837270196, 'gamma': 3.878740655726583}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:17,393] Trial 37 finished with value: 0.5519013029637444 and parameters: {'n_estimators': 348, 'max_depth': 7, 'learning_rate': 0.2084873788227615, 'subsample': 0.5609951073000885, 'colsample_bytree': 0.5259080231287356, 'gamma': 4.755086250572279}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:17,672] Trial 38 finished with value: 0.5624105195085219 and parameters: {'n_estimators': 407, 'max_depth': 7, 'learning_rate': 0.26127115171088955, 'subsample': 0.5069040648932245, 'colsample_bytree': 0.6215047558437528, 'gamma': 4.415166149304422}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:18,128] Trial 39 finished with value: 0.5480261501605919 and parameters: {'n_estimators': 617, 'max_depth': 6, 'learning_rate': 0.1710230033718606, 'subsample': 0.542203354719707, 'colsample_bytree': 0.6502580745357632, 'gamma': 2.79551131929462}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:18,378] Trial 40 finished with value: 0.5270631189240392 and parameters: {'n_estimators': 310, 'max_depth': 3, 'learning_rate': 0.20176388985784638, 'subsample': 0.6079979222459063, 'colsample_bytree': 0.5837459305755008, 'gamma': 3.417032796321337}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:18,638] Trial 41 finished with value: 0.5352165324553897 and parameters: {'n_estimators': 222, 'max_depth': 8, 'learning_rate': 0.23363961041802336, 'subsample': 0.5595720769453897, 'colsample_bytree': 0.6996969472159037, 'gamma': 3.9577023152910407}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:18,816] Trial 42 finished with value: 0.5441094080468704 and parameters: {'n_estimators': 146, 'max_depth': 9, 'learning_rate': 0.18867217050856944, 'subsample': 0.7432274237943403, 'colsample_bytree': 0.6253939470522084, 'gamma': 4.832711452268549}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:19,288] Trial 43 finished with value: 0.5719434813055166 and parameters: {'n_estimators': 104, 'max_depth': 8, 'learning_rate': 0.0305825786364635, 'subsample': 0.5768004920686753, 'colsample_bytree': 0.8281361395137744, 'gamma': 3.900970760207173}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:21,353] Trial 44 finished with value: 0.5376309380551996 and parameters: {'n_estimators': 305, 'max_depth': 7, 'learning_rate': 0.03247607322122759, 'subsample': 0.5773918132544923, 'colsample_bytree': 0.9559253990932364, 'gamma': 0.05000872098251241}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:21,746] Trial 45 finished with value: 0.5477145604038396 and parameters: {'n_estimators': 524, 'max_depth': 8, 'learning_rate': 0.2593020306207562, 'subsample': 0.5386931683930889, 'colsample_bytree': 0.8277813755352619, 'gamma': 4.329485383328033}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:22,224] Trial 46 finished with value: 0.555787992977135 and parameters: {'n_estimators': 741, 'max_depth': 7, 'learning_rate': 0.2454074927333035, 'subsample': 0.6010218961237874, 'colsample_bytree': 0.8728232545574798, 'gamma': 3.645113510045986}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:22,579] Trial 47 finished with value: 0.5431478264285531 and parameters: {'n_estimators': 247, 'max_depth': 6, 'learning_rate': 0.12043487138930115, 'subsample': 0.5282016040635986, 'colsample_bytree': 0.8421667985706428, 'gamma': 3.9596918470592533}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:23,698] Trial 48 finished with value: 0.547717679313256 and parameters: {'n_estimators': 443, 'max_depth': 8, 'learning_rate': 0.025197919032453366, 'subsample': 0.6449737086142475, 'colsample_bytree': 0.8907549577209055, 'gamma': 3.3172920588862906}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:24,014] Trial 49 finished with value: 0.5574201679513792 and parameters: {'n_estimators': 106, 'max_depth': 7, 'learning_rate': 0.1455050145511394, 'subsample': 0.6124165121143335, 'colsample_bytree': 0.8048291524623439, 'gamma': 2.8711280887300754}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:25,063] Trial 50 finished with value: 0.5647241134538625 and parameters: {'n_estimators': 158, 'max_depth': 10, 'learning_rate': 0.06823009877287266, 'subsample': 0.5036731528719777, 'colsample_bytree': 0.6053189722052426, 'gamma': 0.7591833342309089}. Best is trial 36 with value: 0.5746307404305865.


[I 2025-08-31 11:39:25,502] Trial 51 finished with value: 0.5766432615172978 and parameters: {'n_estimators': 120, 'max_depth': 9, 'learning_rate': 0.04049854748851962, 'subsample': 0.6657862418562019, 'colsample_bytree': 0.7265728956867821, 'gamma': 3.8578441694534535}. Best is trial 51 with value: 0.5766432615172978.


[I 2025-08-31 11:39:25,982] Trial 52 finished with value: 0.5530017727479323 and parameters: {'n_estimators': 179, 'max_depth': 9, 'learning_rate': 0.040556212481137735, 'subsample': 0.6571518622819812, 'colsample_bytree': 0.7623965896747928, 'gamma': 3.757338816179923}. Best is trial 51 with value: 0.5766432615172978.


[I 2025-08-31 11:39:26,474] Trial 53 finished with value: 0.5776664591429612 and parameters: {'n_estimators': 100, 'max_depth': 8, 'learning_rate': 0.024166167032474796, 'subsample': 0.5726805476481979, 'colsample_bytree': 0.6669104044167563, 'gamma': 4.142406975495074}. Best is trial 53 with value: 0.5776664591429612.


[I 2025-08-31 11:39:27,173] Trial 54 finished with value: 0.6314529136889517 and parameters: {'n_estimators': 128, 'max_depth': 8, 'learning_rate': 0.010320004612748498, 'subsample': 0.5548455797049894, 'colsample_bytree': 0.6778186113571638, 'gamma': 3.9051743680773257}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:27,697] Trial 55 finished with value: 0.6207140230477071 and parameters: {'n_estimators': 231, 'max_depth': 9, 'learning_rate': 0.011294548500076246, 'subsample': 0.9985205500387807, 'colsample_bytree': 0.6741641573704, 'gamma': 4.18234055574494}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:29,019] Trial 56 finished with value: 0.5738569930329347 and parameters: {'n_estimators': 137, 'max_depth': 10, 'learning_rate': 0.010752464021365173, 'subsample': 0.9945165232509404, 'colsample_bytree': 0.6346527846037007, 'gamma': 2.192095809233116}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:29,563] Trial 57 finished with value: 0.5480657988734282 and parameters: {'n_estimators': 223, 'max_depth': 8, 'learning_rate': 0.02399252844595448, 'subsample': 0.9193870479365225, 'colsample_bytree': 0.6869188971099526, 'gamma': 3.6006559745236544}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:29,938] Trial 58 finished with value: 0.5404816261437164 and parameters: {'n_estimators': 282, 'max_depth': 9, 'learning_rate': 0.05595777465727552, 'subsample': 0.901459748937802, 'colsample_bytree': 0.7181393864467871, 'gamma': 4.158370936406708}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:30,449] Trial 59 finished with value: 0.5568494042145173 and parameters: {'n_estimators': 239, 'max_depth': 7, 'learning_rate': 0.019500194472681743, 'subsample': 0.8488905908173676, 'colsample_bytree': 0.5735021722053729, 'gamma': 4.366988520544967}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:31,718] Trial 60 finished with value: 0.5574650939377799 and parameters: {'n_estimators': 340, 'max_depth': 9, 'learning_rate': 0.010768323801154342, 'subsample': 0.8085953687361174, 'colsample_bytree': 0.6488636604011471, 'gamma': 3.1961617725742384}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:32,459] Trial 61 finished with value: 0.5726068632365399 and parameters: {'n_estimators': 123, 'max_depth': 10, 'learning_rate': 0.016847675556822456, 'subsample': 0.9866049306904523, 'colsample_bytree': 0.638949315882114, 'gamma': 2.9631739420064296}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:32,937] Trial 62 finished with value: 0.5475435348538882 and parameters: {'n_estimators': 141, 'max_depth': 10, 'learning_rate': 0.04824255078415976, 'subsample': 0.9997810637098039, 'colsample_bytree': 0.6098579993613711, 'gamma': 1.9349158339543522}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:34,133] Trial 63 finished with value: 0.5844805730173966 and parameters: {'n_estimators': 141, 'max_depth': 10, 'learning_rate': 0.01025191044633525, 'subsample': 0.9540111142166773, 'colsample_bytree': 0.6790444739195194, 'gamma': 2.653553406107809}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:34,659] Trial 64 finished with value: 0.5195761090459939 and parameters: {'n_estimators': 173, 'max_depth': 10, 'learning_rate': 0.039380448996495586, 'subsample': 0.9326418791779548, 'colsample_bytree': 0.6751110629235274, 'gamma': 2.6168027452188567}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:35,347] Trial 65 finished with value: 0.5338997877699292 and parameters: {'n_estimators': 197, 'max_depth': 9, 'learning_rate': 0.026758552281716134, 'subsample': 0.9694878271430982, 'colsample_bytree': 0.7109369408403972, 'gamma': 2.3670153808806327}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:36,127] Trial 66 finished with value: 0.5802219151182181 and parameters: {'n_estimators': 271, 'max_depth': 8, 'learning_rate': 0.010136057067594425, 'subsample': 0.9666850403117883, 'colsample_bytree': 0.6963094347803812, 'gamma': 3.786020918467114}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:36,618] Trial 67 finished with value: 0.5537371826654051 and parameters: {'n_estimators': 376, 'max_depth': 8, 'learning_rate': 0.020630408931636642, 'subsample': 0.960504560136589, 'colsample_bytree': 0.7324927348714677, 'gamma': 3.85515972494836}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:36,901] Trial 68 finished with value: 0.5666350105605034 and parameters: {'n_estimators': 228, 'max_depth': 9, 'learning_rate': 0.06238111511473648, 'subsample': 0.893912447774405, 'colsample_bytree': 0.6995728483588315, 'gamma': 4.1239975485042635}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:37,360] Trial 69 finished with value: 0.5345566787404957 and parameters: {'n_estimators': 156, 'max_depth': 8, 'learning_rate': 0.07487225398186499, 'subsample': 0.9462540387909717, 'colsample_bytree': 0.6844915572130904, 'gamma': 1.4063688720148018}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:37,602] Trial 70 finished with value: 0.5795951238546927 and parameters: {'n_estimators': 125, 'max_depth': 9, 'learning_rate': 0.03823971382329926, 'subsample': 0.9683133556253923, 'colsample_bytree': 0.6674008926054545, 'gamma': 4.460282840486499}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:37,823] Trial 71 finished with value: 0.6003243383723217 and parameters: {'n_estimators': 103, 'max_depth': 9, 'learning_rate': 0.03922212366435525, 'subsample': 0.9804509492080086, 'colsample_bytree': 0.6633572127689102, 'gamma': 4.456704310113248}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:38,024] Trial 72 finished with value: 0.5779465432631845 and parameters: {'n_estimators': 120, 'max_depth': 9, 'learning_rate': 0.048911587542152836, 'subsample': 0.9775780884286257, 'colsample_bytree': 0.6661026064631805, 'gamma': 4.455558301021194}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:38,234] Trial 73 finished with value: 0.5685552340425493 and parameters: {'n_estimators': 126, 'max_depth': 10, 'learning_rate': 0.04971804800155459, 'subsample': 0.9778774103983657, 'colsample_bytree': 0.6689010886193612, 'gamma': 4.487281989711725}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:38,516] Trial 74 finished with value: 0.5848893662802074 and parameters: {'n_estimators': 193, 'max_depth': 9, 'learning_rate': 0.03702896747744358, 'subsample': 0.9512536886486114, 'colsample_bytree': 0.6608337215304161, 'gamma': 4.752895686999288}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:38,813] Trial 75 finished with value: 0.5853654006025948 and parameters: {'n_estimators': 201, 'max_depth': 9, 'learning_rate': 0.034956456127681015, 'subsample': 0.9436813765035839, 'colsample_bytree': 0.6534437907102363, 'gamma': 4.744834617416184}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:39,118] Trial 76 finished with value: 0.5710452925314363 and parameters: {'n_estimators': 271, 'max_depth': 9, 'learning_rate': 0.03809631220269049, 'subsample': 0.9534577966617629, 'colsample_bytree': 0.6937377429765998, 'gamma': 4.684205667185655}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:39,402] Trial 77 finished with value: 0.5639986746046135 and parameters: {'n_estimators': 206, 'max_depth': 10, 'learning_rate': 0.031952506191894624, 'subsample': 0.9226770951972283, 'colsample_bytree': 0.6298686769464231, 'gamma': 4.916766324005186}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:39,581] Trial 78 finished with value: 0.5622244771185644 and parameters: {'n_estimators': 184, 'max_depth': 9, 'learning_rate': 0.09455274159271586, 'subsample': 0.9380096224610915, 'colsample_bytree': 0.6488368597082953, 'gamma': 4.814408964487535}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:40,113] Trial 79 finished with value: 0.5832593454271898 and parameters: {'n_estimators': 163, 'max_depth': 10, 'learning_rate': 0.018504641758879226, 'subsample': 0.8985510840185168, 'colsample_bytree': 0.7090435525781871, 'gamma': 4.29205183328228}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:40,593] Trial 80 finished with value: 0.5840569257515813 and parameters: {'n_estimators': 241, 'max_depth': 10, 'learning_rate': 0.01955447063204535, 'subsample': 0.884667251730096, 'colsample_bytree': 0.7041777832942346, 'gamma': 4.706304862451374}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:41,093] Trial 81 finished with value: 0.5862398504919355 and parameters: {'n_estimators': 246, 'max_depth': 10, 'learning_rate': 0.01745559504634375, 'subsample': 0.901064857133726, 'colsample_bytree': 0.7133851696111169, 'gamma': 4.71357808991867}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:41,521] Trial 82 finished with value: 0.5945804320750279 and parameters: {'n_estimators': 163, 'max_depth': 10, 'learning_rate': 0.018531931168969454, 'subsample': 0.8850417239227514, 'colsample_bytree': 0.706043729930995, 'gamma': 4.7470685685800085}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:41,908] Trial 83 finished with value: 0.5705676230247845 and parameters: {'n_estimators': 249, 'max_depth': 10, 'learning_rate': 0.02892217075297167, 'subsample': 0.8746796986555858, 'colsample_bytree': 0.7331100574913431, 'gamma': 4.726446595831362}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:42,303] Trial 84 finished with value: 0.603556638534295 and parameters: {'n_estimators': 204, 'max_depth': 10, 'learning_rate': 0.01794135833004449, 'subsample': 0.9096214367896257, 'colsample_bytree': 0.6772328257876377, 'gamma': 4.998805813315343}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:42,714] Trial 85 finished with value: 0.6002065686000894 and parameters: {'n_estimators': 206, 'max_depth': 10, 'learning_rate': 0.017625875107390727, 'subsample': 0.9104783789912024, 'colsample_bytree': 0.6820399543675402, 'gamma': 4.961820887412022}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:42,934] Trial 86 finished with value: 0.5698255904686261 and parameters: {'n_estimators': 211, 'max_depth': 10, 'learning_rate': 0.05899148710052893, 'subsample': 0.9110079890067364, 'colsample_bytree': 0.6487896622004556, 'gamma': 4.9733185003226215}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:43,280] Trial 87 finished with value: 0.5522537329537547 and parameters: {'n_estimators': 196, 'max_depth': 10, 'learning_rate': 0.03388495318766031, 'subsample': 0.8539775585911408, 'colsample_bytree': 0.6855077027600857, 'gamma': 4.584834099274949}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:43,542] Trial 88 finished with value: 0.5430337689904864 and parameters: {'n_estimators': 322, 'max_depth': 10, 'learning_rate': 0.07054805431082767, 'subsample': 0.9285440675163574, 'colsample_bytree': 0.6583983723260898, 'gamma': 4.834408732044882}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:44,076] Trial 89 finished with value: 0.5496610347037365 and parameters: {'n_estimators': 985, 'max_depth': 9, 'learning_rate': 0.028486402003857398, 'subsample': 0.940868654806172, 'colsample_bytree': 0.5973886280621521, 'gamma': 4.999089385528737}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:44,640] Trial 90 finished with value: 0.5490227004288034 and parameters: {'n_estimators': 931, 'max_depth': 10, 'learning_rate': 0.045974219980266004, 'subsample': 0.9088043424570195, 'colsample_bytree': 0.7464706024445125, 'gamma': 4.574407062507573}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:45,040] Trial 91 finished with value: 0.6071128258594471 and parameters: {'n_estimators': 150, 'max_depth': 10, 'learning_rate': 0.015435816325709997, 'subsample': 0.9481558715737191, 'colsample_bytree': 0.6759718527967207, 'gamma': 4.80070233210677}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:45,376] Trial 92 finished with value: 0.6267085779646291 and parameters: {'n_estimators': 168, 'max_depth': 10, 'learning_rate': 0.016680762961864038, 'subsample': 0.9862764948468694, 'colsample_bytree': 0.6789846323708775, 'gamma': 4.8573953285200835}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:45,681] Trial 93 finished with value: 0.624799912428033 and parameters: {'n_estimators': 163, 'max_depth': 10, 'learning_rate': 0.01765037345601382, 'subsample': 0.9886028144764534, 'colsample_bytree': 0.6840383290578509, 'gamma': 4.909534467198847}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:46,038] Trial 94 finished with value: 0.6312056790922339 and parameters: {'n_estimators': 155, 'max_depth': 10, 'learning_rate': 0.016460172482036385, 'subsample': 0.9857347806257422, 'colsample_bytree': 0.7123107590431962, 'gamma': 4.889424216554083}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:46,293] Trial 95 finished with value: 0.6119731322828885 and parameters: {'n_estimators': 156, 'max_depth': 10, 'learning_rate': 0.02599077734038853, 'subsample': 0.9879493934795244, 'colsample_bytree': 0.693663039595854, 'gamma': 4.878966797094104}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:46,566] Trial 96 finished with value: 0.6297557599091161 and parameters: {'n_estimators': 150, 'max_depth': 10, 'learning_rate': 0.024322242988505993, 'subsample': 0.9870204536045214, 'colsample_bytree': 0.6870067691968071, 'gamma': 4.860403906693503}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:46,817] Trial 97 finished with value: 0.6228096551467821 and parameters: {'n_estimators': 149, 'max_depth': 10, 'learning_rate': 0.028023396739774248, 'subsample': 0.9866147920366509, 'colsample_bytree': 0.6908402921902075, 'gamma': 4.8721358915777815}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:47,091] Trial 98 finished with value: 0.6149188018858593 and parameters: {'n_estimators': 147, 'max_depth': 10, 'learning_rate': 0.023512874547594577, 'subsample': 0.9901900880865001, 'colsample_bytree': 0.7184781959935447, 'gamma': 4.848842803391345}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:47,397] Trial 99 finished with value: 0.6111204166723061 and parameters: {'n_estimators': 151, 'max_depth': 10, 'learning_rate': 0.02441749239192641, 'subsample': 0.990217044533206, 'colsample_bytree': 0.7652543579139849, 'gamma': 4.853623597940336}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:47,717] Trial 100 finished with value: 0.6061789145659403 and parameters: {'n_estimators': 175, 'max_depth': 10, 'learning_rate': 0.025225108256509768, 'subsample': 0.9913762936554543, 'colsample_bytree': 0.7607320698774493, 'gamma': 4.5926680137782805}. Best is trial 54 with value: 0.6314529136889517.


[I 2025-08-31 11:39:47,961] Trial 101 finished with value: 0.6374445645195799 and parameters: {'n_estimators': 150, 'max_depth': 10, 'learning_rate': 0.024960663814806147, 'subsample': 0.9994765394716952, 'colsample_bytree': 0.7207526561266685, 'gamma': 4.883779147238871}. Best is trial 101 with value: 0.6374445645195799.


[I 2025-08-31 11:39:48,265] Trial 102 finished with value: 0.6164269810381205 and parameters: {'n_estimators': 145, 'max_depth': 10, 'learning_rate': 0.027201862413089667, 'subsample': 0.9859112992833414, 'colsample_bytree': 0.7804627060676812, 'gamma': 4.877822163935748}. Best is trial 101 with value: 0.6374445645195799.


[I 2025-08-31 11:39:48,444] Trial 103 finished with value: 0.6159095157274712 and parameters: {'n_estimators': 141, 'max_depth': 10, 'learning_rate': 0.05453957564651747, 'subsample': 0.9978192348531965, 'colsample_bytree': 0.7819239719914958, 'gamma': 4.873684577521966}. Best is trial 101 with value: 0.6374445645195799.


[I 2025-08-31 11:39:48,635] Trial 104 finished with value: 0.624624544050509 and parameters: {'n_estimators': 135, 'max_depth': 10, 'learning_rate': 0.043590876526209646, 'subsample': 0.9979137330720946, 'colsample_bytree': 0.7854573018374496, 'gamma': 4.902345770162286}. Best is trial 101 with value: 0.6374445645195799.


[I 2025-08-31 11:39:48,877] Trial 105 finished with value: 0.5783118075991478 and parameters: {'n_estimators': 136, 'max_depth': 10, 'learning_rate': 0.05298685927119935, 'subsample': 0.9737890996680588, 'colsample_bytree': 0.8030442314618708, 'gamma': 4.261403128462117}. Best is trial 101 with value: 0.6374445645195799.


[I 2025-08-31 11:39:49,015] Trial 106 finished with value: 0.599052124193317 and parameters: {'n_estimators': 177, 'max_depth': 4, 'learning_rate': 0.04395499413413451, 'subsample': 0.998149067807168, 'colsample_bytree': 0.7974269945727742, 'gamma': 4.5422996991025695}. Best is trial 101 with value: 0.6374445645195799.


[I 2025-08-31 11:39:49,289] Trial 107 finished with value: 0.6073132173359361 and parameters: {'n_estimators': 114, 'max_depth': 10, 'learning_rate': 0.032819130010190045, 'subsample': 0.9594053388494945, 'colsample_bytree': 0.742747565473634, 'gamma': 4.656134613713702}. Best is trial 101 with value: 0.6374445645195799.


[I 2025-08-31 11:39:49,679] Trial 108 finished with value: 0.6374535113352955 and parameters: {'n_estimators': 132, 'max_depth': 10, 'learning_rate': 0.013821617946014013, 'subsample': 0.9824785431369629, 'colsample_bytree': 0.7831623972804225, 'gamma': 4.907372460430418}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:50,151] Trial 109 finished with value: 0.6233168777418829 and parameters: {'n_estimators': 101, 'max_depth': 10, 'learning_rate': 0.014343608704221005, 'subsample': 0.9818391079292537, 'colsample_bytree': 0.7387580489876201, 'gamma': 4.392170568060974}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:50,677] Trial 110 finished with value: 0.620805294102867 and parameters: {'n_estimators': 100, 'max_depth': 10, 'learning_rate': 0.013644262695799513, 'subsample': 0.9649816672155386, 'colsample_bytree': 0.7901339284786065, 'gamma': 4.320812353175621}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:51,333] Trial 111 finished with value: 0.6170508720875661 and parameters: {'n_estimators': 109, 'max_depth': 10, 'learning_rate': 0.010051885928909765, 'subsample': 0.9769040910868989, 'colsample_bytree': 0.8163198458296389, 'gamma': 4.35925343425753}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:51,898] Trial 112 finished with value: 0.6147171377215239 and parameters: {'n_estimators': 121, 'max_depth': 10, 'learning_rate': 0.014031514092307538, 'subsample': 0.964416135270707, 'colsample_bytree': 0.7241838664379283, 'gamma': 4.0650726844845835}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:52,348] Trial 113 finished with value: 0.6156786902005622 and parameters: {'n_estimators': 101, 'max_depth': 10, 'learning_rate': 0.014124281300358893, 'subsample': 0.9811355672870141, 'colsample_bytree': 0.7372613091513661, 'gamma': 4.394199101675568}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:52,729] Trial 114 finished with value: 0.6026489250805139 and parameters: {'n_estimators': 175, 'max_depth': 10, 'learning_rate': 0.02164323756519929, 'subsample': 0.9700453368144354, 'colsample_bytree': 0.789706254646218, 'gamma': 4.604560150988447}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:53,106] Trial 115 finished with value: 0.5852570159845503 and parameters: {'n_estimators': 223, 'max_depth': 10, 'learning_rate': 0.030153546335508666, 'subsample': 0.9608529290821475, 'colsample_bytree': 0.8155909200930599, 'gamma': 4.509490572635463}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:53,670] Trial 116 finished with value: 0.6282937577280684 and parameters: {'n_estimators': 131, 'max_depth': 10, 'learning_rate': 0.010138313056222675, 'subsample': 0.9826156650947903, 'colsample_bytree': 0.7901179142254101, 'gamma': 4.64083910142301}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:53,902] Trial 117 finished with value: 0.5470202959363253 and parameters: {'n_estimators': 128, 'max_depth': 5, 'learning_rate': 0.04422557701721193, 'subsample': 0.7517383543865298, 'colsample_bytree': 0.7695612618702729, 'gamma': 4.644290640190611}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:54,221] Trial 118 finished with value: 0.6175899578131976 and parameters: {'n_estimators': 168, 'max_depth': 10, 'learning_rate': 0.022461154090245503, 'subsample': 0.98286038983689, 'colsample_bytree': 0.8467002567332379, 'gamma': 4.902947129101695}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:54,505] Trial 119 finished with value: 0.611298996451326 and parameters: {'n_estimators': 123, 'max_depth': 10, 'learning_rate': 0.03411034967218572, 'subsample': 0.9578098305534716, 'colsample_bytree': 0.7580274927621743, 'gamma': 4.668185413186922}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:55,100] Trial 120 finished with value: 0.6238390716618935 and parameters: {'n_estimators': 185, 'max_depth': 10, 'learning_rate': 0.010041673791458473, 'subsample': 0.9734888213536619, 'colsample_bytree': 0.7908578791101891, 'gamma': 4.8111901569623186}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:55,686] Trial 121 finished with value: 0.6202818019698578 and parameters: {'n_estimators': 185, 'max_depth': 10, 'learning_rate': 0.010123393075644315, 'subsample': 0.9716239338922052, 'colsample_bytree': 0.7912440343278403, 'gamma': 4.752684790240631}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:56,115] Trial 122 finished with value: 0.6184955734653275 and parameters: {'n_estimators': 132, 'max_depth': 10, 'learning_rate': 0.016330583213649986, 'subsample': 0.9821536580930275, 'colsample_bytree': 0.753400773278389, 'gamma': 4.524056558612331}. Best is trial 108 with value: 0.6374535113352955.


[I 2025-08-31 11:39:56,329] Trial 123 finished with value: 0.643962155011446 and parameters: {'n_estimators': 100, 'max_depth': 10, 'learning_rate': 0.02834251091212154, 'subsample': 0.9983242866996357, 'colsample_bytree': 0.7783564313161607, 'gamma': 4.934044703071168}. Best is trial 123 with value: 0.643962155011446.


[I 2025-08-31 11:39:56,562] Trial 124 finished with value: 0.6352901993033493 and parameters: {'n_estimators': 160, 'max_depth': 10, 'learning_rate': 0.03091116140353318, 'subsample': 0.9999457216517469, 'colsample_bytree': 0.8120475499221901, 'gamma': 4.979250299808254}. Best is trial 123 with value: 0.643962155011446.


Best XGBoost Params: {'n_estimators': 100, 'max_depth': 10, 'learning_rate': 0.02834251091212154, 'subsample': 0.9983242866996357, 'colsample_bytree': 0.7783564313161607, 'gamma': 4.934044703071168}
Final Tuned XGBoost F1 Score: 0.6866





SHAP analysis complete and plot logged to MLflow.


In [6]:
# --- MLP Challenger Model ---
# Step 1: Imports and Data Scaling
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# Deep learning models are sensitive to feature scale. We must standardize our data.
# We fit the scaler ONLY on the training data to prevent data leakage from the test set.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data successfully scaled.")
print(f"Shape of scaled training data: {X_train_scaled.shape}")

Data successfully scaled.
Shape of scaled training data: (2380, 32)


In [7]:
# Convert numpy arrays to PyTorch Tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders to handle batching
# We don't shuffle time-series data to preserve temporal order if needed, 
# but for a simple MLP, shuffling is often acceptable. Let's keep it False for rigor.
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("PyTorch Tensors and DataLoaders created.")

PyTorch Tensors and DataLoaders created.


In [8]:
# Step 2: Define the MLP Architecture
class ETF_MLP(nn.Module):
    def __init__(self, input_size, hidden_size_1=128, hidden_size_2=64, dropout_rate=0.5):
        """
        Initializes the MLP model.
        
        Args:
            input_size (int): The number of input features.
            hidden_size_1 (int): Number of neurons in the first hidden layer.
            hidden_size_2 (int): Number of neurons in the second hidden layer.
            dropout_rate (float): The dropout probability.
        """
        super(ETF_MLP, self).__init__()
        
        # --- Layer Definitions ---
        self.layer_1 = nn.Linear(input_size, hidden_size_1)
        self.bn_1 = nn.BatchNorm1d(hidden_size_1)
        
        self.layer_2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.bn_2 = nn.BatchNorm1d(hidden_size_2)
        
        self.output_layer = nn.Linear(hidden_size_2, 1)
        
        # --- Activation and Regularization ---
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)
        
    def forward(self, x):
        """ The forward pass of the model. """
        # First hidden layer
        x = self.layer_1(x)
        x = self.bn_1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Second hidden layer
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Output layer with sigmoid for binary classification
        x = torch.sigmoid(self.output_layer(x))
        return x

# Instantiate the model to test
input_features = X_train.shape[1]
model_mlp = ETF_MLP(input_size=input_features)
print("MLP Model Architecture:")
print(model_mlp)

MLP Model Architecture:
ETF_MLP(
  (layer_1): Linear(in_features=32, out_features=128, bias=True)
  (bn_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_2): Linear(in_features=128, out_features=64, bias=True)
  (bn_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)


In [9]:
# Step 3: Manual MLP Training and Evaluation

# --- Configuration ---
INPUT_SIZE = X_train.shape[1]
LEARNING_RATE = 0.001
EPOCHS = 50

# --- Model, Loss, Optimizer (Demonstrates 5.2, 5.3) ---
model_mlp = ETF_MLP(input_size=INPUT_SIZE, dropout_rate=0.4)
criterion = nn.BCELoss() # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=LEARNING_RATE) # Adam Optimizer
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) # LR Schedule

# --- MLflow Logging ---
with mlflow.start_run(run_name="MLP_Manual_Baseline") as run:
    mlflow.log_params({"learning_rate": LEARNING_RATE, "epochs": EPOCHS, "optimizer": "Adam"})
    
    # --- Training Loop ---
    for epoch in range(EPOCHS):
        model_mlp.train() # Set model to training mode
        for features, labels in train_loader:
            # Forward pass
            outputs = model_mlp(features)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Update learning rate
        scheduler.step()
        
        # --- Evaluation on Test Set ---
        model_mlp.eval() # Set model to evaluation mode
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for features, labels in test_loader:
                outputs = model_mlp(features)
                predicted = (outputs > 0.5).float()
                all_preds.extend(predicted.numpy())
                all_labels.extend(labels.numpy())
        
        # Calculate and log F1 score for the epoch
        f1 = f1_score(all_labels, all_preds)
        mlflow.log_metric("test_f1_score", f1, step=epoch)

    print(f"Final MLP F1 Score from manual run: {f1:.4f}")
    # Log the final model
    mlflow.pytorch.log_model(model_mlp, "mlp-model")

Final MLP F1 Score from manual run: 0.1919


In [10]:
import mlflow
import pandas as pd

# Ensure MLflow is pointing to your tracking server/directory
# mlflow.set_tracking_uri("../mlruns") # Uncomment if running in a new session/script

# Get the experiment by its name
experiment = mlflow.get_experiment_by_name("ETF_Trend_Prediction")

if experiment:
    # Search for all runs within this experiment
    runs_df = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        # Order by F1 score (desc) and then by start time (desc)
        order_by=["metrics.f1_score DESC", "start_time DESC"],
        output_format="pandas"
    )

    # --- New Logic to Extract All Metrics and Parameters ---
    # Identify all metric and parameter columns
    metric_cols = [col for col in runs_df.columns if col.startswith("metrics.")]
    param_cols = [col for col in runs_df.columns if col.startswith("params.")]

    # Select core run info, all metrics, and all parameters
    # The 'tags.mlflow.runName' contains the run name
    selected_cols = [
        "tags.mlflow.runName", "start_time", "run_id"
    ] + metric_cols + param_cols

    metrics_and_params = runs_df[selected_cols].copy()

    # Rename columns for better readability (optional, you can keep original for params if many)
    # This example renames just the core and metric columns
    metrics_and_params.rename(columns={
        "tags.mlflow.runName": "Run Name",
        "metrics.f1_score": "F1 Score",
        "metrics.accuracy": "Accuracy",
        "metrics.roc_auc": "ROC AUC"
        # Add more renames for specific metrics/params if you want,
        # but for ALL params, it might be too many to rename individually.
        # Keeping 'params.param_name' is often fine.
    }, inplace=True)

    print("Metrics and Parameters for 'ETF_Trend_Prediction' Experiment:")
    display(metrics_and_params)

else:
    print(f"Experiment 'ETF_Trend_Prediction' not found.")


Metrics and Parameters for 'ETF_Trend_Prediction' Experiment:


Unnamed: 0,Run Name,start_time,run_id,Accuracy,F1 Score,ROC AUC,metrics.test_f1_score,params.learning_rate,params.max_depth,params.colsample_bytree,params.n_estimators,params.subsample,params.gamma,params.optimizer,params.epochs
0,LogisticRegression_Baseline,2025-08-31 15:38:47.588000+00:00,8b121febd9d847ba8458e9eefbef7985,0.535242,0.696839,,,,,,,,,,
1,LogisticRegression_Baseline,2025-08-31 15:07:34.419000+00:00,7034c380912143aaa2cf06421c423db9,0.535242,0.696839,,,,,,,,,,
2,LogisticRegression_Baseline,2025-08-31 15:01:53.999000+00:00,6bf309a40e994d72a9fee76d58a666cc,0.535242,0.696839,,,,,,,,,,
3,XGBoost_Tuned_Champion,2025-08-31 15:39:56.568000+00:00,34ed825b740448a8a1e819c91e60a089,0.537445,0.686567,0.505608,,0.0283425109121215,10.0,0.7783564313161607,100.0,0.9983242866996356,4.934044703071168,,
4,RandomForest_Baseline,2025-08-31 15:01:54.150000+00:00,098f8cd1870f4859b0cee1abb6d87a93,0.529736,0.635976,,,,,,,,,,
5,RandomForest_Baseline,2025-08-31 15:38:47.732000+00:00,630ae3d8504a413bbdb0e4b7dbfab06f,0.515419,0.62069,,,,,,,,,,
6,RandomForest_Baseline,2025-08-31 15:07:34.552000+00:00,8756a967e1a84589acd8b86056b80f3e,0.515419,0.62069,,,,,,,,,,
7,XGBoost_Tuned_Champion,2025-08-31 15:03:07.673000+00:00,38d868687ee041bd96d32f3a5dc68451,0.530837,0.607735,0.518111,,0.2554495345997615,7.0,0.6246378761517877,507.0,0.5179475127627672,0.9178064491217854,,
8,XGBoost_Tuned_Champion,2025-08-31 15:08:52.459000+00:00,b8c1a62575e04e448fdb3b40f3963226,0.515419,0.59854,0.503375,,0.2222640017545119,5.0,0.7129322245155306,424.0,0.531449623541663,2.1414651108117377,,
9,MLP_Manual_Baseline,2025-08-31 15:40:09.664000+00:00,8b775a938f4847858ba9b9422c0b297d,,,,0.191919,0.001,,,,,,Adam,50.0
