In [1]:
import pandas as pd
import optuna
import joblib
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score


In [2]:
df = pd.read_csv('../data/XAU_1d_signals_combo_all.csv', index_col='Date', parse_dates=True)
df = df[df['Signal_Combo3'].isin([-1, 1])].copy()
df.dropna(inplace=True)

features = [
    'Close', 'Volume',
    'SMA_20', 'SMA_50', 'SMA_100', 'SMA_200',
    'EMA_20', 'EMA_50', 'EMA_100', 'EMA_200',
    'RSI_14', 'MACD', 'MACD_Signal', 'ATR_14'
]
target = 'Signal_Combo3'

X = df[features]
y = df[target].replace({-1: 0, 1: 1})

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=False)


In [3]:
def objective(trial):
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'use_label_encoder': False,
        'random_state': 42,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'scale_pos_weight': (y_train == 0).sum() / (y_train == 1).sum()
    }

    model = XGBClassifier(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_valid)
    return f1_score(y_valid, y_pred, average='weighted')

In [4]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2025-06-22 12:52:24,539] A new study created in memory with name: no-name-0be95df4-0af8-453b-af3c-e52bcda3799e
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-06-22 12:52:25,121] Trial 0 finished with value: 0.8208517669540931 and parameters: {'learning_rate': 0.2800847526752777, 'max_depth': 8, 'n_estimators': 453, 'subsample': 0.6888574404176093, 'colsample_bytree': 0.5447038120774736}. Best is trial 0 with value: 0.8208517669540931.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-06-22 12:52:25,687] Trial 1 finished with value: 0.7825809579081967 and parameters: {'learning_rate': 0.2673022275884253, 'max_depth': 10, 'n_estimators': 846, 'subsample': 0.8603043588876944, 'colsample_bytree': 0.9778925315923946}. Best is trial 0 with value: 0.8208517669540931.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-06-22 12:52:26

In [5]:
print("\nBest Trial:")
print(study.best_trial)


Best Trial:
FrozenTrial(number=34, state=TrialState.COMPLETE, values=[0.8292329708392818], datetime_start=datetime.datetime(2025, 6, 22, 12, 52, 39, 554079), datetime_complete=datetime.datetime(2025, 6, 22, 12, 52, 40, 6598), params={'learning_rate': 0.22944250288333945, 'max_depth': 9, 'n_estimators': 599, 'subsample': 0.8662429870139322, 'colsample_bytree': 0.5124957021433034}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'learning_rate': FloatDistribution(high=0.3, log=False, low=0.01, step=None), 'max_depth': IntDistribution(high=10, log=False, low=3, step=1), 'n_estimators': IntDistribution(high=1000, log=False, low=100, step=1), 'subsample': FloatDistribution(high=1.0, log=False, low=0.5, step=None), 'colsample_bytree': FloatDistribution(high=1.0, log=False, low=0.5, step=None)}, trial_id=34, value=None)


In [6]:
import optuna.visualization
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()

In [7]:
best_params = study.best_trial.params
best_params.update({
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'use_label_encoder': False,
    'random_state': 42,
    'scale_pos_weight': (y_train == 0).sum() / (y_train == 1).sum()
})

best_model = XGBClassifier(**best_params)
best_model.fit(X_train, y_train)




Parameters: { "use_label_encoder" } are not used.




In [8]:
joblib.dump(best_model, '../models/xgboost_combo3_best1.pkl')
joblib.dump(study, '../models/xgboost_combo3_optuna_study.pkl')

['../models/xgboost_combo3_optuna_study.pkl']