In [1]:
import pandas as pd
import joblib
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

In [2]:
# --- 1. Δεδομένα ---
df = pd.read_csv('../data/XAU_1d_signals_combo_all.csv', index_col='Date', parse_dates=True)
df = df[df['Signal_Combo3'].isin([-1, 1])].copy()
df.dropna(inplace=True)

features = [
    'Close', 'Volume',
    'SMA_20', 'SMA_50', 'SMA_100', 'SMA_200',
    'EMA_20', 'EMA_50', 'EMA_100', 'EMA_200',
    'RSI_14', 'MACD', 'MACD_Signal', 'ATR_14'
]
target = 'Signal_Combo3'

X = df[features]
y = df[target].replace({-1: 0, 1: 1})

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=False)


In [3]:
# --- 2. Καλύτερες παράμετροι από trial 29 ---
best_params = {
    'learning_rate': 0.2944510377633659,
    'max_depth': 6,
    'n_estimators': 860,
    'subsample': 0.9453135116071003,
    'colsample_bytree': 0.5256532494481772,
    'scale_pos_weight': (y_train == 0).sum() / (y_train == 1).sum(),
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'random_state': 42
}

model = XGBClassifier(**best_params)
model.fit(X_train, y_train)


In [4]:
# --- 3. Αξιολόγηση ---
y_pred = model.predict(X_valid)
print("\nConfusion Matrix:\n", confusion_matrix(y_valid, y_pred))
print("\nClassification Report:\n", classification_report(y_valid, y_pred))


Confusion Matrix:
 [[148 130]
 [ 27 734]]

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.53      0.65       278
           1       0.85      0.96      0.90       761

    accuracy                           0.85      1039
   macro avg       0.85      0.75      0.78      1039
weighted avg       0.85      0.85      0.84      1039

