# Setup iniziale, import e caricamento dati

In [None]:
# IMPORT LIBRERIE
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import xgboost as xgb

# CARICA I DATI
filename = "XAG_H1_exp.csv"
df = pd.read_csv(filename, sep='\t', parse_dates=['Time'])

# ESPLORAZIONE INIZIALE
print(df.head())
print(df.info())
print(df.describe())
print(df.isnull().sum())

# PLOT: Correlazione
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(numeric_only=True), annot=True, fmt=".2f", cmap='coolwarm')
plt.title("Heatmap delle Correlazioni")
plt.tight_layout()
plt.savefig("correlation_heatmap.png")
plt.show()

# PLOT: Prezzo & EMA
plt.figure(figsize=(14, 6))
plt.plot(df['Time'], df['Close'], label='Close')
plt.plot(df['Time'], df['EMA_Fast'], label='EMA Fast (7)')
plt.plot(df['Time'], df['EMA_Slow'], label='EMA Slow (21)')
plt.title("Close & EMA nel Tempo")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.tight_layout()
plt.savefig("price_ema_plot.png")
plt.show()


# Creazione Target + Feature Engineering + Scaler

In [None]:
# CREAZIONE TARGET BINARIO: Trend UP = 1 se Close_future > Close + soglia
n_shift = 3
threshold = 0.0003

df['Close_future'] = df['Close'].shift(-n_shift)
df['target'] = (df['Close_future'] > df['Close'] + threshold).astype(int)
df.drop(columns=['Close_future'], inplace=True)

# FEATURE ENGINEERING
df['EMA_diff'] = df['EMA_Fast'] - df['EMA_Slow']
df['Close_pct_change'] = df['Close'].pct_change()
df['Volume_pct_change'] = df['Volume'].pct_change()
df['ADX_delta'] = df['ADX'].diff()
df['Volume_delta'] = df['Volume'].diff()
df['ADX_roll_std3'] = df['ADX'].rolling(window=3).std()
df['Volume_roll_std3'] = df['Volume'].rolling(window=3).std()

# RIMOZIONE NaN
df.dropna(inplace=True)
print("Dati pronti. Dimensione finale:", df.shape)

# === Definizione colonne da scalare
feature_cols = [col for col in df.columns if col not in ['target', 'Time']]

# === SCALING DELLE FEATURE
scaler = StandardScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])

# Salva lo scaler per l’inferenza successiva
joblib.dump(scaler, "scaler_xgboost.save")
print("Scaler salvato come scaler_xgboost.save")

# Salva dataset per training o backup
df.to_csv("xgboost_ready_dataset.csv", index=False)


# Train/test split, training modello XGBoost

In [None]:
# SPLIT DATI: Feature & Target
target_col = 'target'
feature_cols = [col for col in df.columns if col not in [target_col, 'Time']]

X = df[feature_cols]
y = df[target_col]

# SPLIT TRAIN/TEST
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.25,
    random_state=42,
    stratify=y  # mantiene proporzione classi
)

# BILANCIAMENTO CLASSE POSITIVA (1)
pos_weight = (len(y_train) - sum(y_train)) / sum(y_train)
print(f"Peso classe positiva per bilanciamento: {pos_weight:.2f}")

# MODELLO XGBOOST
model = xgb.XGBClassifier(
    n_estimators=100,
    use_label_encoder=False,
    eval_metric='logloss',
    scale_pos_weight=pos_weight,
    random_state=42
)

# TRAINING
model.fit(X_train, y_train)
print("Training completato.")


# Valutazione modello + salvataggio

In [None]:
# VALUTAZIONE
y_pred = model.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=4))

# SALVATAGGIO MODELLO
joblib.dump(model, "xgboost_trend_model.pkl")
print("Modello salvato come xgboost_trend_model.pkl")


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['No Uptrend', 'Uptrend'], yticklabels=['No Uptrend', 'Uptrend'])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix - XGBoost")
plt.tight_layout()
plt.savefig("xgb_confusion_matrix.png")
plt.show()
