# Import

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix

# EDA & VISUALIZATION

In [None]:
filename = "XAG_H1_exp.csv"
df = pd.read_csv(filename, sep='\t', parse_dates=['Time'])

print(df.head())
print(df.info())
print(df.describe())
print(df.isnull().sum())

plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(numeric_only=True), annot=True, fmt=".2f", cmap='coolwarm')
plt.title("Heatmap Correlazioni tra feature")
plt.tight_layout()
plt.savefig("correlation_heatmap.png")
plt.show()

plt.figure(figsize=(14, 6))
plt.plot(df['Time'], df['Close'], label='Close Price')
plt.plot(df['Time'], df['EMA_Fast'], label='EMA Fast (7)')
plt.plot(df['Time'], df['EMA_Slow'], label='EMA Slow (21)')
plt.title("Prezzo Close e EMA nel tempo")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.tight_layout()
plt.savefig("price_ema_plot.png")
plt.show()

# LABEL CREATION

In [None]:
n_shift = 1
threshold_up = 0.0001
threshold_down = 0.0001

df['Close_future'] = df['Close'].shift(-n_shift)
df['diff'] = df['Close_future'] - df['Close']

def label_trend_binary(x):
    if x > threshold_up:
        return 1
    elif x < -threshold_down:
        return -1
    else:
        return np.nan

df['trend'] = df['diff'].apply(label_trend_binary)
df.dropna(subset=['Close_future', 'trend'], inplace=True)

print("Distribuzione classi trend:")
print(df['trend'].value_counts(normalize=True) * 100)

# Preparazione dati sequenziali e scaling per modello LSTM

In [None]:
df['EMA_diff'] = df['EMA_Fast'] - df['EMA_Slow']
df['Close_pct_change'] = df['Close'].pct_change()
df['Volume_pct_change'] = df['Volume'].pct_change()
df['ADX_delta'] = df['ADX'].diff()
df['Volume_delta'] = df['Volume'].diff()

df['ADX_roll_std3'] = df['ADX'].rolling(window=3).std()
df['Volume_roll_std3'] = df['Volume'].rolling(window=3).std()

df.dropna(inplace=True)
df.to_csv("lstm_ready_dataset.csv", index=False)

# SEQUENCE BUILDING

In [None]:
sequence_length = 20
test_size = 0.15
val_size = 0.15

feature_cols = [
    'Open', 'High', 'Low', 'Close', 'Volume',
    'EMA_Fast', 'EMA_Slow', 'ADX', 'EMA_diff',
    'Close_pct_change', 'Volume_pct_change',
    'ADX_delta', 'Volume_delta',
    'ADX_roll_std3', 'Volume_roll_std3'
]

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[feature_cols])
y_raw = df['trend'].values

def create_sequences(X, y, seq_length):
    Xs, ys = [], []
    for i in range(len(X) - seq_length):
        Xs.append(X[i:i+seq_length])
        ys.append(y[i+seq_length])
    return np.array(Xs), np.array(ys)

X, y = create_sequences(df_scaled, y_raw, sequence_length)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=test_size+val_size, stratify=y, random_state=42)
val_ratio = val_size / (test_size + val_size)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=val_ratio, stratify=y_temp, random_state=42)

print(f"Train: {len(X_train)} - Val: {len(X_val)} - Test: {len(X_test)}")

joblib.dump(scaler, 'scaler_lstm.save')

# LSTM MODEL TRAINING

In [None]:
input_shape = (X_train.shape[1], X_train.shape[2])

model = Sequential([
    LSTM(64, return_sequences=True, input_shape=input_shape),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1, activation='tanh')  # Output: regressione tra -1 e 1
])

model.compile(optimizer='adam', loss='hinge', metrics=['accuracy'])
model.summary()

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=50, batch_size=64, callbacks=[early_stop])

test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"\nTest accuracy: {test_acc:.4f}")

model.save('lstm_trend_model.h5')
print("Modello salvato.")

# Evaluation Report

In [None]:
y_pred_raw = model.predict(X_test).flatten()
y_pred = np.where(y_pred_raw > 0, 1, -1)  # tanh threshold

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=4))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Genera la confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Crea il plot
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['No Uptrend', 'Uptrend'], yticklabels=['No Uptrend', 'Uptrend'])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix - LSTM")

# Salva e mostra il grafico
plt.tight_layout()
plt.savefig("lstm_confusion_matrix.png")
plt.show()
