# 🚀 Multi-Currency Forex Trading Model

**Training:** All 6 currency pairs from `data/train/`
**Testing:** All 6 currency pairs from `data/test/`

Pairs:
- EUR_USD
- GBP_USD
- USD_JPY
- USD_CAD
- USD_CHF
- XAU_USD (Gold)

---

## 📦 1. Setup & Imports

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from glob import glob

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
np.random.seed(42)
tf.random.set_seed(42)

print(f"✅ TensorFlow: {tf.__version__}")
print(f"✅ GPU: {tf.config.list_physical_devices('GPU')}")

✅ TensorFlow: 2.20.0
✅ GPU: []


: 

## 📥 2. Load All Training Data

In [None]:
# Get all CSV files from train folder
train_files = glob('../data/train/*_1min.csv')
print(f"📂 Found {len(train_files)} training files:")
for f in train_files:
    print(f"   • {Path(f).name}")

# Load all training data
train_dfs = []
for file in train_files:
    pair_name = Path(file).stem.replace('_1min', '')
    df = pd.read_csv(file)
    
    # Standardize column names
    df.columns = df.columns.str.lower()
    if 'tick_volume' in df.columns:
        df.rename(columns={'tick_volume': 'volume'}, inplace=True)
    
    df['pair'] = pair_name
    train_dfs.append(df)
    print(f"✅ Loaded {pair_name}: {len(df):,} rows")

# Combine all training data
df_train_all = pd.concat(train_dfs, ignore_index=True)
print(f"\n📊 Total training data: {len(df_train_all):,} rows")
print(f"   Columns: {list(df_train_all.columns)}")

📂 Found 6 training files:
   • EUR_USD_1min.csv
   • GBP_USD_1min.csv
   • USD_CAD_1min.csv
   • USD_CHF_1min.csv
   • USD_JPY_1min.csv
   • XAU_USD_1min.csv
✅ Loaded EUR_USD: 1,859,492 rows
✅ Loaded GBP_USD: 1,861,292 rows
✅ Loaded USD_CAD: 1,855,369 rows


## 📥 3. Load All Test Data

In [None]:
# Get all CSV files from test folder
test_files = glob('../data/test/*_test.csv')
print(f"📂 Found {len(test_files)} test files:")
for f in test_files:
    print(f"   • {Path(f).name}")

# Load all test data
test_dfs = []
for file in test_files:
    pair_name = Path(file).stem.replace('_test', '')
    df = pd.read_csv(file)
    
    # Standardize column names
    df.columns = df.columns.str.lower()
    if 'tick_volume' in df.columns:
        df.rename(columns={'tick_volume': 'volume'}, inplace=True)
    
    df['pair'] = pair_name
    test_dfs.append(df)
    print(f"✅ Loaded {pair_name}: {len(df):,} rows")

# Combine all test data
df_test_all = pd.concat(test_dfs, ignore_index=True)
print(f"\n📊 Total test data: {len(df_test_all):,} rows")
print(f"   Columns: {list(df_test_all.columns)}")

## 🔧 4. Feature Engineering

In [None]:
def create_features(df):
    """Create technical indicators"""
    df = df.copy()
    
    # Price features
    df['price_range'] = df['high'] - df['low']
    df['body'] = df['close'] - df['open']
    df['upper_wick'] = df['high'] - df[['open', 'close']].max(axis=1)
    df['lower_wick'] = df[['open', 'close']].min(axis=1) - df['low']
    
    # Moving Averages
    for period in [5, 10, 20, 50]:
        df[f'sma_{period}'] = df['close'].rolling(period).mean()
        df[f'ema_{period}'] = df['close'].ewm(span=period).mean()
    
    # RSI
    delta = df['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    rs = gain / loss
    df['rsi'] = 100 - (100 / (1 + rs))
    
    # MACD
    ema12 = df['close'].ewm(span=12).mean()
    ema26 = df['close'].ewm(span=26).mean()
    df['macd'] = ema12 - ema26
    df['macd_signal'] = df['macd'].ewm(span=9).mean()
    df['macd_hist'] = df['macd'] - df['macd_signal']
    
    # Bollinger Bands
    sma20 = df['close'].rolling(20).mean()
    std20 = df['close'].rolling(20).std()
    df['bb_upper'] = sma20 + (std20 * 2)
    df['bb_lower'] = sma20 - (std20 * 2)
    df['bb_width'] = df['bb_upper'] - df['bb_lower']
    
    # ATR
    high_low = df['high'] - df['low']
    high_close = (df['high'] - df['close'].shift()).abs()
    low_close = (df['low'] - df['close'].shift()).abs()
    true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    df['atr'] = true_range.rolling(14).mean()
    
    # Volume
    df['volume_sma'] = df['volume'].rolling(20).mean()
    df['volume_ratio'] = df['volume'] / df['volume_sma']
    
    # Returns
    df['return_1'] = df['close'].pct_change(1)
    df['return_5'] = df['close'].pct_change(5)
    df['return_10'] = df['close'].pct_change(10)
    
    return df

print("🔧 Creating features for training data...")
df_train_all = create_features(df_train_all)

print("🔧 Creating features for test data...")
df_test_all = create_features(df_test_all)

print(f"✅ Training features: {df_train_all.shape}")
print(f"✅ Test features: {df_test_all.shape}")

## 🏷️ 5. Create Labels

**Strategy:** Predict if price will go UP/DOWN/NEUTRAL in next 15 minutes

In [None]:
def create_labels(df, horizon=15, threshold=0.0005):
    """
    Create labels: 0=SELL, 1=NEUTRAL, 2=BUY
    
    Args:
        horizon: Minutes to look ahead
        threshold: Minimum price change (0.05% = ~5 pips)
    """
    df = df.copy()
    
    # Future price change
    df['future_price'] = df.groupby('pair')['close'].shift(-horizon)
    df['price_change'] = (df['future_price'] - df['close']) / df['close']
    
    # Create labels
    df['label'] = 1  # Default: NEUTRAL
    df.loc[df['price_change'] > threshold, 'label'] = 2   # BUY
    df.loc[df['price_change'] < -threshold, 'label'] = 0  # SELL
    
    return df

HORIZON = 15
THRESHOLD = 0.0005  # 0.05%

print(f"🏷️ Creating labels (horizon={HORIZON}min, threshold={THRESHOLD*100:.2f}%)...")
df_train_all = create_labels(df_train_all, HORIZON, THRESHOLD)
df_test_all = create_labels(df_test_all, HORIZON, THRESHOLD)

# Check label distribution
print("\n📊 Training Label Distribution:")
train_label_dist = df_train_all['label'].value_counts().sort_index()
for label, count in train_label_dist.items():
    label_name = ['SELL', 'NEUTRAL', 'BUY'][int(label)]
    pct = count / len(df_train_all) * 100
    print(f"   {label_name:8}: {count:,} ({pct:.1f}%)")

print("\n📊 Test Label Distribution:")
test_label_dist = df_test_all['label'].value_counts().sort_index()
for label, count in test_label_dist.items():
    label_name = ['SELL', 'NEUTRAL', 'BUY'][int(label)]
    pct = count / len(df_test_all) * 100
    print(f"   {label_name:8}: {count:,} ({pct:.1f}%)")

## 📦 6. Prepare Sequences

Create time series sequences (lookback window = 60 minutes)

In [None]:
def prepare_sequences(df, sequence_length=60):
    """
    Create sequences for LSTM
    """
    # Select feature columns (exclude non-numeric and target)
    feature_cols = [col for col in df.columns if col not in 
                   ['time', 'pair', 'label', 'future_price', 'price_change']]
    feature_cols = [col for col in feature_cols if df[col].dtype in ['float64', 'int64']]
    
    # Drop NaN
    df_clean = df[feature_cols + ['label']].dropna()
    
    X_list = []
    y_list = []
    
    # Create sequences
    for i in range(sequence_length, len(df_clean)):
        X_list.append(df_clean[feature_cols].iloc[i-sequence_length:i].values)
        y_list.append(df_clean['label'].iloc[i])
    
    X = np.array(X_list)
    y = np.array(y_list)
    
    return X, y, feature_cols

SEQUENCE_LENGTH = 60

print(f"📦 Creating sequences (lookback={SEQUENCE_LENGTH} minutes)...\n")

X_train, y_train, feature_cols = prepare_sequences(df_train_all, SEQUENCE_LENGTH)
X_test, y_test, _ = prepare_sequences(df_test_all, SEQUENCE_LENGTH)

print(f"✅ Training data: X={X_train.shape}, y={y_train.shape}")
print(f"✅ Test data: X={X_test.shape}, y={y_test.shape}")
print(f"\n📋 Features ({len(feature_cols)}): {feature_cols[:10]}...")

## 🔄 7. Scale Features

In [None]:
# Reshape for scaling
n_samples_train, n_timesteps, n_features = X_train.shape
n_samples_test = X_test.shape[0]

X_train_reshaped = X_train.reshape(-1, n_features)
X_test_reshaped = X_test.reshape(-1, n_features)

# Fit scaler on training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_test_scaled = scaler.transform(X_test_reshaped)

# Reshape back
X_train_scaled = X_train_scaled.reshape(n_samples_train, n_timesteps, n_features)
X_test_scaled = X_test_scaled.reshape(n_samples_test, n_timesteps, n_features)

print(f"✅ Data scaled: X_train={X_train_scaled.shape}, X_test={X_test_scaled.shape}")

## 🏗️ 8. Build LSTM Model

In [None]:
def build_lstm_model(input_shape, n_classes=3):
    """
    Build LSTM model for multi-class classification
    """
    model = Sequential([
        LSTM(128, return_sequences=True, input_shape=input_shape),
        Dropout(0.3),
        BatchNormalization(),
        
        LSTM(64, return_sequences=False),
        Dropout(0.3),
        BatchNormalization(),
        
        Dense(32, activation='relu'),
        Dropout(0.2),
        
        Dense(n_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Build model
input_shape = (X_train_scaled.shape[1], X_train_scaled.shape[2])
model = build_lstm_model(input_shape)

print("🏗️ Model Architecture:")
model.summary()

## 🎯 9. Setup Callbacks

In [None]:
# Create output directory
Path('../models/multi_currency').mkdir(parents=True, exist_ok=True)

callbacks = [
    ModelCheckpoint(
        '../models/multi_currency/best_model.keras',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    ),
    EarlyStopping(
        monitor='val_accuracy',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-6,
        verbose=1
    )
]

print("✅ Callbacks configured")

## 🚀 10. Train Model

Training on **all currency pairs** from train folder

In [None]:
BATCH_SIZE = 64
EPOCHS = 50
VALIDATION_SPLIT = 0.2

print("🚀 Starting training...\n")
print(f"⚙️  Configuration:")
print(f"   • Training samples: {len(X_train_scaled):,}")
print(f"   • Validation split: {VALIDATION_SPLIT*100:.0f}%")
print(f"   • Batch size: {BATCH_SIZE}")
print(f"   • Max epochs: {EPOCHS}")
print(f"   • Early stopping: patience=10\n")
print("="*70 + "\n")

history = model.fit(
    X_train_scaled,
    y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_split=VALIDATION_SPLIT,
    callbacks=callbacks,
    verbose=1
)

print("\n" + "="*70)
print("✅ Training complete!")
print(f"   • Best val accuracy: {max(history.history['val_accuracy'])*100:.2f}%")
print(f"   • Total epochs: {len(history.history['loss'])}")

## 📈 11. Training Visualization

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy
axes[0].plot(history.history['accuracy'], label='Train')
axes[0].plot(history.history['val_accuracy'], label='Validation')
axes[0].set_title('Model Accuracy', fontweight='bold', fontsize=14)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)

# Loss
axes[1].plot(history.history['loss'], label='Train')
axes[1].plot(history.history['val_loss'], label='Validation')
axes[1].set_title('Model Loss', fontweight='bold', fontsize=14)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.savefig('../models/multi_currency/training_history.png', dpi=150, bbox_inches='tight')
plt.show()

print("📊 Training history saved")

## 🧪 12. Test on All Currency Pairs

Testing on **all test data** from test folder

In [None]:
# Load best model
best_model = keras.models.load_model('../models/multi_currency/best_model.keras')

print("🧪 Testing on all currency pairs...\n")

# Predict
y_pred_proba = best_model.predict(X_test_scaled)
y_pred = np.argmax(y_pred_proba, axis=1)

# Overall accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"🎯 Overall Test Accuracy: {accuracy*100:.2f}%\n")

# Classification report
print("📊 Classification Report:")
print(classification_report(
    y_test,
    y_pred,
    target_names=['SELL', 'NEUTRAL', 'BUY'],
    digits=4
))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['SELL', 'NEUTRAL', 'BUY'],
            yticklabels=['SELL', 'NEUTRAL', 'BUY'])
plt.title('Confusion Matrix - All Currency Pairs', fontweight='bold', fontsize=14)
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig('../models/multi_currency/confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

## 🔍 13. Signal Distribution Analysis

In [None]:
print("🔍 SIGNAL DISTRIBUTION ANALYSIS")
print("="*70)

# Count predictions
pred_counts = pd.Series(y_pred).value_counts().sort_index()
true_counts = pd.Series(y_test).value_counts().sort_index()

print("\n📊 TRUE LABELS:")
for label in [0, 1, 2]:
    label_name = ['SELL', 'NEUTRAL', 'BUY'][label]
    count = true_counts.get(label, 0)
    pct = count / len(y_test) * 100
    print(f"   {label_name:8}: {count:6,} ({pct:5.1f}%)")

print("\n🎯 PREDICTED LABELS:")
for label in [0, 1, 2]:
    label_name = ['SELL', 'NEUTRAL', 'BUY'][label]
    count = pred_counts.get(label, 0)
    pct = count / len(y_pred) * 100
    print(f"   {label_name:8}: {count:6,} ({pct:5.1f}%)")

# Signal percentage
signal_pct = (pred_counts.get(0, 0) + pred_counts.get(2, 0)) / len(y_pred) * 100
print(f"\n📡 ACTIONABLE SIGNALS: {signal_pct:.1f}%")

if signal_pct < 20:
    print("   ⚠️  Too few signals - mostly predicts NEUTRAL")
elif signal_pct > 40:
    print("   ✅ Good signal generation!")
else:
    print("   ⚠️  Moderate signal generation")

print("="*70)

## 💾 14. Save Results

In [None]:
import json
from datetime import datetime
import joblib

# Save scaler
joblib.dump(scaler, '../models/multi_currency/scaler.pkl')
print("✅ Scaler saved")

# Save metadata
metadata = {
    'model_name': 'Multi_Currency_LSTM',
    'currency_pairs': ['EUR_USD', 'GBP_USD', 'USD_JPY', 'USD_CAD', 'USD_CHF', 'XAU_USD'],
    'horizon': HORIZON,
    'threshold': THRESHOLD,
    'sequence_length': SEQUENCE_LENGTH,
    'n_features': len(feature_cols),
    'feature_columns': feature_cols,
    'training_samples': int(len(X_train)),
    'test_samples': int(len(X_test)),
    'test_accuracy': float(accuracy),
    'best_val_accuracy': float(max(history.history['val_accuracy'])),
    'signal_percentage': float(signal_pct),
    'trained_at': datetime.now().isoformat()
}

with open('../models/multi_currency/model_metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print("✅ Metadata saved")
print("\n" + "="*70)
print("🎉 ALL DONE!")
print("="*70)
print(f"\n📁 Model saved to: ../models/multi_currency/")
print(f"   • best_model.keras")
print(f"   • scaler.pkl")
print(f"   • model_metadata.json")
print(f"   • training_history.png")
print(f"   • confusion_matrix.png")
print(f"\n🎯 Final Results:")
print(f"   • Test Accuracy: {accuracy*100:.2f}%")
print(f"   • Signal Coverage: {signal_pct:.1f}%")
print(f"   • Training samples: {len(X_train):,}")
print(f"   • Test samples: {len(X_test):,}")

## 🎊 Summary

✅ **Training Data:** All 6 currency pairs from `data/train/`
✅ **Test Data:** All 6 currency pairs from `data/test/`
✅ **Model:** LSTM with 128→64 units
✅ **Features:** 30+ technical indicators
✅ **Prediction:** 15-minute ahead (BUY/SELL/NEUTRAL)

---

**Next Steps:**
1. Deploy model to Flask API
2. Integrate with mobile app
3. Add real-time MT5 data feed
4. Implement signal filtering (confidence > 80%)