# Notebook 6: ML Models

This notebook covers machine learning models for trade profitability prediction.

## Models
- Model A: XGBoost (Gradient Boosting)
- Model B: LSTM (Deep Learning)

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import pickle
from pathlib import Path

from src.ml.problem_definition import MLDatasetBuilder
from src.ml.model_training import XGBoostModel, LSTMModel

In [None]:
# Load ML dataset
with open('../models/ml_dataset.pkl', 'rb') as f:
    data = pickle.load(f)

X_train = data['X_train']
X_test = data['X_test']
y_train = data['y_train']
y_test = data['y_test']

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")
print(f"Features: {len(data['feature_names'])}")

In [None]:
# Train XGBoost
xgb_model = XGBoostModel(n_splits=5)
xgb_model.time_series_cv(X_train, y_train)
xgb_model.train(X_train, y_train)

# Evaluate
xgb_metrics = xgb_model.evaluate(X_test, y_test)
print(f"\nXGBoost Test AUC: {xgb_metrics['auc']:.4f}")

In [None]:
# Train LSTM
lstm_model = LSTMModel(sequence_length=10)
lstm_model.train(X_train.values, y_train.values, 
                 X_test.values, y_test.values,
                 epochs=50, batch_size=16)

# Evaluate
lstm_metrics = lstm_model.evaluate(X_test.values, y_test.values)
print(f"\nLSTM Test AUC: {lstm_metrics['auc']:.4f}")

In [None]:
# Compare models
print("\nModel Comparison:")
print(f"{'Metric':<15} {'XGBoost':<15} {'LSTM':<15}")
print("-" * 45)
for metric in ['accuracy', 'precision', 'recall', 'f1', 'auc']:
    print(f"{metric:<15} {xgb_metrics[metric]:.4f}         {lstm_metrics.get(metric, 0):.4f}")

## Results

| Model | Accuracy | AUC | Precision | Recall |
|-------|----------|-----|-----------|--------|
| XGBoost | 0.5701 | 0.5916 | 0.3684 | 0.1707 |
| LSTM | 0.6082 | 0.4732 | 0.0000 | 0.0000 |

XGBoost outperforms LSTM for this task.