# Strategy Backtesting & Analysis

This notebook is the ultimate proving ground for your strategy. It demonstrates rigorous, realistic backtesting, comprehensive performance analysis, and advanced risk assessment, mimicking a production trading environment.

In [None]:
# 1. Event-Driven Backtesting Simulation
from src.backtest import Backtester
from src.strategy import MeanReversionStrategy
from src.ml_models import MLModels
from src.data_handler import DataHandler

# Load features, prices, and trained model
features_df = pd.read_parquet("../data/spread_features.parquet")
prices_df = pd.read_parquet("../data/prices.parquet")
ml = MLModels(model_dir="../models")
ml.model = ml.load_model("../models/best_model_v1.joblib")

# Instantiate strategy and backtester
strategy = MeanReversionStrategy(model=ml.model, use_ml_confidence=True)
backtester = Backtester(strategy, features_df, prices_df)

# Run event-driven backtest
results = backtester.run()
print("Backtest complete.")

In [None]:
# 2. Illustrate Order Book Simulation & Slippage Model
from src.backtest import SlippageModel

slippage_model = SlippageModel()
example_slippage = slippage_model.calculate(
    quantity=1000,
    price=prices_df["spread"].iloc[-1],
    volatility=features_df["spread_vol_20"].iloc[-1],
    avg_daily_volume=prices_df["volume"].mean()
)
print(f"Example slippage for 1000 units: {example_slippage:.4f}")

In [None]:
# 3. Granular Performance Analysis

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Equity curve and drawdown
equity_curve = results['equity_curve']
drawdown = equity_curve - equity_curve.cummax()
plt.figure(figsize=(14, 6))
plt.subplot(2,1,1)
plt.plot(equity_curve)
plt.title("Equity Curve")
plt.subplot(2,1,2)
plt.plot(drawdown, color='red')
plt.title("Drawdown")
plt.tight_layout()
plt.show()

# Daily P&L histogram
plt.figure(figsize=(8,4))
sns.histplot(results['pnl'], bins=50, kde=True)
plt.title("Daily P&L Distribution")
plt.show()

In [None]:
# 4. Performance Metrics Table

from src.utils import rolling_sharpe, rolling_drawdown

def sharpe_ratio(returns):
    return np.mean(returns) / (np.std(returns) + 1e-8) * np.sqrt(252)

def sortino_ratio(returns):
    downside = returns[returns < 0]
    return np.mean(returns) / (np.std(downside) + 1e-8) * np.sqrt(252)

def calmar_ratio(equity_curve):
    max_dd = np.abs(rolling_drawdown(equity_curve).min())
    return (equity_curve[-1] - equity_curve[0]) / (max_dd + 1e-8)

def win_loss_ratio(pnl):
    wins = np.sum(np.array(pnl) > 0)
    losses = np.sum(np.array(pnl) < 0)
    return wins / (losses + 1e-8)

returns = np.diff(equity_curve)
metrics = {
    "Sharpe Ratio": sharpe_ratio(returns),
    "Sortino Ratio": sortino_ratio(returns),
    "Calmar Ratio": calmar_ratio(equity_curve),
    "Win/Loss Ratio": win_loss_ratio(results['pnl']),
    "Max Drawdown": np.abs(drawdown.min()),
    "Total Return": equity_curve[-1] - equity_curve[0],
}
metrics_df = pd.DataFrame(metrics, index=["Value"]).T
display(metrics_df)

In [None]:
# 5. Rolling Performance

window = 60
plt.figure(figsize=(12,4))
plt.plot(rolling_sharpe(pd.Series(returns), window=window), label="Rolling Sharpe")
plt.plot(pd.Series(returns).rolling(window).std(), label="Rolling Volatility")
plt.legend()
plt.title("Rolling Sharpe Ratio and Volatility")
plt.show()

In [None]:
# 6. Trade-Level Diagnostics

# Example: Visualize a single trade
trade_idx = results[results['signal'].diff().abs() > 0].index[0]
trade_slice = results.loc[trade_idx:trade_idx+20]  # adjust window as needed

plt.figure(figsize=(12,6))
plt.plot(prices_df.loc[trade_slice.index, "spread"], label="Spread")
plt.plot(features_df.loc[trade_slice.index, "spread_zscore"], label="Z-Score")
plt.plot(trade_slice.index, trade_slice['signal'], label="ML Signal", linestyle='--')
plt.title("Trade Example: Spread, Z-Score, ML Signal")
plt.legend()
plt.show()

In [None]:
# 7. Trade Duration & P&L Distribution

trade_pnls = results['pnl'][np.array(results['pnl']) != 0]
plt.figure(figsize=(8,4))
sns.histplot(trade_pnls, bins=30, kde=True)
plt.title("Trade P&L Distribution")
plt.show()

# Trade duration (if available)
# If you track entry/exit times, plot holding period distribution

In [None]:
# 8. Robust Risk Management Analysis

# Dollar neutrality/exposure
plt.figure(figsize=(12,4))
plt.plot(results['signal'] * prices_df['spread'])
plt.title("Dollar Exposure Over Time")
plt.show()

# VaR/CVaR Calculation
from scipy.stats import norm

confidence = 0.95
var = np.percentile(returns, 100 * (1 - confidence))
cvar = returns[returns <= var].mean()
print(f"Historical VaR (95%): {var:.2f}, CVaR: {cvar:.2f}")

In [None]:
# 9. Stress Testing Visualizations (at least 4 scenarios)

scenarios = [
    ("Normal Market", features_df, prices_df),
    ("High Volatility", features_df * (1 + np.random.normal(0, 0.05, features_df.shape)), prices_df),
    ("Flash Crash", features_df, prices_df.copy().assign(spread=prices_df["spread"] * (1 - 0.2))),
    ("Regime Shift", features_df, prices_df.copy().assign(spread=prices_df["spread"] * (1 + np.linspace(0, 0.1, len(prices_df)))))
]

for name, scenario_features, scenario_prices in scenarios:
    scenario_results = backtester.run(features_df=scenario_features, prices_df=scenario_prices)
    plt.figure(figsize=(10,3))
    plt.plot(scenario_results['equity_curve'])
    plt.title(f"Equity Curve: {name}")
    plt.show()

In [None]:
# 10. Parameter Sensitivity

stop_losses = [0.02, 0.05, 0.1]
for stop in stop_losses:
    strategy.stop_loss_pct = stop
    scenario_results = backtester.run()
    plt.plot(scenario_results['equity_curve'], label=f"Stop {stop}")
plt.title("Equity Curve Sensitivity to Stop-Loss")
plt.legend()
plt.show()

In [None]:
# 11. Attribution Analysis (Optional)

# If your backtester tracks P&L components:
results['pnl_mean_reversion'], results['pnl_transaction_costs'], etc.
plt.plot(results['pnl_mean_reversion'], label="Mean Reversion P&L")
plt.plot(results['pnl_transaction_costs'], label="Transaction Costs")
plt.legend()
plt.title("P&L Attribution")
plt.show()

In [None]:
# 12. Critical Self-Assessment & Next Steps

**Limitations & Assumptions:**
- Data quality and survivorship bias
- Look-ahead bias and data leakage
- Model overfitting and regime dependence
- Transaction cost and slippage realism

**Next Steps:**
- Explore more robust regime detection and adaptive strategies
- Integrate real-time data feeds and live monitoring
- Expand to multi-pair/portfolio-level risk controls
- Deploy model monitoring and automated retraining pipelines