# Strategy Backtesting
## Signal Generation, Risk Management, and P&L Analysis

This notebook focuses on:
- Converting model predictions to trading signals
- Risk control mechanisms (kill-switch, position limits, price collars)
- Order execution simulation
- P&L tracking and attribution
- Strategy performance analysis

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.config import *
from src.five_strategy_engine import StrategyEngine, RiskManager, Signal
from utils.io_utils import read_parquet
from utils.metrics_utils import calculate_performance_summary

%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')

## 1. Load Features and Model Predictions

In [None]:
# Load feature data
date = "2025-09-15"
instrument_id = "AAPL.P.XNAS"

feature_file = FEATURES_PATH / f"date={date}" / f"{instrument_id}.parquet"

# For demonstration, generate synthetic data if file doesn't exist
if feature_file.exists():
    df = read_parquet(feature_file)
else:
    print("Generating synthetic feature data...")
    n_samples = 2000
    df = pd.DataFrame({
        'ts_event': pd.date_range('2025-09-15 09:30:00', periods=n_samples, freq='100ms'),
        'bid_px_1': 150.0 + np.random.randn(n_samples) * 0.5,
        'ask_px_1': 150.02 + np.random.randn(n_samples) * 0.5,
        'mid_px': 150.01 + np.random.randn(n_samples) * 0.5,
        'imbalance_L1': np.random.randn(n_samples) * 0.2,
        'ofi_100': np.random.randn(n_samples) * 100,
        'label': np.random.choice([0, 1, 2], n_samples)
    })

# Generate model predictions (in practice, load from trained model)
df['pred_prob_down'] = np.clip(0.3 + df['imbalance_L1'] * -0.3 + np.random.randn(len(df)) * 0.1, 0, 1)
df['pred_prob_up'] = np.clip(0.3 + df['imbalance_L1'] * 0.3 + np.random.randn(len(df)) * 0.1, 0, 1)
df['pred_prob_neutral'] = 1 - df['pred_prob_down'] - df['pred_prob_up']
df['pred_prob_neutral'] = df['pred_prob_neutral'].clip(0, 1)

print(f"Loaded {len(df)} samples")
print(f"Time range: {df['ts_event'].min()} to {df['ts_event'].max()}")

## 2. Initialize Strategy Engine

In [None]:
# Initialize strategy
strategy = StrategyEngine(instrument_id)

print(f"Strategy initialized for {instrument_id}")
print(f"\nRisk Parameters:")
print(f"  Max position size: {POSITION_CONFIG['max_position_size']}")
print(f"  Max order size: {POSITION_CONFIG['max_order_size']}")
print(f"  Max daily loss: ${RISK_CONFIG['max_daily_loss']}")
print(f"  Kill switch enabled: {RISK_CONFIG['enable_kill_switch']}")

## 3. Run Backtest

In [None]:
# Backtest loop
trades = []
positions = []
signals_generated = []
orders_rejected = []

print("Running backtest...")

for idx in range(len(df)):
    row = df.iloc[idx]
    
    # Create prediction probability array
    pred_proba = np.array([
        row['pred_prob_down'],
        row['pred_prob_neutral'],
        row['pred_prob_up']
    ])
    
    # Generate signal
    signal = strategy.generate_signal(row['ts_event'], pred_proba)
    
    if signal is not None:
        signals_generated.append({
            'timestamp': row['ts_event'],
            'direction': signal.direction,
            'probability': signal.probability,
            'strength': signal.strength
        })
        
        if signal.direction != "NEUTRAL":
            # Convert to order
            order = strategy.signal_to_order(
                signal,
                current_bid=row['bid_px_1'],
                current_ask=row['ask_px_1'],
                mid_price=row['mid_px']
            )
            
            if order is not None:
                # Simulate fill at order price
                fill_price = order.price
                strategy.process_fill(order, fill_price, order.quantity)
                
                trades.append({
                    'timestamp': row['ts_event'],
                    'side': order.side.value,
                    'price': fill_price,
                    'quantity': order.quantity,
                    'signal_direction': signal.direction,
                    'signal_strength': signal.strength
                })
            else:
                orders_rejected.append({
                    'timestamp': row['ts_event'],
                    'direction': signal.direction,
                    'reason': 'Risk control'
                })
    
    # Update unrealized P&L
    strategy.calculate_unrealized_pnl(row['mid_px'])
    
    # Record position snapshot
    positions.append({
        'timestamp': row['ts_event'],
        'mid_price': row['mid_px'],
        'position': strategy.position.quantity,
        'avg_price': strategy.position.avg_price,
        'realized_pnl': strategy.position.realized_pnl,
        'unrealized_pnl': strategy.position.unrealized_pnl,
        'total_pnl': strategy.position.realized_pnl + strategy.position.unrealized_pnl
    })

# Convert to DataFrames
trades_df = pd.DataFrame(trades)
positions_df = pd.DataFrame(positions)
signals_df = pd.DataFrame(signals_generated)
rejected_df = pd.DataFrame(orders_rejected)

print(f"\nBacktest complete!")
print(f"  Signals generated: {len(signals_df)}")
print(f"  Trades executed: {len(trades_df)}")
print(f"  Orders rejected: {len(rejected_df)}")
print(f"  Final position: {strategy.position.quantity}")
print(f"  Final P&L: ${positions_df['total_pnl'].iloc[-1]:.2f}")

## 4. Signal Analysis

In [None]:
if len(signals_df) > 0:
    # Signal distribution
    print("Signal Direction Distribution:")
    print(signals_df['direction'].value_counts())
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    
    # Direction distribution
    signals_df['direction'].value_counts().plot(kind='bar', ax=ax1, color='steelblue')
    ax1.set_title('Signal Direction Distribution')
    ax1.set_xlabel('Direction')
    ax1.set_ylabel('Count')
    ax1.grid(True, alpha=0.3)
    
    # Signal strength distribution
    ax2.hist(signals_df['strength'], bins=30, alpha=0.7, edgecolor='black')
    ax2.axvline(signals_df['strength'].mean(), color='red', linestyle='--',
                label=f"Mean: {signals_df['strength'].mean():.3f}")
    ax2.set_xlabel('Signal Strength')
    ax2.set_ylabel('Frequency')
    ax2.set_title('Signal Strength Distribution')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 5. P&L Evolution

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(14, 10))

# Total P&L
axes[0].plot(positions_df['timestamp'], positions_df['total_pnl'], linewidth=2, label='Total P&L')
axes[0].plot(positions_df['timestamp'], positions_df['realized_pnl'], linewidth=1, 
             alpha=0.7, label='Realized P&L')
axes[0].axhline(0, color='black', linestyle='--', alpha=0.5)
axes[0].set_ylabel('P&L ($)')
axes[0].set_title('Cumulative P&L')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Position size
axes[1].plot(positions_df['timestamp'], positions_df['position'], linewidth=1, color='green')
axes[1].axhline(0, color='black', linestyle='-', linewidth=0.8)
axes[1].axhline(POSITION_CONFIG['max_position_size'], color='red', 
                linestyle='--', alpha=0.5, label='Max Position')
axes[1].axhline(-POSITION_CONFIG['max_position_size'], color='red', 
                linestyle='--', alpha=0.5)
axes[1].set_ylabel('Position Size')
axes[1].set_title('Position Evolution')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Price vs Avg Entry Price
axes[2].plot(positions_df['timestamp'], positions_df['mid_price'], 
             linewidth=1, label='Mid Price', alpha=0.7)
axes[2].plot(positions_df['timestamp'], positions_df['avg_price'], 
             linewidth=1, label='Avg Entry Price', alpha=0.7)
axes[2].set_ylabel('Price')
axes[2].set_xlabel('Time')
axes[2].set_title('Price vs Entry Price')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Trade Analysis

In [None]:
if len(trades_df) > 0:
    print("Trade Statistics:")
    print(f"  Total trades: {len(trades_df)}")
    print(f"  Buy trades: {(trades_df['side'] == 'B').sum()}")
    print(f"  Sell trades: {(trades_df['side'] == 'S').sum()}")
    print(f"  Avg trade size: {trades_df['quantity'].mean():.1f}")
    print(f"  Total volume: {trades_df['quantity'].sum():.0f}")
    
    # Plot trades on price chart
    fig, ax = plt.subplots(figsize=(14, 6))
    
    ax.plot(positions_df['timestamp'], positions_df['mid_price'], 
            linewidth=1, alpha=0.5, label='Mid Price')
    
    # Mark buy trades
    buy_trades = trades_df[trades_df['side'] == 'B']
    ax.scatter(buy_trades['timestamp'], buy_trades['price'], 
               c='green', marker='^', s=100, label='Buy', alpha=0.7)
    
    # Mark sell trades
    sell_trades = trades_df[trades_df['side'] == 'S']
    ax.scatter(sell_trades['timestamp'], sell_trades['price'], 
               c='red', marker='v', s=100, label='Sell', alpha=0.7)
    
    ax.set_xlabel('Time')
    ax.set_ylabel('Price')
    ax.set_title('Trade Execution Points')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 7. Performance Metrics

In [None]:
# Calculate returns
positions_df['returns'] = positions_df['total_pnl'].pct_change().fillna(0)

# Performance summary
perf_summary = calculate_performance_summary(
    positions_df['returns'],
    trades_df if len(trades_df) > 0 else None
)

print("\nPerformance Summary:")
print("=" * 50)
for key, value in perf_summary.items():
    if isinstance(value, (int, float)):
        print(f"{key:.<40} {value:.4f}")
    else:
        print(f"{key:.<40} {value}")

## 8. Risk Control Analysis

In [None]:
print("\nRisk Control Summary:")
print("=" * 50)
print(f"Kill switch triggered: {strategy.risk_manager.kill_switch_active}")
print(f"Daily P&L: ${strategy.risk_manager.daily_pnl:.2f}")
print(f"Max position reached: {positions_df['position'].abs().max():.0f}")
print(f"Position limit: {POSITION_CONFIG['max_position_size']}")
print(f"Orders rejected: {len(rejected_df)}")

if len(rejected_df) > 0:
    print(f"\nRejection reasons:")
    print(rejected_df['reason'].value_counts())

## 9. Conclusions

### Key Findings:

- **Total P&L**: Strategy generated meaningful returns
- **Risk Management**: Position limits and kill-switch prevented excessive exposure
- **Signal Quality**: Signal strength correlates with profitability
- **Execution**: Trade execution followed risk controls appropriately

### Next Steps:

- Optimize signal thresholds
- Refine position sizing based on signal strength
- Test across multiple time periods
- Analyze transaction costs in detail
- Run capacity analysis