# Strategy Validation Template

This notebook helps you rigorously validate a strategy before production deployment.

## Validation Checklist:
- [ ] Out-of-sample testing
- [ ] Walk-forward analysis
- [ ] Multiple symbols testing
- [ ] Regime analysis
- [ ] Robustness testing (parameter sensitivity)
- [ ] Statistical significance testing
- [ ] Transaction cost sensitivity
- [ ] Risk analysis

In [None]:
# Setup
import sys
sys.path.append('/Users/jacobliu/repos/projects/trading-bot')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

from research.utils import (
    QuickBacktest,
    load_sample_data,
    plot_equity_curve,
    plot_drawdown,
    calculate_metrics,
    rolling_metrics,
)
from research.utils.data_loaders import add_features

%matplotlib inline
%load_ext autoreload
%autoreload 2

print("Setup complete!")

## 1. Define Strategy

Implement your strategy as a function.

In [None]:
def my_strategy(data, i, params):
    """Strategy logic.
    
    Args:
        data: DataFrame with features
        i: Current index
        params: Strategy parameters dict
        
    Returns:
        Position size as fraction (-1 to 1)
    """
    # Example: RSI mean reversion
    rsi = data['rsi_14'].iloc[i]
    
    if rsi < params['oversold']:
        return 1.0  # Long
    elif rsi > params['overbought']:
        return 0.0  # Flat
    else:
        return data['signal'].iloc[i-1] if i > 0 else 0.0  # Hold

# Default parameters
default_params = {
    'oversold': 30,
    'overbought': 70,
}

print(f"Strategy defined with parameters: {default_params}")

## 2. Load and Prepare Data

In [None]:
# Load longer history for robust testing
data = load_sample_data('SPY', days=1000)

if 'symbol' in data.columns:
    data = data[data['symbol'] == 'SPY'].set_index('timestamp').sort_index()

# Add features
data = add_features(data, features=['returns', 'sma', 'ema', 'rsi'])
data = data.dropna()

# Add signal column
data['signal'] = 0.0

print(f"Data: {len(data)} bars from {data.index[0].date()} to {data.index[-1].date()}")

## 3. In-Sample vs Out-of-Sample Split

In [None]:
# Split: 70% in-sample, 30% out-of-sample
split_idx = int(len(data) * 0.7)

data_in_sample = data.iloc[:split_idx]
data_out_sample = data.iloc[split_idx:]

print(f"In-sample:  {len(data_in_sample)} bars ({data_in_sample.index[0].date()} to {data_in_sample.index[-1].date()})")
print(f"Out-sample: {len(data_out_sample)} bars ({data_out_sample.index[0].date()} to {data_out_sample.index[-1].date()})")

## 4. In-Sample Performance

In [None]:
# Backtest in-sample
bt = QuickBacktest(initial_capital=100000)

# Generate signals
for i in range(len(data_in_sample)):
    data_in_sample.iloc[i, data_in_sample.columns.get_loc('signal')] = my_strategy(data_in_sample, i, default_params)

result_in = bt.run_signals(data_in_sample, data_in_sample['signal'])
print("In-Sample Results:")
print(result_in)

In [None]:
result_in.plot()

## 5. Out-of-Sample Performance

In [None]:
# Backtest out-of-sample with SAME parameters
for i in range(len(data_out_sample)):
    data_out_sample.iloc[i, data_out_sample.columns.get_loc('signal')] = my_strategy(data_out_sample, i, default_params)

result_out = bt.run_signals(data_out_sample, data_out_sample['signal'])
print("Out-of-Sample Results:")
print(result_out)

In [None]:
result_out.plot()

In [None]:
# Compare in-sample vs out-of-sample
comparison = pd.DataFrame({
    'In-Sample': result_in.metrics,
    'Out-of-Sample': result_out.metrics,
}).T

print("\nIn-Sample vs Out-of-Sample Comparison:")
print(comparison[['total_return', 'sharpe_ratio', 'max_drawdown', 'win_rate', 'num_trades']])

# Check for overfitting
sharpe_degradation = (result_out.metrics['sharpe_ratio'] / result_in.metrics['sharpe_ratio']) if result_in.metrics['sharpe_ratio'] > 0 else 0
print(f"\nSharpe degradation: {sharpe_degradation:.2%}")
if sharpe_degradation < 0.7:
    print("WARNING: Significant performance degradation suggests overfitting!")
elif sharpe_degradation > 0.85:
    print("GOOD: Strategy appears robust to out-of-sample data.")

## 6. Walk-Forward Analysis

In [None]:
# Walk-forward with 6-month windows
window_size = 126  # ~6 months
step_size = 21     # ~1 month

wf_results = []

for start in range(0, len(data) - window_size, step_size):
    end = start + window_size
    window_data = data.iloc[start:end].copy()
    
    # Generate signals
    for i in range(len(window_data)):
        window_data.iloc[i, window_data.columns.get_loc('signal')] = my_strategy(window_data, i, default_params)
    
    # Backtest
    result = bt.run_signals(window_data, window_data['signal'])
    
    wf_results.append({
        'start_date': window_data.index[0],
        'end_date': window_data.index[-1],
        'sharpe': result.metrics['sharpe_ratio'],
        'return': result.metrics['total_return'],
        'max_dd': result.metrics['max_drawdown'],
    })

wf_df = pd.DataFrame(wf_results)
print(f"Walk-forward analysis: {len(wf_df)} windows")
wf_df.head()

In [None]:
# Plot walk-forward results
fig, axes = plt.subplots(2, 1, figsize=(15, 8))

# Sharpe ratio over time
axes[0].plot(wf_df['end_date'], wf_df['sharpe'], marker='o', linewidth=2)
axes[0].axhline(y=wf_df['sharpe'].mean(), color='r', linestyle='--', label=f'Mean: {wf_df["sharpe"].mean():.2f}')
axes[0].axhline(y=0, color='black', linestyle='-', alpha=0.3)
axes[0].set_title('Walk-Forward Sharpe Ratio', fontweight='bold')
axes[0].set_ylabel('Sharpe Ratio')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Returns over time
axes[1].bar(wf_df['end_date'], wf_df['return'] * 100, alpha=0.7)
axes[1].axhline(y=0, color='black', linestyle='-', alpha=0.5)
axes[1].set_title('Walk-Forward Returns', fontweight='bold')
axes[1].set_ylabel('Return (%)')
axes[1].set_xlabel('Period End Date')
axes[1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

print(f"\nWalk-Forward Statistics:")
print(f"Average Sharpe: {wf_df['sharpe'].mean():.2f}")
print(f"Sharpe Std Dev: {wf_df['sharpe'].std():.2f}")
print(f"% Positive Sharpe: {(wf_df['sharpe'] > 0).mean():.1%}")

## 7. Parameter Sensitivity Analysis

In [None]:
# Test parameter robustness
oversold_values = range(20, 41, 2)
overbought_values = range(60, 81, 2)

param_results = []

for os_val in oversold_values:
    for ob_val in overbought_values:
        params = {'oversold': os_val, 'overbought': ob_val}
        
        # Generate signals
        test_data = data.copy()
        for i in range(len(test_data)):
            test_data.iloc[i, test_data.columns.get_loc('signal')] = my_strategy(test_data, i, params)
        
        result = bt.run_signals(test_data, test_data['signal'])
        
        param_results.append({
            'oversold': os_val,
            'overbought': ob_val,
            'sharpe': result.metrics['sharpe_ratio'],
            'return': result.metrics['total_return'],
            'max_dd': result.metrics['max_drawdown'],
        })

param_df = pd.DataFrame(param_results)
print(f"Tested {len(param_df)} parameter combinations")

In [None]:
# Heatmap of Sharpe ratios
pivot_sharpe = param_df.pivot(index='oversold', columns='overbought', values='sharpe')

plt.figure(figsize=(12, 8))
sns.heatmap(pivot_sharpe, annot=True, fmt='.2f', cmap='RdYlGn', center=0, vmin=-1, vmax=3)
plt.title('Sharpe Ratio Heatmap (Oversold vs Overbought)', fontweight='bold')
plt.ylabel('Oversold Threshold')
plt.xlabel('Overbought Threshold')
plt.show()

# Find best parameters
best_params = param_df.loc[param_df['sharpe'].idxmax()]
print(f"\nBest parameters: Oversold={best_params['oversold']:.0f}, Overbought={best_params['overbought']:.0f}")
print(f"Best Sharpe: {best_params['sharpe']:.2f}")

# Check robustness
top_10_pct = param_df.nlargest(int(len(param_df) * 0.1), 'sharpe')
print(f"\nTop 10% parameter ranges:")
print(f"Oversold: {top_10_pct['oversold'].min():.0f} - {top_10_pct['oversold'].max():.0f}")
print(f"Overbought: {top_10_pct['overbought'].min():.0f} - {top_10_pct['overbought'].max():.0f}")

## 8. Transaction Cost Sensitivity

In [None]:
# Test with different commission/slippage levels
cost_scenarios = [
    {'name': 'Low Cost', 'commission': 0.0005, 'slippage': 0.0002},
    {'name': 'Medium Cost', 'commission': 0.001, 'slippage': 0.0005},
    {'name': 'High Cost', 'commission': 0.002, 'slippage': 0.001},
    {'name': 'Very High Cost', 'commission': 0.005, 'slippage': 0.002},
]

cost_results = []

for scenario in cost_scenarios:
    bt_cost = QuickBacktest(
        initial_capital=100000,
        commission=scenario['commission'],
        slippage=scenario['slippage']
    )
    
    # Use the same signals as before
    result = bt_cost.run_signals(data, data['signal'])
    
    cost_results.append({
        'scenario': scenario['name'],
        'sharpe': result.metrics['sharpe_ratio'],
        'return': result.metrics['total_return'],
        'num_trades': result.metrics['num_trades'],
    })

cost_df = pd.DataFrame(cost_results)
print("Transaction Cost Sensitivity:")
print(cost_df)

## 9. Rolling Performance Metrics

In [None]:
# Calculate rolling metrics
full_result = bt.run_signals(data, data['signal'])
rolling = rolling_metrics(full_result.returns, window=63)  # ~3 months

# Plot rolling Sharpe
fig, axes = plt.subplots(2, 1, figsize=(15, 8))

axes[0].plot(rolling.index, rolling['sharpe'], linewidth=2)
axes[0].axhline(y=0, color='black', linestyle='-', alpha=0.3)
axes[0].set_title('Rolling Sharpe Ratio (63-day)', fontweight='bold')
axes[0].set_ylabel('Sharpe Ratio')
axes[0].grid(True, alpha=0.3)

axes[1].plot(rolling.index, rolling['volatility'] * 100, linewidth=2, color='orange')
axes[1].set_title('Rolling Volatility (63-day)', fontweight='bold')
axes[1].set_ylabel('Volatility (%)')
axes[1].set_xlabel('Date')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 10. Final Validation Summary

In [None]:
print("="*60)
print("STRATEGY VALIDATION SUMMARY")
print("="*60)

print("\n1. Out-of-Sample Performance:")
print(f"   Sharpe Ratio: {result_out.metrics['sharpe_ratio']:.2f}")
print(f"   Total Return: {result_out.metrics['total_return']:.2%}")
print(f"   Max Drawdown: {result_out.metrics['max_drawdown']:.2%}")
print(f"   Degradation: {sharpe_degradation:.1%}")

print("\n2. Walk-Forward Consistency:")
print(f"   Average Sharpe: {wf_df['sharpe'].mean():.2f}")
print(f"   Sharpe Stability: {wf_df['sharpe'].std():.2f}")
print(f"   % Positive Periods: {(wf_df['sharpe'] > 0).mean():.1%}")

print("\n3. Parameter Robustness:")
print(f"   Best Sharpe: {param_df['sharpe'].max():.2f}")
print(f"   Top 10% Sharpe Range: {top_10_pct['sharpe'].min():.2f} - {top_10_pct['sharpe'].max():.2f}")

print("\n4. Cost Sensitivity:")
sharpe_at_high_cost = cost_df[cost_df['scenario'] == 'High Cost']['sharpe'].values[0]
print(f"   Sharpe at High Cost: {sharpe_at_high_cost:.2f}")
print(f"   Still Profitable: {'YES' if sharpe_at_high_cost > 0.5 else 'NO'}")

print("\n" + "="*60)
print("RECOMMENDATION:")
if (sharpe_degradation > 0.8 and 
    wf_df['sharpe'].mean() > 1.0 and 
    sharpe_at_high_cost > 0.5):
    print("APPROVED for paper trading")
else:
    print("NEEDS MORE WORK before paper trading")
print("="*60)