# Backtest Analysis

Comprehensive backtest with in-sample/out-of-sample validation and performance metrics.

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from datetime import datetime
from scipy import stats

from data_acquisition import DataAcquisition
from signal_generator import SignalGenerator
from backtester import Backtester

sns.set_style('darkgrid')
%matplotlib inline

## 1. Setup

In [None]:
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

data_acq = DataAcquisition(config)
signal_gen = SignalGenerator(config)
backtester = Backtester(config)

start_date = datetime.strptime(config['backtest']['start_date'], '%Y-%m-%d')
end_date = datetime.strptime(config['backtest']['end_date'], '%Y-%m-%d')
train_end = datetime.strptime(config['backtest']['train_end'], '%Y-%m-%d')

## 2. Run Backtest

In [None]:
# Fetch data
df = data_acq.prepare_dataset('BTCUSDT', start_date, end_date)
signals = signal_gen.generate_signals(df)

# Split
train_df = df[df.index < train_end]
test_df = df[df.index >= train_end]
train_signals = signals[signals.index < train_end]
test_signals = signals[signals.index >= train_end]

print(f"Train period: {train_df.index[0]} to {train_df.index[-1]}")
print(f"Test period: {test_df.index[0]} to {test_df.index[-1]}")

## 3. In-Sample Results

In [None]:
train_results, train_trades = backtester.run_backtest(train_df, train_signals)
train_metrics = backtester.calculate_metrics(train_results, train_trades)

print("\n" + "="*60)
print("IN-SAMPLE PERFORMANCE")
print("="*60)
for metric, value in train_metrics.items():
    print(f"{metric:.<40} {value:>15.2f}")
print("="*60)

## 4. Out-of-Sample Results

In [None]:
test_results, test_trades = backtester.run_backtest(test_df, test_signals)
test_metrics = backtester.calculate_metrics(test_results, test_trades)

print("\n" + "="*60)
print("OUT-OF-SAMPLE PERFORMANCE")
print("="*60)
for metric, value in test_metrics.items():
    print(f"{metric:.<40} {value:>15.2f}")
print("="*60)

# Selection criteria
print("\n" + "="*60)
print("STRATEGY SELECTION CRITERIA")
print("="*60)
criteria = [
    ('Sharpe Ratio > 1.5', test_metrics['Sharpe Ratio'] > 1.5),
    ('Max Drawdown > -10%', test_metrics['Max Drawdown (%)'] > -10),
    ('Win Rate > 60%', test_metrics['Win Rate (%)'] > 60)
]
for criterion, passed in criteria:
    status = '✓ PASS' if passed else '✗ FAIL'
    print(f"{criterion:.<40} {status:>15}")
print("="*60)

all_passed = all([c[1] for c in criteria])
print(f"\nOverall: {'✓ STRATEGY SELECTED' if all_passed else '✗ STRATEGY REJECTED'}")

## 5. Equity Curve

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Equity curve
axes[0].plot(train_results.index, train_results['equity'], label='In-Sample', linewidth=2)
axes[0].plot(test_results.index, test_results['equity'], label='Out-of-Sample', linewidth=2)
axes[0].axhline(y=config['backtest']['initial_capital'], color='black', linestyle='--', alpha=0.5)
axes[0].set_title('Equity Curve', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Portfolio Value ($)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Drawdown
train_cummax = train_results['equity'].cummax()
train_dd = (train_results['equity'] - train_cummax) / train_cummax * 100

test_cummax = test_results['equity'].cummax()
test_dd = (test_results['equity'] - test_cummax) / test_cummax * 100

axes[1].fill_between(train_results.index, train_dd, 0, alpha=0.5, label='In-Sample')
axes[1].fill_between(test_results.index, test_dd, 0, alpha=0.5, label='Out-of-Sample')
axes[1].axhline(y=-10, color='red', linestyle='--', label='Max DD Threshold')
axes[1].set_title('Drawdown', fontsize=14, fontweight='bold')
axes[1].set_ylabel('Drawdown (%)')
axes[1].set_xlabel('Date')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Trade Analysis

In [None]:
if len(test_trades) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # P&L distribution
    axes[0, 0].hist(test_trades['pnl'], bins=30, alpha=0.7, edgecolor='black')
    axes[0, 0].axvline(x=0, color='red', linestyle='--')
    axes[0, 0].set_title('P&L Distribution')
    axes[0, 0].set_xlabel('P&L ($)')
    axes[0, 0].set_ylabel('Frequency')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Cumulative P&L
    test_trades['cumulative_pnl'] = test_trades['pnl'].cumsum()
    axes[0, 1].plot(range(len(test_trades)), test_trades['cumulative_pnl'], linewidth=2)
    axes[0, 1].set_title('Cumulative P&L')
    axes[0, 1].set_xlabel('Trade Number')
    axes[0, 1].set_ylabel('Cumulative P&L ($)')
    axes[0, 1].grid(True, alpha=0.3)
    
    # Returns distribution
    axes[1, 0].hist(test_trades['return'] * 100, bins=30, alpha=0.7, edgecolor='black', color='green')
    axes[1, 0].axvline(x=0, color='red', linestyle='--')
    axes[1, 0].set_title('Return Distribution')
    axes[1, 0].set_xlabel('Return (%)')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Trade duration
    test_trades['duration'] = (test_trades['exit_time'] - test_trades['entry_time']).dt.total_seconds() / 3600
    axes[1, 1].hist(test_trades['duration'], bins=30, alpha=0.7, edgecolor='black', color='orange')
    axes[1, 1].set_title('Trade Duration')
    axes[1, 1].set_xlabel('Duration (hours)')
    axes[1, 1].set_ylabel('Frequency')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\nTrade Statistics:")
    print(f"Avg P&L: ${test_trades['pnl'].mean():.2f}")
    print(f"Avg Return: {test_trades['return'].mean() * 100:.4f}%")
    print(f"Avg Duration: {test_trades['duration'].mean():.2f} hours")
    print(f"Win Rate: {(test_trades['pnl'] > 0).sum() / len(test_trades) * 100:.2f}%")

## 7. Statistical Significance

In [None]:
if len(test_trades) > 0:
    # T-test: mean return > 0
    t_stat, p_value = stats.ttest_1samp(test_trades['return'], 0)
    
    print("\n" + "="*60)
    print("STATISTICAL SIGNIFICANCE TEST")
    print("="*60)
    print(f"Null Hypothesis: Mean return = 0")
    print(f"Alternative: Mean return > 0")
    print(f"\nT-statistic: {t_stat:.4f}")
    print(f"P-value: {p_value:.6f}")
    print(f"\nResult: {'✓ Statistically significant (p < 0.05)' if p_value < 0.05 else '✗ Not significant'}")
    print("="*60)
    
    # Bootstrap Sharpe Ratio confidence interval
    n_bootstrap = 1000
    sharpe_samples = []
    
    for _ in range(n_bootstrap):
        sample = test_trades['return'].sample(n=len(test_trades), replace=True)
        sharpe = sample.mean() / sample.std() * np.sqrt(252 * 24) if sample.std() > 0 else 0
        sharpe_samples.append(sharpe)
    
    ci_lower = np.percentile(sharpe_samples, 2.5)
    ci_upper = np.percentile(sharpe_samples, 97.5)
    
    print(f"\nSharpe Ratio 95% Confidence Interval: [{ci_lower:.2f}, {ci_upper:.2f}]")

## 8. Comparison Table

In [None]:
comparison = pd.DataFrame({
    'Metric': list(train_metrics.keys()),
    'In-Sample': list(train_metrics.values()),
    'Out-of-Sample': list(test_metrics.values())
})

print("\n" + "="*80)
print("PERFORMANCE COMPARISON")
print("="*80)
print(comparison.to_string(index=False))
print("="*80)