# Backtest Results Analysis

This notebook analyzes the performance of the Canadian Bond Day Count Arbitrage strategy
using historical backtest results.

## Objectives
1. Run complete backtest
2. Analyze performance metrics
3. Validate control test results
4. Assess strategy viability

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

from backtester import CanadianBondArbitrageBacktester

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

## 1. Run Backtest

In [None]:
# Initialize backtester
backtester = CanadianBondArbitrageBacktester('../config.yaml')

# Run backtest for 2023
print("Running backtest...")
results = backtester.run(
    start_date='2023-01-01',
    end_date='2023-12-31',
    run_control=True
)

print("\n‚úÖ Backtest complete!")

## 2. Performance Summary

In [None]:
# Display main backtest results
main = results['main_backtest']
control = results['control_test']

print("\n" + "="*80)
print("BACKTEST PERFORMANCE SUMMARY")
print("="*80)

print("\nüìä MAIN STRATEGY (181/182-day periods):")
print(f"  Total Trades: {main['num_trades']}")
print(f"  Winning Trades: {main['num_wins']}")
print(f"  Win Rate: {main['win_rate']:.2%}")
print(f"  Average Return: {main['avg_return_bps']:.2f} bps")
print(f"  Median Return: {main['median_return_bps']:.2f} bps")
print(f"  Std Dev: {main['std_return_bps']:.2f} bps")
print(f"  Sharpe Ratio: {main['sharpe_ratio']:.2f}")
print(f"  Total P&L: ${main['total_pnl']:,.2f}")
print(f"  Max Drawdown: ${main['max_drawdown']:,.2f}")
print(f"  Total Return: {main['total_return_pct']:.2f}%")

if control is not None:
    print("\nüìä CONTROL TEST (Non-target periods):")
    print(f"  Total Trades: {control['num_trades']}")
    print(f"  Win Rate: {control['win_rate']:.2%}")
    print(f"  Average Return: {control['avg_return_bps']:.2f} bps")
    print(f"  Total P&L: ${control['total_pnl']:,.2f}")
    
    print("\nüéØ STRATEGY ALPHA:")
    alpha = results['strategy_alpha']
    print(f"  Alpha: {alpha:.2f} bps")
    print(f"  Interpretation: Strategy outperforms by {alpha:.2f} bps on average")

print("\nüí∞ COST ANALYSIS:")
print(f"  Avg Transaction Costs: ${main['avg_transaction_costs']:,.2f}")
print(f"  Avg Slippage: ${main['avg_slippage']:,.2f}")
print(f"  Avg Days Held: {main['avg_days_held']:.1f}")

print("\n" + "="*80)

## 3. Trade Analysis

In [None]:
# Load trade log
try:
    trades = pd.read_csv('../data/backtest_trades.csv', parse_dates=['entry_date', 'exit_date'])
    
    print(f"\nAnalyzing {len(trades)} trades...")
    
    # Display sample trades
    print("\nSample Trades:")
    display_cols = [
        'bond_identifier', 'entry_date', 'exit_date', 'coupon_period_length',
        'return_bps', 'net_pnl', 'success'
    ]
    print(trades[display_cols].head(10))
    
except FileNotFoundError:
    print("\n‚ö†Ô∏è No trade log found. Backtest may not have generated trades.")
    trades = pd.DataFrame()

## 4. Performance Visualizations

In [None]:
if len(trades) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(16, 10))
    
    # 1. Cumulative P&L
    trades['cumulative_pnl'] = trades['net_pnl'].cumsum()
    axes[0, 0].plot(trades.index, trades['cumulative_pnl'], linewidth=2, color='green')
    axes[0, 0].fill_between(trades.index, 0, trades['cumulative_pnl'], alpha=0.3, color='green')
    axes[0, 0].set_xlabel('Trade Number')
    axes[0, 0].set_ylabel('Cumulative P&L ($)')
    axes[0, 0].set_title('Cumulative Profit & Loss')
    axes[0, 0].grid(True, alpha=0.3)
    axes[0, 0].axhline(0, color='black', linestyle='--', linewidth=1)
    
    # 2. Return distribution
    axes[0, 1].hist(trades['return_bps'], bins=30, edgecolor='black', alpha=0.7, color='steelblue')
    axes[0, 1].axvline(trades['return_bps'].mean(), color='red', linestyle='--', 
                      label=f"Mean: {trades['return_bps'].mean():.2f} bps")
    axes[0, 1].set_xlabel('Return (bps)')
    axes[0, 1].set_ylabel('Frequency')
    axes[0, 1].set_title('Distribution of Returns')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3, axis='y')
    
    # 3. Returns by coupon period
    trades.boxplot(column='return_bps', by='coupon_period_length', ax=axes[1, 0])
    axes[1, 0].set_xlabel('Coupon Period Length (days)')
    axes[1, 0].set_ylabel('Return (bps)')
    axes[1, 0].set_title('Returns by Coupon Period')
    plt.sca(axes[1, 0])
    plt.xticks(rotation=0)
    
    # 4. Win/Loss breakdown
    win_loss = trades['success'].value_counts()
    colors = ['red', 'green']
    axes[1, 1].pie(win_loss.values, labels=['Loss', 'Win'], autopct='%1.1f%%', 
                  colors=colors, startangle=90)
    axes[1, 1].set_title('Win/Loss Distribution')
    
    plt.tight_layout()
    plt.show()
else:
    print("‚ö†Ô∏è No trades to visualize")

## 5. Statistical Validation

In [None]:
if len(trades) > 0:
    from scipy import stats
    
    print("\n" + "="*80)
    print("STATISTICAL VALIDATION")
    print("="*80)
    
    # T-test: Are returns significantly different from zero?
    t_stat, p_value = stats.ttest_1samp(trades['return_bps'], 0)
    
    print(f"\nT-Test (Returns vs Zero):")
    print(f"  T-Statistic: {t_stat:.4f}")
    print(f"  P-Value: {p_value:.6f}")
    
    if p_value < 0.05:
        print(f"  ‚úÖ Returns are statistically significant (p < 0.05)")
    else:
        print(f"  ‚ùå Returns are NOT statistically significant (p >= 0.05)")
    
    # Normality test
    _, p_norm = stats.shapiro(trades['return_bps'])
    print(f"\nNormality Test (Shapiro-Wilk):")
    print(f"  P-Value: {p_norm:.6f}")
    
    if p_norm > 0.05:
        print(f"  Returns appear normally distributed")
    else:
        print(f"  Returns do NOT appear normally distributed")
    
    # Compare 181 vs 182 day periods
    if 181 in trades['coupon_period_length'].values and 182 in trades['coupon_period_length'].values:
        returns_181 = trades[trades['coupon_period_length'] == 181]['return_bps']
        returns_182 = trades[trades['coupon_period_length'] == 182]['return_bps']
        
        t_stat_comp, p_comp = stats.ttest_ind(returns_181, returns_182)
        
        print(f"\nComparison: 181-day vs 182-day periods:")
        print(f"  181-day avg return: {returns_181.mean():.2f} bps")
        print(f"  182-day avg return: {returns_182.mean():.2f} bps")
        print(f"  T-Statistic: {t_stat_comp:.4f}")
        print(f"  P-Value: {p_comp:.6f}")
        
        if p_comp < 0.05:
            print(f"  ‚úÖ Significant difference between period types")
        else:
            print(f"  No significant difference between period types")
    
    print("\n" + "="*80)

## 6. Risk Metrics

In [None]:
if len(trades) > 0:
    print("\n" + "="*80)
    print("RISK ANALYSIS")
    print("="*80)
    
    # Calculate risk metrics
    returns = trades['return_bps'].values
    
    # Value at Risk (VaR)
    var_95 = np.percentile(returns, 5)
    var_99 = np.percentile(returns, 1)
    
    # Conditional Value at Risk (CVaR)
    cvar_95 = returns[returns <= var_95].mean()
    cvar_99 = returns[returns <= var_99].mean()
    
    # Maximum loss
    max_loss = returns.min()
    max_win = returns.max()
    
    print(f"\nValue at Risk (VaR):")
    print(f"  95% VaR: {var_95:.2f} bps")
    print(f"  99% VaR: {var_99:.2f} bps")
    
    print(f"\nConditional VaR (Expected Shortfall):")
    print(f"  95% CVaR: {cvar_95:.2f} bps")
    print(f"  99% CVaR: {cvar_99:.2f} bps")
    
    print(f"\nExtreme Returns:")
    print(f"  Maximum Loss: {max_loss:.2f} bps")
    print(f"  Maximum Win: {max_win:.2f} bps")
    print(f"  Gain/Loss Ratio: {abs(max_win / max_loss):.2f}")
    
    # Drawdown analysis
    cumulative = trades['net_pnl'].cumsum()
    running_max = cumulative.cummax()
    drawdown = cumulative - running_max
    max_dd = drawdown.min()
    
    print(f"\nDrawdown:")
    print(f"  Maximum Drawdown: ${max_dd:,.2f}")
    print(f"  Max DD %: {(max_dd / main['initial_capital']) * 100:.2f}%")
    
    print("\n" + "="*80)

## 7. Strategy Viability Assessment

In [None]:
print("\n" + "="*80)
print("STRATEGY VIABILITY ASSESSMENT")
print("="*80)

# Criteria for viability
criteria = {
    'Win Rate > 85%': main['win_rate'] > 0.85,
    'Avg Return > 0.5 bps': main['avg_return_bps'] > 0.5,
    'Sharpe Ratio > 1.0': main['sharpe_ratio'] > 1.0,
    'Positive Total P&L': main['total_pnl'] > 0,
    'Statistical Significance': p_value < 0.05 if len(trades) > 0 else False,
    'Sufficient Sample Size': main['num_trades'] >= 20
}

print("\nViability Criteria:")
passed = 0
for criterion, result in criteria.items():
    status = "‚úÖ PASS" if result else "‚ùå FAIL"
    print(f"  {criterion:30s}: {status}")
    if result:
        passed += 1

viability_score = (passed / len(criteria)) * 100

print(f"\nViability Score: {viability_score:.1f}%")

if viability_score >= 80:
    print("\nüéØ VERDICT: Strategy appears VIABLE for implementation")
elif viability_score >= 60:
    print("\n‚ö†Ô∏è VERDICT: Strategy shows POTENTIAL but needs improvement")
else:
    print("\n‚ùå VERDICT: Strategy does NOT appear viable in current form")

print("\n‚ö†Ô∏è IMPORTANT NOTES:")
print("  - Results depend heavily on data quality")
print("  - Actual execution may differ from backtest")
print("  - Strategy capacity is extremely limited")
print("  - Publication risk: now public knowledge")

print("\n" + "="*80)

## Conclusions

This backtest validates (or invalidates) the Canadian Bond Day Count Arbitrage strategy.

### Key Findings
- Review the win rate, average returns, and Sharpe ratio above
- Compare main strategy vs control test results
- Assess whether returns are statistically significant
- Evaluate if transaction costs are manageable

### Next Steps
1. If viable: Develop live execution infrastructure
2. If marginal: Optimize cost structure or refine entry/exit timing
3. If not viable: Consider abandoning or waiting for better market conditions