# No-Arbitrage Bound Validation

This notebook validates the clamp-adjusted no-arbitrage bounds and replicates the key finding that >95% of price ratios stay within bounds.

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from datetime import datetime

from data_acquisition import DataAcquisition
from signal_generator import SignalGenerator

sns.set_style('darkgrid')
%matplotlib inline

## 1. Load Data

In [None]:
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

data_acq = DataAcquisition(config)
signal_gen = SignalGenerator(config)

start_date = datetime.strptime(config['backtest']['start_date'], '%Y-%m-%d')
end_date = datetime.strptime(config['backtest']['end_date'], '%Y-%m-%d')

df = data_acq.prepare_dataset('BTCUSDT', start_date, end_date)
print(f"Data loaded: {len(df)} rows")

## 2. Calculate Bounds

In [None]:
signals = signal_gen.generate_signals(df)

print("\nBound Statistics:")
print(f"Avg Upper Bound: {signals['upper_bound'].mean() * 10000:.4f} bps")
print(f"Avg Lower Bound: {signals['lower_bound'].mean() * 10000:.4f} bps")
print(f"Avg Bound Width: {(signals['upper_bound'] - signals['lower_bound']).mean() * 10000:.4f} bps")

## 3. Visualize Bounds vs Premium Index

In [None]:
plt.figure(figsize=(16, 8))

# Convert to bps for visualization
premium_bps = signals['premium_index'] * 10000
upper_bps = signals['upper_bound'] * 10000
lower_bps = signals['lower_bound'] * 10000

# Plot premium index
plt.plot(signals.index, premium_bps, label='Premium Index', alpha=0.6, linewidth=0.8)

# Plot bounds
plt.plot(signals.index, upper_bps, 'r--', label='Upper Bound', linewidth=1.5)
plt.plot(signals.index, lower_bps, 'g--', label='Lower Bound', linewidth=1.5)

# Fill between bounds
plt.fill_between(signals.index, lower_bps, upper_bps, alpha=0.2, color='gray', label='No-Arbitrage Zone')

# Highlight breaches
breaches = signals[signals['signal_active'] == 1]
if len(breaches) > 0:
    plt.scatter(breaches.index, breaches['premium_index'] * 10000, 
                color='red', s=20, alpha=0.7, label='Arbitrage Opportunities', zorder=5)

plt.title('Premium Index vs No-Arbitrage Bounds (Figure 5 Replication)', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Premium (bps)')
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 4. Validate Model (>95% Within Bounds)

In [None]:
pct_within = signal_gen.validate_bounds(df, signals)

print(f"\n{'='*60}")
print(f"MODEL VALIDATION RESULT")
print(f"{'='*60}")
print(f"Percentage within bounds: {pct_within:.2f}%")
print(f"Target: >95%")
print(f"Status: {'✓ PASS' if pct_within >= 95 else '✗ FAIL'}")
print(f"{'='*60}")

## 5. Breach Analysis

In [None]:
breaches = signals[signals['signal_active'] == 1]

print(f"\nBreach Statistics:")
print(f"Total breaches: {len(breaches)} ({len(breaches)/len(signals)*100:.2f}%)")
print(f"Upper bound breaches: {(signals['signal'] == 1).sum()}")
print(f"Lower bound breaches: {(signals['signal'] == -1).sum()}")

if len(breaches) > 0:
    print(f"\nAvg distance from bound: {breaches['distance_from_bound'].mean() * 10000:.4f} bps")
    print(f"Max distance from bound: {breaches['distance_from_bound'].max() * 10000:.4f} bps")

## 6. Bound Sensitivity Analysis

In [None]:
# Test different clamp values
clamp_values = [0.0003, 0.0005, 0.0007, 0.001]  # 3, 5, 7, 10 bps

results = []
for clamp in clamp_values:
    config_test = config.copy()
    config_test['parameters']['delta'] = clamp
    
    signal_gen_test = SignalGenerator(config_test)
    signals_test = signal_gen_test.generate_signals(df)
    pct_within = signal_gen_test.validate_bounds(df, signals_test)
    
    results.append({
        'Clamp (bps)': clamp * 10000,
        'Pct Within Bounds': pct_within,
        'Num Breaches': signals_test['signal_active'].sum()
    })

results_df = pd.DataFrame(results)
print("\nSensitivity Analysis:")
print(results_df.to_string(index=False))

# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(results_df['Clamp (bps)'], results_df['Pct Within Bounds'], marker='o')
axes[0].axhline(y=95, color='red', linestyle='--', label='95% Target')
axes[0].set_xlabel('Clamp (bps)')
axes[0].set_ylabel('% Within Bounds')
axes[0].set_title('Bound Coverage vs Clamp Size')
axes[0].legend()
axes[0].grid(True)

axes[1].plot(results_df['Clamp (bps)'], results_df['Num Breaches'], marker='o', color='orange')
axes[1].set_xlabel('Clamp (bps)')
axes[1].set_ylabel('Number of Breaches')
axes[1].set_title('Arbitrage Opportunities vs Clamp Size')
axes[1].grid(True)

plt.tight_layout()
plt.show()

## 7. Comparison with Traditional Model

In [None]:
# Traditional model (no clamp)
config_traditional = config.copy()
config_traditional['parameters']['delta'] = 0.0  # No clamp

signal_gen_trad = SignalGenerator(config_traditional)
signals_trad = signal_gen_trad.generate_signals(df)

plt.figure(figsize=(16, 8))

# Plot premium
plt.plot(signals.index, premium_bps, label='Premium Index', alpha=0.6, linewidth=0.8)

# Clamp-adjusted bounds
plt.plot(signals.index, upper_bps, 'r--', label='Upper Bound (Clamp-Adjusted)', linewidth=1.5)
plt.plot(signals.index, lower_bps, 'g--', label='Lower Bound (Clamp-Adjusted)', linewidth=1.5)

# Traditional bounds
plt.plot(signals_trad.index, signals_trad['upper_bound'] * 10000, 'r:', 
         label='Upper Bound (Traditional)', linewidth=1.5, alpha=0.7)
plt.plot(signals_trad.index, signals_trad['lower_bound'] * 10000, 'g:', 
         label='Lower Bound (Traditional)', linewidth=1.5, alpha=0.7)

plt.title('Clamp-Adjusted vs Traditional No-Arbitrage Bounds', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Premium (bps)')
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\nModel Comparison:")
print(f"Clamp-Adjusted: {pct_within:.2f}% within bounds")
print(f"Traditional: {signal_gen_trad.validate_bounds(df, signals_trad):.2f}% within bounds")