# Kiak — Cointegration & Adaptive β Demo

This notebook illustrates how Engle–Granger cointegration filtering and an exponentially-smoothed hedge ratio improve the stability of the lead–lag strategy.

## 1. Generate synthetic price data
We simulate four assets (two crypto, two indices) with shared stochastic trends so that some pairs are cointegrated and others are only loosely correlated.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from correlation_analyzer import CorrelationAnalyzer
from crossasset_leadlag_model import CrossAssetLeadLagModel, ModelConfig
from backtester import Backtester, BacktestConfig

plt.style.use('seaborn-v0_8')
np.random.seed(1)

# Simulate prices
n_periods = 800
time_index = pd.date_range('2024-01-01', periods=n_periods, freq='T')

base_crypto = np.cumsum(np.random.normal(0, 1.0, n_periods))
macro_trend = np.cumsum(np.random.normal(0, 0.3, n_periods))
noise_btc = np.random.normal(0, 3, n_periods)
noise_eth = np.random.normal(0, 2, n_periods)
noise_sp = np.random.normal(0, 0.6, n_periods)
noise_ndq = np.random.normal(0, 0.9, n_periods)

prices = pd.DataFrame({
    'BTCUSDT': 20000 + base_crypto + 8 * macro_trend + noise_btc,
    'ETHUSDT': 1200 + 0.55 * base_crypto + 5 * macro_trend + noise_eth,
    'SP500': 4000 + macro_trend + noise_sp,
    'NASDAQ': 12000 + 2.0 * macro_trend + noise_ndq,
}, index=time_index)

prices.head()

## 2. Correlation scan and Engle–Granger filtering
We rank all pairs by correlation and then retain only those with Engle–Granger p-values below 0.05.

In [None]:
analyzer = CorrelationAnalyzer(prices)
candidate_pairs = analyzer.find_pairs(list(prices.columns), min_correlation=0.0)
print(f"Top correlated pairs: {candidate_pairs[:3]}")

cointegrated_pairs = analyzer.filter_cointegrated_pairs(candidate_pairs, significance_level=0.05)
cointegrated_pairs

## 3. Run lead–lag model with rolling vs adaptive β
We compare the naive correlation-only selection (`BTCUSDT`/`SP500`) with the top cointegrated pair (`BTCUSDT`/`ETHUSDT`). For each pair we run the legacy rolling β and the new EMA-based β.

In [None]:
backtester = Backtester(BacktestConfig(transaction_cost=0.0002, position_size=0.3))

model_configs = {
    'rolling': lambda: ModelConfig(window=60, beta_mode='rolling'),
    'ema': lambda: ModelConfig(window=60, beta_mode='ema', beta_span=12),
}

pairs_to_evaluate = {
    'Correlation-only': ('BTCUSDT', 'SP500'),
    'Cointegrated (primary)': ('BTCUSDT', 'ETHUSDT'),
    'Cointegrated (secondary)': ('SP500', 'NASDAQ'),
}

results = {}
metrics_rows = []

for label, (leader, lagger) in pairs_to_evaluate.items():
    results[label] = {}
    for mode_name, config_builder in model_configs.items():
        config = config_builder()
        model = CrossAssetLeadLagModel(config)
        signals = model.run_strategy(prices, leader, lagger)
        results[label][mode_name] = signals
        backtest_out = backtester.run_backtest(signals, prices, leader, lagger)
        metrics = backtest_out.get('metrics', {})
        metrics_rows.append({
            'pair': f"{leader}-{lagger}",
            'selection': label,
            'mode': 'EMA β (span=12)' if mode_name == 'ema' else 'Rolling β',
            'sharpe': metrics.get('sharpe_ratio'),
            'num_trades': metrics.get('num_trades'),
            'total_return_pct': metrics.get('total_return_pct'),
        })

metrics_df = pd.DataFrame(metrics_rows)
metrics_df

In [None]:
baseline_sharpe = metrics_df[(metrics_df['selection'] == 'Correlation-only') & (metrics_df['mode'] == 'Rolling β')]['sharpe'].iloc[0]
adaptive_sharpe = metrics_df[(metrics_df['selection'] == 'Cointegrated (primary)') & (metrics_df['mode'] == 'EMA β (span=12)')]['sharpe'].iloc[0]
improvement = (adaptive_sharpe - baseline_sharpe) / abs(baseline_sharpe) * 100
print(f"Sharpe improvement from correlation-only to cointegration+EMA: {improvement:.2f}%")

## 4. Spread stability before/after smoothing
The plots below overlay the rolling-β spread with the EMA-smoothed spread for each pair.

In [None]:
fig, axes = plt.subplots(len(pairs_to_evaluate), 1, figsize=(12, 12), sharex=True)
for ax, (label, (leader, lagger)) in zip(axes, pairs_to_evaluate.items()):
    roll_spread = results[label]['rolling']['spread'].dropna()
    ema_spread = results[label]['ema']['spread'].dropna()
    ax.plot(roll_spread.index, roll_spread, label='Rolling β spread', alpha=0.5)
    ax.plot(ema_spread.index, ema_spread, label='EMA β spread', alpha=0.8)
    ax.set_title(f"{leader} vs {lagger} — {label}")
    ax.set_ylabel('Spread')
    ax.grid(True, alpha=0.3)
axes[-1].set_xlabel('Timestamp')
axes[0].legend(loc='upper right')
plt.tight_layout()

## 5. Adaptive β trace
The EMA hedge ratio dampens noise while following the long-term trend.

In [None]:
beta_view = results['Cointegrated (primary)']['ema'][['beta_raw', 'beta']].dropna()
plt.figure(figsize=(12, 4))
plt.plot(beta_view.index, beta_view['beta_raw'], label='Raw rolling β', alpha=0.5)
plt.plot(beta_view.index, beta_view['beta'], label='EMA β (span=12)', linewidth=2)
plt.title('BTCUSDT vs ETHUSDT — Hedge ratio comparison')
plt.ylabel('β')
plt.xlabel('Timestamp')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()

## 6. Recent signals snapshot
The table highlights the latest signals after applying the adaptive β.

In [None]:
results['Cointegrated (primary)']['ema'].tail(10)