## Regime Switching for Pairs Trading

When trading a spread between two correlated assets (like BTC and ETH), the key question is:
**when should I enter and exit positions?**

Let's look at real data to see why this is tricky...

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fetch_data import fetch_btc_eth_history

# Fetch 17 days of hourly BTC/ETH data
plt.style.use("dark_background")
df = fetch_btc_eth_history(days=17, interval="1h")

fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(df["open_time"], df["log_ratio"], color="cyan", linewidth=1.2)
ax.set_xlabel("Date", fontsize=11)
ax.set_ylabel("log(BTC/ETH)", fontsize=11)
ax.set_title("BTC/ETH Spread: When Should We Enter and Exit?", fontsize=14, fontweight="bold")
ax.grid(True, alpha=0.3)
ax.tick_params(axis='x', rotation=30)
plt.tight_layout()
plt.show()

In [None]:
from fetch_data import estimate_ou_parameters

# Split into training (first 14 days) and test (last 3 days)
spread = df["log_ratio"].values
times = df["open_time"]
dt = 1 / 24  # hourly data = 1/24 of a day

train_hours = 14 * 24
train_spread = spread[:train_hours]
train_times = times[:train_hours]
test_spread = spread[train_hours:]
test_times = times[train_hours:]

# Estimate parameters from training period only
params = estimate_ou_parameters(train_spread, dt)
mu = train_spread.mean()  # Use empirical mean
sigma = train_spread.std()  # Use empirical std dev (not OU sigma)

print(f"Parameters estimated from first 14 days:")
print(f"  Mean (μ): {mu:.4f}")
print(f"  Std dev (σ): {sigma:.4f}")
print(f"  OU half-life: {params['half_life']*24:.1f} hours")

# Visualize both periods
fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharey=True)

for ax, s, t, title in [
    (axes[0], train_spread, train_times, "Training Period: 14 days (parameters estimated here)"),
    (axes[1], test_spread, test_times, "Test Period: 3 days (how would it perform?)")
]:
    ax.plot(t, s, color="cyan", linewidth=1.2)
    ax.axhline(mu, color="white", linestyle="--", linewidth=1.5, label=f"μ = {mu:.3f}")
    ax.fill_between(t, mu - sigma, mu + sigma, color="green", alpha=0.25, label="±1σ")
    ax.fill_between(t, mu - 2*sigma, mu - sigma, color="red", alpha=0.25, label="±2σ")
    ax.fill_between(t, mu + sigma, mu + 2*sigma, color="red", alpha=0.25)
    ax.set_xlabel("Date", fontsize=11)
    ax.set_title(title, fontsize=12, fontweight="bold")
    ax.grid(True, alpha=0.3)
    ax.tick_params(axis='x', rotation=30)

axes[0].set_ylabel("log(BTC/ETH)", fontsize=11)
axes[0].legend(loc="upper right", fontsize=9)

plt.tight_layout()
plt.show()

In [None]:
def backtest_threshold(spread: np.ndarray, mu: float, threshold: float, fee: float = 0.001) -> float:
    """
    Backtest a simple threshold strategy on spread data.
    
    Strategy:
    - Go long when spread < mu - threshold (expect reversion up)
    - Go short when spread > mu + threshold (expect reversion down)
    - Exit (go flat) when spread crosses mu
    
    Returns total PnL after fees.
    """
    position = 0  # -1 short, 0 flat, +1 long
    pnl = 0.0
    entry_price = 0.0
    
    for i in range(1, len(spread)):
        price = spread[i]
        prev_price = spread[i-1]
        
        # Check for exit first (cross the mean)
        if position == 1 and price >= mu:  # Long position, crossed above mean
            pnl += (price - entry_price) - fee
            position = 0
        elif position == -1 and price <= mu:  # Short position, crossed below mean
            pnl += (entry_price - price) - fee
            position = 0
        
        # Check for entry
        if position == 0:
            if price < mu - threshold:  # Enter long
                position = 1
                entry_price = price
                pnl -= fee
            elif price > mu + threshold:  # Enter short
                position = -1
                entry_price = price
                pnl -= fee
    
    # Close any open position at end
    if position == 1:
        pnl += (spread[-1] - entry_price)
    elif position == -1:
        pnl += (entry_price - spread[-1])
    
    return pnl

# Find the best threshold on training data (overfit!)
thresholds = np.linspace(0.001, 3 * sigma, 50)
train_pnls = [backtest_threshold(train_spread, mu, t) for t in thresholds]
best_idx = np.argmax(train_pnls)
best_threshold = thresholds[best_idx]
best_train_pnl = train_pnls[best_idx]

# Now test it out-of-sample
test_pnl = backtest_threshold(test_spread, mu, best_threshold)

print(f"Best threshold (overfit to training): {best_threshold:.4f} ({best_threshold/sigma:.2f}σ)")
print(f"Training PnL: {best_train_pnl:.4f}")
print(f"Test PnL:     {test_pnl:.4f}")

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharey=True)

for ax, s, t, title, pnl in [
    (axes[0], train_spread, train_times, f"Training: 14 days (PnL: {best_train_pnl:.4f})", best_train_pnl),
    (axes[1], test_spread, test_times, f"Test: 3 days (PnL: {test_pnl:.4f})", test_pnl)
]:
    ax.plot(t, s, color="cyan", linewidth=1.2)
    ax.axhline(mu, color="white", linestyle="--", linewidth=1.5, label=f"μ = {mu:.3f}")
    ax.axhline(mu + best_threshold, color="red", linestyle=":", linewidth=1.5, label=f"±{best_threshold/sigma:.2f}σ threshold")
    ax.axhline(mu - best_threshold, color="red", linestyle=":", linewidth=1.5)
    ax.fill_between(t, mu - best_threshold, mu + best_threshold, color="gray", alpha=0.15)
    ax.set_xlabel("Date", fontsize=11)
    ax.set_title(title, fontsize=12, fontweight="bold")
    ax.grid(True, alpha=0.3)
    ax.tick_params(axis='x', rotation=30)

axes[0].set_ylabel("log(BTC/ETH)", fontsize=11)
axes[0].legend(loc="upper right", fontsize=9)

plt.suptitle("Overfit Strategy: Best Threshold on Training Data", fontsize=13, fontweight="bold", y=1.02)
plt.tight_layout()
plt.show()

## Paper Simplfied
https://arxiv.org/abs/2512.04697v1

## Model training data

## Run simulations

## Train on simulated Data

## Coming back to pairs trading

## Backtesting