## Setup: Finnhub API Key

This notebook uses real market data from [Finnhub](https://finnhub.io/) (free tier: 60 API calls/min).

**To use real data:**
1. Get a free API key at https://finnhub.io/register
2. Set environment variable: `export FINNHUB_API_KEY=your_key_here`
3. Or enter the key when prompted

**Without a key:** The notebook falls back to synthetic data generation.


# Pairs Trading
OLS hedge: $y_t \approx \beta x_t + c$, spread $s_t=y_t-\beta x_t$, z-score $z_t=\frac{s_t-\mu}{\sigma}$.

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt

# Import Finnhub helper
try:
    from python.finnhub_helper import fetch_historical_simulation, get_finnhub_api_key
    FINNHUB_AVAILABLE = True
except Exception as e:
    print(f"Finnhub helper not available: {e}")
    FINNHUB_AVAILABLE = False

# Get API key
api_key = get_finnhub_api_key() if FINNHUB_AVAILABLE else None

if FINNHUB_AVAILABLE and api_key:
    print("Fetching real market data from Finnhub...")
    # Fetch historical data for two correlated pairs (ETH and BNB typically cointegrated)
    symbols = ['BINANCE:ETHUSDT', 'BINANCE:BNBUSDT']
    df = fetch_historical_simulation(symbols, periods=4000, api_key=api_key)
    
    x = df['BINANCE:ETHUSDT_mid'].values
    y = df['BINANCE:BNBUSDT_mid'].values
    print(f"Fetched {len(x)} data points")
    print(f"ETH price range: ${x.min():.2f} - ${x.max():.2f}")
    print(f"BNB price range: ${y.min():.2f} - ${y.max():.2f}")
else:
    print("Using synthetic data")
    np.random.seed(1)
    N=4000
    x=np.cumsum(np.random.randn(N)*0.3)+100
    y=0.8*x+np.cumsum(np.random.randn(N)*0.05)

# OLS hedge regression
X=np.vstack([x,np.ones_like(x)]).T
beta,c= np.linalg.lstsq(X,y,rcond=None)[0]
print(f"Hedge ratio (beta): {beta:.4f}, Intercept: {c:.4f}")

spread=y-(beta*x+c)
mu=pd.Series(spread).rolling(200).mean()
sig=pd.Series(spread).rolling(200).std().replace(0,1e-9)
z=(pd.Series(spread)-mu)/sig

# Trading signals: long spread when z < -2, short when z > 2
pos=(z<-2).astype(int)-(z>2).astype(int)
ret=np.diff(spread,prepend=spread[0])
pnl=np.cumsum(pos*ret)

print(f"Total trades: {np.sum(np.abs(np.diff(pos, prepend=pos.iloc[0])) > 0)}")
print(f"Final PnL: {pnl[-1]:.2f}")
print(f"Sharpe ratio: {pnl.mean() / (pnl.std() + 1e-9) * np.sqrt(252):.2f}")

plt.figure(figsize=(12, 8))
plt.subplot(3, 1, 1)
plt.plot(x, label='X (ETH)' if FINNHUB_AVAILABLE and api_key else 'X')
plt.plot(y, label='Y (BNB)' if FINNHUB_AVAILABLE and api_key else 'Y')
plt.legend(); plt.title('Price Series')

plt.subplot(3, 1, 2)
plt.plot(z, label='Z-score')
plt.axhline(-2, color='g', linestyle='--', label='Entry threshold')
plt.axhline(2, color='r', linestyle='--', label='Entry threshold')
plt.legend(); plt.title('Z-score of Spread')

plt.subplot(3, 1, 3)
plt.plot(pnl, label='Cumulative PnL')
plt.legend(); plt.title('Strategy PnL')
plt.tight_layout()
plt.show()
