# Pair Trading Strategy Research

Armaan Gandhara | agandhara243@gmail.com | armaangandhara.me

09/2025

## Config & Utils

*Purpose* : Centeralized parameters, imports, styles, and small helpers reused across the notebook. This keeps later sections focused on research and backtesting logic, not boilerplate

Whats inside:
- Project config (Config dataclass): dates, universe, paths, risk-free, frequency
- Reproducibility: seed setter
- Plot style: consistent figures 
- Helpers: annualizer factor, returns, rolling z-score, drawdown and risk metrics, alignment utilities
- Lightweight disk cache utility for later data ingest

### Code

In [2]:
# =======================
# Config & Utils
# =======================


from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import os
import json
import hashlib
import warnings
from typing import Iterable, Tuple, Optional, Dict
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from ibfetcher import IBFetcher
from utils import save_to_csv

warnings.filterwarnings("ignore")

# ---------- Config ----------

@dataclass
class Config:
    start:str
    end:str
    tickers:Iterable[str]
    data_dir: str = "data"
    freq: str = "D"
    trading_days: int = 252
    rf_annual: float = 0.00

    def path(self):
        p = Path(self.data_dir)
        p.mkdir(parents=True, exist_ok=True)
        (p/"cache").mkdir(parents=True, exist_ok=True)
        return p
    
# ---------- Repro/Style ----------

def set_seed(seed: int = 42):
    np.random.seed(seed)
    random.seed(seed)

def set_plot_style():
    plt.rcParams.update({
        "figure.figsize": (10,5),
        "axes.grid":True,
        "grid.alpha": 0.3,
        "font.size": 11,
        "axes.spines.top": False,
        "axes.spines.right": False,
    })

# ---------- Frequencies/Annualization ----------

_ANNUALIZE = {
    "D": 252,
    "B": 252,
    "W": 52,
    "M": 12,
}

def annualization_factor(freq:str):
    return _ANNUALIZE.get(freq.upper(),252)

# ---------- Returns & Z-Score ----------

def compute_returns(prices:pd.DataFrame, method:str="log"):
    """
    Compute log or simple returns from price levels
    """
    if method not in {"log", "simple"}:
        raise ValueError("method must be 'log' or 'simple'")
    px = prices.sort_index()
    if method == "log":
        rets = np.log(px).diff()
    else:
        rets   = px.pct_change()
    return rets.replace([np.inf, -np.inf], np.nan)

def zscore_rolling(x: pd.Series, window:int):
    mu = x.rolling(window).mean()
    sigma = x.rolling(window).std(ddof=0)
    z = (x - mu) / sigma
    return z
    

# ---------- Drawdowns & Risk Metrics ----------

def equity_to_drawdown(equity:pd.Series):
    cummax = equity.cummax()
    dd = equity/cummax - 1.0
    return dd

def sharpe_ratio(returns: pd.Series, freq:str="D", rf_annual: float = 0.0):
    af = annualization_factor(freq)
    rf_per_step = (1+rf_annual)**(1/af) - 1
    ex = returns - rf_per_step
    mu = ex.mean() * af
    sigma = ex.std(ddof=0) * np.sqrt(af)
    if sigma == 0 or np.isnan(sigma):
        a = np.nan
    else:
        a = mu/sigma
    return a

def sortino_ratio(returns:pd.Series, freq:str = "D", rf_annual:float = 0.0):
    af = annualization_factor(freq)
    rf_per_step = (1 + rf_annual) ** (1/af) - 1
    ex = returns - rf_per_step
    downside = ex.clip(upper = 0)
    dd_sigma = downside.std(ddof=0) * np.sqrt(af)
    mu = ex.mean() * af
    if dd_sigma == 0 or np.isnan(dd_sigma):
        a = np.nan
    else:
        a = mu/ dd_sigma
    return a
    
def calmar_ratio(equity: pd.Series, freq: str = "D"):
    af = annualization_factor(freq)
    rets = equity.pct_change().dropna()
    cagr = (equity.dropna().iloc[-1] / equity.dropna().iloc[0]) ** (af / len(rets)) - 1
    dd = equity_to_drawdown(equity).min()
    max_dd = abs(dd) if pd.notna(dd) else np.nan
    if not max_dd or max_dd == 0:
        a = np.nan
    else:
        a = cagr/max_dd
    return a

def max_drawdown(equity:pd.Series):
    return abs(equity_to_drawdown(equity).min())

# ---------- Alignment/Cleaning ----------

def ensure_datetime_index(df: pd.DataFrame):
    if not isinstance(df.index, pd.DatetimeIndex):
        df = df.copy()
        df.index = pd.to_datetime(df.index)
    return df.sort_index()

def align_panels(*dfs: pd.DataFrame, dropna: bool = True):
    """
    Align multiple DataFrames on the intersection of dates and shared columns.
    """
    cols = set(dfs[0].columns)                    
    for d in dfs[1:]:
        cols &= set(d.columns)
    cols = sorted(list(cols))
    aligned = [ensure_datetime_index(d)[cols] for d in dfs]
    idx = aligned[0].index
    for d in aligned[1:]:
        idx = idx.intersection(d.index)
    aligned = [a.loc[idx] for a in aligned]
    if dropna:
        good = ~pd.concat([a.isna().any(axis=1) for a in aligned], axis=1).any(axis=1)
        aligned = [a.loc[good] for a in aligned]
    return tuple(aligned)


### Test

In [3]:
ib = IBFetcher()
ib.connect_app()

df = ib.fetch_stock_data("AAPL", duration="2 D", bar_size="5 mins")

df.head()

ib.disconnect_app()

save_to_csv(df, "AAPL", "2 D", "5 mins")

Connecting to IB...
Connected.
Error 2104: Market data farm connection is OK:usfarm.nj
Error 2104: Market data farm connection is OK:usfuture
Error 2104: Market data farm connection is OK:cashfarm
Error 2104: Market data farm connection is OK:usfarm
Error 2106: HMDS data farm connection is OK:euhmds
Error 2106: HMDS data farm connection is OK:fundfarm
Error 2106: HMDS data farm connection is OK:ushmds
Error 2158: Sec-def data farm connection is OK:secdefil
Data collection finished for request 1
Saved AAPL to ./data/raw\AAPL_2 D_5 mins_20250923.csv
Disconnected from IB.
Saved to data\cache\prices\AAPL_2 D_5 mins_20250923.csv


## Data Ingest

## Pair Selection

## Hedge Ratio & Spread

## OU Check

## Signals & Sizing

## Cost and Execution

## Walk-Forard Backtest

## Results

## Factor Neautrality

## Sensitivity Sweeps

## Regime Splits

## OOS Holdout

## Beta Stability

## Structural Breaks

## Cost Stress Test

## ML Ranker for Pairs

## Intraday Extensions