In [2]:
# run_top4_pairs.py
# Purpose: fetch 5-min bars directly from IBKR and backtest ONLY the top-4 pairs
# Output: a table of results (Sharpe/MaxDD/Trades/Corr/Bars) + best params per pair (optional grid)
#
# Requirements (important):
# 1) data_layer_v2.IBKRDataClient must have get_bars_multi(...)
# 2) backtest_v2.Backtester.run() must return (results, trade_count)

import time
import pandas as pd
import numpy as np

from data_layer_v2 import IBKRDataClient
from backtest_v2 import Backtester


TOP4_PAIRS = [
    ("VTI", "XLI"),
    ("SPY", "XLI"),
    ("VTI", "XLF"),
    ("QQQ", "XLY"),
]

# ====== you can tune these ======
DURATION = "60 D"         # try "180 D" later
BAR_SIZE = "5 mins"
USE_RTH = True
SLEEP_SEC = 2

# Strategy params (single run)
Z_ENTRY = 1.2
Z_EXIT = 0.2
SPREAD_WINDOW = 300
HEDGE_WINDOW = 300

# Filters
MIN_OVERLAP_BARS = 800
MIN_TRADES = 8

# Optional: small grid (set to True if you want parameter search)
DO_GRID = False
GRID_Z_ENTRY = [0.8, 1.0, 1.2]
GRID_SPREAD_WINDOW = [200, 300, 400]


def clean_5m_df(df: pd.DataFrame) -> pd.DataFrame:
    """
    IBKR util.df(bars) typically returns columns:
    date, open, high, low, close, volume, barCount, average (not always all)
    We keep only close and set datetime index.
    """
    if df is None or len(df) == 0:
        return pd.DataFrame(columns=["close"])

    if "date" not in df.columns or "close" not in df.columns:
        return pd.DataFrame(columns=["close"])

    out = df.copy()
    out["date"] = pd.to_datetime(out["date"])
    out = out.set_index("date").sort_index()
    out = out[["close"]].dropna()
    return out


def fetch_data_5m(symbols, port=7497, client_id=12, market_data_type=3):
    client = IBKRDataClient(port=port, client_id=client_id, market_data_type=market_data_type)
    contracts = client.qualify(symbols)

    data_dict = client.get_bars_multi(
        contracts,
        duration=DURATION,
        bar_size=BAR_SIZE,
        use_rth=USE_RTH,
        pause_sec=0.3,
        max_retries=2,
        retry_sleep_sec=3,
    )

    out = {}
    for s in symbols:
        df = clean_5m_df(data_dict.get(s))
        if len(df) > 0:
            out[s] = df

    return out


def corr_5m(df_a: pd.DataFrame, df_b: pd.DataFrame) -> float:
    ra = df_a["close"].pct_change().dropna()
    rb = df_b["close"].pct_change().dropna()
    c = ra.corr(rb)
    return float(c) if c is not None and not np.isnan(c) else np.nan


def backtest_one_pair(df_a, df_b, z_entry, z_exit, spread_window, hedge_window):
    bt = Backtester(
        df_a=df_a,
        df_b=df_b,
        z_entry=z_entry,
        z_exit=z_exit,
        spread_window=spread_window,
        hedge_window=hedge_window,
    )
    results, trades = bt.run()
    metrics = bt.performance_metrics(results)
    return metrics, int(trades)


def main():
    # collect needed symbols from the 4 pairs
    symbols = sorted(set([s for pair in TOP4_PAIRS for s in pair]))

    print(f"[FETCH] symbols={symbols} duration={DURATION} bar_size={BAR_SIZE} use_rth={USE_RTH}")
    data_5m = fetch_data_5m(symbols)

    missing = [s for s in symbols if s not in data_5m]
    if missing:
        print(f"[FETCH][WARN] missing data for: {missing}")

    rows = []

    for a, b in TOP4_PAIRS:
        if a not in data_5m or b not in data_5m:
            rows.append({"A": a, "B": b, "Err": "missing_data"})
            continue

        # align times
        df_a = data_5m[a]
        df_b = data_5m[b]
        idx = df_a.index.intersection(df_b.index)

        if len(idx) < MIN_OVERLAP_BARS:
            rows.append({"A": a, "B": b, "Bars": len(idx), "Err": "too_few_overlap_bars"})
            continue

        a2 = df_a.loc[idx]
        b2 = df_b.loc[idx]
        c = corr_5m(a2, b2)

        if DO_GRID:
            best = None  # (sharpe, rowdict)
            for ze in GRID_Z_ENTRY:
                for sw in GRID_SPREAD_WINDOW:
                    try:
                        metrics, trades = backtest_one_pair(
                            a2, b2,
                            z_entry=ze, z_exit=Z_EXIT,
                            spread_window=sw, hedge_window=HEDGE_WINDOW
                        )
                        if trades < MIN_TRADES:
                            continue
                        r = {
                            "A": a, "B": b, "Corr": c, "Bars": int(len(idx)),
                            "z_entry": ze, "z_exit": Z_EXIT,
                            "spread_window": sw, "hedge_window": HEDGE_WINDOW,
                            "Sharpe": float(metrics["Sharpe"]),
                            "MaxDD": float(metrics["Max_Drawdown"]),
                            "Trades": int(trades),
                            "Err": ""
                        }
                        if best is None or r["Sharpe"] > best[0]:
                            best = (r["Sharpe"], r)
                    except Exception as e:
                        continue

            if best is None:
                rows.append({"A": a, "B": b, "Corr": c, "Bars": int(len(idx)), "Err": "no_valid_grid_result"})
            else:
                rows.append(best[1])

        else:
            try:
                metrics, trades = backtest_one_pair(
                    a2, b2,
                    z_entry=Z_ENTRY, z_exit=Z_EXIT,
                    spread_window=SPREAD_WINDOW, hedge_window=HEDGE_WINDOW
                )

                if trades < MIN_TRADES:
                    rows.append({
                        "A": a, "B": b, "Corr": c, "Bars": int(len(idx)),
                        "Sharpe": float(metrics["Sharpe"]),
                        "MaxDD": float(metrics["Max_Drawdown"]),
                        "Trades": int(trades),
                        "Err": f"trades<{MIN_TRADES}"
                    })
                else:
                    rows.append({
                        "A": a, "B": b, "Corr": c, "Bars": int(len(idx)),
                        "z_entry": Z_ENTRY, "z_exit": Z_EXIT,
                        "spread_window": SPREAD_WINDOW, "hedge_window": HEDGE_WINDOW,
                        "Sharpe": float(metrics["Sharpe"]),
                        "MaxDD": float(metrics["Max_Drawdown"]),
                        "Trades": int(trades),
                        "Err": ""
                    })
            except Exception as e:
                rows.append({"A": a, "B": b, "Corr": c, "Bars": int(len(idx)), "Err": str(e)})

        time.sleep(0.2)

    out = pd.DataFrame(rows)
    if "Sharpe" in out.columns:
        out = out.sort_values("Sharpe", ascending=False, na_position="last").reset_index(drop=True)

    print("\n=== RESULTS (TOP 4 PAIRS) ===")
    print(out.to_string(index=False))


if __name__ == "__main__":
    main()

[FETCH] symbols=['QQQ', 'SPY', 'VTI', 'XLF', 'XLI', 'XLY'] duration=60 D bar_size=5 mins use_rth=True
Total trades: 25
Total trades: 28
Total trades: 28
Total trades: 30

=== RESULTS (TOP 4 PAIRS) ===
  A   B     Corr  Bars  z_entry  z_exit  spread_window  hedge_window   Sharpe     MaxDD  Trades Err
VTI XLI 0.731085  4608      1.2     0.2            300           300 6.087426 -0.058014      25    
SPY XLI 0.699643  4608      1.2     0.2            300           300 5.577379 -0.062306      28    
VTI XLF 0.627763  4608      1.2     0.2            300           300 5.082179 -0.056459      28    
QQQ XLY 0.669246  4608      1.2     0.2            300           300 4.611589 -0.049992      30    
