In [1]:
# pair_screener (core parts)
# Updated: request 5-minute bars directly (no 1-min fetch, no resample)

import itertools
import time
import pandas as pd
import numpy as np

from data_layer_v2 import IBKRDataClient
from backtest_v2 import Backtester


def pair_screener_verbose(
    symbols,
    duration="60 D",
    use_rth=True,
    batch_size=8,
    sleep_sec=2,
    corr_min=0.6,
    z_entry=1.2,
    z_exit=0.2,
    spread_window=300,
    hedge_window=300,
    min_overlap_bars=800,
    min_trades=8,
    progress_every_pairs=50,
    save_csv_path="pair_screener_progress.csv",
    ib_port=7497,
    ib_client_id=11,
    market_data_type=3,
):
    client = IBKRDataClient(port=ib_port, client_id=ib_client_id, market_data_type=market_data_type)

    # -------------------------
    # 1) Fetch 5-min data directly
    # -------------------------
    data_5m = {}
    fetch_ok = 0
    fetch_fail = 0

    total_batches = (len(symbols) + batch_size - 1) // batch_size
    print(f"[FETCH] symbols={len(symbols)} duration={duration} use_rth={use_rth} batch_size={batch_size} => batches={total_batches}")

    for bi in range(total_batches):
        batch = symbols[bi * batch_size : (bi + 1) * batch_size]
        print(f"[FETCH] batch {bi+1}/{total_batches}: {batch}")

        try:
            contracts = client.qualify(batch)

            # KEY CHANGE: 5-minute bars directly
            data_dict_5m = client.get_bars_multi(
                contracts,
                duration=duration,
                bar_size="5 mins",
                use_rth=use_rth,
                pause_sec=0.3,
                max_retries=2,
                retry_sleep_sec=3,
            )

        except Exception as e:
            print(f"[FETCH][ERROR] batch failed: {e}")
            fetch_fail += len(batch)
            time.sleep(sleep_sec)
            continue

        for s in batch:
            df = data_dict_5m.get(s)
            if df is None or len(df) == 0:
                print(f"  - {s}: NO DATA")
                fetch_fail += 1
                continue

            # Ensure we have expected columns and datetime index
            if "date" not in df.columns or "close" not in df.columns:
                print(f"  - {s}: BAD FORMAT (missing date/close)")
                fetch_fail += 1
                continue

            df = df.copy()
            df["date"] = pd.to_datetime(df["date"])
            df = df.set_index("date").sort_index()

            df = df[["close"]].dropna()
            if len(df) == 0:
                print(f"  - {s}: EMPTY after cleaning")
                fetch_fail += 1
                continue

            data_5m[s] = df
            fetch_ok += 1
            print(f"  + {s}: bars_5m={len(df)}")

        time.sleep(sleep_sec)

    symbols_ok = list(data_5m.keys())
    print(f"[FETCH DONE] ok={fetch_ok} fail={fetch_fail} usable_symbols={len(symbols_ok)}")

    if len(symbols_ok) < 2:
        print("[STOP] Not enough symbols with data.")
        return pd.DataFrame(columns=["A","B","Corr","Sharpe","MaxDD","Bars","Trades","Err"])

    # -------------------------
    # 2) CSV header (with Trades)
    # -------------------------
    if save_csv_path:
        pd.DataFrame(columns=["A","B","Corr","Sharpe","MaxDD","Bars","Trades","Err"]).to_csv(save_csv_path, index=False)

    # -------------------------
    # 3) Backtest pairs
    # -------------------------
    pairs = list(itertools.combinations(symbols_ok, 2))
    total_pairs = len(pairs)
    print(f"[BT] total_pairs={total_pairs} corr_min={corr_min} min_overlap_bars={min_overlap_bars} min_trades={min_trades}")

    t0 = time.time()
    rows = []

    filtered_overlap = 0
    filtered_corr = 0
    filtered_trades = 0
    bt_errors = 0
    kept = 0

    for i, (a, b) in enumerate(pairs, start=1):
        df_a = data_5m[a]
        df_b = data_5m[b]

        idx = df_a.index.intersection(df_b.index)
        if len(idx) < min_overlap_bars:
            filtered_overlap += 1
            continue

        a2 = df_a.loc[idx]
        b2 = df_b.loc[idx]

        ra = a2["close"].pct_change().dropna()
        rb = b2["close"].pct_change().dropna()
        corr = ra.corr(rb)
        if corr is None or np.isnan(corr) or corr < corr_min:
            filtered_corr += 1
            continue

        trades = 0
        try:
            bt = Backtester(
                df_a=a2, df_b=b2,
                z_entry=z_entry, z_exit=z_exit,
                spread_window=spread_window, hedge_window=hedge_window
            )

            # IMPORTANT: your Backtester.run() must return (results, trade_count)
            results, trades = bt.run()
            metrics = bt.performance_metrics(results)

            if trades < min_trades:
                filtered_trades += 1
                continue

            row = {
                "A": a, "B": b,
                "Corr": float(corr),
                "Sharpe": float(metrics["Sharpe"]),
                "MaxDD": float(metrics["Max_Drawdown"]),
                "Bars": int(len(idx)),
                "Trades": int(trades),
                "Err": ""
            }
            rows.append(row)
            kept += 1

            if save_csv_path:
                pd.DataFrame([row]).to_csv(save_csv_path, mode="a", header=False, index=False)

        except Exception as e:
            bt_errors += 1
            row = {"A": a, "B": b, "Corr": float(corr), "Sharpe": np.nan, "MaxDD": np.nan,
                   "Bars": int(len(idx)), "Trades": int(trades), "Err": str(e)}
            rows.append(row)
            if save_csv_path:
                pd.DataFrame([row]).to_csv(save_csv_path, mode="a", header=False, index=False)

        if (i % progress_every_pairs == 0) or (i == total_pairs):
            elapsed = time.time() - t0
            rate = i / elapsed if elapsed > 0 else np.nan
            eta_sec = (total_pairs - i) / rate if rate and rate > 0 else np.nan

            print(
                f"[BT] {i}/{total_pairs} scanned | kept={kept} | "
                f"filtered_overlap={filtered_overlap} filtered_corr={filtered_corr} filtered_trades={filtered_trades} errors={bt_errors} | "
                f"elapsed={elapsed:.1f}s eta={eta_sec:.1f}s"
            )

            df_tmp = pd.DataFrame(rows)
            df_tmp2 = df_tmp.dropna(subset=["Sharpe"]).sort_values("Sharpe", ascending=False).head(5)
            if len(df_tmp2) > 0:
                print(df_tmp2[["A","B","Sharpe","MaxDD","Corr","Bars","Trades"]].to_string(index=False))

    out = pd.DataFrame(rows)
    if out.empty:
        return pd.DataFrame(columns=["A","B","Corr","Sharpe","MaxDD","Bars","Trades","Err"])

    out_valid = out.dropna(subset=["Sharpe"]).sort_values("Sharpe", ascending=False)
    out_invalid = out[out["Sharpe"].isna()]
    out = pd.concat([out_valid, out_invalid], axis=0).reset_index(drop=True)
    return out

In [2]:
symbols = ["SPY","QQQ","DIA","IWM","MDY","RSP","VTI","XLB","XLE","XLF","XLI","XLK","XLP","XLU","XLV","XLY","GLD","SLV","USO","UUP"]

ranked = pair_screener_verbose(
    symbols,
    duration="60 D",
    use_rth=True,
    batch_size=8,
    sleep_sec=2,
    corr_min=0.6,
    min_trades=8
)

print(ranked.head(20))

[FETCH] symbols=20 duration=60 D use_rth=True batch_size=8 => batches=3
[FETCH] batch 1/3: ['SPY', 'QQQ', 'DIA', 'IWM', 'MDY', 'RSP', 'VTI', 'XLB']
  + SPY: bars_5m=4608
  + QQQ: bars_5m=4608
  + DIA: bars_5m=4608
  + IWM: bars_5m=4608
  + MDY: bars_5m=4608
  + RSP: bars_5m=4608
  + VTI: bars_5m=4608
  + XLB: bars_5m=4608
[FETCH] batch 2/3: ['XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XLV', 'XLY']
  + XLE: bars_5m=4608
  + XLF: bars_5m=4608
  + XLI: bars_5m=4608
  + XLK: bars_5m=4608
  + XLP: bars_5m=4608
  + XLU: bars_5m=4608
  + XLV: bars_5m=4608
  + XLY: bars_5m=4608
[FETCH] batch 3/3: ['GLD', 'SLV', 'USO', 'UUP']
  + GLD: bars_5m=4608
  + SLV: bars_5m=4608
  + USO: bars_5m=4608
  + UUP: bars_5m=4608
[FETCH DONE] ok=20 fail=0 usable_symbols=20
[BT] total_pairs=190 corr_min=0.6 min_overlap_bars=800 min_trades=8
Total trades: 34
Total trades: 31
Total trades: 30
Total trades: 22
Total trades: 27
Total trades: 39
Total trades: 34
Total trades: 28
Total trades: 38
Total trades: 28
Total 