# Preflop Heads-Up Explorer

Analyse hero performance in true heads-up lineups (only one opponent seated). Use this notebook to profile profitability by position, street progression, and preflop line selection.

> Heads-up hands are filtered via `seat_count_start = 2` in the warehouse tables.

## Study Goals

- Quantify overall winrate and bb/100 in heads-up pots.
- Break down profit, VPIP/PFR, and showdown rates by position (BTN vs BB).
- Track results by deepest street reached (flop, turn, river).
- Compare profitability across preflop line families (open raise, 3-bet, call vs open, walks, etc.).
- Provide scaffolding for deeper slices such as stack depth, villain aggression, and board texture buckets.

In [None]:
from pathlib import Path
import os

def _locate_project_root() -> Path:
    current = Path().resolve()
    for candidate in (current, *current.parents):
        if (candidate / "AGENTS.md").exists():
            return candidate
    raise FileNotFoundError("Repository root not found from notebook location.")

PROJECT_ROOT = _locate_project_root()
del _locate_project_root

DB_CANDIDATES = [
    PROJECT_ROOT / "data" / "warehouse" / "drivehud.sqlite",
    PROJECT_ROOT / "data" / "warehouse" / "ignition.sqlite",
    PROJECT_ROOT / "drivehud" / "drivehud.db",
]

for candidate in DB_CANDIDATES:
    if candidate.exists():
        DB_PATH = candidate
        break
else:
    checked = os.linesep.join(str(p) for p in DB_CANDIDATES)
    message = "Database not found. Checked:" + os.linesep + checked
    raise FileNotFoundError(message)

CACHE_DIR = PROJECT_ROOT / "analysis" / "cache"
CACHE_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
import sys

if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import sqlite3
from typing import Sequence

import numpy as np
import pandas as pd
from IPython.display import display

from analysis.sqlite_utils import connect_readonly

In [None]:
# --- Configuration ---
FORCE_RELOAD = False
HEADS_UP_ONLY = True  # Toggle to reuse the loader for short-handed slices later
MIN_HAND_SAMPLE = 25  # Warn when aggregates are based on very small samples

CACHE_PATH = CACHE_DIR / "preflop_heads_up_hands.parquet"

In [None]:
STREET_ORDER = pd.CategoricalDtype(["Preflop", "Flop", "Turn", "River"], ordered=True)
PREFLOP_LINE_ORDER = [
    "Open Raise",
    "ISO Raise",
    "3-Bet",
    "4-Bet+",
    "Call vs Open",
    "Limp/Complete",
    "BB Walk / Fold",
    "BTN Folded",
    "Uncategorised",
]
FLAG_COLUMNS = [
    "vpip", "pfr", "three_bet", "four_bet", "call_open", "cold_call", "iso_raise",
    "squeeze", "steal_attempt", "steal_success", "faced_3b", "fold_to_3b", "three_bet_vs_steal",
]

def assign_preflop_line(row: pd.Series) -> str:
    if not row.get("vpip", 0):
        return "BB Walk / Fold" if row.get("position_pre") == "BB" else "BTN Folded"
    if row.get("four_bet"):
        return "4-Bet+"
    if row.get("three_bet"):
        return "3-Bet"
    if row.get("pfr"):
        return "ISO Raise" if row.get("iso_raise") else "Open Raise"
    if row.get("call_open") or row.get("cold_call"):
        return "Call vs Open"
    if row.get("vpip"):
        return "Limp/Complete"
    return "Uncategorised"

def _bb_per_100(series: pd.Series) -> float:
    hands = len(series)
    if hands == 0:
        return float("nan")
    values = series.fillna(0.0)
    return 100.0 * float(values.sum()) / hands

def summarise_groups(frame: pd.DataFrame, group_cols: Sequence[str]) -> pd.DataFrame:
    if frame.empty:
        return pd.DataFrame()
    grouped = (
        frame.groupby(list(group_cols), dropna=False)
        .agg(
            hands=("hand_id", "nunique"),
            net_bb=("net_bb", lambda s: float(s.fillna(0.0).sum())),
            net_dollars=("net_dollars", lambda s: float(s.sum())),
            bb_per_100=("net_bb", _bb_per_100),
            vpip_pct=("vpip", "mean"),
            pfr_pct=("pfr", "mean"),
            three_bet_pct=("three_bet", "mean"),
            call_open_pct=("call_open", "mean"),
            showdown_pct=("showdown", "mean"),
            went_to_flop=("saw_flop", "mean"),
            went_to_turn=("saw_turn", "mean"),
            went_to_river=("saw_river", "mean"),
            avg_pot_bb=("total_pot_bb", "mean"),
        )
        .sort_values("net_bb", ascending=False)
    )
    pct_cols = [
        "vpip_pct", "pfr_pct", "three_bet_pct", "call_open_pct", "showdown_pct",
        "went_to_flop", "went_to_turn", "went_to_river",
    ]
    grouped[pct_cols] = grouped[pct_cols] * 100.0
    return grouped.round({
        "net_bb": 1,
        "net_dollars": 2,
        "bb_per_100": 2,
        "avg_pot_bb": 2,
        "vpip_pct": 1,
        "pfr_pct": 1,
        "three_bet_pct": 1,
        "call_open_pct": 1,
        "showdown_pct": 1,
        "went_to_flop": 1,
        "went_to_turn": 1,
        "went_to_river": 1,
    })

def load_heads_up_hero_hands(
    db_path: Path,
    cache_path: Path | None = None,
    force: bool = False,
    heads_up_only: bool = True,
) -> pd.DataFrame:
    if cache_path and cache_path.exists() and not force:
        return pd.read_parquet(cache_path)

    where_clause = "WHERE 1=1"
    if heads_up_only:
        where_clause += " AND h.seat_count_start = 2"
    query = f"""
    SELECT
        h.hand_id,
        h.started_at_local,
        h.started_at_utc,
        h.seat_count_start,
        h.total_pot_c,
        h.rake_c,
        s.position_pre,
        s.role_pre,
        r.net_c,
        r.won_c,
        r.showdown,
        r.hand_class,
        COALESCE(hb.bb_c, 0) AS bb_c,
        CASE WHEN h.board_flop IS NOT NULL THEN 1 ELSE 0 END AS saw_flop,
        CASE WHEN h.board_turn IS NOT NULL THEN 1 ELSE 0 END AS saw_turn,
        CASE WHEN h.board_river IS NOT NULL THEN 1 ELSE 0 END AS saw_river,
        CASE
            WHEN h.board_river IS NOT NULL THEN 'River'
            WHEN h.board_turn IS NOT NULL THEN 'Turn'
            WHEN h.board_flop IS NOT NULL THEN 'Flop'
            ELSE 'Preflop'
        END AS deepest_street,
        v.vpip,
        v.pfr,
        v.three_bet,
        v.four_bet,
        v.call_open,
        v.cold_call,
        v.iso_raise,
        v.squeeze,
        v.steal_attempt,
        v.steal_success,
        v.faced_3b,
        v.fold_to_3b,
        v.three_bet_vs_steal
    FROM hands h
    JOIN seats s ON s.hand_id = h.hand_id AND s.is_hero = 1
    JOIN results r ON r.hand_id = h.hand_id AND r.seat_no = s.seat_no
    LEFT JOIN v_hand_bb hb ON hb.hand_id = h.hand_id
    LEFT JOIN v_hero_preflop_ext v ON v.hand_id = h.hand_id
    {where_clause}
    ORDER BY h.started_at_utc
    """
    with connect_readonly(db_path) as conn:
        frame = pd.read_sql_query(query, conn)

    for col in ("started_at_local", "started_at_utc"):
        if col in frame.columns:
            frame[col] = pd.to_datetime(frame[col], errors="coerce")

    for col in FLAG_COLUMNS:
        if col not in frame.columns:
            frame[col] = 0
        frame[col] = frame[col].fillna(0).astype(int)

    frame["bb_c"] = frame["bb_c"].astype(float)
    frame["net_dollars"] = frame["net_c"] / 100.0
    frame["won_dollars"] = frame["won_c"] / 100.0
    frame["net_bb"] = np.where(frame["bb_c"] > 0, frame["net_c"] / frame["bb_c"], np.nan)
    frame["won_bb"] = np.where(frame["bb_c"] > 0, frame["won_c"] / frame["bb_c"], np.nan)
    frame["total_pot_bb"] = np.where(frame["bb_c"] > 0, frame["total_pot_c"] / frame["bb_c"], np.nan)
    frame["rake_bb"] = np.where(frame["bb_c"] > 0, frame["rake_c"] / frame["bb_c"], np.nan)

    bool_cols = ["showdown", "saw_flop", "saw_turn", "saw_river"]
    for col in bool_cols:
        frame[col] = frame[col].astype(bool)

    frame["deepest_street"] = pd.Categorical(frame["deepest_street"], dtype=STREET_ORDER)
    frame["preflop_line"] = frame.apply(assign_preflop_line, axis=1)
    frame["preflop_line"] = pd.Categorical(frame["preflop_line"], categories=PREFLOP_LINE_ORDER, ordered=True)

    if cache_path:
        frame.to_parquet(cache_path, index=False)
    return frame

In [None]:
heads_up_df = load_heads_up_hero_hands(
    DB_PATH,
    cache_path=CACHE_PATH,
    force=FORCE_RELOAD,
    heads_up_only=HEADS_UP_ONLY,
)

print(f"Loaded {len(heads_up_df):,} heads-up hero hands from {DB_PATH.name}.")
if not heads_up_df.empty:
    display(heads_up_df.head())

In [None]:
if heads_up_df.empty:
    print("No heads-up hands available for analysis.")
else:
    total_hands = len(heads_up_df)
    summary_rows = [
        ("Hands", total_hands),
        ("Net (bb)", round(heads_up_df["net_bb"].fillna(0).sum(), 1)),
        ("Net ($)", round(heads_up_df["net_dollars"].sum(), 2)),
        ("bb/100", round(_bb_per_100(heads_up_df["net_bb"]), 2)),
        ("VPIP %", round(heads_up_df["vpip"].mean() * 100, 1)),
        ("PFR %", round(heads_up_df["pfr"].mean() * 100, 1)),
        ("3-Bet %", round(heads_up_df["three_bet"].mean() * 100, 1)),
        ("Showdown %", round(heads_up_df["showdown"].mean() * 100, 1)),
        ("Avg Pot (bb)", round(heads_up_df["total_pot_bb"].mean(), 2)),
    ]
    overall_summary = pd.DataFrame(summary_rows, columns=["Metric", "Value"])
    display(overall_summary)
    if total_hands < MIN_HAND_SAMPLE:
        print(f"Warning: only {total_hands} hands — treat all per-100 stats with caution.")

In [None]:
if heads_up_df.empty:
    print("No position breakdown available.")
else:
    position_summary = summarise_groups(heads_up_df, ["position_pre"])
    display(position_summary)

In [None]:
if heads_up_df.empty:
    print("No street progression breakdown available.")
else:
    street_summary = summarise_groups(heads_up_df, ["deepest_street"])
    ordered = street_summary.reindex(STREET_ORDER.categories, fill_value=np.nan)
    display(ordered)

In [None]:
if heads_up_df.empty:
    print("No preflop line breakdown available.")
else:
    line_summary = summarise_groups(heads_up_df, ["preflop_line"])
    display(line_summary)

In [None]:
if heads_up_df.empty:
    print("No combined breakdown available.")
else:
    line_position = summarise_groups(heads_up_df, ["position_pre", "preflop_line"])
    display(line_position)

    line_street = summarise_groups(heads_up_df, ["preflop_line", "deepest_street"])
    display(line_street)

## Next Analysis Ideas

- Slice results by stake and local session to spot when the heads-up lobby is softest.
- Compare villain action frequencies (fold/call/3-bet) by preflop line using `actions` joins.
- Add flop texture buckets (paired, monotone, high-card) to understand post-flop leaks.
- Track stack depth in big blinds to separate deep ante battles from short-stack push/fold spots.
- Build quick visualisations (line charts, heatmaps) once the aggregates stabilise.
- Extend to near-heads-up (3 players) to contrast adjustments as another player sits in.