# Best Response Correctness Harness
Compare baseline `best_response_exact` vs a future efficient implementation.


In [None]:
import os, sys
from pathlib import Path

def find_repo_root(start_dir: str) -> str:
    cur = Path(start_dir).resolve()
    for _ in range(6):
        if (cur / "liars_poker").is_dir() or (cur / "pyproject.toml").exists():
            return str(cur)
        if cur.parent == cur:
            break
        cur = cur.parent
    return str(Path(start_dir).resolve())

NB_DIR = Path.cwd()
REPO_ROOT = Path(find_repo_root(NB_DIR))
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from liars_poker import GameSpec, Rules
from liars_poker.policies.random import RandomPolicy
from liars_poker.policies.tabular import TabularPolicy
from liars_poker.policies.commit_once import CommitOnceMixture
from liars_poker.algo.br_exact import best_response_exact as br_baseline

try:
    from liars_poker.algo.br_exact_efficient_old import best_response_exact as br_candidate
    HAVE_CANDIDATE = True
except Exception as exc:  # noqa: BLE001
    print("Candidate implementation not available yet:", exc)
    br_candidate = None
    HAVE_CANDIDATE = False

from liars_poker.eval.match import exact_eval_tabular_both_seats
from liars_poker.infoset import InfoSet


## Comparison Helper
Run both implementations and assert exploitability, state values, and strategy equality.


In [None]:
import math
from typing import Dict, Tuple


def extract_state_values(tab_policy: TabularPolicy) -> Dict[Tuple[int, Tuple[int, ...], Tuple[int, ...]], float]:
    values = {}
    for iset, val in tab_policy.values().items():
        key = (iset.pid, iset.hand, iset.history)
        values[key] = val
    return values


def extract_probs(tab_policy: TabularPolicy) -> Dict[Tuple[int, Tuple[int, ...], Tuple[int, ...]], Dict[int, float]]:
    out = {}
    for iset, dist in tab_policy.probs.items():
        key = (iset.pid, iset.hand, iset.history)
        out[key] = dict(dist)
    return out


def compare_implementations(spec: GameSpec, opp_policy, *, tol=1e-9):
    base_policy, base_br = br_baseline(spec, opp_policy)
    base_policy.bind_rules(base_br.rules)
    base_vals = base_br.state_card_values
    base_exp = base_br.exploitability()

    if not HAVE_CANDIDATE:
        print("Candidate implementation not available; skipping compare.")
        return

    cand_policy, cand_br = br_candidate(spec, opp_policy)
    cand_policy.bind_rules(cand_br.rules)
    cand_vals = cand_br.state_card_values
    cand_exp = cand_br.exploitability()

    # Exploitability check
    assert all(abs(a - b) <= tol for a, b in zip(base_exp, cand_exp)), f"Exploitability mismatch: {base_exp} vs {cand_exp}"

    # State values check (all histories seen by baseline)
    for history, hand_map in base_vals.items():
        for hand, val in hand_map.items():
            cand_val = cand_vals.get(history, {}).get(hand)
            assert cand_val is not None, f"Missing state {history} for hand {hand}"
            assert math.isclose(val, cand_val, rel_tol=0, abs_tol=tol), f"Value mismatch at {history}, hand {hand}: {val} vs {cand_val}"

    # Strategy check: compare action dists at all infosets present in baseline policy
    base_probs = extract_probs(base_policy)
    cand_probs = extract_probs(cand_policy)
    for key, dist in base_probs.items():
        other = cand_probs.get(key)
        assert other is not None, f"Missing infoset {key} in candidate policy"
        # Normalize both to avoid tiny numeric differences
        def norm(d):
            s = sum(d.values())
            return {a: (p / s if s else 0.0) for a, p in d.items()}
        d1 = norm(dist)
        d2 = norm(other)
        assert set(d1.keys()) == set(d2.keys()), f"Action set mismatch at {key}: {d1.keys()} vs {d2.keys()}"
        for a in d1:
            assert math.isclose(d1[a], d2[a], rel_tol=0, abs_tol=tol), f"Prob mismatch at {key}, action {a}: {d1[a]} vs {d2[a]}"

    print("All checks passed for spec:", spec)


## Test Scenarios
A suite of specs and opponent policies to stress branches and card removal.


In [None]:
# Opponent policy helpers
class AlwaysCall(TabularPolicy):
    POLICY_KIND = "AlwaysCall"
    def __init__(self):
        super().__init__()
    def action_probs(self, infoset: InfoSet):
        return { -1: 1.0 }


def make_deterministic_raise(rules):
    class AlwaysRaise(TabularPolicy):
        POLICY_KIND = "AlwaysRaise"
        def action_probs(self, infoset: InfoSet):
            legal = rules.legal_actions_for(infoset)
            raise_only = [a for a in legal if a != -1]
            if not raise_only:
                return { -1: 1.0 }
            return { min(raise_only): 1.0 }
    return AlwaysRaise()

specs = [
    GameSpec(ranks=2, suits=1, hand_size=1, claim_kinds=("RankHigh",)),
    GameSpec(ranks=3, suits=1, hand_size=1, claim_kinds=("RankHigh",)),
    GameSpec(ranks=3, suits=2, hand_size=1, claim_kinds=("RankHigh", "Pair")),
    GameSpec(ranks=3, suits=2, hand_size=2, claim_kinds=("RankHigh", "Pair")),
    GameSpec(ranks=4, suits=4, hand_size=1, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True),
    GameSpec(ranks=4, suits=4, hand_size=2, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True),
]

opp_policies = []
for spec in specs:
    r = RandomPolicy(); r.bind_rules(Rules(spec))
    c = AlwaysCall(); c.bind_rules(Rules(spec))
    ar = make_deterministic_raise(Rules(spec)); ar.bind_rules(Rules(spec))
    # Mixed: 70% random, 30% deterministic raise via CommitOnceMixture
    mix = CommitOnceMixture([r, ar], [0.7, 0.3]); mix.bind_rules(Rules(spec))
    opp_policies.append((spec, [r, c, ar, mix]))

for spec, policies in opp_policies:
    print("Running spec", spec)
    for opp in policies:
        opp.begin_episode()
        compare_implementations(spec, opp, tol=1e-9)


In [10]:
# base_policy, base_br = br_baseline(spec, opp)
# base_policy.bind_rules(base_br.rules)
# base_vals = base_br.state_card_values
# base_exp = base_br.exploitability()
# base_br.state_card_values

In [11]:
# cand_policy, cand_br = br_candidate(spec, opp)
# cand_policy.bind_rules(cand_br.rules)
# cand_vals = cand_br.state_card_values
# cand_exp = cand_br.exploitability()
# cand_br.state_card_values

In [12]:
# base_probs = extract_probs(base_policy)
# cand_probs = extract_probs(cand_policy)

## Performance Benchmark
Compare baseline vs candidate on a non-trivial spec.


In [None]:
import time

spec_bench = GameSpec(ranks=6, suits=2, hand_size=2, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True)
rules_bench = Rules(spec_bench)
opp_bench = RandomPolicy(); opp_bench.bind_rules(rules_bench)

if HAVE_CANDIDATE:

    # Benchmark
    def run_baseline():
        br_baseline(spec_bench, opp_bench)
    def run_candidate():
        br_candidate(spec_bench, opp_bench)

    import timeit
    base_time = timeit.timeit(run_baseline, number=1)
    cand_time = timeit.timeit(run_candidate, number=1)
    print(f"Baseline time: {base_time:.4f}s, Candidate time: {cand_time:.4f}s, speedup: {base_time / cand_time if cand_time else float('inf'):.2f}x")
else:
    print("Candidate implementation not available; benchmark skipped.")
