# Best Response Correctness Harness
Compare baseline `best_response_exact` vs a future efficient implementation.


## What best response to use
 - If opponent is completely black box, probably shouldn't use an exact best response. But if need to, use br_exact.
 - If opponent is Tabular, br_exact will do ok.
 - If opponent is DenseTabular, use br_exact_dense_to_dense

In [12]:
import os, sys
from pathlib import Path
from typing import Iterable, Tuple

def find_repo_root(start_dir: str) -> str:
    cur = Path(start_dir).resolve()
    for _ in range(6):
        if (cur / "liars_poker").is_dir() or (cur / "pyproject.toml").exists():
            return str(cur)
        if cur.parent == cur:
            break
        cur = cur.parent
    return str(Path(start_dir).resolve())

NB_DIR = Path.cwd()
REPO_ROOT = Path(find_repo_root(NB_DIR))
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from liars_poker import GameSpec, Rules, Policy
from liars_poker.core import possible_starting_hands
from liars_poker.infoset import CALL, InfoSet
from liars_poker.policies.random import RandomPolicy
from liars_poker.policies.tabular import TabularPolicy
from liars_poker.policies.commit_once import CommitOnceMixture
from liars_poker.algo.br_exact import best_response_exact as br_baseline


from liars_poker.algo.br_exact_dense_to_dense import best_response_exact as br_candidate


from liars_poker.eval.match import eval_seats_split
from liars_poker.infoset import InfoSet
from liars_poker.serialization import load_policy


## Comparison Helper
Run both implementations and assert exploitability, state values, and strategy equality.


In [13]:
import math
from typing import Dict, Tuple

def compare_predicted_exploitability(spec: GameSpec, opp_policy, *, tol=1e-9):
    base_policy, base_meta = br_baseline(spec, opp_policy)
    base_exp = base_meta['computer'].exploitability()

    cand_policy, cand_meta = br_candidate(spec, opp_policy)
    cand_exp = cand_meta['computer'].exploitability()


    # Exploitability check
    assert all(abs(a - b) <= tol for a, b in zip(base_exp, cand_exp)), f"Exploitability mismatch: {base_exp} vs {cand_exp}"
    print(f"Exploitability check passed for {spec}\n{base_exp} vs {cand_exp}, against policy {opp_policy}.")



## Test Scenarios
A suite of specs and opponent policies to stress branches and card removal.


Helper functions to generate some basic opp policies

In [14]:
import numpy as np
from typing import Callable, Tuple
from liars_poker.infoset import CALL
from liars_poker.policies.tabular_dense import DenseTabularPolicy 

def _action_to_col(action: int) -> int:
    return 0 if action == CALL else action + 1

def _min_raise_action(legal: Tuple[int, ...]) -> int | None:
    raise_only = [a for a in legal if a != CALL]
    return min(raise_only) if raise_only else None

def _make_dense_from_rule(
    spec,
    choose_action: Callable[[DenseTabularPolicy, int, Tuple[int, ...]], int],
) -> DenseTabularPolicy:
    """
    choose_action(policy, hid, legal_actions_tuple) -> action_id
    Action must be legal for that hid.
    """
    p = DenseTabularPolicy(spec)
    H, N, A = p.S.shape

    for hid in range(H):
        legal = p.legal_actions[hid]
        if not legal:
            continue

        a = choose_action(p, hid, legal)
        col = _action_to_col(a)
        if not p.legal_mask[hid, col]:
            raise ValueError(f"Chosen action {a} not legal at hid={hid}.")

        p.S[hid, :, :] = 0.0
        p.S[hid, :, col] = 1.0

    p.recompute_likelihoods()
    return p


In [31]:
def AlwaysCallDense(spec) -> DenseTabularPolicy:
    def chooser(p: DenseTabularPolicy, hid: int, legal: Tuple[int, ...]) -> int:
        if CALL in legal:
            return CALL
        return min(legal)
    return _make_dense_from_rule(spec, chooser)

def AlwaysRaiseDense(spec) -> DenseTabularPolicy:
    def chooser(p: DenseTabularPolicy, hid: int, legal: Tuple[int, ...]) -> int:
        r = _min_raise_action(legal)
        return r if r is not None else CALL
    return _make_dense_from_rule(spec, chooser)

def SeededStochasticDense(spec, *, seed: int = 0, temperature: float = 1.0) -> DenseTabularPolicy:
    p = DenseTabularPolicy(spec)
    H, N, A = p.S.shape
    rng = np.random.default_rng(seed)

    temp = max(float(temperature), 1e-8)

    for hid in range(H):
        cols = np.flatnonzero(p.legal_mask[hid])  # legal action columns
        m = int(cols.size)
        if m == 0:
            continue

        # random positive weights -> normalize
        W = rng.random((N, m), dtype=np.float32)
        # optional temperature shaping (lower temp => peakier)
        W = np.power(W, 1.0 / temp, dtype=np.float32)

        denom = W.sum(axis=1, keepdims=True)
        W = np.divide(W, denom, out=np.full_like(W, 1.0 / m), where=denom > 0)

        p.S[hid, :, :] = 0.0
        p.S[hid][:, cols] = W

    p.recompute_likelihoods()
    return p


In [None]:
from liars_poker.serialization import load_policy
pol, temp_spec = load_policy('')

In [44]:
specs = [
    GameSpec(ranks=2, suits=1, hand_size=1, claim_kinds=("RankHigh",)),
    GameSpec(ranks=3, suits=1, hand_size=1, claim_kinds=("RankHigh",)),
    GameSpec(ranks=3, suits=2, hand_size=1, claim_kinds=("RankHigh", "Pair")),
    GameSpec(ranks=3, suits=2, hand_size=2, claim_kinds=("RankHigh", "Pair")),
    GameSpec(ranks=4, suits=4, hand_size=1, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True),
    GameSpec(ranks=4, suits=4, hand_size=2, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True),
    GameSpec(ranks=2, suits=4, hand_size=2, claim_kinds=("RankHigh", "Pair", "Trips"), suit_symmetry=True),
    GameSpec(ranks=3, suits=4, hand_size=2, claim_kinds=("RankHigh", "Pair", "Trips"), suit_symmetry=True),
]

opp_policies = []

preloaded_policy_dirs = ['C:/Users/adidh/Documents/liars_poker/artifacts/my_dense_policy']

for dir in preloaded_policy_dirs:
    _pol, _spec = load_policy(dir)
    opp_policies.append((_spec, [_pol]))



for spec in specs:
    c = AlwaysCallDense(spec); c.bind_rules(Rules(spec))
    r = AlwaysRaiseDense(spec); r.bind_rules(Rules(spec))
    s = SeededStochasticDense(spec); s.bind_rules(Rules(spec))


    opp_policies.append((spec, [c, r, s]))



for spec, policies in opp_policies:
    print("Running spec", spec)
    for opp in policies:
        compare_predicted_exploitability(spec, opp, tol=1e-6)
        print()

Running spec GameSpec(ranks=7, suits=4, hand_size=2, claim_kinds=('RankHigh', 'Pair'), suit_symmetry=True)


KeyboardInterrupt: 


## Empirical Seat-by-Seat Comparison
Compare sampled win rates of baseline vs candidate best responses across seats for each spec/opponent, and check if differences are statistically insignificant.


In [22]:
import math
from collections import namedtuple

def chi2_two_props(x1, n1, x2, n2):
    # Chi-square test for difference in proportions (df=1); returns p-value
    if n1 <= 0 or n2 <= 0:
        return float('nan')
    p_pool = (x1 + x2) / (n1 + n2)
    if p_pool in (0, 1):
        return 1.0
    exp1_s, exp1_f = n1 * p_pool, n1 * (1 - p_pool)
    exp2_s, exp2_f = n2 * p_pool, n2 * (1 - p_pool)
    chi2 = 0.0
    chi2 += (x1 - exp1_s) ** 2 / exp1_s
    chi2 += (n1 - x1 - exp1_f) ** 2 / exp1_f
    chi2 += (x2 - exp2_s) ** 2 / exp2_s
    chi2 += (n2 - x2 - exp2_f) ** 2 / exp2_f
    try:
        import scipy.stats as stats
        return 1 - stats.chi2.cdf(chi2, 1)
    except Exception:
        return float('nan')


In [33]:

# Run empirical comparison for seat-by-seat win rates
EPISODES = 100
results = []
for spec, policies in opp_policies:
    for opp in policies:
        opp.begin_episode()
        # Baseline
        base_policy, _ = br_baseline(spec, opp)
        base_policy.bind_rules(Rules(spec))
        base_results = eval_seats_split(spec, base_policy, opp, episodes=EPISODES, seed=1)

        # Candidate
        cand_policy, _ = br_candidate(spec, opp)
        cand_policy.bind_rules(Rules(spec))
        cand_results = eval_seats_split(spec, cand_policy, opp, episodes=EPISODES, seed=2)

        half = EPISODES // 2
        rem = EPISODES - half
        p1_p = chi2_two_props(base_results['A_seat1'] * half, half, cand_results['A_seat1'] * half, half)
        p2_p = chi2_two_props(base_results['A_seat2'] * rem, rem, cand_results['A_seat2'] * rem, rem)
        results.append((spec, opp.__class__.__name__, base_results, cand_results, p1_p, p2_p))


def fmt_p(p):
    if p is None or math.isnan(p):
        return "n/a"
    flag = " **LOW**" if p < 0.05 else ""
    return f"{p:.4f}{flag}"

for row in results:
    spec, opp_name, base_res, cand_res, p1_p, p2_p = row
    print(f"Spec={spec}, Opp={opp_name}")
    print(f"  Base: P1 win={base_res['A_seat1']:.3f}, P2 win={base_res['A_seat2']:.3f}")

    print(f"  Cand: P1 win={cand_res['A_seat1']:.3f}, P2 win={cand_res['A_seat2']:.3f}")
    print(f"  chi2 p-values: P1 seat={fmt_p(p1_p)}, P2 seat={fmt_p(p2_p)}")

    print()


Spec=GameSpec(ranks=2, suits=1, hand_size=1, claim_kinds=('RankHigh',), suit_symmetry=False), Opp=DenseTabularPolicy
  Base: P1 win=1.000, P2 win=1.000
  Cand: P1 win=1.000, P2 win=1.000
  chi2 p-values: P1 seat=1.0000, P2 seat=1.0000

Spec=GameSpec(ranks=2, suits=1, hand_size=1, claim_kinds=('RankHigh',), suit_symmetry=False), Opp=DenseTabularPolicy
  Base: P1 win=1.000, P2 win=1.000
  Cand: P1 win=1.000, P2 win=1.000
  chi2 p-values: P1 seat=1.0000, P2 seat=1.0000

Spec=GameSpec(ranks=3, suits=1, hand_size=1, claim_kinds=('RankHigh',), suit_symmetry=False), Opp=DenseTabularPolicy
  Base: P1 win=1.000, P2 win=0.840
  Cand: P1 win=1.000, P2 win=0.780
  chi2 p-values: P1 seat=1.0000, P2 seat=0.4444

Spec=GameSpec(ranks=3, suits=1, hand_size=1, claim_kinds=('RankHigh',), suit_symmetry=False), Opp=DenseTabularPolicy
  Base: P1 win=0.740, P2 win=0.580
  Cand: P1 win=0.800, P2 win=0.640
  chi2 p-values: P1 seat=0.4759, P2 seat=0.5385

Spec=GameSpec(ranks=3, suits=2, hand_size=1, claim_kinds

## Performance Benchmark
Compare baseline vs candidate on a non-trivial spec.


In [35]:
import time

spec_bench = GameSpec(ranks=6, suits=4, hand_size=2, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True)
rules_bench = Rules(spec_bench)
opp_bench = SeededStochasticDense(spec_bench); opp_bench.bind_rules(rules_bench)



# Benchmark
def run_baseline():
    br_baseline(spec_bench, opp_bench)
def run_candidate():
    br_candidate(spec_bench, opp_bench)

import timeit
cand_time = timeit.timeit(run_candidate, number=1)
base_time = timeit.timeit(run_baseline, number=1)

print(f"Baseline time: {base_time:.4f}s, Candidate time: {cand_time:.4f}s, speedup: {base_time / cand_time if cand_time else float('inf'):.2f}x")



Baseline time: 26.2910s, Candidate time: 1.1681s, speedup: 22.51x
