# Sampling & Env Performance Checks
Guardrails before refactoring sampling / Env internals.


In [12]:
import os, sys, time, statistics, random
from collections import Counter
from pathlib import Path

def find_repo_root(start_dir: str) -> str:
    cur = Path(start_dir).resolve()
    for _ in range(6):
        if (cur / 'liars_poker').is_dir() or (cur / 'pyproject.toml').exists():
            return str(cur)
        if cur.parent == cur:
            break
        cur = cur.parent
    return str(Path(start_dir).resolve())

NB_DIR = Path.cwd()
REPO_ROOT = Path(find_repo_root(NB_DIR))
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))
print('Repo root:', REPO_ROOT)

from liars_poker import GameSpec, Env, Rules
from liars_poker.infoset import InfoSet, CALL
from liars_poker.policies import RandomPolicy, TabularPolicy
from liars_poker.policies.tabular_dense import DenseTabularPolicy
from liars_poker.eval.match import play_match, eval_seats_split
from liars_poker.algo.br_exact import best_response_exact

rng = random.Random(123)

def freq_from_samples(samples):
    c = Counter(samples)
    total = sum(c.values())
    return {k: v / total for k, v in c.items()}

def assert_close_dist(got, expected, tol=0.08):
    for action, exp in expected.items():
        g = got.get(action, 0.0)
        assert abs(g - exp) <= tol, f"{action}: {g:.3f} vs {exp:.3f}"

def bench(fn, runs=5, warmup=1):
    for _ in range(warmup):
        fn()
    times = []
    for _ in range(runs):
        t0 = time.perf_counter()
        fn()
        times.append(time.perf_counter() - t0)
    return times

def summarize(times):
    if not times:
        return None
    return {
        'mean': statistics.mean(times),
        'stdev': statistics.stdev(times) if len(times) > 1 else 0.0,
        'median': statistics.median(times),
        'min': min(times),
        'max': max(times),
    }

def report(label, times, units='s', per=None):
    stats = summarize(times)
    if stats is None:
        return
    mean = stats['mean']
    extra = ''
    if per is not None and per > 0:
        extra = f", per={mean / per:.6f} {units}"
    print(f"{label}: mean={mean:.6f}{units} std={stats['stdev']:.6f}{units} median={stats['median']:.6f}{units}{extra}")


Repo root: C:\Users\adidh\Documents\liars_poker


## Correctness: Sampling determinism
Ensure sampling respects RNG and legal actions.


In [13]:
spec = GameSpec(ranks=3, suits=1, hand_size=1, claim_kinds=('RankHigh',))
rules = Rules(spec)
pol = RandomPolicy(); pol.bind_rules(rules)
iset = InfoSet(pid=0, hand=(1,), history=())
legal = rules.legal_actions_for(iset)
print('Legal actions:', legal)

rng1 = random.Random(1)
samples1 = [pol.sample(iset, rng1) for _ in range(8)]
rng2 = random.Random(1)
samples2 = [pol.sample(iset, rng2) for _ in range(8)]
print('Samples1 == Samples2:', samples1 == samples2, samples1)
assert all(a in legal for a in samples1)
assert samples1 == samples2
print('Deterministic sampling OK.')


Legal actions: (0, 1, 2)
Samples1 == Samples2: True [0, 2, 2, 0, 1, 1, 1, 2]
Deterministic sampling OK.


## Correctness: Sampling distribution vs action_probs
Empirically check that sampling matches intended probabilities.


In [14]:
spec_dist = GameSpec(ranks=3, suits=1, hand_size=1, claim_kinds=('RankHigh',))
rules_dist = Rules(spec_dist)
iset = InfoSet(pid=0, hand=(1,), history=())
legal = rules_dist.legal_actions_for(iset)

tab = TabularPolicy(); tab.bind_rules(rules_dist)
tab.set(iset, {legal[0]: 0.7, legal[1]: 0.2, legal[2]: 0.1})
expected = tab.action_probs(iset)

rng = random.Random(5)
samples = [tab.sample(iset, rng) for _ in range(5000)]
freq = freq_from_samples(samples)
print('Tabular expected:', expected)
print('Tabular sampled:', {k: round(v,3) for k,v in freq.items()})
assert_close_dist(freq, expected, tol=0.07)

dense = DenseTabularPolicy(spec_dist)
dense.S[0, dense.hand_to_idx[iset.hand], :] = 0.0
dense.S[0, dense.hand_to_idx[iset.hand], 1] = 0.7
dense.S[0, dense.hand_to_idx[iset.hand], 2] = 0.2
dense.S[0, dense.hand_to_idx[iset.hand], 3] = 0.1
dense.recompute_likelihoods()
expected_dense = dense.action_probs(iset)
rng = random.Random(5)
samples = [dense.sample(iset, rng) for _ in range(5000)]
freq = freq_from_samples(samples)
print('Dense expected:', expected_dense)
print('Dense sampled:', {k: round(v,3) for k,v in freq.items()})
assert_close_dist(freq, expected_dense, tol=0.07)
print('Distribution checks OK.')


Tabular expected: {0: 0.7, 1: 0.2, 2: 0.1}
Tabular sampled: {0: 0.703, 1: 0.194, 2: 0.103}
Dense expected: {0: 0.699999988079071, 1: 0.20000000298023224, 2: 0.10000000149011612}
Dense sampled: {0: 0.703, 1: 0.194, 2: 0.103}
Distribution checks OK.


## Correctness: Dense vs Tabular defaults
Uniform defaults should match for the same infoset/history.


In [15]:
spec_def = GameSpec(ranks=3, suits=2, hand_size=1, claim_kinds=('RankHigh','Pair'), suit_symmetry=True)
rules_def = Rules(spec_def)
tab = TabularPolicy(); tab.bind_rules(rules_def)
dense = DenseTabularPolicy(spec_def)

iset_root = InfoSet(pid=0, hand=(1,), history=())
iset_after = InfoSet(pid=1, hand=(1,), history=(0,))

tab_root = tab.action_probs(iset_root)
dense_root = dense.action_probs(iset_root)
tab_after = tab.action_probs(iset_after)
dense_after = dense.action_probs(iset_after)

assert_close_dist(tab_root, dense_root, tol=1e-6)
assert_close_dist(tab_after, dense_after, tol=1e-6)
print('Defaults match for root and post-claim states.')


Defaults match for root and post-claim states.


## Correctness: Eval helpers invariants
Sanity-check seat split results and play_match.


In [16]:
spec_sm = GameSpec(ranks=2, suits=1, hand_size=1, claim_kinds=('RankHigh',))
rules_sm = Rules(spec_sm)
p_rand = RandomPolicy(); p_rand.bind_rules(rules_sm)
p_br, _ = best_response_exact(spec_sm, p_rand)
p_br.bind_rules(rules_sm)

wins = play_match(Env(spec_sm), p_rand, p_br, episodes=40, seed=7)
print('Sampled wins (rand vs BR):', wins)

seats = eval_seats_split(spec_sm, p_rand, p_br, episodes=200, seed=9)
print('Seat-split win rates:', seats)
assert abs((seats['A_seat1'] + seats['B_seat2']) - 1.0) < 1e-9
assert abs((seats['A_seat2'] + seats['B_seat1']) - 1.0) < 1e-9
print('Seat-split invariants OK.')


Sampled wins (rand vs BR): {'P1': 18, 'P2': 22}
Seat-split win rates: {'A_seat1': 0.5, 'A_seat2': 0.0, 'B_seat1': 1.0, 'B_seat2': 0.5}
Seat-split invariants OK.


## Performance: policy.sample microbench
Measure per-sample runtime for common policy types.


In [17]:
spec_bench = GameSpec(ranks=3, suits=2, hand_size=1, claim_kinds=('RankHigh','Pair'), suit_symmetry=True)
rules_bench = Rules(spec_bench)
iset = InfoSet(pid=0, hand=(1,), history=())

rand_pol = RandomPolicy(); rand_pol.bind_rules(rules_bench)
tab_pol = TabularPolicy(); tab_pol.bind_rules(rules_bench)
tab_pol.set(iset, {0: 0.6, 1: 0.2, 2: 0.2})
dense_pol = DenseTabularPolicy(spec_bench)
dense_pol.S[0, dense_pol.hand_to_idx[iset.hand], :] = 0.0
dense_pol.S[0, dense_pol.hand_to_idx[iset.hand], 1] = 0.6
dense_pol.S[0, dense_pol.hand_to_idx[iset.hand], 2] = 0.2
dense_pol.S[0, dense_pol.hand_to_idx[iset.hand], 3] = 0.2
dense_pol.recompute_likelihoods()

N = 50000

def bench_sample(policy):
    r = random.Random(1)
    for _ in range(N):
        policy.sample(iset, r)

for label, policy in [('Random', rand_pol), ('Tabular', tab_pol), ('Dense', dense_pol)]:
    times = bench(lambda p=policy: bench_sample(p), runs=5, warmup=1)
    report(f'{label} sample', times, units='s', per=N)


Random sample: mean=0.433116s std=0.013585s median=0.437546s, per=0.000009 s
Tabular sample: mean=0.679143s std=0.016440s median=0.679458s, per=0.000014 s
Dense sample: mean=0.378680s std=0.007704s median=0.379014s, per=0.000008 s


## Performance: play_match microbench
Measure end-to-end match runtime for common policy pairings.


In [18]:
spec_match = GameSpec(ranks=3, suits=2, hand_size=1, claim_kinds=('RankHigh','Pair'), suit_symmetry=True)
rules_match = Rules(spec_match)
rand_a = RandomPolicy(); rand_a.bind_rules(rules_match)
rand_b = RandomPolicy(); rand_b.bind_rules(rules_match)
dense_a = DenseTabularPolicy(spec_match)
dense_b = DenseTabularPolicy(spec_match)

EP = 200

def bench_play(p1, p2):
    env = Env(spec_match)
    play_match(env, p1, p2, episodes=EP, seed=77)

pairs = [
    ('Rand vs Rand', rand_a, rand_b),
    ('Rand vs Dense', rand_a, dense_a),
    ('Dense vs Dense', dense_a, dense_b),
]

for label, p1, p2 in pairs:
    times = bench(lambda a=p1, b=p2: bench_play(a, b), runs=5, warmup=1)
    report(label, times, units='s', per=EP)


Rand vs Rand: mean=0.025737s std=0.001849s median=0.026309s, per=0.000129 s
Rand vs Dense: mean=0.023667s std=0.002806s median=0.022484s, per=0.000118 s
Dense vs Dense: mean=0.024343s std=0.002414s median=0.024383s, per=0.000122 s
