# Dense CFR Sanity Checks

Quick correctness and blocker-aware checks for the dense CFR implementation.

In [2]:
import os, sys
from pathlib import Path

# Add repo root to sys.path

def find_repo_root(start_dir: str) -> str:
    cur = Path(start_dir).resolve()
    for _ in range(6):
        if (cur / "liars_poker").is_dir() or (cur / "pyproject.toml").exists():
            return str(cur)
        if cur.parent == cur:
            break
        cur = cur.parent
    return str(Path(start_dir).resolve())

NB_DIR = Path.cwd()
REPO_ROOT = Path(find_repo_root(NB_DIR))
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from liars_poker.core import GameSpec
from liars_poker.infoset import CALL
from liars_poker.policies.tabular_dense import DenseTabularPolicy
from liars_poker.algo.cfr_exact_dense import CFRExactDense
from liars_poker.algo.br_exact_dense_to_dense import best_response_dense


## Test 1: Strategy validity (legal actions only, rows sum to 1)

In [3]:
spec = GameSpec(ranks=3, suits=2, hand_size=1, claim_kinds=("RankHigh", "Pair"))

cfr = CFRExactDense(spec)
for _ in range(5):
    cfr.iterate()

avg_policy = cfr.average_policy()

# Check a few random hids and hands for correct normalization over legal actions
H, N, A = avg_policy.S.shape
for hid in [0, 1, 2, 3, 4, 5]:
    legal = avg_policy.legal_actions[hid]
    if not legal:
        continue
    cols = [0 if a == CALL else a + 1 for a in legal]
    row = avg_policy.S[hid, 0, cols]
    assert abs(float(row.sum()) - 1.0) < 1e-6, (hid, row.sum())

# CALL should be illegal at root (hid=0), so column 0 should be zero there
assert float(avg_policy.S[0, 0, 0]) == 0.0
print('Test 1 passed')


Test 1 passed


## Test 2: Exploitability trend (small spec, few iterations)

In [4]:
spec = GameSpec(ranks=2, suits=4, hand_size=2, claim_kinds=("RankHigh", "Pair"))

cfr = CFRExactDense(spec)

episodes = 100
results = {}
for t in range(episodes + 1):

    cfr.iterate()

    avg_policy = cfr.average_policy()
    br_pol, meta = best_response_dense(spec, avg_policy, debug=False)
    p1, p2 = meta["computer"].exploitability()
    results[t] = (p1, p2, 0.5 * (p1 + p2))


    print(f"iter={t}: p_first={p1:.6f}, p_second={p2:.6f}, avg={0.5 * (p1 + p2):.6f}")

# Qualitative check: later iterations should not be worse in this tiny game
assert results[episodes][2] <= results[0][2] + 1e-6
print('Test 2 completed')


iter=0: p_first=0.842857, p_second=0.671429, avg=0.757143
iter=1: p_first=0.821429, p_second=0.438187, avg=0.629808
iter=2: p_first=0.814286, p_second=0.358791, avg=0.586538
iter=3: p_first=0.810714, p_second=0.319093, avg=0.564904
iter=4: p_first=0.808571, p_second=0.295275, avg=0.551923
iter=5: p_first=0.807143, p_second=0.279396, avg=0.543269
iter=6: p_first=0.806122, p_second=0.268053, avg=0.537088
iter=7: p_first=0.805357, p_second=0.259547, avg=0.532452
iter=8: p_first=0.804762, p_second=0.252930, avg=0.528846
iter=9: p_first=0.804286, p_second=0.247637, avg=0.525962
iter=10: p_first=0.803896, p_second=0.243307, avg=0.523601
iter=11: p_first=0.803571, p_second=0.239698, avg=0.521635
iter=12: p_first=0.803297, p_second=0.236644, avg=0.519970
iter=13: p_first=0.803061, p_second=0.234027, avg=0.518544
iter=14: p_first=0.802857, p_second=0.231758, avg=0.517308
iter=15: p_first=0.802679, p_second=0.229773, avg=0.516226
iter=16: p_first=0.802521, p_second=0.228022, avg=0.515271
iter=17

In [5]:
from liars_poker.training.dense_fsp import dense_fsp_loop
pol1, info1 = dense_fsp_loop(spec=spec, episodes=episodes, episodes_test=0, efficient=True)

Predicted exploitability: avg=0.757142857 (first=0.8429, second=0.6714)
Sampled exploitability: avg=0.0000 (BR as P1=0.0000, BR as P2=0.0000), chi2 p-value=nan

Predicted exploitability: avg=0.628571429 (first=0.8214, second=0.4357)
Sampled exploitability: avg=0.0000 (BR as P1=0.0000, BR as P2=0.0000), chi2 p-value=nan

Predicted exploitability: avg=0.585714287 (first=0.8143, second=0.3571)
Sampled exploitability: avg=0.0000 (BR as P1=0.0000, BR as P2=0.0000), chi2 p-value=nan

Predicted exploitability: avg=0.564285714 (first=0.8107, second=0.3179)
Sampled exploitability: avg=0.0000 (BR as P1=0.0000, BR as P2=0.0000), chi2 p-value=nan

Predicted exploitability: avg=0.551428571 (first=0.8086, second=0.2943)
Sampled exploitability: avg=0.0000 (BR as P1=0.0000, BR as P2=0.0000), chi2 p-value=nan

Predicted exploitability: avg=0.542857143 (first=0.8071, second=0.2786)
Sampled exploitability: avg=0.0000 (BR as P1=0.0000, BR as P2=0.0000), chi2 p-value=nan

Predicted exploitability: avg=0.53

## Test 3: Blocker asymmetry check (suit-symmetric deck)

Find at least one pair of hands where A0 and A1 differ.

In [6]:
spec = GameSpec(ranks=3, suits=4, hand_size=2, claim_kinds=("RankHigh",), suit_symmetry=True)

cfr = CFRExactDense(spec)
A0 = cfr.A0
A1 = cfr.A1

found = None
for i in range(cfr.n_hands):
    for j in range(cfr.n_hands):
        if abs(A0[i, j] - A1[j, i]) > 1e-9:
            found = (i, j, A0[i, j], A1[j, i])
            break
    if found:
        break

if found:
    i, j, a0, a1 = found
    print(f"Found asymmetry at hands {i},{j}: A0={a0}, A1^T={a1}")
else:
    print('No asymmetry found in this spec (possible but unlikely); matrices still computed separately.')


Found asymmetry at hands 0,1: A0=8.0, A1^T=3.0


In [7]:
from datetime import datetime
from pathlib import Path

from liars_poker.core import GameSpec, ARTIFACTS_ROOT
from liars_poker.training.cfr_dense import (
    cfr_dense_loop,
    cfr_dense_resume,
    save_cfr_run,
    load_cfr_state,
)

spec = GameSpec(ranks=2, suits=2, hand_size=1, claim_kinds=("RankHigh",), suit_symmetry=True)
run_id = f"cfr_demo_{datetime.now().strftime('%Y%m%d-%H%M%S')}"

policy, logs, cfr = cfr_dense_loop(spec, iterations=5, eval_every=1)
save_cfr_run(
    run_id=run_id,
    policy=policy,
    cfr=cfr,
    logs=logs,
    spec=spec,
    iterations_done=5,
    eval_every=1,
)

run_dir = Path(ARTIFACTS_ROOT) / 'benchmark_runs' / run_id
cfr_loaded, spec_loaded, metrics, iter_done = load_cfr_state(run_dir)
print('Loaded', spec_loaded, 'iter_done', iter_done)
print('Metrics keys:', list(metrics.keys()))

# Resume for a few more iterations
policy2, logs2, cfr2, spec2, total_iters = cfr_dense_resume(
    run_dir, remaining_iterations=3, eval_every=0
)
print('Resumed to total_iters', total_iters)


[cfr] iter=1 elapsed=0.00s 
[cfr] iter=2 elapsed=0.01s 
[cfr] iter=3 elapsed=0.01s 
[cfr] iter=4 elapsed=0.01s 
[cfr] iter=5 elapsed=0.01s 
Loaded GameSpec(ranks=2, suits=2, hand_size=1, claim_kinds=('RankHigh',), suit_symmetry=True) iter_done 5
Metrics keys: ['run_type', 'spec', 'iterations', 'eval_every', 'exploitability_series', 'timestamp']
[cfr] iter=6 elapsed=0.00s 
[cfr] iter=7 elapsed=0.00s 
[cfr] iter=8 elapsed=0.00s 
Resumed to total_iters 8
