# Dense Policy & Dense BR Tests
Validate DenseTabularPolicy, mixing, and dense-to-dense best response on small specs.


In [1]:
import sys
import random
from pathlib import Path
import numpy as np

def find_repo_root(start_dir: str) -> str:
    cur = Path(start_dir).resolve()
    for _ in range(6):
        if (cur / "liars_poker").is_dir() or (cur / "pyproject.toml").exists():
            return str(cur)
        if cur.parent == cur:
            break
        cur = cur.parent
    return str(Path(start_dir).resolve())

NB_DIR = Path.cwd()
REPO_ROOT = Path(find_repo_root(NB_DIR))
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from liars_poker import GameSpec, Rules
from liars_poker.infoset import InfoSet, CALL
from liars_poker.policies.tabular import TabularPolicy
from liars_poker.policies.tabular_dense import DenseTabularPolicy, mix_dense
from liars_poker.algo import br_exact
from liars_poker.algo.br_exact_dense_to_dense import best_response_exact as br_dense

def assert_close_dicts(a, b, tol=1e-6):
    assert set(a.keys()) == set(b.keys())
    for k in a:
        assert abs(a[k] - b[k]) <= tol, f"{k}: {a[k]} vs {b[k]}"


## Test 1: action_probs matches dense rows
Ensure `action_probs` agrees with `S` and legality.


In [2]:
spec = GameSpec(ranks=2, suits=1, hand_size=1, claim_kinds=("RankHigh",), suit_symmetry=False)
rules = Rules(spec)
pol = DenseTabularPolicy(spec)
hand = pol.hands[0]

# Root: uniform over claims, CALL illegal
iset = InfoSet(pid=0, hand=hand, history=())
dist = pol.action_probs(iset)
assert CALL not in dist
row = pol.S[0, pol.hand_to_idx[hand]]
expected = {0: float(row[1]), 1: float(row[2])}
assert_close_dicts(dist, expected)

# Overwrite one row to a deterministic action
pol.S[0, pol.hand_to_idx[hand], :] = 0.0
pol.S[0, pol.hand_to_idx[hand], 1] = 1.0
dist2 = pol.action_probs(iset)
row2 = pol.S[0, pol.hand_to_idx[hand]]
expected2 = {0: float(row2[1]), 1: float(row2[2])}
assert_close_dicts(dist2, expected2)

# After one claim, CALL becomes legal
iset2 = InfoSet(pid=1, hand=hand, history=(0,))
dist3 = pol.action_probs(iset2)
assert CALL in dist3
print("Test 1 passed.")


Test 1 passed.


## Test 2: Likelihood DP sanity
Check that likelihood tables reflect actor history.


In [3]:
pol = DenseTabularPolicy(spec)
# Make pid0 deterministic at root: always claim 0
pol.S[0, :, :] = 0.0
pol.S[0, :, 1] = 1.0
pol.recompute_likelihoods()

# hid=1 (claim 0): pid0 made claim 0 -> L_pid0 = 1
assert np.allclose(pol.L_pid0[1], 1.0)
# hid=2 (claim 1): pid0 made claim 1 -> L_pid0 = 0
assert np.allclose(pol.L_pid0[2], 0.0)
# pid1 has not acted yet at popcount=1
assert np.allclose(pol.L_pid1[1], 1.0)
print("Test 2 passed.")


Test 2 passed.


## Test 3: Dense mixing uses posterior likelihoods
When one component assigns zero likelihood, mixture should follow the other.


In [4]:
a = DenseTabularPolicy(spec)
b = DenseTabularPolicy(spec)

# a: pid0 always claim 0 at root
a.S[0, :, :] = 0.0
a.S[0, :, 1] = 1.0
a.recompute_likelihoods()

# b: pid0 always claim 1 at root
b.S[0, :, :] = 0.0
b.S[0, :, 2] = 1.0
b.recompute_likelihoods()

mix = mix_dense(a, b, w_a=0.3)

# hid=3 (claims 0 and 1): pid_to_act=0, L_pid0 differs
hid = 3
hand_idx = 0
assert np.allclose(mix.S[hid, hand_idx], a.S[hid, hand_idx])
print("Test 3 passed.")


Test 3 passed.


## Test 4: Dense BR matches baseline on uniform opponent
Compare exploitability and state values for a small spec.


In [5]:
spec2 = GameSpec(ranks=7, suits=1, hand_size=1, claim_kinds=("RankHigh",), suit_symmetry=False)
rules2 = Rules(spec2)

opp_tab = TabularPolicy(); opp_tab.bind_rules(rules2)
opp_dense = DenseTabularPolicy(spec2)

base_pol, base_meta = br_exact.best_response_exact(spec2, opp_tab)
cand_pol, cand_meta = br_dense(spec2, opp_dense)

base_exp = base_meta['computer'].exploitability()
cand_exp = cand_meta['computer'].exploitability()
assert np.allclose(base_exp, cand_exp, atol=1e-8), f"Exploitability mismatch: {base_exp} vs {cand_exp}"

for history, hand_map in base_meta['computer'].state_card_values.items():
    for hand, val in hand_map.items():
        other = cand_meta['computer'].state_card_values.get(history, {}).get(hand)
        assert other is not None, f"Missing state {history} for hand {hand}"
        assert abs(val - other) <= 1e-8, f"Value mismatch at {history}, hand {hand}: {val} vs {other}"

print("Test 4 passed.")


Test 4 passed.
