In [1]:
import os, sys

# find repo root (looks for liars_poker/ or pyproject.toml)
def find_repo_root(start_dir: str) -> str:
    cur = os.path.abspath(start_dir)
    for _ in range(6):
        if os.path.isdir(os.path.join(cur, "liars_poker")) or os.path.exists(os.path.join(cur, "pyproject.toml")):
            return cur
        parent = os.path.dirname(cur)
        if parent == cur:
            break
        cur = parent
    return os.path.abspath(os.path.join(start_dir, "..", ".."))

NB_DIR = os.getcwd()
REPO_ROOT = find_repo_root(NB_DIR)
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

ARTIFACTS_ROOT = os.path.join(REPO_ROOT, "artifacts")
os.makedirs(ARTIFACTS_ROOT, exist_ok=True)

print("repo root   :", REPO_ROOT)
print("artifacts   :", ARTIFACTS_ROOT)


repo root   : /root/liars_poker
artifacts   : /root/liars_poker/artifacts


In [2]:
import random
from pprint import pprint

from liars_poker import (
    GameSpec, Env, InfoSet, Rules,
    Policy, TabularPolicy, CommitOnceMixture, RandomPolicy,
    eval_both_seats, eval_vs
)

from liars_poker.algo.br_mc import efficient_best_response_mc_v3, efficient_best_response_mc_v2
from typing import List, Tuple

SEED = 42
random.seed(SEED)

# small game; P1 always starts by design
spec = GameSpec(ranks=7, suits=4, hand_size=2, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True)
rules = Rules(spec)


In [3]:
rules.legal_actions_for(InfoSet(0, (1, 2), () ))
# rules._claims

(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)

In [4]:
# def play_match(env: Env, p1: Policy, p2: Policy, episodes: int = 10, seed: int = 0) -> Dict[str, int]:
#     rng = random.Random(seed)
#     wins = {"P1": 0, "P2": 0}

#     p1.bind_rules(env.rules)
#     p2.bind_rules(env.rules)

#     for _ in range(episodes):
#         obs = env.reset(seed=rng.randint(0, 2_147_483_647))
#         p1.begin_episode(rng)
#         p2.begin_episode(rng)

#         while True:
#             if obs["terminal"]:
#                 winner = obs["winner"]
#                 if winner in wins:
#                     wins[winner] += 1
#                 break

#             player = env.current_player()
#             policy = p1 if player == "P1" else p2
#             infoset = env.infoset_key(player)
#             action = policy.sample(infoset, rng)
#             obs = env.step(action)

#     return wins




In [5]:
p1, p2 = RandomPolicy(), RandomPolicy()
p1.bind_rules(rules)
p2.bind_rules(rules)
env = Env(spec)

In [6]:
beat_up_p1 = efficient_best_response_mc_v3(spec, p1, episodes=10_000)

In [7]:
beat_up_p1.action_probs(InfoSet(pid=0, hand=(4,), history=()))




{0: 0.07142857142857142,
 1: 0.07142857142857142,
 2: 0.07142857142857142,
 3: 0.07142857142857142,
 4: 0.07142857142857142,
 5: 0.07142857142857142,
 6: 0.07142857142857142,
 7: 0.07142857142857142,
 8: 0.07142857142857142,
 9: 0.07142857142857142,
 10: 0.07142857142857142,
 11: 0.07142857142857142,
 12: 0.07142857142857142,
 13: 0.07142857142857142}

In [8]:
sorted(list(set([s.history for s in beat_up_p1.probs if len(s.history)%2 == 1])))

[(0,),
 (0, 1, 2),
 (0, 1, 2, 6, 10),
 (0, 1, 2, 6, 10, 11, 12),
 (0, 1, 2, 9, 12),
 (0, 1, 4),
 (0, 1, 4, 5, 7),
 (0, 1, 4, 5, 13),
 (0, 1, 5),
 (0, 1, 5, 6, 10),
 (0, 1, 5, 8, 13),
 (0, 1, 6),
 (0, 1, 6, 8, 9),
 (0, 1, 6, 8, 9, 12, 13),
 (0, 1, 6, 8, 13),
 (0, 1, 6, 9, 11),
 (0, 1, 6, 9, 11, 12, 13),
 (0, 1, 7),
 (0, 1, 7, 11, 13),
 (0, 1, 8),
 (0, 1, 8, 10, 13),
 (0, 1, 9),
 (0, 1, 9, 10, 12),
 (0, 1, 9, 11, 12),
 (0, 1, 10),
 (0, 1, 11),
 (0, 1, 12),
 (0, 1, 13),
 (0, 2, 3),
 (0, 2, 3, 7, 13),
 (0, 2, 3, 8, 11),
 (0, 2, 4),
 (0, 2, 4, 7, 10),
 (0, 2, 4, 7, 10, 11, 12),
 (0, 2, 4, 10, 13),
 (0, 2, 5),
 (0, 2, 5, 11, 12),
 (0, 2, 6),
 (0, 2, 7),
 (0, 2, 7, 9, 10),
 (0, 2, 7, 9, 10, 11, 13),
 (0, 2, 8),
 (0, 2, 8, 11, 13),
 (0, 2, 10),
 (0, 2, 11),
 (0, 2, 11, 12, 13),
 (0, 2, 12),
 (0, 2, 13),
 (0, 3, 4),
 (0, 3, 4, 6, 9),
 (0, 3, 5),
 (0, 3, 5, 8, 11),
 (0, 3, 6),
 (0, 3, 6, 8, 10),
 (0, 3, 6, 8, 12),
 (0, 3, 6, 9, 12),
 (0, 3, 6, 11, 13),
 (0, 3, 7),
 (0, 3, 7, 9, 13),
 (0, 3, 8),


In [9]:
eval_both_seats(spec, p1, beat_up_p1)

{'A': 557, 'B': 1443, 'total': 2000}

In [10]:
from liars_poker.algo.br_exact import BestResponseComputer, best_response_exact
from liars_poker.core import possible_starting_hands
possible_starting_hands(spec)
# prob_vectors = percolate(spec, rules, InfoSet(0, (0, ), ()), (0, ), p1, init_prob=1)


[(1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (1, 7),
 (2, 2),
 (2, 3),
 (2, 4),
 (2, 5),
 (2, 6),
 (2, 7),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (4, 4),
 (4, 5),
 (4, 6),
 (4, 7),
 (5, 5),
 (5, 6),
 (5, 7),
 (6, 6),
 (6, 7),
 (7, 7)]

In [11]:
# tab = TabularPolicy()
# tab.bind_rules(rules)

# histories = [
#  (0,),
#  (0, 1, 2),
#  (0, 1, 2, 3, 4),
#  (0, 1, 2, 3, 5),
#  (0, 1, 2, 4, 5),
#  (0, 1, 3),
#  (0, 1, 3, 4, 5),
#  (0, 1, 4),
#  (0, 1, 5),
#  (0, 2, 3),
#  (0, 2, 3, 4, 5),
#  (0, 2, 4),
#  (0, 2, 5),
#  (0, 3, 4),
#  (0, 3, 5),
#  (0, 4, 5),
#  (1,),
#  (1, 2, 3),
#  (1, 2, 3, 4, 5),
#  (1, 2, 4),
#  (1, 2, 5),
#  (1, 3, 4),
#  (1, 3, 5),
#  (1, 4, 5),
#  (2,),
#  (2, 3, 4),
#  (2, 3, 5),
#  (2, 4, 5),
#  (3,),
#  (3, 4, 5),
#  (4,),
#  (5,)
# ]

# for hand in [1,2,3]:
#     hand = (hand, )
#     for history in histories:
#         iset = InfoSet(1, hand, history)
#         if hand == (1,) or history[-1] == 5:
#             dist = {-1: 1.0}
#         else:
#             dist = {history[-1]+1: 1.0}
#         tab.set(iset, dist)



# tab.probs

In [12]:
the_defeater, br = best_response_exact(spec, p1, debug=True)

Percolating started.
Percolating done.
(1, 1)
(1, 2)
(1, 3)
(1, 4)
(1, 5)
(1, 6)
(1, 7)
(2, 2)
(2, 3)
(2, 4)
(2, 5)
(2, 6)
(2, 7)
(3, 3)
(3, 4)
(3, 5)
(3, 6)
(3, 7)
(4, 4)
(4, 5)
(4, 6)
(4, 7)
(5, 5)
(5, 6)
(5, 7)
(6, 6)
(6, 7)
(7, 7)
best response calculated.


In [23]:
sorted(br.state_card_values[()].items(), key=lambda x: x[1], reverse=True)

[((7, 7), 1.0),
 ((6, 6), 0.9907692307692308),
 ((5, 5), 0.9876923076923078),
 ((4, 4), 0.9861538461538463),
 ((3, 3), 0.9852307692307692),
 ((2, 2), 0.9846153846153847),
 ((1, 1), 0.984175824175824),
 ((1, 7), 0.9341666666666666),
 ((5, 7), 0.9330769230769231),
 ((6, 7), 0.9330769230769231),
 ((2, 7), 0.933076923076923),
 ((3, 7), 0.933076923076923),
 ((4, 7), 0.933076923076923),
 ((1, 6), 0.9241432641432639),
 ((2, 6), 0.9206267806267804),
 ((3, 6), 0.9170598290598289),
 ((4, 6), 0.9167277167277165),
 ((5, 6), 0.9167277167277165),
 ((1, 5), 0.9162609890109891),
 ((2, 5), 0.9129871794871793),
 ((1, 4), 0.9103071928071927),
 ((3, 5), 0.9097769230769231),
 ((1, 2), 0.9097475345167654),
 ((4, 5), 0.9094780219780221),
 ((1, 3), 0.9092912087912086),
 ((2, 4), 0.9082226107226107),
 ((2, 3), 0.9080897435897435),
 ((3, 4), 0.905541958041958)]

In [14]:
rules.legal_actions_for(InfoSet(0, ( ), ()))

(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)

In [15]:
{k: v for k, v in br.state_card_values.items() if len([t for m, t in v.items() if t > 1 or t < 0]) > 0}

{}

In [16]:
eval_both_seats(spec, p1, the_defeater)

{'A': 167, 'B': 1833, 'total': 2000}

In [17]:
br.state_card_values[(0,1,2,3,4,5,6,7,8)]

{(1, 1): 0.9815384615384615,
 (1, 2): 0.788923076923077,
 (1, 3): 0.9815384615384615,
 (1, 4): 0.9815384615384614,
 (1, 5): 0.9815384615384615,
 (1, 6): 0.9815384615384615,
 (1, 7): 0.9815384615384615,
 (2, 2): 0.7889230769230768,
 (2, 3): 0.8295384615384616,
 (2, 4): 0.7915384615384615,
 (2, 5): 0.7784615384615385,
 (2, 6): 0.7784615384615385,
 (2, 7): 0.7784615384615383,
 (3, 3): 0.9852307692307691,
 (3, 4): 0.9815384615384615,
 (3, 5): 0.9815384615384615,
 (3, 6): 0.9815384615384615,
 (3, 7): 0.9815384615384617,
 (4, 4): 0.9861538461538462,
 (4, 5): 0.9815384615384615,
 (4, 6): 0.9815384615384615,
 (4, 7): 0.9815384615384615,
 (5, 5): 0.9876923076923078,
 (5, 6): 0.9815384615384614,
 (5, 7): 0.9815384615384617,
 (6, 6): 0.9907692307692307,
 (6, 7): 0.9815384615384615,
 (7, 7): 1.0}

In [24]:
import scipy.stats as stats
from liars_poker.algo.br_exact import adjustment_factor

# Helper to weight canonical hands by actual deck multiplicity

def hand_weight(hand):
    return adjustment_factor(spec, (), hand)

# Probability opponent opens with each action (based on percolation mass)
def opening_action_probs():
    actions = br.rules.legal_actions_from_last(None)
    masses = {action: sum(br.prob_vectors.get((action,), {}).values()) for action in actions}
    total = sum(masses.values())
    if total == 0:
        return {action: 0.0 for action in actions}
    return {action: mass / total for action, mass in masses.items()}

# Expected win rate when we go first
p_first_num = 0.0
p_first_den = 0.0
for hand, value in br.state_card_values.get((), {}).items():
    w = hand_weight(hand)
    p_first_num += value * w
    p_first_den += w
p_first = 0.0 if p_first_den == 0 else p_first_num / p_first_den

# Expected win rate when opponent opens
p_second_num = 0.0
p_second_den = 0.0
action_probs = opening_action_probs()
for action, action_prob in action_probs.items():
    if action_prob == 0.0:
        continue
    history = (action,)
    for hand, value in br.state_card_values.get(history, {}).items():
        w = hand_weight(hand) * action_prob
        p_second_num += value * w
        p_second_den += w
p_second = 0.0 if p_second_den == 0 else p_second_num / p_second_den

# Overall expected win chance (opponent starts half the time)
p0 = 0.5 * (p_first + p_second)
print(f"Expected win probability p0: {p0:.6f}")
print(f"Broken down into {p_first:.6f} if going first and {p_second:.6f} if going second.")

n = 1_000_000
results = eval_both_seats(spec, p1, the_defeater, seed=41, episodes=n)
x = results['B']
print(f"Observed wins for us (B): {x}")

expected_successes = n * p0
expected_failures = n * (1 - p0)
observed_successes = x
observed_failures = n - x

chi2_stat = ((observed_successes - expected_successes) ** 2) / expected_successes           + ((observed_failures - expected_failures) ** 2) / expected_failures

df = 1
p_value = 1 - stats.chi2.cdf(chi2_stat, df)

print(f"Chi-squared statistic: {chi2_stat:.4f}")
print(f"Degrees of freedom: {df}")
print(f"P-value: {p_value:.6g}")

if p_value < 0.05:
    print(f"Reject H0: significant evidence that p != {p0:.6f}")
else:
    print(f"Fail to reject H0: no significant evidence that p differs from {p0:.6f}")



Expected win probability p0: 0.919270
Broken down into 0.926520 if going first and 0.912021 if going second.
Observed wins for us (B): 919329
Chi-squared statistic: 0.0464
Degrees of freedom: 1
P-value: 0.829492
Fail to reject H0: no significant evidence that p differs from 0.919270
