In [16]:
import os, sys

# find repo root (looks for liars_poker/ or pyproject.toml)
def find_repo_root(start_dir: str) -> str:
    cur = os.path.abspath(start_dir)
    for _ in range(6):
        if os.path.isdir(os.path.join(cur, "liars_poker")) or os.path.exists(os.path.join(cur, "pyproject.toml")):
            return cur
        parent = os.path.dirname(cur)
        if parent == cur:
            break
        cur = parent
    return os.path.abspath(os.path.join(start_dir, "..", ".."))

NB_DIR = os.getcwd()
REPO_ROOT = find_repo_root(NB_DIR)
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

ARTIFACTS_ROOT = os.path.join(REPO_ROOT, "artifacts")
os.makedirs(ARTIFACTS_ROOT, exist_ok=True)

print("repo root   :", REPO_ROOT)
print("artifacts   :", ARTIFACTS_ROOT)


repo root   : /root/liars_poker
artifacts   : /root/liars_poker/artifacts


In [17]:
import random
from pprint import pprint

from liars_poker import (
    GameSpec, Env, InfoSet, Rules,
    Policy, TabularPolicy, CommitOnceMixture, RandomPolicy,
    eval_both_seats, eval_vs
)

from liars_poker.algo.br_mc import efficient_best_response_mc_v3, efficient_best_response_mc_v2
from typing import List, Tuple

SEED = 42
random.seed(SEED)

# small game; P1 always starts by design
spec = GameSpec(ranks=10, suits=4, hand_size=2, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True)
rules = Rules(spec)


In [18]:
rules.legal_actions_for(InfoSet(0, (1, 2), () ))
# rules._claims

(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)

In [19]:
# def play_match(env: Env, p1: Policy, p2: Policy, episodes: int = 10, seed: int = 0) -> Dict[str, int]:
#     rng = random.Random(seed)
#     wins = {"P1": 0, "P2": 0}

#     p1.bind_rules(env.rules)
#     p2.bind_rules(env.rules)

#     for _ in range(episodes):
#         obs = env.reset(seed=rng.randint(0, 2_147_483_647))
#         p1.begin_episode(rng)
#         p2.begin_episode(rng)

#         while True:
#             if obs["terminal"]:
#                 winner = obs["winner"]
#                 if winner in wins:
#                     wins[winner] += 1
#                 break

#             player = env.current_player()
#             policy = p1 if player == "P1" else p2
#             infoset = env.infoset_key(player)
#             action = policy.sample(infoset, rng)
#             obs = env.step(action)

#     return wins




In [20]:
p1, p2 = RandomPolicy(), RandomPolicy()
p1.bind_rules(rules)
p2.bind_rules(rules)
env = Env(spec)

In [21]:
beat_up_p1 = efficient_best_response_mc_v3(spec, p1, episodes=10_000)

In [22]:
beat_up_p1.action_probs(InfoSet(pid=0, hand=(4,), history=()))




{0: 0.05,
 1: 0.05,
 2: 0.05,
 3: 0.05,
 4: 0.05,
 5: 0.05,
 6: 0.05,
 7: 0.05,
 8: 0.05,
 9: 0.05,
 10: 0.05,
 11: 0.05,
 12: 0.05,
 13: 0.05,
 14: 0.05,
 15: 0.05,
 16: 0.05,
 17: 0.05,
 18: 0.05,
 19: 0.05}

In [23]:
sorted(list(set([s.history for s in beat_up_p1.probs if len(s.history)%2 == 1])))

[(0,),
 (0, 1, 2),
 (0, 1, 2, 9, 17),
 (0, 1, 2, 10, 16),
 (0, 1, 3),
 (0, 1, 4),
 (0, 1, 5),
 (0, 1, 5, 10, 19),
 (0, 1, 5, 17, 18),
 (0, 1, 6),
 (0, 1, 10),
 (0, 1, 14),
 (0, 1, 14, 17, 19),
 (0, 1, 15),
 (0, 1, 15, 16, 17),
 (0, 1, 15, 16, 19),
 (0, 1, 17),
 (0, 1, 19),
 (0, 2, 4),
 (0, 2, 4, 11, 15),
 (0, 2, 8),
 (0, 2, 8, 10, 14),
 (0, 2, 8, 11, 17),
 (0, 2, 8, 11, 17, 18, 19),
 (0, 2, 11),
 (0, 2, 12),
 (0, 2, 12, 13, 18),
 (0, 2, 12, 15, 19),
 (0, 2, 13),
 (0, 2, 14),
 (0, 2, 15),
 (0, 2, 15, 18, 19),
 (0, 2, 16),
 (0, 2, 16, 17, 19),
 (0, 3, 5),
 (0, 3, 5, 9, 14),
 (0, 3, 8),
 (0, 3, 8, 16, 17),
 (0, 3, 8, 16, 17, 18, 19),
 (0, 3, 10),
 (0, 3, 10, 13, 15),
 (0, 3, 13),
 (0, 3, 13, 18, 19),
 (0, 3, 15),
 (0, 3, 17),
 (0, 3, 19),
 (0, 4, 6),
 (0, 4, 7),
 (0, 4, 7, 16, 17),
 (0, 4, 7, 17, 18),
 (0, 4, 8),
 (0, 4, 8, 16, 18),
 (0, 4, 9),
 (0, 4, 9, 13, 17),
 (0, 4, 10),
 (0, 4, 10, 15, 19),
 (0, 4, 12),
 (0, 4, 12, 15, 16),
 (0, 4, 12, 15, 16, 18, 19),
 (0, 4, 14),
 (0, 4, 14, 18, 

In [24]:
eval_both_seats(spec, p1, beat_up_p1)

{'A': 714, 'B': 1286, 'total': 2000}

In [25]:
from liars_poker.algo.br_exact import BestResponseComputer, best_response_exact
from liars_poker.core import possible_starting_hands
possible_starting_hands(spec)
# prob_vectors = percolate(spec, rules, InfoSet(0, (0, ), ()), (0, ), p1, init_prob=1)


[(1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (1, 7),
 (1, 8),
 (1, 9),
 (1, 10),
 (2, 2),
 (2, 3),
 (2, 4),
 (2, 5),
 (2, 6),
 (2, 7),
 (2, 8),
 (2, 9),
 (2, 10),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (3, 9),
 (3, 10),
 (4, 4),
 (4, 5),
 (4, 6),
 (4, 7),
 (4, 8),
 (4, 9),
 (4, 10),
 (5, 5),
 (5, 6),
 (5, 7),
 (5, 8),
 (5, 9),
 (5, 10),
 (6, 6),
 (6, 7),
 (6, 8),
 (6, 9),
 (6, 10),
 (7, 7),
 (7, 8),
 (7, 9),
 (7, 10),
 (8, 8),
 (8, 9),
 (8, 10),
 (9, 9),
 (9, 10),
 (10, 10)]

In [26]:
# tab = TabularPolicy()
# tab.bind_rules(rules)

# histories = [
#  (0,),
#  (0, 1, 2),
#  (0, 1, 2, 3, 4),
#  (0, 1, 2, 3, 5),
#  (0, 1, 2, 4, 5),
#  (0, 1, 3),
#  (0, 1, 3, 4, 5),
#  (0, 1, 4),
#  (0, 1, 5),
#  (0, 2, 3),
#  (0, 2, 3, 4, 5),
#  (0, 2, 4),
#  (0, 2, 5),
#  (0, 3, 4),
#  (0, 3, 5),
#  (0, 4, 5),
#  (1,),
#  (1, 2, 3),
#  (1, 2, 3, 4, 5),
#  (1, 2, 4),
#  (1, 2, 5),
#  (1, 3, 4),
#  (1, 3, 5),
#  (1, 4, 5),
#  (2,),
#  (2, 3, 4),
#  (2, 3, 5),
#  (2, 4, 5),
#  (3,),
#  (3, 4, 5),
#  (4,),
#  (5,)
# ]

# for hand in [1,2,3]:
#     hand = (hand, )
#     for history in histories:
#         iset = InfoSet(1, hand, history)
#         if hand == (1,) or history[-1] == 5:
#             dist = {-1: 1.0}
#         else:
#             dist = {history[-1]+1: 1.0}
#         tab.set(iset, dist)



# tab.probs

In [None]:
the_defeater, br = best_response_exact(spec, p1, debug=True)

Percolating started.
Percolating done.
(1, 1)
(1, 2)


In [None]:
rules.legal_actions_for(InfoSet(0, ( ), ()))

(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)

In [None]:
{k: v for k, v in br.state_card_values.items() if len([t for m, t in v.items() if t > 1 or t < 0]) > 0}

{}

In [None]:
import scipy.stats as stats

# Given data

total = 0
num = 0
for k, v in br.state_card_values[()].items():
    print(k, v)
    if k[0] == k[1]:
        total += v
        num +=1
    else:
        total += v*(16/6)
        num+=16/6
p0 = total/num
print(p0)

n = 1_00_000

x = eval_vs(spec, p1, the_defeater, seed=41, episodes=n)['P1'] 


# Expected counts under H0
expected_successes = n * p0
expected_failures = n * (1 - p0)

# Observed counts
observed_successes = x
observed_failures = n - x

# Chi-squared statistic
chi2_stat = ((observed_successes - expected_successes) ** 2) / expected_successes \
          + ((observed_failures - expected_failures) ** 2) / expected_failures

# Degrees of freedom
df = 1

# p-value (two-sided)
p_value = 1 - stats.chi2.cdf(chi2_stat, df)

print(f"Chi-squared statistic: {chi2_stat:.4f}")
print(f"Degrees of freedom: {df}")
print(f"P-value: {p_value:.6g}")

if p_value < 0.05:
    print(f"Reject H0: significant evidence that p != {p0}")
else:
    print(f"Fail to reject H0: no significant evidence that p differs from {p0}")


KeyError: ()