In [34]:
import sys
import os
repo_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if repo_root not in sys.path:
    sys.path.append(repo_root)

from liars_poker import GameSpec, RandomPolicy, Env
from liars_poker.simple_api import start_run, build_best_response, mix_policies, play_vs_bot
from liars_poker.br import best_response_mc
import random


In [None]:
seed = 42
spec = GameSpec(ranks=3, suits=2, hand_size=1, starter='P1', claim_kinds=("RankHigh","Pair"))


env = Env(spec, seed=seed)
env.step(0)

legal_actions = env.legal_actions()


env.rules._build_claims()

[-1, 1, 2, 3, 4, 5]

In [31]:
run = start_run(spec, save_root=os.path.join(repo_root, "artifacts"), seed=seed)

a0_id = run.log_policy(
    RandomPolicy(),
    role="average",
    seed=seed,
    notes="Initial average: RandomPolicy",
)
print("Logged:", a0_id)

cur = run.current_policy()
print("Loaded current policy type:", type(cur).__name__)


Logged: A0
Loaded current policy type: RandomPolicy


In [38]:
br, V = best_response_mc(spec, cur, episodes=1_000_000, min_visits_per_action=0, alternate_seats=True, seed=seed)

# Log the BR as B0 with lineage (trained vs A0)
b0_id = run.log_policy(
    br,
    role="best_response",
    parents=[{"id": a0_id, "role": "avg", "weight": 1.0}],
    notes="Stub BR against A0",
)
print("Logged:", b0_id)



Logged: B1


In [39]:
br, V = best_response_mc(spec, br, episodes=100_000, min_visits_per_action=0, alternate_seats=True, seed=seed)

# Log the BR as B0 with lineage (trained vs A0)
b0_id = run.log_policy(
    br,
    role="best_response",
    parents=[{"id": a0_id, "role": "avg", "weight": 1.0}],
    notes="Stub BR against A0",
)
print("Logged:", b0_id)



Logged: B2


In [40]:
V

{(0, 4, (2,), (0, 1, 2, 4)): -1.0,
 (0, 4, (1,), (0, 1, 2, 4)): 1.0,
 (0, 4, (0,), (0, 1, 2, 4)): 1.0,
 (0, 4, (5,), (0, 1, 2, 4)): 1.0,
 (0, 3, (3,), (0, 1, 2, 3)): 1.0,
 (0, 3, (5,), (0, 1, 2, 3)): 1.0,
 (0, 4, (4,), (0, 1, 2, 4)): 1.0,
 (0, 3, (1,), (0, 1, 2, 3)): 1.0,
 (0, 3, (4,), (0, 1, 2, 3)): 1.0,
 (0, 3, (0,), (0, 1, 2, 3)): 1.0,
 (1, 3, (1,), (0, 2, 3)): -1.0,
 (1, 4, (5,), (1, 2, 4)): 1.0,
 (1, 3, (4,), (0, 2, 3)): 1.0,
 (1, 4, (1,), (1, 2, 4)): 1.0,
 (1, 3, (0,), (0, 2, 3)): -1.0,
 (1, 4, (4,), (1, 2, 4)): 1.0,
 (1, 3, (2,), (0, 2, 3)): 1.0,
 (1, 2, (5,), (0, 1, 2)): -1.0,
 (1, 3, (3,), (0, 2, 3)): 1.0,
 (1, 4, (0,), (1, 2, 4)): 1.0,
 (1, 4, (3,), (1, 2, 4)): -1.0,
 (1, 2, (1,), (0, 1, 2)): 1.0,
 (1, 2, (0,), (0, 1, 2)): 1.0,
 (1, 2, (3,), (0, 1, 2)): 1.0,
 (1, 2, (2,), (0, 1, 2)): 1.0,
 (1, 2, (4,), (0, 1, 2)): -1.0,
 (1, 3, (5,), (0, 2, 3)): 1.0,
 (0, 2, (1,), (1, 2)): 0.23863636363636365,
 (0, 1, (0,), (0, 1)): 1.0,
 (0, 1, (2,), (0, 1)): 1.0,
 (0, 4, (3,), (2, 4)): -1.0

In [36]:
play_vs_bot(spec, br, [2], [1], 'me')

Your hand: (2,)
To play: You
Last claim: None
Legal: ['RankHigh:1', 'RankHigh:2', 'RankHigh:3', 'Pair:1', 'Pair:2', 'Pair:3']
To play: Bot
Bot plays: Pair:1
To play: You
Last claim: Pair:1
Legal: ['CALL', 'Pair:2', 'Pair:3']
Winner: P1
