In [4]:
# If you're running this from the repo's /notebooks folder, add the repo root to sys.path
import sys, os, random
repo_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if repo_root not in sys.path:
    sys.path.append(repo_root)

from liars_poker import GameSpec, Env, CALL, RandomPolicy
# train_fsp is currently exported at top-level; if not, use:
# from liars_poker.fsp import train_fsp

from liars_poker import fsp as fsp_mod   # to get the function even if facade changes
from liars_poker import eval as eval_mod # play_match lives here

print("Imports OK. Repo root:", repo_root)


Imports OK. Repo root: /root/liars_poker


In [2]:
spec = GameSpec(
    ranks=13,
    suits=1,             # set 1â€“4
    hand_size=2,
    starter="random",    # "random" | "P1" | "P2"
    claim_kinds=("RankHigh", "Pair"),
)
spec


GameSpec(ranks=13, suits=1, hand_size=2, starter='random', claim_kinds=('RankHigh', 'Pair'))

In [3]:
env = Env(spec, seed=123)

obs = env.reset()  # random deal, random starter
print("To play:", obs["to_play"])
print("Hand (current player):", obs["hand"])
print("Last claim idx:", obs["last_claim_idx"])
print("Legal actions (indices):", obs["legal_actions"])
assert CALL not in obs["legal_actions"], "CALL should not be legal on the very first move."

# helper to render action ids -> strings
def render_action(env, a):
    if a == CALL:
        return "CALL"
    kind, r = env.claims[a]
    return f"{kind}:{r}"

print("Legal actions (rendered):", [render_action(env, a) for a in obs["legal_actions"]])


To play: P1
Hand (current player): (5, 8)
Last claim idx: None
Legal actions (indices): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Legal actions (rendered): ['RankHigh:1', 'RankHigh:2', 'RankHigh:3', 'RankHigh:4', 'RankHigh:5', 'RankHigh:6', 'RankHigh:7', 'RankHigh:8', 'RankHigh:9', 'RankHigh:10', 'RankHigh:11', 'RankHigh:12', 'RankHigh:13']


In [5]:
rng_seed = 777
p1 = RandomPolicy()
p2 = RandomPolicy()

obs = env.reset(seed=rng_seed)
t = 0
rng = random.Random(rng_seed)
while not obs["terminal"]:
    player = env.current_player()
    legal = obs["legal_actions"]
    infoset = env.infoset_key(player)
    a = (p1 if player == "P1" else p2).sample(infoset, legal, rng)
    print(f"{player} plays:", render_action(env, a))
    obs = env.step(a)
    t += 1

print("Winner:", obs["winner"])


P1 plays: RankHigh:3
P2 plays: RankHigh:7
P1 plays: RankHigh:9
P2 plays: RankHigh:10
P1 plays: RankHigh:11
P2 plays: RankHigh:13
P1 plays: CALL
Winner: P1


In [9]:
wins = eval_mod.play_match(env, RandomPolicy(), RandomPolicy(), episodes=50, seed=43)
wins


{'P1': 23, 'P2': 27}

In [10]:
# choose a root; using repo_root ensures artifacts don't land under /notebooks
save_root = os.path.join(repo_root, "artifacts")

result = fsp_mod.train_fsp(
    spec=spec,
    eta_schedule="harmonic",
    mix="commit_once",
    max_iters=1,
    save_root=save_root,
    seed=7,
)

result


{'run_dir': '/root/liars_poker/artifacts/runs/run_20251011_165623_7',
 'average_policy_id': 'A1'}