In [1]:
# If you're running this notebook from <repo>/notebooks, add repo root to sys.path
import sys, os
repo_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if repo_root not in sys.path:
    sys.path.append(repo_root)

from liars_poker import GameSpec, RandomPolicy, Env
from liars_poker.simple_api import start_run, build_best_response, mix_policies
from liars_poker.br import best_response_exact


In [20]:
spec = GameSpec(
    ranks=13,
    suits=2,
    hand_size=2,
    starter="random",              # "random" | "P1" | "P2"
    claim_kinds=("RankHigh", "Pair"),
)
spec


GameSpec(ranks=13, suits=2, hand_size=2, starter='random', claim_kinds=('RankHigh', 'Pair'))

In [24]:
env = Env(spec=spec, seed=79)
env.reset()
env.step(10)
env.rules.legal_actions_for(env.infoset_key(for_player='P2'))
env.infoset_key(for_player='P1')

(0, 10, (9, 10), (10,))

In [3]:
run = start_run(spec, save_root=os.path.join(repo_root, "artifacts"), seed=123)

a0_id = run.log_policy(
    RandomPolicy(),
    role="average",
    seed=123,
    notes="Initial average: RandomPolicy",
)
print("Logged:", a0_id)

cur = run.current_policy()
print("Loaded current policy type:", type(cur).__name__)


Logged: A0
Loaded current policy type: RandomPolicy


In [4]:
# Build BR (stub returns RandomPolicy for now)
br = build_best_response(cur, {"seed": 7})

# Log the BR as B0 with lineage (trained vs A0)
b0_id = run.log_policy(
    br,
    role="best_response",
    parents=[{"id": a0_id, "role": "avg", "weight": 1.0}],
    notes="Stub BR against A0",
)
print("Logged:", b0_id)

# Mix current average (A0) with BR (B0) using commit-once with w=0.05
mix = mix_policies(cur, br, {"impl": "commit_once", "w": 0.05})

# Log new average A1 with proper parents (A0, B0) and mixing metadata
a1_id = run.log_policy(
    mix,
    role="average",
    parents=[
        {"id": a0_id, "role": "avg", "weight": 0.95},
        {"id": b0_id, "role": "br",  "weight": 0.05},
    ],
    mixing={"impl": "commit_once", "schedule": "manual", "eta_k": 0.05},
    notes="A1 = 0.95*A0 + 0.05*B0 (commit-once)",
)
print("New average:", a1_id)


Logged: B0
New average: A1


In [5]:
import json, glob

print("Run dir:", run.run_dir)

print("\nManifests:")
for p in sorted(glob.glob(os.path.join(run.manifests_dir, "*.json"))):
    print(" -", os.path.basename(p))

print("\nPolicies:")
for p in sorted(glob.glob(os.path.join(run.policies_dir, "*.json"))):
    with open(p, "r") as f:
        header = json.load(f)
    print(" -", os.path.basename(p), "->", header.get("class", "unknown"))


Run dir: /root/liars_poker/artifacts/runs/run_20251012_140042_123

Manifests:
 - A0.json
 - A1.json
 - B0.json

Policies:
 - A0.json -> RandomPolicy
 - A1.json -> CommitOnceMixture
 - B0.json -> RandomPolicy
