In [29]:
import os, sys

def find_repo_root(start_dir: str) -> str:
    """Walk up from start_dir to find repo root by markers."""
    cur = os.path.abspath(start_dir)
    for _ in range(6):  # up to 6 levels just in case
        if os.path.exists(os.path.join(cur, "pyproject.toml")) or os.path.isdir(os.path.join(cur, "liars_poker")):
            return cur
        parent = os.path.dirname(cur)
        if parent == cur:
            break
        cur = parent
    # Fallback: assume ../.. from notebooks/some_folder
    return os.path.abspath(os.path.join(start_dir, "..", ".."))

NB_DIR = os.getcwd()
REPO_ROOT = find_repo_root(NB_DIR)

if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

ARTIFACTS_ROOT = os.path.join(REPO_ROOT, "artifacts")
os.makedirs(ARTIFACTS_ROOT, exist_ok=True)

print("Notebook dir :", NB_DIR)
print("Repo root    :", REPO_ROOT)
print("Artifacts    :", ARTIFACTS_ROOT)


Notebook dir : /root/liars_poker/notebooks/post_oct13
Repo root    : /root/liars_poker
Artifacts    : /root/liars_poker/artifacts


In [30]:
import random, json
from pprint import pprint

from liars_poker import (
    GameSpec, Env, InfoSet, CALL, NO_CLAIM,
    RandomPolicy, TabularPolicy, CommitOnceMixture,
    best_response_mc,
    play_match, eval_both_seats,
    RunManager, StrategyManifest,
    FSPConfig, FSPTrainer, harmonic_eta
)

# Small helpers
def show_dist(d, env, max_items=10):
    items = sorted(d.items(), key=lambda kv: (-kv[1], kv[0]))[:max_items]
    for a, p in items:
        print(f"{env.render_action(a):>12}: {p:6.3f}")
        
def top_n_states_by_visits(policy: TabularPolicy, n=5):
    visits = policy.visits()
    return sorted(visits.items(), key=lambda kv: -kv[1])[:n]

SEED = 42
random.seed(SEED)

In [31]:
# Small game; P1 always starts by design
spec = GameSpec(ranks=3, suits=4, hand_size=1, claim_kinds=("RankHigh","Pair"))

# Create a run under ../artifacts
save_root = ARTIFACTS_ROOT
run = RunManager(spec=spec, save_root=save_root, seed=SEED)

# Log initial average as pure random (A0)
a0_id = run.log_policy(
    RandomPolicy(),
    role="average",
    parents=[], mixing=None,
    notes="Initial average: RandomPolicy",
    seed=SEED
)


run_dir = run.run_dir
manifests_dir = run.manifests_dir
policies_dir  = run.policies_dir


print("Artifacts root:", save_root)
print("Run folder    :", run_dir)
print("Initial average policy id:", a0_id)
print("Current average (index)  :", run.current_policy_id())


Artifacts root: /root/liars_poker/artifacts
Run folder    : /root/liars_poker/artifacts/runs/run_20251013_223922_42
Initial average policy id: A0
Current average (index)  : A0


In [32]:
# Trainer config: a single iteration with a modest BR budget
cfg = FSPConfig(
    episodes0=10_000,   # BR sample budget
    epsilon=0.10,
    min_visits=0,
    max_iters=10,
    eta_schedule="harmonic",
    mix_impl="commit_once",
    seed=SEED
)

trainer = FSPTrainer(run, cfg)

cur_avg = run.current_policy()
print("Current avg type:", type(cur_avg).__name__)

# Perform a single trainer step
for i in range(10):
    br, cur_avg, b_id, a_id, metrics = trainer.step(opponent_policy=cur_avg, iter_index=i)
    print("\nLogged BR:", b_id, "| New AVG:", a_id)
    print("Step metrics:")
    pprint(metrics)


Current avg type: RandomPolicy

Logged BR: B0 | New AVG: A1
Step metrics:
{'avg_components': 2, 'br_states': 655, 'eta': 0.5}

Logged BR: B1 | New AVG: A2
Step metrics:
{'avg_components': 3, 'br_states': 616, 'eta': 0.3333333333333333}

Logged BR: B2 | New AVG: A3
Step metrics:
{'avg_components': 4, 'br_states': 605, 'eta': 0.25}

Logged BR: B3 | New AVG: A4
Step metrics:
{'avg_components': 5, 'br_states': 623, 'eta': 0.2}

Logged BR: B4 | New AVG: A5
Step metrics:
{'avg_components': 6, 'br_states': 642, 'eta': 0.16666666666666666}

Logged BR: B5 | New AVG: A6
Step metrics:
{'avg_components': 7, 'br_states': 661, 'eta': 0.14285714285714285}

Logged BR: B6 | New AVG: A7
Step metrics:
{'avg_components': 8, 'br_states': 651, 'eta': 0.125}

Logged BR: B7 | New AVG: A8
Step metrics:
{'avg_components': 9, 'br_states': 660, 'eta': 0.1111111111111111}

Logged BR: B8 | New AVG: A9
Step metrics:
{'avg_components': 10, 'br_states': 650, 'eta': 0.1}

Logged BR: B9 | New AVG: A10
Step metrics:
{'av

In [33]:
# Evaluate BR vs previous average, both seats
res = eval_both_seats(spec, br, cur_avg, episodes=2000, seed=SEED+911)
print("BR vs previous AVG (candidate perspective):", res)

# Inspect a few of the most-visited states in the BR
env = Env(spec)
top_states = top_n_states_by_visits(br, n=5)

if not top_states:
    print("\n(No visited states recorded — try raising episodes0 in the config.)")
else:
    print("\nTop visited states in BR and its probs at those states:")
    for (iset, count) in top_states:
        pid, last_idx, hand, hist = iset.pid, iset.last_idx, iset.hand, iset.history
        print(f"visits={count:5d} | pid={pid} last={last_idx} hand={hand} hist_len={len(hist)}")
        probs = br.action_probs(iset)
        show_dist(probs, env)
        print("-"*40)


BR vs previous AVG (candidate perspective): {'A': 1603, 'B': 397, 'total': 2000}

Top visited states in BR and its probs at those states:
visits=  471 | pid=0 last=-2 hand=(2,) hist_len=0
  RankHigh:1:  1.000
  RankHigh:2:  0.000
  RankHigh:3:  0.000
      Pair:1:  0.000
      Pair:2:  0.000
      Pair:3:  0.000
----------------------------------------
visits=  446 | pid=0 last=-2 hand=(10,) hist_len=0
  RankHigh:3:  1.000
  RankHigh:1:  0.000
  RankHigh:2:  0.000
      Pair:1:  0.000
      Pair:2:  0.000
      Pair:3:  0.000
----------------------------------------
visits=  443 | pid=0 last=-2 hand=(8,) hist_len=0
  RankHigh:3:  1.000
  RankHigh:1:  0.000
  RankHigh:2:  0.000
      Pair:1:  0.000
      Pair:2:  0.000
      Pair:3:  0.000
----------------------------------------
visits=  442 | pid=0 last=-2 hand=(5,) hist_len=0
  RankHigh:2:  1.000
  RankHigh:1:  0.000
  RankHigh:3:  0.000
      Pair:1:  0.000
      Pair:2:  0.000
      Pair:3:  0.000
----------------------------------

In [34]:
rng = random.Random(SEED + 2025)
env = Env(spec)

p1 = br
p2 = cur_avg
p1.bind_rules(env.rules); p2.bind_rules(env.rules)
p1.begin_episode(rng); p2.begin_episode(rng)

obs = env.reset(seed=rng.randint(0, 10**9))
print("RESET → P1 always starts; to_play:", obs["to_play"])

step_idx = 0
while True:
    if obs["terminal"]:
        print(f"\nTERMINAL. Winner:", obs["winner"])
        break
    
    player = env.current_player()             # "P1" or "P2"
    policy = p1 if player == "P1" else p2
    iset   = env.infoset_key(player)          # Assumed to be the policy's turn
    legal  = env.legal_actions()
    legal_str = [env.render_action(a) for a in legal]
    print(f"\nStep {step_idx} | {player} to play | legal={legal_str}")

    # Show the policy's distribution on its turn (for mixtures this is posterior-expected)
    dist = policy.prob_dist_at_infoset(iset)
    print("policy.prob_dist_at_infoset:")
    show_dist(dist, env)

    # Sample and step
    a = policy.sample(iset, rng)
    print("→ chosen:", env.render_action(a))
    obs = env.step(a)
    step_idx += 1


RESET → P1 always starts; to_play: P1

Step 0 | P1 to play | legal=['RankHigh:1', 'RankHigh:2', 'RankHigh:3', 'Pair:1', 'Pair:2', 'Pair:3']
policy.prob_dist_at_infoset:
  RankHigh:3:  1.000
  RankHigh:1:  0.000
  RankHigh:2:  0.000
      Pair:1:  0.000
      Pair:2:  0.000
      Pair:3:  0.000
→ chosen: RankHigh:3

Step 1 | P2 to play | legal=['CALL', 'Pair:1', 'Pair:2', 'Pair:3']
policy.prob_dist_at_infoset:
      Pair:1:  0.318
        CALL:  0.227
      Pair:2:  0.227
      Pair:3:  0.227
→ chosen: CALL

TERMINAL. Winner: P1


In [35]:
print("Artifacts root:", save_root)
print("This run folder:", run_dir)

# Expanded lineage of the current average (id, role, weight)
try:
    expanded = run.expand_lineage(run.current_policy_id())
    print("\nExpanded lineage of current average (id, role, weight):")
    for pid, role, w in expanded:
        print(f"{pid:>4} | {role:<12} | {w:7.4f}")
except Exception as e:
    print("expand_lineage not available or failed:", e)

# Peek the BR manifest we just created
man_path = os.path.join(manifests_dir, f"{b_id}.json")
if os.path.exists(man_path):
    with open(man_path, "r", encoding="utf-8") as f:
        man = json.load(f)
    print("\nBR manifest summary:")
    print(" id:", man.get("id"))
    print(" role:", man.get("role"))
    print(" parents:", man.get("parents"))
    train_block = man.get("train", {})
    print(" train keys:", sorted(train_block.keys()))
    print(" artifacts:", man.get("artifacts"))
else:
    print("Manifest not found at:", man_path)


Artifacts root: /root/liars_poker/artifacts
This run folder: /root/liars_poker/artifacts/runs/run_20251013_223922_42

Expanded lineage of current average (id, role, weight):
  A9 | avg          |  0.9091
  B9 | br           |  0.0909

BR manifest summary:
 id: B9
 role: best_response
 parents: [{'id': 'A9', 'role': 'avg', 'weight': 1.0}]
 train keys: ['algo', 'episodes', 'epsilon', 'min_visits']
 artifacts: {'policy': 'policies/B9.json.gz'}
