In [1]:
import sys
import os
repo_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if repo_root not in sys.path:
    sys.path.append(repo_root)

from liars_poker import GameSpec, RandomPolicy, Env
from liars_poker.simple_api import start_run, build_best_response, mix_policies, play_vs_bot
from liars_poker.br import best_response_mc
import random


In [9]:
import os
from collections import defaultdict
from liars_poker.simple_api import mix_policies
from liars_poker.eval import eval_both_seats
from liars_poker.env import Env, rules_for_spec
from liars_poker.logging import load_json

def harmonic_eta(t: int, c: float = 1.0) -> float:
    """Î·_t = c / (t+1)"""
    return c / (t + 1)

def fsp_step(
    run,
    spec: GameSpec,
    opponent_policy,             # current average
    *,
    episodes: int = 20_000,
    epsilon: float = 0.1,
    min_visits_per_action: int = 1,
    eta: float = 0.05,
    mix_impl: str = "commit_once",
    seed: int = 0,
):
    """Build BR vs 'opponent_policy', log it, mix with weight eta, log new average; return (br, new_avg)."""

    br, V = best_response_mc(
        spec, opponent_policy,
        episodes=episodes,
        epsilon=epsilon,
        min_visits_per_action=min_visits_per_action,
        alternate_seats=True,
        seed=seed,
    )

    # 2) Log BR (as Bk)
    cur_avg_id = run.current_policy_id
    b_id = run.log_policy(
        br,
        role="best_response",
        parents=[{"id": cur_avg_id, "role": "avg", "weight": 1.0}],
        notes=f"MC-BR vs {cur_avg_id}; episodes={episodes}, eps={epsilon}, seed={seed}",
        seed=seed,
    )

    manifest_dir = run.manifests_dir

    def expand_entry(entry):
        pid = entry["id"]
        role = entry.get("role", "avg")
        weight = entry["weight"]
        if weight <= 0:
            return []
        if role != "avg":
            return [(pid, role, weight)]
        manifest_path = os.path.join(manifest_dir, f"{pid}.json")
        data = load_json(manifest_path)
        parents = data.get("parents", [])
        if not parents:
            leaf_role = data.get("role", role)
            return [(pid, leaf_role, weight)]
        expanded = []
        for parent in parents:
            child_entry = {
                "id": parent["id"],
                "role": parent.get("role", "avg"),
                "weight": parent["weight"],
            }
            for cid, crole, cw in expand_entry(child_entry):
                expanded.append((cid, crole, weight * cw))
        return expanded

    # 3) Mix into new average (Ak+1)
    new_avg = mix_policies(opponent_policy, br, {"impl": mix_impl, "w": eta})

    parent_contribs = defaultdict(float)

    if (1.0 - eta) > 0:
        for pid, role, weight in expand_entry({"id": cur_avg_id, "role": "avg", "weight": 1.0 - eta}):
            parent_contribs[(pid, role)] += weight

    if eta > 0:
        parent_contribs[(b_id, "br")] += eta

    parents_for_log = [
        {"id": pid, "role": role, "weight": parent_contribs[(pid, role)]}
        for (pid, role) in sorted(parent_contribs.keys())
        if parent_contribs[(pid, role)] > 0
    ]

    a_id = run.log_policy(
        new_avg,
        role="average",
        parents=parents_for_log,
        mixing={"impl": mix_impl, "schedule": "manual", "eta_k": eta},
        notes=f"A' = (1-{eta})*{cur_avg_id} + {eta}*{b_id}",
        seed=seed,
    )

    return br, new_avg, b_id, a_id, V


In [10]:
seed = 42
spec = GameSpec(ranks=3, suits=4, hand_size=1, starter='P1', claim_kinds=("RankHigh","Pair"))


In [11]:
run = start_run(spec, save_root=os.path.join(repo_root, "artifacts"), seed=seed)
a0_id = run.log_policy(
    RandomPolicy(),
    role="average",
    seed=seed,
    notes="Initial average: RandomPolicy",
)
print("Logged:", a0_id)

cur = run.current_policy()
print("Loaded current policy type:", type(cur).__name__)

Logged: A0
Loaded current policy type: RandomPolicy


In [12]:
# reproducible RNG stream for the loop
base_seed = seed
rules = rules_for_spec(spec)   # bind once (policies/env reuse this)

# optional quick eval helper

iters = 50

cur = run.current_policy()
for t in range(iters):
    eta_t = harmonic_eta(t)
    print(f"\n=== Iteration {t} | eta={eta_t:.4f} ===")

    br, new_avg, b_id, a_id, V = fsp_step(
        run, spec, cur,
        episodes=10_000+4000*t,
        epsilon=0.1,
        min_visits_per_action=1,
        eta=eta_t,
        mix_impl="commit_once",
        seed=base_seed + t,
    )

    w_br_vs_cur = eval_both_seats(spec, br, cur, episodes=2000, seed=base_seed + 911*t)
    print("BR vs current avg (candidate perspective):", w_br_vs_cur)


    cur = new_avg  # advance average
    print(f"Logged BR={b_id}, new AVG={a_id}")

print("\nDone. Latest average policy id:", run.current_policy_id)



=== Iteration 0 | eta=1.0000 ===
BR vs current avg (candidate perspective): {'A': 1691, 'B': 309, 'total': 2000}
Logged BR=B0, new AVG=A1

=== Iteration 1 | eta=0.5000 ===
BR vs current avg (candidate perspective): {'A': 1437, 'B': 563, 'total': 2000}
Logged BR=B1, new AVG=A2

=== Iteration 2 | eta=0.3333 ===
BR vs current avg (candidate perspective): {'A': 1317, 'B': 683, 'total': 2000}
Logged BR=B2, new AVG=A3

=== Iteration 3 | eta=0.2500 ===
BR vs current avg (candidate perspective): {'A': 1251, 'B': 749, 'total': 2000}
Logged BR=B3, new AVG=A4

=== Iteration 4 | eta=0.2000 ===
BR vs current avg (candidate perspective): {'A': 1226, 'B': 774, 'total': 2000}
Logged BR=B4, new AVG=A5

=== Iteration 5 | eta=0.1667 ===
BR vs current avg (candidate perspective): {'A': 1204, 'B': 796, 'total': 2000}
Logged BR=B5, new AVG=A6

=== Iteration 6 | eta=0.1429 ===
BR vs current avg (candidate perspective): {'A': 1207, 'B': 793, 'total': 2000}
Logged BR=B6, new AVG=A7

=== Iteration 7 | eta=0.12

In [13]:
from liars_poker.simple_api import load_policy, _load_spec

# pick an id, e.g. "A3" or "B1"
pid = "A50"
run_dir = '/root/liars_poker/artifacts/runs/run_20251012_201636_42'


policy = load_policy(run_dir, pid)



In [14]:
play_vs_bot(spec, policy, my_cards=[5], bot_cards=[2], start='me')

Your hand: (5,)
To play: You
Last claim: None
Legal: ['RankHigh:1', 'RankHigh:2', 'RankHigh:3', 'Pair:1', 'Pair:2', 'Pair:3']
Error: Unrecognized action string: 
To play: You
Last claim: None
Legal: ['RankHigh:1', 'RankHigh:2', 'RankHigh:3', 'Pair:1', 'Pair:2', 'Pair:3']


KeyboardInterrupt: Interrupted by user