In [25]:
import os, sys

# find repo root (looks for liars_poker/ or pyproject.toml)
def find_repo_root(start_dir: str) -> str:
    cur = os.path.abspath(start_dir)
    for _ in range(6):
        if os.path.isdir(os.path.join(cur, "liars_poker")) or os.path.exists(os.path.join(cur, "pyproject.toml")):
            return cur
        parent = os.path.dirname(cur)
        if parent == cur:
            break
        cur = parent
    return os.path.abspath(os.path.join(start_dir, "..", ".."))

NB_DIR = os.getcwd()
REPO_ROOT = find_repo_root(NB_DIR)
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

ARTIFACTS_ROOT = os.path.join(REPO_ROOT, "artifacts")
os.makedirs(ARTIFACTS_ROOT, exist_ok=True)

print("repo root   :", REPO_ROOT)
print("artifacts   :", ARTIFACTS_ROOT)


repo root   : /root/liars_poker
artifacts   : /root/liars_poker/artifacts


In [26]:
import random
from pprint import pprint

from liars_poker import (
    GameSpec, Env, InfoSet,
    RandomPolicy, best_response_mc,
    RunManager, FSPConfig, FSPTrainer,
    eval_both_seats
)

SEED = 42
random.seed(SEED)

# small game; P1 always starts by design
spec = GameSpec(ranks=3, suits=1, hand_size=1, claim_kinds=("RankHigh", ))



In [27]:
# create run + log initial average A0 = RandomPolicy
run = RunManager(spec=spec, save_root=ARTIFACTS_ROOT, seed=SEED)
a0_id = run.log_policy(RandomPolicy(), role="average", parents=[], mixing=None, notes="A0: Random", seed=SEED)
print("A0 logged as:", a0_id)

A0 logged as: A0


In [28]:
# hyperparams — tweak these
MAX_ITERS   = 30         # number of meta-iterations
EPISODES_BR = 20_000     # BR sampling budget per iteration
EPSILON     = 0.10       # BR epsilon-greedy
MIN_VISITS  = 0          # ensure each (s,a) visited this many times before ε-greedy
SEED_BASE   = SEED

cfg = FSPConfig(
    episodes0=EPISODES_BR,
    epsilon=EPSILON,
    min_visits=MIN_VISITS,
    max_iters=MAX_ITERS,
    eta_schedule="harmonic",   # η_t = 1/(t+1)
    mix_impl="commit_once",
    seed=SEED_BASE,
)
trainer = FSPTrainer(run, cfg)

In [29]:
cur = run.current_policy()
cur

<liars_poker.policies.random.RandomPolicy at 0x73017dba5b20>

In [30]:
br, new_avg, b_id, a_id, metrics = trainer.step(opponent_policy=cur, iter_index=0)

In [31]:
[(x, br.probs[x]) for x in br.probs if x.hand==(0, ) and x.pid==1]

[(InfoSet(pid=1, last_idx=2, hand=(0,), history=(0, 1, 2)), {-1: 1.0}),
 (InfoSet(pid=1, last_idx=0, hand=(0,), history=(0,)), {2: 1.0}),
 (InfoSet(pid=1, last_idx=1, hand=(0,), history=(1,)), {2: 1.0}),
 (InfoSet(pid=1, last_idx=2, hand=(0,), history=(2,)), {-1: 1.0})]

In [32]:
eval_both_seats(spec, new_avg, br, 10_000)

{'A': 3993, 'B': 6007, 'total': 10000}

In [33]:


cur = run.current_policy()
for t in range(1, MAX_ITERS + 1):
    print(cur.prob_dist_at_infoset(infoset=InfoSet(pid=0, last_idx=-2, hand=(0, ), history=())))
    br, new_avg, b_id, a_id, metrics = trainer.step(opponent_policy=cur, iter_index=t)
    print(br.prob_dist_at_infoset(infoset=InfoSet(pid=0, last_idx=-2, hand=(0, ), history=())))

    print(f"iter {t}: BR={b_id}  →  AVG={a_id}")

    


    res = eval_both_seats(spec, br, cur, episodes=3000, seed=SEED_BASE + 1234)
    print("BR vs current AVG (BR first):")
    pprint(res)

    cur = new_avg

final_avg = run.current_policy()
print("final average id:", run.current_policy_id())


{0: 0.3333333333333333, 1: 0.3333333333333333, 2: 0.3333333333333333}
{0: 1.0, 1: 0.0, 2: 0.0}
iter 1: BR=B1  →  AVG=A2
BR vs current AVG (BR first):
{'A': 2082, 'B': 918, 'total': 3000}
{0: 0.5555555555555556, 1: 0.22222222222222224, 2: 0.22222222222222224}
{0: 0.0, 1: 0.0, 2: 1.0}
iter 2: BR=B2  →  AVG=A3
BR vs current AVG (BR first):
{'A': 1959, 'B': 1041, 'total': 3000}
{0: 0.25, 1: 0.25, 2: 0.5}
{0: 0.0, 1: 0.0, 2: 1.0}
iter 3: BR=B3  →  AVG=A4
BR vs current AVG (BR first):
{'A': 1911, 'B': 1089, 'total': 3000}
{0: 0.26666666666666666, 1: 0.26666666666666666, 2: 0.4666666666666667}
{0: 1.0, 1: 0.0, 2: 0.0}
iter 4: BR=B4  →  AVG=A5
BR vs current AVG (BR first):
{'A': 1897, 'B': 1103, 'total': 3000}
{0: 0.4444444444444444, 1: 0.2777777777777778, 2: 0.2777777777777778}
{0: 1.0, 1: 0.0, 2: 0.0}
iter 5: BR=B5  →  AVG=A6
BR vs current AVG (BR first):
{'A': 1972, 'B': 1028, 'total': 3000}
{0: 0.42857142857142855, 1: 0.2857142857142857, 2: 0.2857142857142857}
{0: 1.0, 1: 0.0, 2: 0.0}
iter

In [34]:
# one-off best response (not logged)
fresh_br = best_response_mc(
    spec,
    final_avg,
    episodes=20_000,     # bump for stronger BR if you like
    epsilon=0.10,
    min_visits_per_action=1,
    alternate_seats=True,
    seed=SEED_BASE + 999,
    annotate="none",     # no need to carry V/visits here
)

# evaluate fresh BR vs final average, seat-swapped
res = eval_both_seats(spec, fresh_br, final_avg, episodes=3000, seed=SEED_BASE + 1234)
print("fresh BR vs final AVG (candidate perspective):")
pprint(res)


fresh BR vs final AVG (candidate perspective):
{'A': 2082, 'B': 918, 'total': 3000}


In [35]:
final_avg.policies[0]

<liars_poker.policies.random.RandomPolicy at 0x73017dba6270>

In [36]:
from liars_poker import RandomPolicy

# Final average vs pure Random (should not be a coin flip if A deviates)
sanity1 = eval_both_seats(spec, final_avg, RandomPolicy(), episodes=30000, seed=SEED + 777)
print("Final AVG vs Random:", sanity1)

# Random vs Final average (swap roles to ensure we didn’t tie seat to ID)
sanity2 = eval_both_seats(spec, RandomPolicy(), final_avg, episodes=30000, seed=SEED + 778)
print("Random vs Final AVG:", sanity2)


Final AVG vs Random: {'A': 14972, 'B': 15028, 'total': 30000}
Random vs Final AVG: {'A': 14948, 'B': 15052, 'total': 30000}


In [37]:
# from collections import defaultdict
# import os, json
# from liars_poker.io.manifest import read_strategy_manifest

# def manifest_path(run, pid): 
#     return os.path.join(run.manifests_dir, f"{pid}.json")

# def expand_parents_recursively(run, pid):
#     man = read_strategy_manifest(manifest_path(run, pid))
#     if not man.parents:
#         print("no parents")
#         return [(pid, man.role, 1.0)]
#     accum = []
#     for p in man.parents:
#         print(p)
#         for (cid, crole, cw) in expand_parents_recursively(run, p["id"]):
#             accum.append((cid, crole, cw * float(p["weight"])))
#     # merge duplicates
#     agg = defaultdict(float)
#     for (cid, crole, w) in accum:
#         agg[(cid, crole)] += w
#     return [(cid, crole, agg[(cid, crole)]) for (cid, crole) in agg]

# final_id = run.current_policy_id()
# expanded = sorted(expand_parents_recursively(run, final_id))
# print("Expanded lineage weights for", final_id)
# for (pid, role, w) in expanded:
#     print(f"  {pid:>4} | {role:<13} | {w:.6f}")


In [38]:
from collections import defaultdict
import os, json
from liars_poker.io.manifest import read_strategy_manifest

def manifest_path(run, pid):
    return os.path.join(run.manifests_dir, f"{pid}.json")

def expand_avg_lineage(run, pid):
    """Expand weights for an *average* policy. Treat BR nodes as leaves."""
    man = read_strategy_manifest(manifest_path(run, pid))
    if man.role != "average" or not man.parents:
        return [(pid, man.role, 1.0)]
    acc = []
    for p in man.parents:
        w = float(p["weight"])
        if p.get("role") == "avg":
            for (cid, crole, cw) in expand_avg_lineage(run, p["id"]):
                acc.append((cid, crole, w * cw))
        else:
            # BR or anything else: leaf
            acc.append((p["id"], p.get("role",""), w))
    # merge duplicates
    merged = defaultdict(float)
    for (cid, crole, w) in acc:
        merged[(cid, crole)] += w
    return [(cid, crole, merged[(cid, crole)]) for (cid, crole) in merged]
final_id = run.current_policy_id()
for (pid, role, w) in sorted(expand_avg_lineage(run, final_id)):
    print(f"{pid:>4} | {role:<13} | {w:.6f}")


  A0 | average       | 0.031250
  B0 | br            | 0.031250
  B1 | br            | 0.031250
 B10 | br            | 0.031250
 B11 | br            | 0.031250
 B12 | br            | 0.031250
 B13 | br            | 0.031250
 B14 | br            | 0.031250
 B15 | br            | 0.031250
 B16 | br            | 0.031250
 B17 | br            | 0.031250
 B18 | br            | 0.031250
 B19 | br            | 0.031250
  B2 | br            | 0.031250
 B20 | br            | 0.031250
 B21 | br            | 0.031250
 B22 | br            | 0.031250
 B23 | br            | 0.031250
 B24 | br            | 0.031250
 B25 | br            | 0.031250
 B26 | br            | 0.031250
 B27 | br            | 0.031250
 B28 | br            | 0.031250
 B29 | br            | 0.031250
  B3 | br            | 0.031250
 B30 | br            | 0.031250
  B4 | br            | 0.031250
  B5 | br            | 0.031250
  B6 | br            | 0.031250
  B7 | br            | 0.031250
  B8 | br            | 0.031250
  B9 | b

In [39]:
run.current_policy_id()

'A31'