In [1]:
import os, sys

# find repo root (looks for liars_poker/ or pyproject.toml)
def find_repo_root(start_dir: str) -> str:
    cur = os.path.abspath(start_dir)
    for _ in range(6):
        if os.path.isdir(os.path.join(cur, "liars_poker")) or os.path.exists(os.path.join(cur, "pyproject.toml")):
            return cur
        parent = os.path.dirname(cur)
        if parent == cur:
            break
        cur = parent
    return os.path.abspath(os.path.join(start_dir, "..", ".."))

NB_DIR = os.getcwd()
REPO_ROOT = find_repo_root(NB_DIR)
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

ARTIFACTS_ROOT = os.path.join(REPO_ROOT, "artifacts")
os.makedirs(ARTIFACTS_ROOT, exist_ok=True)

print("repo root   :", REPO_ROOT)
print("artifacts   :", ARTIFACTS_ROOT)


repo root   : /root/liars_poker
artifacts   : /root/liars_poker/artifacts


In [2]:
import scipy.stats as stats
import random
from pprint import pprint

from liars_poker import (
    GameSpec, Env, InfoSet, Rules,
    Policy, TabularPolicy, CommitOnceMixture, RandomPolicy,
    eval_both_seats
)

from liars_poker.algo.br_exact import best_response_exact
from typing import List, Tuple

SEED = 42
random.seed(SEED)

# small game; P1 always starts by design
spec = GameSpec(ranks=7, suits=2, hand_size=1, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True)
rules = Rules(spec)


In [3]:
def flatten_commit_once(policy: Policy) -> List[Tuple[Policy, float]]:
    if isinstance(policy, CommitOnceMixture):
        return list(zip(policy.policies, policy.weights))
    return [(policy, 1.0)]

def mix_policies(base_policy: Policy, br_policy: Policy, eta: float, rng: random.Random | None = None) -> CommitOnceMixture:
    base_components = flatten_commit_once(base_policy)
    br_components = flatten_commit_once(br_policy)

    combined_policies: List[Policy] = []
    combined_weights: List[float] = []

    for policy, weight in base_components:
        scaled = (1.0 - eta) * weight
        combined_policies.append(policy)
        combined_weights.append(scaled)

    for policy, weight in br_components:
        scaled = eta * weight
        combined_policies.append(policy)
        combined_weights.append(scaled)

    mixed_policy = CommitOnceMixture(combined_policies, combined_weights, rng=rng)
    mixed_policy.bind_rules(base_policy._rules)

    return mixed_policy

In [4]:
a0 = RandomPolicy()
a0.bind_rules(rules=rules)

In [5]:
all_averages = [a0]
all_brs = []

curr_av = a0

In [None]:
import math 


last_exploitablity = 1

for i in range(1000):
    print(i)

    eta = 1 / (i+2)

    #b_i = efficient_best_response_mc_v3(spec=spec, opponent=curr_av, episodes=episodes, epsilon=0.1, min_visits_per_action=0, annotate='none', seed=i)
    b_i, br_computer = best_response_exact(spec=spec, policy=curr_av)
    p_first, p_second = br_computer.exploitability()
    predicted = 0.5 * (p_first + p_second)

    eval_results = eval_both_seats(spec, b_i, curr_av, episodes=10_000, seed=random.randint(1,1000))
    observed_wins = eval_results['A']
    total_games = eval_results['total']
    observed_rate = observed_wins / total_games

    expected_successes = total_games * predicted
    expected_failures = total_games * (1 - predicted)
    observed_failures = total_games - observed_wins

    chi2_stat = 0.0
    if expected_successes > 0 and expected_failures > 0:
        chi2_stat = ((observed_wins - expected_successes) ** 2) / expected_successes + ((observed_failures - expected_failures) ** 2) / expected_failures
    p_value = 1 - stats.chi2.cdf(chi2_stat, 1)


    all_brs.append(b_i)

    last_exploitablity = observed_rate
    print(f"Predicted exploitability: avg={predicted:.4f} (first={p_first:.4f}, second={p_second:.4f})")
    print(f"Sampled exploitability: avg={observed_rate:.4f}, chi2 p-value={p_value:.4g}")
    print()

    curr_av = mix_policies(curr_av, b_i, eta)
    all_averages.append(curr_av)





0
Predicted exploitability: avg=0.9572 (first=0.9615, second=0.9529)
Sampled exploitability: avg=0.9558, chi2 p-value=0.4848

1
Predicted exploitability: avg=0.8692 (first=0.8888, second=0.8497)
Sampled exploitability: avg=0.8723, chi2 p-value=0.3654

2
Predicted exploitability: avg=0.8283 (first=0.8746, second=0.7819)
Sampled exploitability: avg=0.8310, chi2 p-value=0.4669

3
Predicted exploitability: avg=0.7743 (first=0.8001, second=0.7485)
Sampled exploitability: avg=0.7763, chi2 p-value=0.6308

4
Predicted exploitability: avg=0.7715 (first=0.7895, second=0.7536)
Sampled exploitability: avg=0.7756, chi2 p-value=0.3343

5
Predicted exploitability: avg=0.7762 (first=0.7944, second=0.7580)
Sampled exploitability: avg=0.7819, chi2 p-value=0.1719

6
Predicted exploitability: avg=0.7728 (first=0.8049, second=0.7407)
Sampled exploitability: avg=0.7684, chi2 p-value=0.292

7
Predicted exploitability: avg=0.7721 (first=0.7889, second=0.7553)
Sampled exploitability: avg=0.7780, chi2 p-value=0

In [None]:
curr_av.store_efficiently('/root/liars_poker/artifacts/runs/run_temp_105')