In [18]:
import os, sys

# find repo root (looks for liars_poker/ or pyproject.toml)
def find_repo_root(start_dir: str) -> str:
    cur = os.path.abspath(start_dir)
    for _ in range(6):
        if os.path.isdir(os.path.join(cur, "liars_poker")) or os.path.exists(os.path.join(cur, "pyproject.toml")):
            return cur
        parent = os.path.dirname(cur)
        if parent == cur:
            break
        cur = parent
    return os.path.abspath(os.path.join(start_dir, "..", ".."))

NB_DIR = os.getcwd()
REPO_ROOT = find_repo_root(NB_DIR)
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

ARTIFACTS_ROOT = os.path.join(REPO_ROOT, "artifacts")
os.makedirs(ARTIFACTS_ROOT, exist_ok=True)

print("repo root   :", REPO_ROOT)
print("artifacts   :", ARTIFACTS_ROOT)


repo root   : /root/liars_poker
artifacts   : /root/liars_poker/artifacts


In [19]:
import random
from pprint import pprint

from liars_poker import (
    GameSpec, Env, InfoSet, Rules,
    best_response_mc,
    Policy, TabularPolicy, CommitOnceMixture, RandomPolicy,
    eval_both_seats
)

from liars_poker.algo.br_mc import efficient_best_response_mc_v3, efficient_best_response_mc_v2
from typing import List, Tuple

SEED = 42
random.seed(SEED)

# small game; P1 always starts by design
spec = GameSpec(ranks=13, suits=1, hand_size=1, claim_kinds=("RankHigh", ), suit_symmetry=True)
rules = Rules(spec)


In [20]:
def flatten_commit_once(policy: Policy) -> List[Tuple[Policy, float]]:
    if isinstance(policy, CommitOnceMixture):
        return list(zip(policy.policies, policy.weights))
    return [(policy, 1.0)]

def mix_policies(base_policy: Policy, br_policy: Policy, eta: float, rng: random.Random | None = None) -> CommitOnceMixture:
    base_components = flatten_commit_once(base_policy)
    br_components = flatten_commit_once(br_policy)

    combined_policies: List[Policy] = []
    combined_weights: List[float] = []

    for policy, weight in base_components:
        scaled = (1.0 - eta) * weight
        combined_policies.append(policy)
        combined_weights.append(scaled)

    for policy, weight in br_components:
        scaled = eta * weight
        combined_policies.append(policy)
        combined_weights.append(scaled)

    mixed_policy = CommitOnceMixture(combined_policies, combined_weights, rng=rng)
    mixed_policy.bind_rules(base_policy._rules)

    return mixed_policy

In [21]:
a0 = RandomPolicy()
a0.bind_rules(rules=rules)

# a0.store_efficiently('/root/liars_poker/artifacts/runs/run_temp')

In [22]:
all_averages = [a0]
all_brs = []

curr_av = a0

In [23]:
# import time

# start = time.time()
# br1 = best_response_mc(spec, a0, episodes=10_000)
# end = time.time()
# print(f"best_response_mc took {end - start:.3f} seconds")
# print(eval_both_seats(spec, br1, a0, 1000, seed=0))

# start = time.time()
# br2 = efficient_best_response_mc_v2(spec, a0, episodes=10_000)
# end = time.time()
# print(f"efficient_best_response_mc_v2 took {end - start:.3f} seconds")
# print(eval_both_seats(spec, br2, a0, 1000, seed=0))

# start = time.time()
# br3 = efficient_best_response_mc_v3(spec, a0, episodes=10_000)
# end = time.time()
# print(f"efficient_best_response_mc_v3 took {end - start:.3f} seconds")
# print(eval_both_seats(spec, br3, a0, 1000, seed=0))





In [24]:
import math 

episodes = 100_000
last_exploitablity = 1

for i in range(100):
    print(i)
    # eta = min(math.sqrt(1/(i+2)), 0.5)
    eta = 1 / (i+2)

    if last_exploitablity < 0.75:
        episodes = int(episodes*1.5)
        episodes = min(episodes, 1_000_000)
    # elif last_exploitablity > 0.6:
    #     episodes = int(episodes*0.8)
    #     episodes = max(100, episodes)

    print(episodes)


    b_i = efficient_best_response_mc_v3(spec=spec, opponent=curr_av, episodes=episodes, epsilon=0.1, min_visits_per_action=0, annotate='none', seed=i)
    eval_results = eval_both_seats(spec, b_i, curr_av, episodes=10_000)
    # print(eval_results)

    # b_i = efficient_best_response_mc_v2(spec=spec, opponent=curr_av, episodes=episodes, epsilon=0.1, min_visits_per_action=0, annotate='none', seed=i)
    # eval_results = eval_both_seats(spec, b_i, curr_av, episodes=10_000)
    # print(eval_results)
    
    # b_i = best_response_mc(spec=spec, opponent=curr_av, episodes=episodes, epsilon=0.1, min_visits_per_action=0, annotate='none', seed=i)
    # eval_results = eval_both_seats(spec, b_i, curr_av, episodes=10_000)
    # print(eval_results)
    
    
    all_brs.append(b_i)


    last_exploitablity = eval_results['A'] / eval_results['total']
    print(last_exploitablity)
    print()


    curr_av = mix_policies(curr_av, b_i, eta)
    all_averages.append(curr_av)




0
100000
0.8877

1
100000
0.771

2
100000
0.7165

3
150000
0.703

4
225000
0.693

5
337500
0.6727

6
506250
0.6708

7
759375
0.6411

8
1000000
0.6283

9
1000000
0.6193

10
1000000
0.6174

11
1000000
0.6064

12
1000000
0.6057

13
1000000
0.605

14
1000000
0.6014

15
1000000
0.5942

16
1000000
0.5829

17
1000000
0.571

18
1000000
0.5699

19
1000000
0.5732

20
1000000
0.5699

21
1000000
0.5625

22
1000000
0.5686

23
1000000
0.5568

24
1000000
0.5674

25
1000000
0.5518

26
1000000
0.5543

27
1000000
0.5532

28
1000000
0.549

29
1000000
0.5511

30
1000000
0.5437

31
1000000
0.5425

32
1000000
0.5504

33
1000000
0.5423

34
1000000
0.5419

35
1000000
0.5371

36
1000000
0.5405

37
1000000
0.5421

38
1000000
0.5379

39
1000000
0.5338

40
1000000
0.5362

41
1000000
0.5322

42
1000000
0.5409

43
1000000
0.5392

44
1000000
0.5366

45
1000000
0.5352

46
1000000
0.5315

47
1000000
0.5402

48
1000000
0.5366

49
1000000
0.5322

50
1000000
0.5398

51
1000000
0.5364

52
1000000
0.5334

53
1000000
0.5352

In [25]:
iset = InfoSet(pid=1, hand=(2, ), history=(0, ))

print(curr_av.prob_dist_at_infoset(iset))




{-1: 0.07996953541507998, 1: 0.060167555217060124, 2: 0.45620715917745636, 3: 0.21858339680121855, 4: 0.09977151561309977, 5: 0.07006854531607004, 6: 0.0007616146230007614, 7: 0.010662604722010647, 8: 0.0007616146230007614, 9: 0.0007616146230007614, 10: 0.0007616146230007614, 11: 0.0007616146230007614, 12: 0.0007616146230007614}


In [30]:
curr_av.store_efficiently('/root/liars_poker/artifacts/runs/run_temp_15')

In [37]:
all_averages[-2].store_efficiently('/root/liars_poker/artifacts/runs/run_temp_19')

In [34]:
b_final = efficient_best_response_mc_v3(spec, curr_av, episodes=1_0_000)

In [35]:
eval_both_seats(spec, b_final, curr_av, episodes=10_000)

{'A': 4528, 'B': 5472, 'total': 10000}