In [11]:
import os, sys

# find repo root (looks for liars_poker/ or pyproject.toml)
def find_repo_root(start_dir: str) -> str:
    cur = os.path.abspath(start_dir)
    for _ in range(6):
        if os.path.isdir(os.path.join(cur, "liars_poker")) or os.path.exists(os.path.join(cur, "pyproject.toml")):
            return cur
        parent = os.path.dirname(cur)
        if parent == cur:
            break
        cur = parent
    return os.path.abspath(os.path.join(start_dir, "..", ".."))

NB_DIR = os.getcwd()
REPO_ROOT = find_repo_root(NB_DIR)
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

ARTIFACTS_ROOT = os.path.join(REPO_ROOT, "artifacts")
os.makedirs(ARTIFACTS_ROOT, exist_ok=True)

print("repo root   :", REPO_ROOT)
print("artifacts   :", ARTIFACTS_ROOT)


repo root   : /root/liars_poker
artifacts   : /root/liars_poker/artifacts


In [12]:
import random
from pprint import pprint

from liars_poker import (
    GameSpec, Env, InfoSet, Rules,
    best_response_mc,
    Policy, TabularPolicy, CommitOnceMixture, RandomPolicy,
    eval_both_seats
)

from liars_poker.algo.br_mc import efficient_best_response_mc_v3, efficient_best_response_mc_v2
from typing import List, Tuple

SEED = 42
random.seed(SEED)

# small game; P1 always starts by design
spec = GameSpec(ranks=6, suits=2, hand_size=2, claim_kinds=("RankHigh", "Pair"), suit_symmetry=True)
rules = Rules(spec)


In [13]:
def flatten_commit_once(policy: Policy) -> List[Tuple[Policy, float]]:
    if isinstance(policy, CommitOnceMixture):
        return list(zip(policy.policies, policy.weights))
    return [(policy, 1.0)]

def mix_policies(base_policy: Policy, br_policy: Policy, eta: float, rng: random.Random | None = None) -> CommitOnceMixture:
    base_components = flatten_commit_once(base_policy)
    br_components = flatten_commit_once(br_policy)

    combined_policies: List[Policy] = []
    combined_weights: List[float] = []

    for policy, weight in base_components:
        scaled = (1.0 - eta) * weight
        combined_policies.append(policy)
        combined_weights.append(scaled)

    for policy, weight in br_components:
        scaled = eta * weight
        combined_policies.append(policy)
        combined_weights.append(scaled)

    mixed_policy = CommitOnceMixture(combined_policies, combined_weights, rng=rng)
    mixed_policy.bind_rules(base_policy._rules)

    return mixed_policy

In [14]:
a0 = RandomPolicy()
a0.bind_rules(rules=rules)

# a0.store_efficiently('/root/liars_poker/artifacts/runs/run_temp')

In [15]:
all_averages = [a0]
all_brs = []

curr_av = a0

In [16]:
# import time

# start = time.time()
# br1 = best_response_mc(spec, a0, episodes=10_000)
# end = time.time()
# print(f"best_response_mc took {end - start:.3f} seconds")
# print(eval_both_seats(spec, br1, a0, 1000, seed=0))

# start = time.time()
# br2 = efficient_best_response_mc_v2(spec, a0, episodes=10_000)
# end = time.time()
# print(f"efficient_best_response_mc_v2 took {end - start:.3f} seconds")
# print(eval_both_seats(spec, br2, a0, 1000, seed=0))

# start = time.time()
# br3 = efficient_best_response_mc_v3(spec, a0, episodes=10_000)
# end = time.time()
# print(f"efficient_best_response_mc_v3 took {end - start:.3f} seconds")
# print(eval_both_seats(spec, br3, a0, 1000, seed=0))





In [None]:
import math 

episodes = 100_000
last_exploitablity = 1

for i in range(100):
    print(i)
    # eta = min(math.sqrt(1/(i+2)), 0.5)
    eta = 1 / (i+2)

    if last_exploitablity < 0.75:
        episodes = int(episodes*1.5)
        episodes = min(episodes, 10_000_000)
    # elif last_exploitablity > 0.6:
    #     episodes = int(episodes*0.8)
    #     episodes = max(100, episodes)

    print(episodes)


    b_i = efficient_best_response_mc_v3(spec=spec, opponent=curr_av, episodes=episodes, epsilon=0.1, min_visits_per_action=0, annotate='none', seed=i)
    eval_results = eval_both_seats(spec, b_i, curr_av, episodes=10_000)
    # print(eval_results)

    # b_i = efficient_best_response_mc_v2(spec=spec, opponent=curr_av, episodes=episodes, epsilon=0.1, min_visits_per_action=0, annotate='none', seed=i)
    # eval_results = eval_both_seats(spec, b_i, curr_av, episodes=10_000)
    # print(eval_results)
    
    # b_i = best_response_mc(spec=spec, opponent=curr_av, episodes=episodes, epsilon=0.1, min_visits_per_action=0, annotate='none', seed=i)
    # eval_results = eval_both_seats(spec, b_i, curr_av, episodes=10_000)
    # print(eval_results)
    
    
    all_brs.append(b_i)


    last_exploitablity = eval_results['A'] / eval_results['total']
    print(last_exploitablity)
    print()


    curr_av = mix_policies(curr_av, b_i, eta)
    all_averages.append(curr_av)




0
100000
0.8492

1
100000
0.798

2
100000
0.7472

3
150000
0.7426

4
225000
0.7557

5
225000
0.7095

6
337500
0.7156

7
506250
0.7173

8
759375
0.7344

9
1139062
0.7464

10
1708593
0.7378

11
2562889
0.7425

12
3844333
0.7288

13
5766499
0.7157

14
8649748
0.7147

15
10000000
0.7127

16
10000000
0.7024

17
10000000
0.6923

18
10000000
0.687

19
10000000
0.6813

20
10000000
0.6775

21
10000000
0.6679

22
10000000
0.6613

23
10000000
0.6565

24
10000000
0.6547

25
10000000
0.6459

26
10000000
0.6418

27
10000000
0.6513

28
10000000
0.6393

29
10000000
0.6386

30
10000000
0.6315

31
10000000
0.6314

32
10000000
0.6347

33
10000000
0.6224

34
10000000
0.6292

35
10000000
0.6266

36
10000000
0.6225

37
10000000
0.6194

38
10000000
0.6106

39
10000000
0.6067

40
10000000
0.6012

41
10000000
0.6087

42
10000000
0.6093

43
10000000
0.6075

44
10000000
0.6005

45
10000000
0.593

46
10000000
0.5895

47
10000000
0.5944

48
10000000
0.5887

49
10000000
0.5938

50
10000000
0.5924

51
10000000
0.585

In [None]:
iset = InfoSet(pid=1, hand=(2, ), history=(0, ))

print(curr_av.prob_dist_at_infoset(iset))




{-1: 0.08333333333333334, 1: 0.08333333333333334, 2: 0.08333333333333334, 3: 0.08333333333333334, 4: 0.08333333333333334, 5: 0.08333333333333334, 6: 0.08333333333333334, 7: 0.08333333333333334, 8: 0.08333333333333334, 9: 0.08333333333333334, 10: 0.08333333333333334, 11: 0.08333333333333334}


In [None]:
curr_av.store_efficiently('/root/liars_poker/artifacts/runs/run_temp_17')

In [None]:
curr_av.weights

[0.02631578947368423,
 0.02631578947368423,
 0.02631578947368423,
 0.02631578947368423,
 0.02631578947368423,
 0.02631578947368423,
 0.02631578947368423,
 0.026315789473684223,
 0.026315789473684223,
 0.026315789473684237,
 0.02631578947368423,
 0.026315789473684237,
 0.02631578947368423,
 0.02631578947368423,
 0.02631578947368421,
 0.026315789473684223,
 0.026315789473684223,
 0.02631578947368423,
 0.026315789473684237,
 0.026315789473684216,
 0.026315789473684237,
 0.02631578947368421,
 0.02631578947368423,
 0.026315789473684216,
 0.026315789473684216,
 0.026315789473684244,
 0.026315789473684216,
 0.026315789473684223,
 0.026315789473684216,
 0.026315789473684223,
 0.026315789473684216,
 0.026315789473684216,
 0.026315789473684216,
 0.026315789473684216,
 0.026315789473684223,
 0.02631578947368423,
 0.026315789473684237,
 0.026315789473684216]

In [None]:
b_final = efficient_best_response_mc_v3(spec, curr_av, episodes=1_000_000)

In [None]:
eval_both_seats(spec, b_final, curr_av, episodes=10_000)

{'A': 7269, 'B': 2731, 'total': 10000}