In [None]:
import multiprocessing as mp
import time
from collections import Counter
from itertools import islice

import numpy as np
import pandas as pd

from farkle.simulation import (  # _play_game → single game:contentReference[oaicite:1]{index=1}
    _play_game,
    generate_strategy_grid,
)
from farkle.stats import games_for_power


In [None]:
# run_tournament.py  ── wake-up-and-done edition
# ------------------------------------------------



# ─── 1. Build the master grid ────────────────────────────────────────────────
strategies, meta = generate_strategy_grid()        # 8 160 objects
meta["str_repr"] = [str(s) for s in strategies]    # convenient lookup

# ─── 2. Powered sample size *per strategy* ───────────────────────────────────
n_games_per_player = games_for_power(
    n_strategies = len(strategies),
    delta        = 0.03,    # 3-percentage-point lift
    alpha        = 0.05,
    power        = 0.80,
    method       = "bh",
    pairwise     = True,
)
# Example: n_games_per_player ≈ 7 300  →  8 160×7 300/5 ≈ 11.9 M games
print(f"Powered sample ⇒ each strategy appears {n_games_per_player:,} times.")

# ─── 3. Balanced table scheduler (lazy generator, zero RAM blow-up) ──────────
def chunker(it, n):
    while (batch := list(islice(it, n))):
        yield tuple(batch)          # tuple → picklable & hashable

def make_tables(strats, repeats, seed=42):
    rng = np.random.default_rng(seed)
    for _ in range(repeats):
        perm = rng.permutation(len(strats))        # shuffle once per round
        yield from chunker(perm, 5)

tables_iter = make_tables(strategies, n_games_per_player)
total_games = len(strategies) * n_games_per_player // 5
print(f"Total games scheduled: {total_games:,}")

# ─── 4. Worker function (index-based to save bandwidth) ──────────────────────
def _one(task):
    seed, idxs = task
    table = [strategies[i] for i in idxs]          # rebuild objects locally
    row   = _play_game(seed, table, 10_000)
    winner = row["winner"]
    return str(row[f"{winner}_strategy"])          # string repr

# Master seed stream (lazy, same pace as tables_iter)
def task_stream():
    rng = np.random.default_rng(999)               # independent RNG
    for idxs in tables_iter:
        yield (rng.integers(0, 2**32 - 1), idxs)

# ─── 5. Run the tournament with 12 processes ─────────────────────────────────
t0 = time.perf_counter()
win_counter = Counter()

with mp.Pool(processes=12, maxtasksperchild=500) as pool:
    first = True
    for win in pool.imap_unordered(_one, task_stream(), chunksize=100):
        if first:
            print("▶▶▶ pool.imap_unordered is producing tasks")
            first = False
        win_counter[win] += 1
elapsed = time.perf_counter() - t0
print(f"Finished {total_games:,} games in {elapsed/3600:,.2f} hours.")

# ─── 6. Tally → CSV ──────────────────────────────────────────────────────────
summary = (
    meta[["strategy_idx", "str_repr"]]
      .assign(wincount=lambda df: df["str_repr"]
                                 .map(win_counter)
                                 .fillna(0)
                                 .astype("int32"))
      .sort_values("strategy_idx")[["strategy_idx", "wincount"]]
)
summary.to_csv("wincounts.csv", index=False)
print("Wrote wincounts.csv")


Powered sample ⇒ each strategy appears 7,333 times.
Total games scheduled: 11,967,456


: 

: 

In [9]:
n_games_per_player = games_for_power(
    n_strategies = 8160,
    delta        = 0.03,    # Strat win rate difference
    alpha        = 0.025,    # p-value analogue - overall false positive tolerance
    power        = 0.90,
    method       = "bh",
    pairwise     = True,
)
print(n_games_per_player)
print(n_games_per_player*8160//5)

10223
16683936


In [None]:
# run_tournament_v2.py  ── progress-aware, chunked, all threads
# -------------------------------------------------------------
import multiprocessing as mp
import pickle
import time
from collections import Counter

from farkle.simulation import generate_strategy_grid
from farkle.stats import games_for_power

# ─── 1. Master grid ───────────────────────────────────────────────────────────
strategies, meta = generate_strategy_grid()
meta["str_repr"] = [str(s) for s in strategies]

# ─── 2. Powered sample size ──────────────────────────────────────────────────
n_games_per_player = games_for_power(
    n_strategies=len(strategies), delta=0.03, alpha=0.025,
    power=0.90, method="bh", pairwise=True,
)
total_games = len(strategies) * n_games_per_player // 5
print(f"{n_games_per_player:,} appearances → {total_games:,} total games.")

# ─── 3. Table generator ──────────────────────────────────────────────────────
def chunker(it, n):
    while (batch := list(islice(it, n))):
        yield tuple(batch)

def make_tables(strats, repeats, seed=42):
    rng = np.random.default_rng(seed)
    for _ in range(repeats):
        yield from chunker(rng.permutation(len(strats)), 5)

tables_iter = make_tables(strategies, n_games_per_player)
# quick sanity check:
peek = list(islice(tables_iter, 2))
print("First two tables (just indices):", peek)
# Then rebuild it, since islice consumed it:
tables_iter = make_tables(strategies, n_games_per_player)

# ─── 4. Worker ----------------------------------------------------------------
def _one(task):
    seed, idxs = task
    table = [strategies[i] for i in idxs]
    row   = _play_game(seed, table, 10_000)
    return str(row[f"{row['winner']}_strategy"])

def task_stream():
    rng = np.random.default_rng(999)
    for idxs in tables_iter:
        yield (rng.integers(0, 2**32 - 1), idxs)

# ─── 5. Tournament loop with progress / checkpoints ---------------------------
CHUNKSIZE         = 10_000
PROCESSES         = 16          # all logical threads
MAXTASKS          = 50
REPORT_EVERY      = 100_000   # games
CHECKPOINT_FILE   = "win_counter.chk"

def save_checkpoint(counter, done):
    with open(CHECKPOINT_FILE, "wb") as f:
        pickle.dump({"done": done, "counter": dict(counter)}, f)

def main():
    print("Entered main() at", time.asctime())
    start = time.perf_counter()
    win_counter = Counter()
    done_games  = 0

    with mp.Pool(processes=PROCESSES,
                 maxtasksperchild=MAXTASKS) as pool:

        first = True
        for win in pool.imap_unordered(_one, task_stream(), chunksize=CHUNKSIZE):
            if first:
                print("pool.imap_unordered is producing tasks")
                first = False
            win_counter[win] += 1
            done_games += 1

            if done_games % REPORT_EVERY == 0:
                pct = 100 * done_games / total_games
                hrs = (time.perf_counter() - start) / 3600
                print(f"{done_games:,}/{total_games:,}  "
                      f"({pct:5.2f} %)  {hrs:5.2f} h")
                save_checkpoint(win_counter, done_games)

    # final save
    save_checkpoint(win_counter, done_games)

    elapsed = (time.perf_counter() - start) / 3600
    print(f"Finished in {elapsed:,.2f} hours")

    summary = (
        meta[["strategy_idx", "str_repr"]]
          .assign(wincount=lambda df:
                  df["str_repr"].map(win_counter).fillna(0).astype("int32"))
          .sort_values("strategy_idx")[["strategy_idx", "wincount"]]
    )
    summary.to_csv("wincounts.csv", index=False)
    print("→ wincounts.csv written")

if __name__ == "__main__":
    main()

10,223 appearances → 16,683,936 total games.


In [None]:
# run_tournament_v2.py  ── progress-aware, chunked, all threads
# -------------------------------------------------------------
import multiprocessing as mp
import time
from collections import Counter
from itertools import islice

from farkle.simulation import generate_strategy_grid
from farkle.stats import games_for_power

# ─── 1. Master grid ───────────────────────────────────────────────────────────
strategies, meta = generate_strategy_grid()
meta["str_repr"] = [str(s) for s in strategies]
# ─── 2. Powered sample size ──────────────────────────────────────────────────
n_games_per_player = games_for_power(
    n_strategies=len(strategies), delta=0.03, alpha=0.025,
    power=0.90, method="bh", pairwise=True,
)
print(f"DEBUG: n_games_per_player = {n_games_per_player}")

################ DEBUGGING IO ################
n_games_per_player = 2
print(f"DEBUG: changed n_games_per_player = {n_games_per_player}")

total_games = len(strategies) * n_games_per_player // 5
print(f"DEBUG: total_games = {total_games}")
print(f"▶ Scheduling {total_games:,} total games.\n")

# ─── 3. Table generator ──────────────────────────────────────────────────────
def chunker(it, n):
    while (batch := list(islice(it, n))):
        yield tuple(batch)

def make_tables(strats, repeats, seed=42):
    rng = np.random.default_rng(seed)
    for _ in range(repeats):
        yield from chunker(rng.permutation(len(strats)), 5)

# Sanity-check peek:
tables_iter = make_tables(strategies, n_games_per_player)
first_two = list(islice(tables_iter, 2))
print("DEBUG: First two tables of indices:", first_two)
print("DEBUG → Peeking at task_stream instead of tables_iter alone:")
_orig_tables = make_tables(strategies, n_games_per_player)
tsk = task_stream()       # uses tables_iter internally, but let's bypass it
# Instead, force task_stream to use a fresh make_tables:
def test_task_stream():
    rng = np.random.default_rng(999)
    for idxs in make_tables(strategies, n_games_per_player):
        yield (rng.integers(0, 2**32 - 1), idxs)

first_task = next(test_task_stream(), None)
print("DEBUG → first task from test_task_stream():", first_task)
second_task = next(test_task_stream(), None)
print("DEBUG → second task from test_task_stream():", second_task)
# Rebuild it, since peek consumed 2 games:
tables_iter = make_tables(strategies, n_games_per_player)

# ─── 4. Worker ----------------------------------------------------------------
def _one(task):
    seed, idxs = task
    table = [strategies[i] for i in idxs]
    row   = _play_game(seed, table, 10_000)
    return str(row[f"{row['winner']}_strategy"])

def task_stream():
    rng = np.random.default_rng(999)
    for idxs in tables_iter:
        yield (rng.integers(0, 2**32 - 1), idxs)

# ─── 5. Tournament loop with progress / checkpoints ---------------------------
CHUNKSIZE         = 10_000
PROCESSES         = 16
MAXTASKS          = 50
REPORT_EVERY      = 100_000
CHECKPOINT_FILE   = "win_counter.chk"

def save_checkpoint(counter, done):
    with open(CHECKPOINT_FILE, "wb") as f:
        pickle.dump({"done": done, "counter": dict(counter)}, f)

def main():
    print("▶▶▶ Entered main() at", time.asctime())
    start = time.perf_counter()
    win_counter = Counter()
    done_games  = 0
    seen_first = False

    with mp.Pool(processes=PROCESSES,
                 maxtasksperchild=MAXTASKS) as pool:

        for win in pool.imap_unordered(_one, task_stream(), chunksize=CHUNKSIZE):
            if not seen_first:
                print("▶ pool.imap_unordered has started returning wins.")
                seen_first = True

            win_counter[win] += 1
            done_games += 1

            if done_games % REPORT_EVERY == 0:
                pct = 100 * done_games / total_games
                hrs = (time.perf_counter() - start) / 3600
                print(f"{done_games:,}/{total_games:,}  "
                      f"({pct:5.2f} %)  {hrs:5.2f} h")
                save_checkpoint(win_counter, done_games)

  # final save
    save_checkpoint(win_counter, done_games)

    elapsed = (time.perf_counter() - start) / 3600
    print(f"Finished in {elapsed:,.2f} hours")

    summary = (
        meta[["strategy_idx", "str_repr"]]
          .assign(wincount=lambda df:
                  df["str_repr"].map(win_counter).fillna(0).astype("int32"))
          .sort_values("strategy_idx")[["strategy_idx", "wincount"]]
    )
    summary.to_csv("wincounts.csv", index=False)
    print("→ wincounts.csv written")

if __name__ == "__main__":
    main()

DEBUG: n_games_per_player = 10223
DEBUG: changed n_games_per_player = 2
DEBUG: total_games = 3264
▶ Scheduling 3,264 total games.

DEBUG: First two tables of indices: [(np.int64(5231), np.int64(3151), np.int64(6782), np.int64(5491), np.int64(4091)), (np.int64(5231), np.int64(3151), np.int64(6782), np.int64(5491), np.int64(4091))]
DEBUG → Peeking at task_stream instead of tables_iter alone:


NameError: name 'task_stream' is not defined

In [None]:
# run_tournament_v2_debug_fixed.py  ── fully ordered & rebuilt generator
import multiprocessing as mp
import time
from collections import Counter
from itertools import islice

from farkle.simulation import generate_strategy_grid
from farkle.stats import games_for_power

# ─── 1. Master grid ───────────────────────────────────────────────────────────
strategies, meta = generate_strategy_grid()   # length = 8_160
meta["str_repr"] = [str(s) for s in strategies]

# ─── 2. Powered sample size ──────────────────────────────────────────────────
n_games_per_player = games_for_power(
    n_strategies=len(strategies),
    delta=0.03, alpha=0.025,
    power=0.90, method="bh", pairwise=True,
)
total_games = len(strategies) * n_games_per_player // 5

print(f"DEBUG → n_games_per_player = {n_games_per_player}")
print(f"DEBUG → total_games        = {total_games}")
print(f"▶ Scheduling {total_games:,} total games.\n")

# ─── 3. Table generator ──────────────────────────────────────────────────────
def chunker(it, n):
    while (batch := list(islice(it, n))):
        yield tuple(batch)

def make_tables(strats, repeats, seed=42):
    rng = np.random.default_rng(seed)
    for _ in range(repeats):
        yield from chunker(rng.permutation(len(strats)), 5)

# ─── 3a. Peek at the first two tables (and then exhaust those two) ───────────
tables_iter = make_tables(strategies, n_games_per_player)
first_two = list(islice(tables_iter, 2))
print("DEBUG → First two tables of indices:", first_two)

# ─── 3b. NOW rebuild tables_iter for real work ──────────────────────────────
tables_iter = make_tables(strategies, n_games_per_player)

# ─── 4. Worker function ───────────────────────────────────────────────────────
def _one(task):
    seed, idxs = task
    table = [strategies[i] for i in idxs]
    row   = _play_game(seed, table, 10_000)
    return str(row[f"{row['winner']}_strategy"])

# ─── 5. Define task_stream (uses the fresh tables_iter) ───────────────────────
def task_stream():
    rng = np.random.default_rng(999)
    for idxs in tables_iter:
        yield (rng.integers(0, 2**32 - 1), idxs)

# Sanity check: peek at task_stream itself (fresh copy) ────────────────────────
print("\nDEBUG → Peeking at task_stream directly:")
def test_task_stream():
    rng = np.random.default_rng(999)
    for idxs in make_tables(strategies, n_games_per_player):
        yield (rng.integers(0, 2**32 - 1), idxs)

tsk1 = next(test_task_stream(), None)
print("DEBUG → first task from test_task_stream():", tsk1)
tsk2 = next(test_task_stream(), None)
print("DEBUG → second task from test_task_stream():", tsk2)

# ─── 6. Quick dry‐run of _one(...) on a single task ────────────────────────────
debug_tables = make_tables(strategies, n_games_per_player)
dbg_idxs = next(debug_tables, None)
dbg_seed = np.random.default_rng(999).integers(0, 2**32 - 1)
dbg_task = (dbg_seed, dbg_idxs)

print("\nDEBUG → Calling _one() on that single task …")
try:
    dbg_result = _one(dbg_task)
    print("DEBUG → _one(dbg_task) returned:", dbg_result)
except Exception as e:
    print("DEBUG → _one(dbg_task) raised an exception:", e)
    import traceback; traceback.print_exc()

# ─── 7. Tournament loop with fresh generator & progress prints ───────────────
CHUNKSIZE         = 10_000
PROCESSES         = 16
MAXTASKS          = 50
REPORT_EVERY      = 100_000
CHECKPOINT_FILE   = "win_counter.chk"

def save_checkpoint(counter, done):
    with open(CHECKPOINT_FILE, "wb") as f:
        pickle.dump({"done": done, "counter": dict(counter)}, f)

def main():
    print("▶▶▶ Entered main() at", time.asctime())

    start = time.perf_counter()
    win_counter = Counter()
    done_games  = 0
    seen_first  = False

    with mp.Pool(processes=PROCESSES,
                 maxtasksperchild=MAXTASKS) as pool:

        print("Pool is open")
        for win in pool.imap_unordered(_one, task_stream(), chunksize=CHUNKSIZE):
            if not seen_first:
                print("▶ pool.imap_unordered has started returning wins.")
                seen_first = True

            win_counter[win] += 1
            done_games += 1

            if done_games % REPORT_EVERY == 0:
                pct = 100 * done_games / total_games
                hrs = (time.perf_counter() - start) / 3600
                print(f"{done_games:,}/{total_games:,}  ({pct:5.2f} %)  {hrs:5.2f} h")
                save_checkpoint(win_counter, done_games)

    # final save
    save_checkpoint(win_counter, done_games)

    elapsed = (time.perf_counter() - start) / 3600
    print(f"\nFinished in {elapsed:,.2f} hours")

    summary = (
        meta[["strategy_idx", "str_repr"]]
          .assign(wincount=lambda df:
                  df["str_repr"].map(win_counter).fillna(0).astype("int32"))
          .sort_values("strategy_idx")[["strategy_idx", "wincount"]]
    )
    summary.to_csv("wincounts.csv", index=False)
    print("→ wincounts.csv written")

if __name__ == "__main__":
    main()


DEBUG → n_games_per_player = 10223
DEBUG → total_games        = 16683936
▶ Scheduling 16,683,936 total games.

DEBUG → First two tables of indices: [(np.int64(5231), np.int64(3151), np.int64(6782), np.int64(5491), np.int64(4091)), (np.int64(5231), np.int64(3151), np.int64(6782), np.int64(5491), np.int64(4091))]

DEBUG → Peeking at task_stream directly:
DEBUG → first task from test_task_stream(): (np.int64(3495654652), (np.int64(5231), np.int64(3151), np.int64(6782), np.int64(5491), np.int64(4091)))
DEBUG → second task from test_task_stream(): (np.int64(3495654652), (np.int64(5231), np.int64(3151), np.int64(6782), np.int64(5491), np.int64(4091)))

DEBUG → Calling _one() on that single task …
DEBUG → _one(dbg_task) returned: Strat(450,2)[--][F-PS][OR][H-]
▶▶▶ Entered main() at Fri Jun  6 14:41:28 2025
Pool is open


In [None]:
"""
run_tournament_v2.py – multiprocess round-robin Farkle tournament
"""
import multiprocessing as mp
import time
from collections import Counter
from itertools import islice

import numpy as np

from farkle.simulation import generate_strategy_grid
from farkle.stats import games_for_power

# ─── 0.  Globals filled by pool.initializer ─────────────────────────────────
strategies: list | None = None       # type: ignore
meta:       pd.DataFrame | None = None  # type: ignore

def _init_worker(shared_strats, shared_meta):
    global strategies, meta
    strategies = shared_strats
    meta       = shared_meta

# ─── 1.  Combinatorial helpers ─────────────────────────────────────────────
def chunker(it, n):
    while (batch := list(islice(it, n))):
        yield tuple(batch)

def make_tables(strats, repeats, seed=42):
    rng = np.random.default_rng(seed)
    for _ in range(repeats):
        yield from chunker(rng.permutation(len(strats)), 5)

# ─── 2.  Worker – purely functional, pickle-safe ───────────────────────────
def _one(task):
    seed, idxs = task
    table = [strategies[i] for i in idxs]        # strategies seen via global
    row   = _play_game(seed, table, 10_000)
    return row[f"{row['winner']}_strategy"]      # already a str

# ─── 3.  Lazy task generator ───────────────────────────────────────────────
def task_stream(repeats):
    rng = np.random.default_rng(999)
    for idxs in make_tables(strategies, repeats):
        yield int(rng.integers(0, 2**32 - 2)), tuple(int(i) for i in idxs)

# ─── 4.  Main driver ───────────────────────────────────────────────────────
def main():
    CHUNKSIZE, PROCESSES, MAXTASKS = 10_000, 16, 50
    REPORT_EVERY, CHECKPOINT = 100_000, "win_counter.chk"

    n_games_per_player = games_for_power(
        n_strategies=len(strategies),
        delta=0.03, alpha=0.025, power=0.90, method="bh", pairwise=True,
    )
    total_games = len(strategies) * n_games_per_player // 5

    print(f"Powered sample ⇒ each strategy appears {n_games_per_player:,} times.")
    print(f"Total games scheduled: {total_games:,}")

    start = time.perf_counter()
    win_counter, done = Counter(), 0

    def save():
        with open(CHECKPOINT, "wb") as f:
            pickle.dump({"done": done, "counter": dict(win_counter)}, f)

    with mp.Pool(PROCESSES, maxtasksperchild=MAXTASKS,
                 initializer=_init_worker,
                 initargs=(strategies, meta)) as pool:
        for win in pool.imap_unordered(_one,
                                       task_stream(n_games_per_player),
                                       chunksize=CHUNKSIZE):
            win_counter[win] += 1
            done += 1
            if done % REPORT_EVERY == 0:
                elapsed = (time.perf_counter() - start) / 3600
                pct = 100 * done / total_games
                print(f"{done:,}/{total_games:,}  ({pct:4.1f} %)  {elapsed:5.2f} h")
                save()
    save()

    hrs = (time.perf_counter() - start) / 3600
    print(f"\nFinished in {hrs:,.2f} hours")

    (meta[["strategy_idx", "str_repr"]]
        .assign(wincount=lambda df: df["str_repr"].map(win_counter).fillna(0).astype("int32"))
        .sort_values("strategy_idx")[["strategy_idx", "wincount"]]
        .to_csv("wincounts.csv", index=False))
    print("→ wincounts.csv written")

# ─── 5.  Safe entry-point ──────────────────────────────────────────────────
if __name__ == "__main__":
    mp.freeze_support()                     # Windows friendliness
    strategies, meta = generate_strategy_grid()
    meta["str_repr"] = [str(s) for s in strategies]
    main()


Powered sample ⇒ each strategy appears 10,223 times.
Total games scheduled: 16,683,936


In [None]:
# run_tournament_v2_debug_fixed.py
"""
Run the full Farkle strategy tournament with multiprocessing,
queue-based logging, and resumable checkpoints.
"""
from __future__ import annotations

import logging
import logging.handlers
import multiprocessing as mp
import sys
import time
from collections import Counter
from itertools import islice
from pathlib import Path
from typing import Tuple

import numpy as np
import pandas as pd

from farkle.simulation import generate_strategy_grid
from farkle.stats import games_for_power


class FirstNFilter(logging.Filter):
    """
    Let every unique call-site (file, lineno) speak `n` times, then silence it.
    Works no matter how tight the surrounding loop is.
    """
    def __init__(self, n: int = 10):
        super().__init__()
        self.n = n
        self.seen: Counter[Tuple[str, int]] = Counter()

    def filter(self, record: logging.LogRecord) -> bool:
        key = (record.pathname, record.lineno)
        self.seen[key] += 1
        return self.seen[key] <= self.n
    
    
DEBUG_FIRST_N = 10    # change once; used by the filter above

root = logging.getLogger()             # grab root so *all* libraries inherit it
root.setLevel(logging.DEBUG)           # we really want to see DEBUG

handler = logging.StreamHandler(sys.stdout)           # print to notebook cell
handler.setFormatter(logging.Formatter(
        "%(asctime)s %(levelname)-5s %(filename)s:%(lineno)d | %(message)s",
        datefmt="%H:%M:%S"))
handler.addFilter(FirstNFilter(DEBUG_FIRST_N))        # <── the magic!

root.handlers[:] = [handler]            # replace any default Jupyter handlers
# ──────────────────────────────────────────────────────────────────────────────
# 0.--- Globals & tuning knobs
CHUNKSIZE       = 10_000          # tasks sent to each worker at a time
PROCESSES       = 16
MAXTASKS        = 50              # fork a fresh worker after this many tasks
REPORT_EVERY    = 100_000         # progress print interval
CHECKPOINT_FILE = Path("win_counter.chk")

# ──────────────────────────────────────────────────────────────────────────────
# 1.--- Strategy grid and powered sample size
strategies, meta = generate_strategy_grid()   # 8 160 rows
meta["str_repr"] = [str(s) for s in strategies]

n_games_per_player = games_for_power(
    n_strategies=len(strategies),
    delta=0.03, alpha=0.025, power=0.90,
    method="bh", pairwise=True,
)
total_tasks = len(strategies) * n_games_per_player // 5  # == total games
logging.basicConfig(level=logging.INFO,
    format="%(asctime)s|%(processName)s|%(levelname)s|%(message)s",
    datefmt="%H:%M:%S")

log = logging.getLogger("tournament")
log.info("n_games_per_player = %s, total tasks = %s", n_games_per_player, total_tasks)

# ──────────────────────────────────────────────────────────────────────────────
# 2.--- Utility: chunk iterable into batches of n
def chunker(it, n):
    while (batch := list(islice(it, n))):
        yield tuple(batch)

# 3.--- Deterministic table generator  (same order every run for reproducibility)
def make_tables(strats, repeats, seed=42):
    rng = np.random.default_rng(seed)
    for rep in range(repeats):
        perm = rng.permutation(len(strats))
        yield from chunker(perm, 5)
        if rep < 2:                      # quick preview
            log.debug("rep=%d first table indices=%s", rep, perm[:5])

# 4.--- Worker
def _one(task):
    seed, idxs = task
    table = [strategies[i] for i in idxs]
    row   = _play_game(seed, table, 10_000)
    win   = str(row[f"{row['winner']}_strategy"])
    return win

# 5.--- Build the (potentially resumable) task stream
def task_stream(already_done: int):
    """
    Generator of (seed, indices) tuples.
    If *already_done* > 0, the first that many tasks are discarded so that
    the stream yields only unfinished work when resuming from a checkpoint.
    """
    rng         = np.random.default_rng(999)
    tables_iter = make_tables(strategies, n_games_per_player)

    # fast-forward to resume point
    if already_done:
        skipped = 0
        for _ in range(already_done):
            next(tables_iter)
            skipped += 1
        log.info("Skipped %s completed tasks from previous run.", skipped)

    for idxs in tables_iter:
        yield (rng.integers(0, 2**32 - 1), idxs)

# ──────────────────────────────────────────────────────────────────────────────
# 6.--- Checkpoint helpers
def save_checkpoint(counter: Counter, done: int):
    with CHECKPOINT_FILE.open("wb") as f:
        pickle.dump({"done": done, "counter": dict(counter)}, f)
    log.info("Checkpoint saved at %s tasks.", done)

def load_checkpoint() -> tuple[int, Counter]:
    if not CHECKPOINT_FILE.exists():
        return 0, Counter()
    with CHECKPOINT_FILE.open("rb") as f:
        data = pickle.load(f)
    log.info("Checkpoint loaded: %s tasks finished previously.", data["done"])
    return data["done"], Counter(data["counter"])

# ──────────────────────────────────────────────────────────────────────────────
# 7.--- Multiprocessing-friendly logging (single queue sink)
def _configure_worker_logging(queue):
    qh = logging.handlers.QueueHandler(queue)
    root = logging.getLogger()
    root.handlers.clear()
    root.setLevel(logging.DEBUG)
    root.addHandler(qh)

def _listener_process(queue):
    h = logging.StreamHandler(sys.stdout)
    fmt = logging.Formatter("%(asctime)s|%(processName)s|%(levelname)s|%(message)s",
                            "%H:%M:%S")
    h.setFormatter(fmt)
    root = logging.getLogger()
    root.addHandler(h)
    root.setLevel(logging.DEBUG)
    while True:
        record = queue.get()
        if record is None:
            break
        root.handle(record)

# ──────────────────────────────────────────────────────────────────────────────
def main():
    already_done, win_counter = load_checkpoint()
    if already_done >= total_tasks:
        log.warning("All %s tasks are already complete. Nothing to do!", total_tasks)
        return

    log_queue = mp.Queue()
    listener  = mp.Process(target=_listener_process, args=(log_queue,), daemon=True)
    listener.start()

    start_time = time.perf_counter()
    done = already_done

    with mp.Pool(
        processes=PROCESSES,
        maxtasksperchild=MAXTASKS,
        initializer=_configure_worker_logging,
        initargs=(log_queue,),
    ) as pool:

        log.info("Pool started with %d workers.", PROCESSES)
        stream = task_stream(already_done)
        for win in pool.imap_unordered(_one, stream, chunksize=CHUNKSIZE):
            win_counter[win] += 1
            done += 1

            if done % REPORT_EVERY == 0 or done == total_tasks:
                pct = 100 * done / total_tasks
                hrs = (time.perf_counter() - start_time) / 3600
                log.info("[%10d / %10d]  %6.2f %%  %6.2f h elapsed",
                         done, total_tasks, pct, hrs)
                save_checkpoint(win_counter, done)

    # tell listener to finish
    log_queue.put(None)
    listener.join()

    # final dump + CSV
    save_checkpoint(win_counter, done)

    summary = (
        meta[["strategy_idx", "str_repr"]]
        .assign(wincount=lambda df:
                df["str_repr"].map(win_counter).fillna(0).astype("int32"))
        .sort_values("strategy_idx")[["strategy_idx", "wincount"]]
    )
    summary.to_csv("wincounts.csv", index=False)
    log.info("Finished!  CSV written with final results.")

# ──────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    main()


14:47:17|MainProcess|INFO|n_games_per_player = 10223, total tasks = 16683936
14:47:17|MainProcess|INFO|Pool started with 16 workers.


In [None]:
# run_tournament_v2_debug_fixed.py
"""
Run the full Farkle strategy tournament with multiprocessing,
queue-based logging, and resumable checkpoints.
"""
from __future__ import annotations

import logging
import logging.handlers
import multiprocessing as mp
import sys
import time
from collections import Counter
from itertools import islice
from pathlib import Path
from typing import Tuple

import numpy as np
import pandas as pd

from farkle.simulation import generate_strategy_grid
from farkle.stats import games_for_power

# ──────────────────────────────────────────────────────────────────────────────
# 0.--- Globals & tuning knobs
CHUNKSIZE       = 10_000          # tasks sent to each worker at a time
PROCESSES       = 16
MAXTASKS        = 50              # fork a fresh worker after this many tasks
REPORT_EVERY    = 100_000         # progress print interval
CHECKPOINT_FILE = Path("win_counter.chk")

# ──────────────────────────────────────────────────────────────────────────────
# 1.--- Set up root logger and handler (no debug() calls here yet)
DEBUG_FIRST_N = 10    # change once; used by the filter below

class FirstNFilter(logging.Filter):
    """
    Let every unique call-site (file, lineno) speak `n` times, then silence it.
    Works no matter how tight the surrounding loop is.
    """
    def __init__(self, n: int = DEBUG_FIRST_N):
        super().__init__()
        self.n = n
        self.seen: Counter[Tuple[str, int]] = Counter()

    def filter(self, record: logging.LogRecord) -> bool:
        key = (record.pathname, record.lineno)
        self.seen[key] += 1
        return self.seen[key] <= self.n

# configure the root logger
root = logging.getLogger()             # grab root so *all* libraries inherit it
root.setLevel(logging.DEBUG)           # we really want to see DEBUG

handler = logging.StreamHandler(sys.stdout)  # print to stdout
handler.setFormatter(logging.Formatter(
        "%(asctime)s %(levelname)-5s %(filename)s:%(lineno)d | %(message)s",
        datefmt="%H:%M:%S"))
handler.addFilter(FirstNFilter(DEBUG_FIRST_N))

root.handlers[:] = [handler]            # replace any default handlers

# now that the handler is fully set up, we can safely grab “tournament” logger
log = logging.getLogger("tournament")
# No log.debug(...) here at module import time.

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s|%(processName)s|%(levelname)s|%(message)s",
    datefmt="%H:%M:%S"
)

# ──────────────────────────────────────────────────────────────────────────────
# 2.--- Strategy grid and powered sample size
strategies, meta = generate_strategy_grid()
meta["str_repr"] = [str(s) for s in strategies]

n_games_per_player = games_for_power(
    n_strategies=len(strategies),
    delta=0.03, alpha=0.025, power=0.90,
    method="bh", pairwise=True,
)
total_tasks = len(strategies) * n_games_per_player // 5  # == total games

log.info("n_games_per_player = %s, total tasks = %s", n_games_per_player, total_tasks)

# ──────────────────────────────────────────────────────────────────────────────
# 3.--- Utility: chunk iterable into batches of n
def chunker(it, n):
    log.debug("reached chunker  vars=%r", locals())
    while (batch := list(islice(it, n))):
        log.debug("chunker yielding batch  vars=%r", locals())
        yield tuple(batch)
    log.debug("chunker exiting (no more batches)  vars=%r", locals())

# ──────────────────────────────────────────────────────────────────────────────
# 4.--- Deterministic table generator (same order every run for reproducibility)
def make_tables(strats, repeats, seed=42):
    log.debug("reached make_tables  vars=%r", locals())
    rng = np.random.default_rng(seed)
    log.debug("initialized RNG in make_tables  vars=%r", locals())
    for rep in range(repeats):
        log.debug("make_tables at rep start  vars=%r", locals())
        perm = rng.permutation(len(strats))
        log.debug("make_tables computed perm  vars=%r", locals())
        yield from chunker(perm, 5)
        log.debug("make_tables yielded tables for rep  vars=%r", locals())
        if rep < 2:  # quick preview
            log.debug("rep=%d first table indices=%s", rep, perm[:5])
    log.debug("make_tables exiting  vars=%r", locals())

# ──────────────────────────────────────────────────────────────────────────────
# 5.--- Worker function
def _one(task):
    log.debug("reached _one  vars=%r", locals())
    seed, idxs = task
    log.debug("unpacked task in _one  vars=%r", locals())
    table = [strategies[i] for i in idxs]
    log.debug("constructed table in _one  vars=%r", locals())
    row = _play_game(seed, table, 10_000)
    log.debug("completed _play_game in _one  vars=%r", locals())
    win = str(row[f"{row['winner']}_strategy"])
    log.debug("computed win in _one  vars=%r", locals())
    return win

# ──────────────────────────────────────────────────────────────────────────────
# 6.--- Build the (potentially resumable) task stream
def task_stream(already_done: int):
    log.debug("reached task_stream  vars=%r", locals())
    rng = np.random.default_rng(999)
    log.debug("initialized RNG in task_stream  vars=%r", locals())
    tables_iter = make_tables(strategies, n_games_per_player)
    log.debug("created tables_iter in task_stream  vars=%r", locals())

    # fast‐forward to resume point
    if already_done:
        log.debug("task_stream skipping already_done tasks  vars=%r", locals())
        skipped = 0
        for _ in range(already_done):
            next(tables_iter)
            skipped += 1
            log.debug("task_stream skipped one task  vars=%r", locals())
        log.info("Skipped %s completed tasks from previous run.", skipped)
        log.debug("task_stream done skipping tasks  vars=%r", locals())

    for idxs in tables_iter:
        log.debug("task_stream about to yield a task  vars=%r", locals())
        yield (rng.integers(0, 2**32 - 1), idxs)
    log.debug("task_stream exiting (no more tasks)  vars=%r", locals())

# ──────────────────────────────────────────────────────────────────────────────
# 7.--- Checkpoint helpers
def save_checkpoint(counter: Counter, done: int):
    log.debug("reached save_checkpoint  vars=%r", locals())
    with CHECKPOINT_FILE.open("wb") as f:
        pickle.dump({"done": done, "counter": dict(counter)}, f)
        log.debug("pickle.dump completed in save_checkpoint  vars=%r", locals())
    log.info("Checkpoint saved at %s tasks.", done)
    log.debug("exiting save_checkpoint  vars=%r", locals())

def load_checkpoint() -> tuple[int, Counter]:
    log.debug("reached load_checkpoint  vars=%r", locals())
    if not CHECKPOINT_FILE.exists():
        log.debug("load_checkpoint found no checkpoint file  vars=%r", locals())
        return 0, Counter()
    with CHECKPOINT_FILE.open("rb") as f:
        data = pickle.load(f)
        log.debug("pickle.load completed in load_checkpoint  vars=%r", locals())
    log.info("Checkpoint loaded: %s tasks finished previously.", data["done"])
    log.debug("exiting load_checkpoint with data  vars=%r", locals())
    return data["done"], Counter(data["counter"])

# ──────────────────────────────────────────────────────────────────────────────
# 8.--- Multiprocessing-friendly logging (single queue sink)
def _configure_worker_logging(queue):
    log.debug("reached _configure_worker_logging  vars=%r", locals())
    qh = logging.handlers.QueueHandler(queue)
    root = logging.getLogger()
    root.handlers.clear()
    root.setLevel(logging.DEBUG)
    root.addHandler(qh)
    log.debug("configured worker logging handlers  vars=%r", locals())

def _listener_process(queue):
    log.debug("reached _listener_process  vars=%r", locals())
    h = logging.StreamHandler(sys.stdout)
    fmt = logging.Formatter("%(asctime)s|%(processName)s|%(levelname)s|%(message)s", "%H:%M:%S")
    h.setFormatter(fmt)
    root = logging.getLogger()
    root.addHandler(h)
    root.setLevel(logging.DEBUG)
    log.debug("listener configured its handler  vars=%r", locals())
    while True:
        record = queue.get()
        log.debug("listener received record  vars=%r", locals())
        if record is None:
            log.debug("listener received sentinel None  vars=%r", locals())
            break
        root.handle(record)
    log.debug("listener exiting  vars=%r", locals())

# ──────────────────────────────────────────────────────────────────────────────
def main():
    log.debug("reached main  vars=%r", locals())
    already_done, win_counter = load_checkpoint()
    log.debug("after load_checkpoint in main  vars=%r", locals())

    if already_done >= total_tasks:
        log.debug("main early exit condition met  vars=%r", locals())
        log.warning("All %s tasks are already complete. Nothing to do!", total_tasks)
        return

    log_queue = mp.Queue()
    log.debug("created log_queue in main  vars=%r", locals())
    listener = mp.Process(target=_listener_process, args=(log_queue,), daemon=True)
    listener.start()
    log.debug("started listener process in main  vars=%r", locals())

    start_time = time.perf_counter()
    log.debug("recorded start_time in main  vars=%r", locals())
    done = already_done
    log.debug("initialized done counter in main  vars=%r", locals())

    with mp.Pool(
        processes=PROCESSES,
        maxtasksperchild=MAXTASKS,
        initializer=_configure_worker_logging,
        initargs=(log_queue,),
    ) as pool:
        log.debug("entered Pool context in main  vars=%r", locals())
        log.info("Pool started with %d workers.", PROCESSES)
        log.debug("logged pool start info in main  vars=%r", locals())
        stream = task_stream(already_done)
        log.debug("created stream iterator in main  vars=%r", locals())

        for win in pool.imap_unordered(_one, stream, chunksize=CHUNKSIZE):
            log.debug("received win from pool.imap_unordered  vars=%r", locals())
            win_counter[win] += 1
            log.debug("updated win_counter in main loop  vars=%r", locals())
            done += 1
            log.debug("incremented done in main loop  vars=%r", locals())

            if done % REPORT_EVERY == 0 or done == total_tasks:
                log.debug("about to compute progress in main loop  vars=%r", locals())
                pct = 100 * done / total_tasks
                hrs = (time.perf_counter() - start_time) / 3600
                log.info("[%10d / %10d]  %6.2f %%  %6.2f h elapsed", done, total_tasks, pct, hrs)
                log.debug("about to save checkpoint in main loop  vars=%r", locals())
                save_checkpoint(win_counter, done)
                log.debug("saved checkpoint in main loop  vars=%r", locals())

        log.debug("exited for loop in main  vars=%r", locals())

    # tell listener to finish
    log.debug("sending sentinel to listener  vars=%r", locals())
    log_queue.put(None)
    log.debug("sent sentinel to listener  vars=%r", locals())
    listener.join()
    log.debug("listener joined in main  vars=%r", locals())

    # final dump + CSV
    log.debug("about to save final checkpoint  vars=%r", locals())
    save_checkpoint(win_counter, done)
    log.debug("saved final checkpoint  vars=%r", locals())

    summary = (
        meta[["strategy_idx", "str_repr"]]
        .assign(wincount=lambda df: df["str_repr"].map(win_counter).fillna(0).astype("int32"))
        .sort_values("strategy_idx")[["strategy_idx", "wincount"]]
    )
    log.debug("constructed summary DataFrame  vars=%r", locals())
    summary.to_csv("wincounts.csv", index=False)
    log.debug("wrote wincounts.csv  vars=%r", locals())
    log.info("Finished!  CSV written with final results.")
    log.debug("exiting main  vars=%r", locals())

# ──────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    # At this point, the handler + root logger are fully configured.
    log.debug("about to call main from __main__  vars=%r", locals())
    main()
    log.debug("returned from main in __main__  vars=%r", locals())


15:16:12 INFO  3836417828.py:85 | n_games_per_player = 10223, total tasks = 16683936


0                 200               0        True       True            True   
1                 200               0        True       True            True   
2                 200               0        True       True            True   
3                 200               0        True       True            True   
4                 200               0        True       True            True   
...               ...             ...         ...        ...             ...   
8155             1000               4       False      False            True   
8156             1000               4       False      False            True   
8157             1000               4       False      False           False   
8158             1000               4       False      False           False   
8159             1000               4       False      False           False   

      consider_dice  require_both  auto_hot_dice  run_up_score  prefer_score  \
0              True          True      