# Starter: Sequence RL — debug-friendly notebook (Anaconda base)

Run the cells **in order** (1 → 10).
This notebook:
- uses your Anaconda base env (no venv),
- verifies CUDA,
- wires `sys.path` so `training/` imports,
- runs the trainer **in-process** (easy to debug) or via subprocess,
- starts TensorBoard,
- evaluates a saved policy,
- and gives quick unit-test & step-debug helpers.


In [1]:
import os, sys, subprocess, textwrap

print("Python:", sys.executable)
try:
    import torch
    print("PyTorch:", torch.__version__)
    print("CUDA available:", torch.cuda.is_available())
    print("CUDA version:", getattr(torch.version, "cuda", None))
    if torch.cuda.is_available():
        print("CUDA device count:", torch.cuda.device_count())
        print("Device 0:", torch.cuda.get_device_name(0))
except Exception as e:
    print("Torch import error:", e)

# Optional: show nvidia-smi (won't crash if missing)
try:
    print("\n--- nvidia-smi ---")
    out = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
    print(out.stdout or out.stderr)
except Exception as e:
    print("nvidia-smi not available:", e)


Python: C:\Users\carlo\anaconda3\python.exe
PyTorch: 2.2.1
CUDA available: True
CUDA version: 12.1
CUDA device count: 1
Device 0: NVIDIA GeForce RTX 3050 Laptop GPU

--- nvidia-smi ---
Fri Aug 29 12:27:43 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.61                 Driver Version: 551.61         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   58C    P8              4W /   25W |     186MiB /   4096MiB |      0%      Default |
|                                         |    

In [33]:
# ✅ Run training.scripts.train in-process with a merged temp config,
#    showing output LIVE in the PyCharm console while debugging.

import os, sys, runpy, importlib, json, time

# --- configure your base config and overrides (dot-paths supported) ---
CFG_PATH = r"E:\sequence_game_board\sequence_board_game\training\configs\full-tiny-smoke.json"
OVERRIDE = {
    "training.total_updates": 20
}

# --- helpers: load cfg, apply dot-path overrides, write temp cfg ---
def load_json(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def apply_overrides(cfg: dict, overrides: dict) -> dict:
    out = json.loads(json.dumps(cfg))  # deep copy
    for k, v in overrides.items():
        cur = out
        parts = k.split(".")
        for p in parts[:-1]:
            if p not in cur or not isinstance(cur[p], dict):
                cur[p] = {}
            cur = cur[p]
        cur[parts[-1]] = v
    return out

# --- compute repo root (parent of 'training' folder) and ensure import path ---
# CFG_PATH .../training/configs/xxx.json  -> go up two levels to repo root
project_root = os.path.abspath(os.path.join(os.path.dirname(CFG_PATH), os.pardir, os.pardir))
print("Project root:", project_root)
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# --- purge any cached 'training' modules so edits are picked up ---
to_purge = [m for m in list(sys.modules) if m == "training" or m.startswith("training.")]
for m in to_purge:
    sys.modules.pop(m, None)
importlib.invalidate_caches()

# --- build merged temp config ---
base_cfg = load_json(CFG_PATH)
merged_cfg = apply_overrides(base_cfg, OVERRIDE)

cfg_dir = os.path.dirname(CFG_PATH)
runtime_cfg_path = os.path.join(cfg_dir, f"_runtime_{int(time.time())}.json")
with open(runtime_cfg_path, "w", encoding="utf-8") as f:
    json.dump(merged_cfg, f, indent=2)
print("Merged config written to:", runtime_cfg_path)
print("total_updates =", merged_cfg.get("training", {}).get("total_updates"))

# --- (optional) encourage line-buffered output in some environments ---
try:
    sys.stdout.reconfigure(line_buffering=True)  # Python 3.7+
    sys.stderr.reconfigure(line_buffering=True)
except Exception:
    pass

# --- run the train module in-process with the merged config (no redirection) ---
argv = ["training.scripts.train", "--config", runtime_cfg_path]
print("Running in-process with argv:", argv)

old_argv = list(sys.argv)
exit_code = 0
try:
    sys.argv = argv
    # All prints from training.scripts.train will appear LIVE in PyCharm's console
    runpy.run_module("training.scripts.train", run_name="__main__", alter_sys=True)
except SystemExit as se:
    exit_code = int(getattr(se, "code", 0) or 0)
except Exception as e:
    # Any exception prints directly to the console
    import traceback
    traceback.print_exc()
    exit_code = 1
finally:
    sys.argv = old_argv

print("Exit code:", exit_code)

# (Optional) clean up the temp config file
try:
    os.remove(runtime_cfg_path)
except Exception:
    pass


Project root: E:\sequence_game_board\sequence_board_game
Merged config written to: E:\sequence_game_board\sequence_board_game\training\configs\_runtime_1756936502.json
total_updates = 20
Running in-process with argv: ['training.scripts.train', '--config', 'E:\\sequence_game_board\\sequence_board_game\\training\\configs\\_runtime_1756936502.json']
[resume] loaded latest runt weights into policy: E:\sequence_game_board\sequence_board_game\training\runs\smoke\full\run\policy_final.pt (strict=False)
update 1/20 | loss/total:-0.0216 | loss/policy:-0.0309 | loss/value:0.0675 | loss/entropy:1.0644 | fps:4.3 |  [eval] overall 4/7 (57.1%) | recent@256 57.1% | ema 52.7% ·███·█·
[snapshots] added E:\sequence_game_board\sequence_board_game\training\runs\smoke\full\run\snapshots\ppo_update_000001.pt
update 2/20 | loss/total:-0.0158 | loss/policy:-0.0209 | loss/value:0.0572 | loss/entropy:1.0403 | fps:5.7 |  [eval] overall 9/15 (60.0%) | recent@256 60.0% | ema 55.8% ·███·█·███··██·
update 3/20 | los



Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\carlo\anaconda3\Lib\site-packages\IPython\core\interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\carlo\AppData\Local\Temp\ipykernel_11076\1315946819.py", line 69, in <module>
    runpy.run_module("training.scripts.train", run_name="__main__", alter_sys=True)
  File "<frozen runpy>", line 226, in run_module
  File "<frozen runpy>", line 98, in _run_module_code
  File "<frozen runpy>", line 88, in _run_code
  File "E:\sequence_game_board\sequence_board_game\training\scripts\train.py", line 546, in <module>
    main()
  File "E:\sequence_game_board\sequence_board_game\training\scripts\train.py", line 392, in main
    next_obs_i, next_info_i, rolled_terminal = pool.skipTo(policy, e, i)
                                               ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\sequence_game_board\sequence_board_game\training\agents\opponent_pool.py", line 320, in skipTo
    obs_np =

##EVAL

In [20]:
# ✅ Run training.scripts.eval in-process with a merged temp config (overrides applied reliably)

import os, sys, runpy, importlib, contextlib, io, json, time

CFG_PATH = r"E:\sequence_game_board\sequence_board_game\training\configs\full-tiny-smoke.json"
EVAL_OVERRIDE = {
    # Example: you can override file paths or add agent kwargs here if needed
    "evaluation.agent_kwargs.agents/training/ppo_lstm_agent.py.policy_path": "runs/smoke/full/policy_final.pt",
}

def load_json(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def apply_overrides(cfg: dict, overrides: dict) -> dict:
    out = json.loads(json.dumps(cfg))
    for k, v in overrides.items():
        cur = out
        parts = k.split(".")
        for p in parts[:-1]:
            if p not in cur or not isinstance(cur[p], dict):
                cur[p] = {}
            cur = cur[p]
        cur[parts[-1]] = v
    return out

project_root = os.path.abspath(os.path.join(os.path.dirname(CFG_PATH), os.pardir, os.pardir))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

to_purge = [m for m in list(sys.modules) if m == "training" or m.startswith("training.")]
for m in to_purge:
    sys.modules.pop(m, None)
importlib.invalidate_caches()

base_cfg = load_json(CFG_PATH)
merged_cfg = apply_overrides(base_cfg, EVAL_OVERRIDE)

cfg_dir = os.path.dirname(CFG_PATH)
runtime_cfg_path = os.path.join(cfg_dir, f"_eval_runtime_{int(time.time())}.json")
with open(runtime_cfg_path, "w", encoding="utf-8") as f:
    json.dump(merged_cfg, f, indent=2)
print("Merged EVAL config:", runtime_cfg_path)

argv = ["training.scripts.eval", "--config", runtime_cfg_path, "--episodes", "100"]

stdout_buf, stderr_buf = io.StringIO(), io.StringIO()
print("Running eval in-process with argv:", argv)

with contextlib.redirect_stdout(stdout_buf), contextlib.redirect_stderr(stderr_buf):
    old_argv = sys.argv
    try:
        sys.argv = argv
        runpy.run_module("training.scripts.eval", run_name="__main__", alter_sys=True)
        exit_code = 0
    except SystemExit as se:
        exit_code = int(getattr(se, "code", 0) or 0)
    except Exception:
        import traceback; traceback.print_exc()
        exit_code = 1
    finally:
        sys.argv = old_argv

print("--- STDOUT (tail) ---\n", stdout_buf.getvalue()[-8000:])
print("--- STDERR (tail) ---\n", stderr_buf.getvalue()[-8000:])
print("Exit code:", exit_code)


Merged EVAL config: E:\sequence_game_board\sequence_board_game\training\configs\_eval_runtime_1756929360.json
Running eval in-process with argv: ['training.scripts.eval', '--config', 'E:\\sequence_game_board\\sequence_board_game\\training\\configs\\_eval_runtime_1756929360.json', '--episodes', '100']


KeyboardInterrupt: UI closed

In [17]:
%load_ext tensorboard
%tensorboard --logdir "E:\sequence_game_board\sequence_board_game\training\runs\smoke\run" --port 5907 --reload_interval 3

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 5907 (pid 70272), started 21:18:08 ago. (Use '!kill 70272' to kill it.)

In [7]:
import os, glob, pprint
from tensorboard.backend.event_processing import event_accumulator as EA

LOGDIR = r"E:/sequence_game_board/sequence_board_game/runs/smoke/run/"   # point at the parent, not the leaf
events = glob.glob(os.path.join(LOGDIR, "**", "events.out.tfevents.*"), recursive=True)
print("Found", len(events), "event file(s)")
pprint.pprint(events[-5:])

if events:
    ea = EA.EventAccumulator(os.path.dirname(events[-1]))  # load its containing directory
    ea.Reload()
    print("\nTAGS:", ea.Tags())        # should include 'scalars'
    print("Scalar keys:", list(ea.Scalars('loss/total')[:3]) if 'loss/total' in ea.Tags().get('scalars', []) else "no loss/total")
    size = os.path.getsize(events[-1])
    print("Event file size (bytes):", size)

Found 0 event file(s)
[]
