# Starter: Sequence RL — debug-friendly notebook (Anaconda base)

Run the cells **in order** (1 → 10).
This notebook:
- uses your Anaconda base env (no venv),
- verifies CUDA,
- wires `sys.path` so `training/` imports,
- runs the trainer **in-process** (easy to debug) or via subprocess,
- starts TensorBoard,
- evaluates a saved policy,
- and gives quick unit-test & step-debug helpers.


In [19]:
import os, sys, subprocess, textwrap

print("Python:", sys.executable)
try:
    import torch
    print("PyTorch:", torch.__version__)
    print("CUDA available:", torch.cuda.is_available())
    print("CUDA version:", getattr(torch.version, "cuda", None))
    if torch.cuda.is_available():
        print("CUDA device count:", torch.cuda.device_count())
        print("Device 0:", torch.cuda.get_device_name(0))
except Exception as e:
    print("Torch import error:", e)

# Optional: show nvidia-smi (won't crash if missing)
try:
    print("\n--- nvidia-smi ---")
    out = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
    print(out.stdout or out.stderr)
except Exception as e:
    print("nvidia-smi not available:", e)


Python: C:\Users\carlo\anaconda3\python.exe
PyTorch: 2.2.1
CUDA available: True
CUDA version: 12.1
CUDA device count: 1
Device 0: NVIDIA GeForce RTX 3050 Laptop GPU

--- nvidia-smi ---
Sun Aug 24 13:22:58 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.61                 Driver Version: 551.61         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   57C    P8              4W /   25W |     276MiB /   4096MiB |      0%      Default |
|                                         |    

In [20]:
import os, sys, importlib, json, pathlib

# If this notebook is inside training/, repo_root = parent; otherwise set it manually.
cwd = os.getcwd()
repo_root = os.path.abspath(os.path.join(cwd, "..")) if os.path.basename(cwd).lower()=="training" else cwd
training_pkg_dir = os.path.join(repo_root, "training")

print("CWD:", cwd)
print("Repo root:", repo_root)
print("Training package dir exists:", os.path.isdir(training_pkg_dir))

# Ensure repo root is on sys.path so "import training" resolves to local sources
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

# Sanity: import training
try:
    import training
    print("training import OK from:", training.__file__)
except Exception as e:
    print("training import failed:", e)
    raise

# Config paths
tiny_cfg = os.path.join(training_pkg_dir, "configs", "tiny-smoke.json")
default_cfg = os.path.join(training_pkg_dir, "configs", "default.json")
print("tiny-smoke.json exists?", os.path.exists(tiny_cfg))
print("default.json exists?", os.path.exists(default_cfg))


CWD: E:\sequence_game_board\sequence_board_game\training
Repo root: E:\sequence_game_board\sequence_board_game
Training package dir exists: True
training import OK from: E:\sequence_game_board\sequence_board_game\training\__init__.py
tiny-smoke.json exists? True
default.json exists? True


In [21]:
# Auto-reload edited modules without restarting the kernel
%load_ext autoreload
%autoreload 2

import io, contextlib, runpy, time, shlex, subprocess
from typing import Dict, Any

def pretty(o):
    import pprint; pprint.pp(o, width=100)

def apply_overrides(cfg: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
    """Dot-path override into nested dict."""
    out = json.loads(json.dumps(cfg))
    for k, v in override.items():
        node = out
        parts = k.split(".")
        for p in parts[:-1]:
            if p not in node or not isinstance(node[p], dict):
                node[p] = {}
            node = node[p]
        node[parts[-1]] = v
    return out

def load_cfg(path: str) -> Dict[str, Any]:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def write_cfg(path: str, cfg: Dict[str, Any]) -> None:
    with open(path, "w", encoding="utf-8") as f:
        json.dump(cfg, f, indent=2)

print("Helpers ready.")


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Helpers ready.


In [22]:
# Pick a base config
CFG_PATH = tiny_cfg  # change to default_cfg for bigger runs

# Build an override suited to a laptop GPU. If CUDA is False, we force CPU in override.
import torch
OVERRIDE = {
    "training.num_envs": 8,
    "training.rollout_length": 32,
    "training.total_updates": 5,       # bump later for longer runs
    "training.minibatch_size": 512,
}
if not torch.cuda.is_available():
    OVERRIDE["training.use_cuda"] = False

print("Config:", CFG_PATH)
print("Overrides:")
pretty(OVERRIDE)

# Preview the merged config (not persisted)
merged_preview = apply_overrides(load_cfg(CFG_PATH), OVERRIDE)
pretty({k: merged_preview[k] for k in ["training","model","logging"] if k in merged_preview})


Config: E:\sequence_game_board\sequence_board_game\training\configs\tiny-smoke.json
Overrides:
{'training.num_envs': 8,
 'training.rollout_length': 32,
 'training.total_updates': 5,
 'training.minibatch_size': 512}
{'training': {'algorithm': 'ppo_lstm',
              'seed': 42,
              'num_envs': 8,
              'rollout_length': 32,
              'total_updates': 5,
              'gamma': 0.99,
              'gae_lambda': 0.95,
              'lr': 0.001,
              'clip_eps': 0.2,
              'entropy_coef': 0.01,
              'value_coef': 0.5,
              'max_grad_norm': 0.5,
              'amp': False,
              'opponent_pool': {'current': 1.0, 'snapshots': 0.0, 'heuristics': 0.0},
              'episode_cap': 200,
              'minibatch_size': 512},
 'model': {'conv_channels': [32, 32, 64], 'lstm_hidden': 128, 'lstm_layers': 1},
 'logging': {'tensorboard': False,
             'csv': True,
             'jsonl': False,
             'log_private_hands_locall

In [25]:
import sys, os, runpy, contextlib, io

# Compose argv exactly like the CLI would
argv = [
    "training.scripts.train",
    "--config", CFG_PATH,
    "--override", json.dumps(OVERRIDE),
]

# Capture stdout/stderr so the cell prints a clean, bounded block.
stdout_buf, stderr_buf = io.StringIO(), io.StringIO()
print("Running in-process with argv:", argv)

with contextlib.redirect_stdout(stdout_buf), contextlib.redirect_stderr(stderr_buf):
    try:
        # Make sys.argv visible to the module's argparse
        old_argv = sys.argv
        sys.argv = argv
        # Auto-reload ensures your edits to modules are picked up before run
        result = runpy.run_module("training.scripts.train", run_name="__main__")
        exit_code = 0
    except SystemExit as se:
        # argparse may call sys.exit(); capture its code
        exit_code = int(getattr(se, "code", 0) or 0)
    except Exception as e:
        exit_code = 1
        print("EXCEPTION:", repr(e))
    finally:
        sys.argv = old_argv

out, err = stdout_buf.getvalue(), stderr_buf.getvalue()
print("--- STDOUT ---\n", out[-10000:])   # last 10k chars
print("--- STDERR ---\n", err[-10000:])
print("Exit code:", exit_code)

# If you crashed and want to inspect the last exception, run:
# %debug


Running in-process with argv: ['training.scripts.train', '--config', 'E:\\sequence_game_board\\sequence_board_game\\training\\configs\\tiny-smoke.json', '--override', '{"training.num_envs": 8, "training.rollout_length": 32, "training.total_updates": 5, "training.minibatch_size": 512}']
--- STDOUT ---
 EXCEPTION: AttributeError("'GameState' object has no attribute 'sequences_meta_cells'")

--- STDERR ---
 
Exit code: 1


In [24]:
# Inline (Jupyter) TensorBoard. If it fails, use the external command below.
%load_ext tensorboard
import os
logdir = os.path.join(repo_root, "runs")
print("TensorBoard logdir:", logdir)
%tensorboard --logdir $logdir --port 6006


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
TensorBoard logdir: E:\sequence_game_board\sequence_board_game\runs


Reusing TensorBoard on port 6006 (pid 36372), started 0:02:08 ago. (Use '!kill 36372' to kill it.)