# Abaddon Training in Google Colab

This notebook runs a **real training profile** (not a smoke run) and writes artifacts directly to Google Drive.

Drive output folder:
- `/content/drive/MyDrive/abaddon`

Outputs:
- `/content/drive/MyDrive/abaddon/abaddon_colab_latest.pt`
- `/content/drive/MyDrive/abaddon/parcae_model.onnx`


In [None]:
from pathlib import Path
from google.colab import drive
import torch

drive.mount('/content/drive')
ABADDON_DIR = Path('/content/drive/MyDrive/abaddon')
ABADDON_DIR.mkdir(parents=True, exist_ok=True)
print('Drive output dir:', ABADDON_DIR)

print('torch:', torch.__version__)
print('cuda available:', torch.cuda.is_available())
if torch.cuda.is_available():
    !nvidia-smi
else:
    print('Warning: GPU not detected. Enable GPU runtime in Colab.')


In [None]:
import os
import pathlib

# Defaults to your repo; override in Colab if needed:
#   %env PARCAE_REPO_URL=https://github.com/<you>/<repo>.git
# If repo already exists at /content/ParcaeStrategy, this cell reuses it.
DEFAULT_REPO_URL = "https://github.com/Tarvorix/parcae.git"
REPO_URL = os.environ.get("PARCAE_REPO_URL", DEFAULT_REPO_URL)
REPO_DIR = pathlib.Path("/content/ParcaeStrategy")

if REPO_DIR.exists():
    print(f"Using existing repo at {REPO_DIR}")
else:
    print(f"Cloning {REPO_URL}")
    !git clone "$REPO_URL" "$REPO_DIR"

%cd /content/ParcaeStrategy
!git status --short


In [None]:
from pathlib import Path
import importlib.util
import os
import sys

DEFAULT_REPO_URL = "https://github.com/Tarvorix/parcae.git"
REPO_DIR = Path('/content/ParcaeStrategy')

if not REPO_DIR.exists():
    print('Repo missing after runtime reset; cloning fresh copy...')
    get_ipython().system('git clone "$DEFAULT_REPO_URL" "$REPO_DIR"')

%cd /content/ParcaeStrategy
print('cwd:', os.getcwd())

src_path = str(REPO_DIR / 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)
print('PYTHONPATH prepended:', src_path)

def missing(mod: str) -> bool:
    return importlib.util.find_spec(mod) is None

to_install = []
if missing('torch'):
    to_install.append('torch')
if missing('onnx'):
    to_install.append('onnx')
if missing('onnxruntime'):
    to_install.append('onnxruntime')

if to_install:
    print('Installing missing packages:', to_install)
    get_ipython().run_line_magic('pip', 'install ' + ' '.join(to_install))
else:
    print('Required packages already installed. Skipping pip install.')


In [None]:
import os
import sys
import time
from pathlib import Path

REPO_SRC = '/content/ParcaeStrategy/src'
if REPO_SRC not in sys.path:
    sys.path.insert(0, REPO_SRC)

for name in list(sys.modules.keys()):
    if name == 'parcaestrategy' or name.startswith('parcaestrategy.'):
        del sys.modules[name]

import parcaestrategy
pkg_path = os.path.abspath(parcaestrategy.__file__)
print('parcaestrategy loaded from:', pkg_path)
if not pkg_path.startswith('/content/ParcaeStrategy/src/'):
    raise RuntimeError('Wrong package source loaded; restart runtime and run notebook from top.')

import torch
from parcaestrategy.ai.train import train_self_play

ABADDON_DIR = Path('/content/drive/MyDrive/abaddon')
ABADDON_DIR.mkdir(parents=True, exist_ok=True)

if not torch.cuda.is_available():
    raise RuntimeError('GPU is required for real training runs. Switch Colab runtime to GPU.')

gpu_name = torch.cuda.get_device_name(0)
DEVICE = 'cuda'
CHECKPOINT_OUT = str(ABADDON_DIR / 'abaddon_colab_latest.pt')
print('GPU:', gpu_name)

# Colab logging cadence: 1 prints every game, N prints every N games, 0 disables.
STATUS_EVERY_GAMES = 1
# Save an interval checkpoint every N games to Drive (0 disables).
CHECKPOINT_EVERY_GAMES = 200

# Real training defaults (auto-tuned by GPU tier).
if 'T4' in gpu_name:
    train_kwargs = dict(
        games=500,
        batch_size=96,
        epochs=3,
        device=DEVICE,
        simulations=80,
        self_play_temperature=1.0,
        self_play_max_plies=200,
        temperature_drop_plies=16,
        out_path=CHECKPOINT_OUT,
        d_model=160,
        layers=8,
        heads=5,
        ffn_dim=640,
        dropout=0.1,
        lr=6e-4,
        warmup_steps=1200,
        progress_every=STATUS_EVERY_GAMES,
        checkpoint_every=CHECKPOINT_EVERY_GAMES,
    )
elif any(tag in gpu_name for tag in ['L4', 'V100', 'A100']):
    train_kwargs = dict(
        games=2500,
        batch_size=128,
        epochs=3,
        device=DEVICE,
        simulations=192,
        self_play_temperature=1.0,
        self_play_max_plies=200,
        temperature_drop_plies=16,
        out_path=CHECKPOINT_OUT,
        d_model=192,
        layers=10,
        heads=8,
        ffn_dim=768,
        dropout=0.1,
        lr=5e-4,
        warmup_steps=1800,
        progress_every=STATUS_EVERY_GAMES,
        checkpoint_every=CHECKPOINT_EVERY_GAMES,
    )
else:
    train_kwargs = dict(
        games=2000,
        batch_size=112,
        epochs=3,
        device=DEVICE,
        simulations=176,
        self_play_temperature=1.0,
        self_play_max_plies=200,
        temperature_drop_plies=16,
        out_path=CHECKPOINT_OUT,
        d_model=176,
        layers=9,
        heads=8,
        ffn_dim=704,
        dropout=0.1,
        lr=5e-4,
        warmup_steps=1500,
        progress_every=STATUS_EVERY_GAMES,
        checkpoint_every=CHECKPOINT_EVERY_GAMES,
    )

print('Training config:')
for k, v in train_kwargs.items():
    print(f'  {k}={v}')

started = time.time()
path = train_self_play(**train_kwargs)
elapsed = time.time() - started
print('checkpoint written:', path)
print(f'elapsed_seconds={elapsed:.1f}')
if elapsed < 300:
    print('WARNING: completed much faster than expected for real profile; verify runtime/session state.')


In [None]:
from pathlib import Path

ABADDON_DIR = Path('/content/drive/MyDrive/abaddon')
ABADDON_DIR.mkdir(parents=True, exist_ok=True)
CKPT = ABADDON_DIR / 'abaddon_colab_latest.pt'
ONNX_OUT = ABADDON_DIR / 'parcae_model.onnx'

if not CKPT.exists():
    raise FileNotFoundError(f'Missing checkpoint: {CKPT}')

!PYTHONPATH=src python scripts/export_onnx.py \
  --checkpoint "$CKPT" \
  --out "$ONNX_OUT" \
  --verify-parity
print('onnx written:', ONNX_OUT)


In [None]:
from pathlib import Path

ABADDON_DIR = Path('/content/drive/MyDrive/abaddon')
files = [
    ABADDON_DIR / 'abaddon_colab_latest.pt',
    ABADDON_DIR / 'parcae_model.onnx',
]
for f in files:
    print(f'{f}:', 'exists' if f.exists() else 'missing')
    if f.exists():
        print('  size_bytes=', f.stat().st_size)


In [None]:
# Optional: download a local copy from Drive artifacts
from pathlib import Path
from google.colab import files

ABADDON_DIR = Path('/content/drive/MyDrive/abaddon')
for fname in ['abaddon_colab_latest.pt', 'parcae_model.onnx']:
    p = ABADDON_DIR / fname
    if p.exists():
        files.download(str(p))
