# AlphaZero for Togyz Kumalak — Kaggle GPU Training

This notebook runs a minimal end-to-end loop:
- build model
- generate self-play samples with MCTS
- train for a few steps with AMP (if CUDA)
- checkpoint to `/kaggle/working/`

Adjust `SIMS`, `GAMES`, and `STEPS` for longer runs.


In [None]:
import os, math, random

import torch

from src.nn.model import AlphaZeroNet
from src.selfplay.worker import play_episode, SelfPlayConfig
from src.trainer.loop import ReplayBuffer, TrainConfig, train_one_iteration, save_checkpoint

SIMS = 80  # increase to 160+ for stronger search
GAMES = 32  # self-play games per mini-iteration
STEPS = 50  # optimizer steps per mini-iteration

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device:', device)

net = AlphaZeroNet(in_channels=7, channels=64, num_blocks=6).to(device)
rb = ReplayBuffer(capacity=300_000)

sp_cfg = SelfPlayConfig(simulations=SIMS)
tr_cfg = TrainConfig(batch_size=256, epochs_per_iter=1, amp=True, device=device)
opt = torch.optim.AdamW(net.parameters(), lr=1e-3, weight_decay=1e-4)

for it in range(3):  # extend as session allows
    # Generate self-play data
    for _ in range(GAMES):
        samples = play_episode(net, sp_cfg)
        rb.add_many(samples)
    print(f'Iter {it}: buffer size =', len(rb))

    # Train
    loss = train_one_iteration(net, opt, rb, tr_cfg, steps=STEPS)
    print(f'Iter {it}: loss = {loss:.4f}')

    # Checkpoint
    ckpt_path = '/kaggle/working/az_togyz.pt'
    save_checkpoint(ckpt_path, net, opt, step=it)
    print('Saved checkpoint to', ckpt_path)
