# Run Stochastic MuZero Harness Benchmarks (Othello + 2048)
This notebook clones the repo from GitHub and runs:
- training on **Othello**
- training on **2048**
- evaluation for each
- optional MCTS planning eval on 2048

Outputs (checkpoints + rollout PNGs) are saved to Google Drive.

## Setup

In [None]:
!nvidia-smi -L || true
import torch, sys
print('torch', torch.__version__, 'cuda available?', torch.cuda.is_available())

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os, pathlib
ROOT = '/content/drive/MyDrive/Colab_Notebooks'
REPO_DIR = f'{ROOT}/tg_smn'
OUTROOT = f'{ROOT}/tg_smn_outputs_stoch_muzero'
BRANCH = ''  # optionally set to 'stoch-muzero-harness' if you pushed a branch and haven't merged yet

pathlib.Path(ROOT).mkdir(parents=True, exist_ok=True)
pathlib.Path(OUTROOT).mkdir(parents=True, exist_ok=True)
print('ROOT:', ROOT)
print('REPO_DIR:', REPO_DIR)
print('OUTROOT:', OUTROOT)

In [None]:
%%bash
set -e
ROOT=/content/drive/MyDrive/Colab_Notebooks
cd $ROOT
if [ ! -d tg_smn ]; then
  git clone https://github.com/RespectableGlioma/tg_smn.git
fi
cd tg_smn
git fetch --all
if [ -n "${BRANCH}" ]; then
  git checkout ${BRANCH}
else
  git checkout $(git symbolic-ref --short HEAD)
fi
git pull
git status

In [None]:
!pip -q install -U pip setuptools wheel
!pip -q install tqdm pillow numpy
# (torch is preinstalled on Colab; no need to pip install torch unless you want a specific build)

## Quick import test

In [None]:
%cd $REPO_DIR
import world_models.stoch_muzero_harness as smh
print('Imported:', smh.__name__)

## Train: Othello

In [None]:
%cd $REPO_DIR
GAME='othello'
!python -u -m world_models.stoch_muzero_harness.train \
  --game $GAME \
  --collect_episodes 300 \
  --train_steps 20000 \
  --eval_every 2000 \
  --save_every 5000 \
  --device cuda \
  --outdir "$OUTROOT"

## Eval: Othello (prediction-only)

In [None]:
%cd $REPO_DIR
!python -u -m world_models.stoch_muzero_harness.eval \
  --game othello \
  --ckpt "$OUTROOT/othello/ckpt_final.pt" \
  --episodes 50 \
  --device cuda

## Train: 2048

In [None]:
%cd $REPO_DIR
GAME='2048'
!python -u -m world_models.stoch_muzero_harness.train \
  --game $GAME \
  --collect_episodes 300 \
  --train_steps 20000 \
  --eval_every 2000 \
  --save_every 5000 \
  --device cuda \
  --outdir "$OUTROOT"

## Eval: 2048 (prediction-only)

In [None]:
%cd $REPO_DIR
!python -u -m world_models.stoch_muzero_harness.eval \
  --game 2048 \
  --ckpt "$OUTROOT/2048/ckpt_final.pt" \
  --episodes 50 \
  --device cuda

## Eval: 2048 with MCTS planning (optional)
This uses the learned latent model for search and applies the entropy shortcut:
- low entropy chance → deterministic rollout
- high entropy chance → sample outcomes

In [None]:
%cd $REPO_DIR
!python -u -m world_models.stoch_muzero_harness.eval \
  --game 2048 \
  --ckpt "$OUTROOT/2048/ckpt_final.pt" \
  --episodes 50 \
  --mcts_sims 64 \
  --entropy_thr 0.5 \
  --device cuda

## View latest rollout images

In [None]:
import glob
from PIL import Image
import matplotlib.pyplot as plt

def show_latest(pattern, title):
    paths = sorted(glob.glob(pattern))
    if not paths:
        print('No images found for', pattern)
        return
    p = paths[-1]
    print(title, '->', p)
    img = Image.open(p)
    plt.figure(figsize=(14, 4))
    plt.imshow(img, cmap='gray')
    plt.axis('off')
    plt.show()

show_latest(f'{OUTROOT}/othello/rollout_gt_vs_pred_step*.png', 'Othello rollout')
show_latest(f'{OUTROOT}/2048/rollout_gt_vs_pred_step*.png', '2048 rollout')