# Run Stochastic MuZero Harness Benchmarks (Othello + 2048)
This notebook clones the repo from GitHub and runs:
- training on **Othello**
- training on **2048**
- evaluation for each
- optional MCTS planning eval on 2048

Outputs (checkpoints + rollout PNGs) are saved to Google Drive.

## Setup

In [3]:
!nvidia-smi -L || true
import torch, sys
print('torch', torch.__version__, 'cuda available?', torch.cuda.is_available())

GPU 0: Tesla T4 (UUID: GPU-1f6fb90f-09d6-fdc0-2c7e-cb7667875e55)
torch 2.9.0+cu126 cuda available? True


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os, pathlib
ROOT = '/content/drive/MyDrive/Colab_Notebooks'
REPO_DIR = f'{ROOT}/tg_smn'
OUTROOT = f'{ROOT}/tg_smn_outputs_stoch_muzero'
BRANCH = 'stoch-muzero-harness'  # optionally set to 'stoch-muzero-harness' if you pushed a branch and haven't merged yet

pathlib.Path(ROOT).mkdir(parents=True, exist_ok=True)
pathlib.Path(OUTROOT).mkdir(parents=True, exist_ok=True)
print('ROOT:', ROOT)
print('REPO_DIR:', REPO_DIR)
print('OUTROOT:', OUTROOT)

ROOT: /content/drive/MyDrive/Colab_Notebooks
REPO_DIR: /content/drive/MyDrive/Colab_Notebooks/tg_smn
OUTROOT: /content/drive/MyDrive/Colab_Notebooks/tg_smn_outputs_stoch_muzero


In [6]:
%%bash
cd /content/drive/MyDrive/Colab_Notebooks/tg_smn

# 1. Fetch all remote info
git fetch --all

# 2. Switch to the harness branch
# (If the branch is named differently, 'git branch -r' below will show you)
git checkout stoch-muzero-harness || echo "Branch not found locally, trying to track remote..." && git checkout -b stoch-muzero-harness origin/stoch-muzero-harness

# 3. Pull latest changes
git pull

# 4. Verify the file now exists
echo "--- Checking world_models folder content ---"
ls -F world_models/

Fetching origin
M	Learning_Causal_Relationships_v2.ipynb
Your branch is up to date with 'origin/stoch-muzero-harness'.
Already up to date.
--- Checking world_models folder content ---
__init__.py
__pycache__/
stoch_muzero_harness/


Already on 'stoch-muzero-harness'
fatal: A branch named 'stoch-muzero-harness' already exists.


In [7]:
!pip -q install -U pip setuptools wheel
!pip -q install tqdm pillow numpy
# (torch is preinstalled on Colab; no need to pip install torch unless you want a specific build)

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ipython 7.34.0 requires jedi>=0.16, which is not installed.[0m[31m
[0m

## Quick import test

In [8]:
%cd $REPO_DIR
import world_models.stoch_muzero_harness as smh
print('Imported:', smh.__name__)

/content/drive/MyDrive/Colab_Notebooks/tg_smn
Imported: world_models.stoch_muzero_harness


In [9]:
import os
print(f"Current Working Directory: {os.getcwd()}")
print("\n--- Contents of Repo Root ---")
!ls -F

print("\n--- Search for stoch_muzero_harness ---")
!find . -name "stoch_muzero_harness.py"

Current Working Directory: /content/drive/MyDrive/Colab_Notebooks/tg_smn

--- Contents of Repo Root ---
ale_rssm_causal_stochastic.py		README.md
Learning_Causal_Relationships_v2.ipynb	Run_StochMuZeroHarness_Benchmarks.ipynb
notebooks/				stoch_muzero_harness_patch.zip
outputs_othello_rssm/			tg_smn/
outputs_rssm_causal_stochastic/		tg_smn.egg-info/
outputs_rssm_causal_stochastic_v2/	TG_SMN_Run.ipynb
pyproject.toml				world_models/

--- Search for stoch_muzero_harness ---


## Train: Othello

In [17]:
%cd $REPO_DIR
GAME='othello'
!python -u -m world_models.stoch_muzero_harness.train \
  --game $GAME \
  --collect_episodes 300 \
  --collect_episodes 2000 \
  --train_steps 40000 \
  --batch 128 \
  --unroll 10 \
  --w_policy 0 \
  --w_value 0 \
  --w_reward 0 \
  --w_chance 0 \
  --w_aux 1 \
  --w_after_aux 3 \
  --w_style 0.2 \
  --w_inv 1 \
  --device cuda \
  --outdir "$OUTROOT"

/content/drive/MyDrive/Colab_Notebooks/tg_smn
Game=othello | action_size=65 | chance_size=1 | obs=(64, 64) | styles=16
Collect(othello):   4% 78/2000 [00:09<04:02,  7.94it/s]
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
object address  : 0x7df08976b2e0
object refcount : 3
object type     : 0xa2a4e0
object type name: KeyboardInterrupt
object repr     : KeyboardInterrupt()
lost sys.stderr
^C


## Eval: Othello (prediction-only)

In [12]:
%cd $REPO_DIR
!python -u -m world_models.stoch_muzero_harness.eval \
  --game othello \
  --ckpt "$OUTROOT/othello/ckpt_final.pt" \
  --episodes 50 \
  --device cuda

/content/drive/MyDrive/Colab_Notebooks/tg_smn
Prediction eval over 2999 transitions: exact_after=0.043 | chance_acc=1.000 | exact_next=0.044


## Train: 2048

In [None]:
%cd $REPO_DIR
GAME='2048'
!python -u -m world_models.stoch_muzero_harness.train \
  --game $GAME \
  --num_styles 1 \
  --collect_episodes 2000 \
  --train_steps 30000 \
  --batch 128 \
  --unroll 5 \
  --w_policy 0 \
  --w_value 0 \
  --w_reward 0 \
  --w_aux 5.0 \
  --w_after_aux 5.0 \
  --w_chance 1 \
  --w_style 0 \
  --w_inv 0 \
  --device cuda \
  --outdir "$OUTROOT"

/content/drive/MyDrive/Colab_Notebooks/tg_smn
Game=2048 | action_size=4 | chance_size=33 | obs=(64, 64) | styles=1
Collect(2048): 100% 2000/2000 [04:21<00:00,  7.65it/s]
Collected episodes: 2000
step 000200 | loss 17.7833 | pol 1.2421 val 618584.3750 aux 1.6630 after_aux 1.5020 chance 1.9584 rew 338.7046 style 0.0000 inv 0.0000
step 000400 | loss 16.9426 | pol 1.2654 val 697464.8125 aux 1.5509 after_aux 1.4419 chance 1.9786 rew 304.1305 style 0.0000 inv 0.0000
step 000600 | loss 17.0703 | pol 1.2573 val 539103.5000 aux 1.5542 after_aux 1.4634 chance 1.9820 rew 418.5751 style 0.0000 inv 0.0000
step 000800 | loss 17.0152 | pol 1.2448 val 645058.2500 aux 1.5361 after_aux 1.4819 chance 1.9253 rew 306.8600 style 0.0000 inv 0.0000
step 001000 | loss 17.0660 | pol 1.2387 val 568916.2500 aux 1.5369 after_aux 1.5000 chance 1.8813 rew 361.7408 style 0.0000 inv 0.0000
step 001200 | loss 16.4719 | pol 1.2513 val 648102.3750 aux 1.4669 after_aux 1.4299 chance 1.9880 rew 325.7607 style 0.0000 inv 0.

## Eval: 2048 (prediction-only)

In [None]:
%cd $REPO_DIR
!python -u -m world_models.stoch_muzero_harness.eval \
  --game 2048 \
  --ckpt "$OUTROOT/2048/ckpt_final.pt" \
  --episodes 50 \
  --device cuda

## Eval: 2048 with MCTS planning (optional)
This uses the learned latent model for search and applies the entropy shortcut:
- low entropy chance → deterministic rollout
- high entropy chance → sample outcomes

In [None]:
%cd $REPO_DIR
!python -u -m world_models.stoch_muzero_harness.eval \
  --game 2048 \
  --ckpt "$OUTROOT/2048/ckpt_final.pt" \
  --episodes 50 \
  --mcts_sims 64 \
  --entropy_thr 0.5 \
  --device cuda

## View latest rollout images

In [None]:
import glob
from PIL import Image
import matplotlib.pyplot as plt

def show_latest(pattern, title):
    paths = sorted(glob.glob(pattern))
    if not paths:
        print('No images found for', pattern)
        return
    p = paths[-1]
    print(title, '->', p)
    img = Image.open(p)
    plt.figure(figsize=(14, 4))
    plt.imshow(img, cmap='gray')
    plt.axis('off')
    plt.show()

show_latest(f'{OUTROOT}/othello/rollout_gt_vs_pred_step*.png', 'Othello rollout')
show_latest(f'{OUTROOT}/2048/rollout_gt_vs_pred_step*.png', '2048 rollout')