In [1]:
# Add this to the first cell of your notebook
%load_ext autoreload
%autoreload 2  # Reload all modules (except those excluded) before executing code

In [2]:
import os
# os.environ['CUDA_VISIBLE_DEVICES'] = ''   # comment this line if you want GPU again

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!pip install tqdm psutil plotly kaleido --quiet
import os
import sys
import random
import time
import threading
import IPython
from google.colab import output
from datetime import datetime

In [5]:
# Ensure version compatibility with local setup
print("Installing compatible package versions...")

# Install specific versions to match local setup
!pip install torch==2.5.1 numpy==2.0.1 --quiet
import numpy as np


# Verify PyTorch and NumPy versions after installation
!python -c "import torch; print(f'PyTorch post-install: {torch.__version__}')"
!python -c "import numpy; print(f'NumPy post-install: {numpy.__version__}')"

# Force CUDA setup for PyTorch
import torch
print(f"CUDA setup: available={torch.cuda.is_available()}, device count={torch.cuda.device_count() if torch.cuda.is_available() else 0}")
if torch.cuda.is_available():
    print(f"Current CUDA device: {torch.cuda.current_device()}, name: {torch.cuda.get_device_name()}")

Installing compatible package versions...
PyTorch post-install: 2.5.1+cu124
NumPy post-install: 2.0.1
CUDA setup: available=True, device count=1
Current CUDA device: 0, name: Tesla T4


In [6]:
# Set path to your project on Google Drive
DRIVE_PATH = '/content/drive/MyDrive/CatanRL'

# Change to the project directory
%cd {DRIVE_PATH}



/content/drive/MyDrive/CatanRL


In [7]:
import time, threading
from google.colab import output

# 1. Define a dummy no‑op Python callback.
def _noop():
    return "ok"

# 2. Register it once – gives us a handle "keep_alive"
output.register_callback('keep_alive', _noop)

def keep_colab_alive(interval_sec: int = 60):
    """Ping the front‑end every <interval_sec> seconds.

    Works in 2025‑04 Colab because it uses the same mechanism Colab widgets use.
    """
    while True:
        try:
            # JS in the page calls the Python no‑op; the round‑trip is what matters
            output.eval_js('google.colab.kernel.invokeFunction("keep_alive", [], {})')
            print("♥", end="", flush=True)
        except Exception:
            # If the socket was momentarily closed, ignore and retry
            pass
        time.sleep(interval_sec)

print("Starting keep‑alive thread …")
threading.Thread(target=keep_colab_alive, daemon=True).start()


Starting keep‑alive thread …


In [None]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

device = torch.device('cpu')

# Set random seeds for reproducibility
def set_random_seeds(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    # if torch.cuda.is_available():
    #     torch.cuda.manual_seed(seed)
    #     torch.backends.cudnn.deterministic = True
    #     torch.backends.cudnn.benchmark = False

set_random_seeds()

# Step 5: Set up training parameters
# You can customize these parameters
import argparse

# Parse arguments from command line or use defaults
# This allows you to change parameters when running the notebook
parser = argparse.ArgumentParser(description="AlphaZero Catan Training")
parser.add_argument("--iterations", type=int, default=50, help="Number of training iterations")
parser.add_argument("--resume", type=str, default=None, help="Path to checkpoint to resume from")
parser.add_argument("--games", type=int, default=20, help="Number of self-play games per iteration")
parser.add_argument("--sims", type=int, default=100, help="Number of MCTS simulations per move")
parser.add_argument("--eval-games", type=int, default=10, help="Number of evaluation games")
parser.add_argument("--quick", action="store_true", help="Quick training (1 iteration, 2 games)")
parser.add_argument("--medium", action="store_true", help="Medium training (10 iterations, 5 games)")
parser.add_argument("--full", action="store_true", help="Full training (50 iterations, 20 games)")
parser.add_argument("--overnight", action="store_true", help="Overnight training (100 iterations, 30 games)")

# Parse the arguments directly
args = parser.parse_args(['--overnight', '--resume', 'models/model_iter_19.pt'])
#just overnight no resume
# args = parser.parse_args(['--medium'])
# Configure training mode
if args.quick:
    print("Running in QUICK mode")
    args.iterations = 1
    args.games = 2
    args.sims = 10
    args.eval_games = 2
elif args.medium:
    print("Running in MEDIUM mode")
    args.iterations = 10
    args.games = 5
    args.sims = 50
    args.eval_games = 5
elif args.full:
    print("Running in FULL mode")
    args.iterations = 50
    args.games = 20
    args.sims = 100
    args.eval_games = 10
elif args.overnight:
    print("Running in OVERNIGHT mode")
    args.iterations = 100
    args.games = 35
    args.sims = 150
    args.eval_games = 20

print(f"\n=== AlphaZero Catan Training ===")
print(f"Iterations: {args.iterations}")
print(f"Self-play games per iteration: {args.games}")
print(f"MCTS simulations per move: {args.sims}")
print(f"Resume from: {args.resume if args.resume else 'Starting fresh'}")

# Step 6: Get configuration and modify for GPU
from AlphaZero.utils.config import get_config
config = get_config()

# Customize config with command line arguments
config['num_iterations'] = args.iterations
config['self_play_games'] = args.games
config['num_simulations'] = args.sims
config['eval_games'] = args.eval_games
config['device'] = 'cpu'

# Step 7: Create logs and models directories
!mkdir -p logs
!mkdir -p models
!mkdir -p plots

# Step 8: Start the training
from AlphaZero.training.training_pipeline import TrainingPipeline

try:
    # Start time tracking
    start_time = time.time()

    # Create the training pipeline
    pipeline = TrainingPipeline(config)

    # Train for the specified iterations
    pipeline.train(args.iterations, resume_from=args.resume)

    # Calculate total training time
    total_time = time.time() - start_time
    hours = int(total_time // 3600)
    minutes = int((total_time % 3600) // 60)
    seconds = int(total_time % 60)

    print(f"\nTraining completed in {hours}h {minutes}m {seconds}s")

except KeyboardInterrupt:
    print("\nTraining interrupted! Saving checkpoint...")
    pipeline.save_model(pipeline.current_iteration)
    print("Checkpoint saved. You can resume with this checkpoint later.")
except Exception as e:
    print(f"Error during training: {e}")
    import traceback
    traceback.print_exc()

# Step 9: Copy results back to Google Drive
!mkdir -p {DRIVE_PATH}/models_{timestamp}
!mkdir -p {DRIVE_PATH}/logs_{timestamp}
!mkdir -p {DRIVE_PATH}/plots_{timestamp}

!cp -r models/* {DRIVE_PATH}/models_{timestamp}/
!cp -r logs/* {DRIVE_PATH}/logs_{timestamp}/
!cp -r plots/* {DRIVE_PATH}/plots_{timestamp}/
""
print(f"\nTraining results saved to Google Drive in folders with timestamp {timestamp}")

Running in OVERNIGHT mode

=== AlphaZero Catan Training ===
Iterations: 100
Self-play games per iteration: 35
MCTS simulations per move: 150
Resume from: models/model_iter_19.pt
[2025-04-21 06:08:03] AlphaZero Catan Training started at 20250421_060803
[2025-04-21 06:08:03] Configuration: {'state_dim': 992, 'action_dim': 200, 'hidden_dim': 256, 'learning_rate': 0.001, 'num_iterations': 100, 'self_play_games': 35, 'eval_games': 20, 'epochs': 10, 'batch_size': 256, 'buffer_size': 200000, 'num_simulations': 150, 'c_puct': 1.5, 'mcts_batch_size': 32, 'max_moves': 200, 'device': 'cpu', 'model_dir': 'models'}


  checkpoint = torch.load(path)


[2025-04-21 06:08:04] Checkpoint loaded from models/model_iter_19.pt, resuming from iteration 19
[2025-04-21 06:08:04] Resuming training from iteration 19
[2025-04-21 06:08:04] 
=== Iteration 20/100 ===
[2025-04-21 06:08:04] Starting self-play...


  network.load_state_dict(torch.load(config['checkpoint_path']))
  network.load_state_dict(torch.load(config['checkpoint_path']))
  network.load_state_dict(torch.load(config['checkpoint_path']))
  network.load_state_dict(torch.load(config['checkpoint_path']))
  network.load_state_dict(torch.load(config['checkpoint_path']))
  network.load_state_dict(torch.load(config['checkpoint_path']))
  network.load_state_dict(torch.load(config['checkpoint_path']))
Self-play games:  57%|█████▋    | 20/35 [06:53<02:43, 10.91s/it]

In [None]:
# ===== CPU model, core / thread counts, and base turbo freq =====
!lscpu | egrep 'Model name|Socket|Thread|Core|MHz'

# ===== Current clock speed of every logical core (updates once) =====
!grep \"cpu MHz\" /proc/cpuinfo | head

# ===== Simple “how fast is it?” micro‑benchmark =====
import time, numpy as np
N = 6000
a = np.random.randn(N, N).astype(np.float32)
b = np.random.randn(N, N).astype(np.float32)

t0 = time.time()
c = a @ b          # single BLAS call – leverages all cores & any MKL/OPENBLAS
elapsed = time.time() - t0
gflops = 2*N**3 / elapsed / 1e9

print(f"\n{elapsed:.3f} s   ≈ {gflops:.1f} GFLOP/s (single large mat‑mul)")


In [None]:
!grep -m1 'model name' /proc/cpuinfo
!nproc
