In [1]:
# Add this to the first cell of your notebook
%load_ext autoreload
%autoreload 2  # Reload all modules (except those excluded) before executing code

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install tqdm psutil plotly kaleido --quiet
import os
import sys
import random
import numpy as np
import time
import threading
import IPython
from google.colab import output
from datetime import datetime

In [4]:
# Ensure version compatibility with local setup
print("Installing compatible package versions...")

# Install specific versions to match local setup
!pip install torch==2.5.1 numpy==2.0.1 --quiet

# Reload modules to ensure changes take effect
%load_ext autoreload
%autoreload 2

# Verify PyTorch and NumPy versions after installation
!python -c "import torch; print(f'PyTorch post-install: {torch.__version__}')"
!python -c "import numpy; print(f'NumPy post-install: {numpy.__version__}')"

# Force CUDA setup for PyTorch
import torch
print(f"CUDA setup: available={torch.cuda.is_available()}, device count={torch.cuda.device_count() if torch.cuda.is_available() else 0}")
if torch.cuda.is_available():
    print(f"Current CUDA device: {torch.cuda.current_device()}, name: {torch.cuda.get_device_name()}")

Installing compatible package versions...
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
PyTorch post-install: 2.5.1+cu124
NumPy post-install: 2.0.1
CUDA setup: available=True, device count=1
Current CUDA device: 0, name: Tesla T4


In [5]:
# Set path to your project on Google Drive
DRIVE_PATH = '/content/drive/MyDrive/CatanRL'

# Change to the project directory
%cd {DRIVE_PATH}



/content/drive/MyDrive/CatanRL


In [6]:
# Run this in a Colab cell to check device handling
import torch

# Check CUDA availability
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device count: {torch.cuda.device_count()}")
    print(f"Current CUDA device: {torch.cuda.current_device()}")
    print(f"CUDA device name: {torch.cuda.get_device_name()}")

# Check device of your model
from AlphaZero.core.network import DeepCatanNetwork
model = DeepCatanNetwork(992, 200, 256)

# Try loading your checkpoint
checkpoint_path = '/content/drive/MyDrive/CatanRL/models/best_model.pt'
checkpoint = torch.load(checkpoint_path, map_location='cpu')  # Load to CPU first

# Print device information
print(f"\nModel device before loading: {next(model.parameters()).device}")

# Check devices in checkpoint
if 'network_state_dict' in checkpoint:
    sample_key = list(checkpoint['network_state_dict'].keys())[0]
    sample_tensor = checkpoint['network_state_dict'][sample_key]
    print(f"Checkpoint tensor device: {sample_tensor.device}")

# Try explicit device handling
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Target device: {device}")

# Move model to device and then load state dict
model = model.to(device)
print(f"Model device after .to(device): {next(model.parameters()).device}")

# Move checkpoint tensors to the right device
for key in checkpoint['network_state_dict']:
    checkpoint['network_state_dict'][key] = checkpoint['network_state_dict'][key].to(device)

# Now load the state dict
model.load_state_dict(checkpoint['network_state_dict'])
print(f"Model device after loading: {next(model.parameters()).device}")

CUDA available: True
CUDA device count: 1
Current CUDA device: 0
CUDA device name: Tesla T4


  checkpoint = torch.load(checkpoint_path, map_location='cpu')  # Load to CPU first



Model device before loading: cpu
Checkpoint tensor device: cpu
Target device: cuda
Model device after .to(device): cuda:0
Model device after loading: cuda:0


In [6]:


def keep_colab_alive():
    """
    This function runs in a separate thread and periodically
    executes JavaScript code to prevent Google Colab from disconnecting.
    """
    while True:
        # Execute JavaScript to simulate user activity
        try:
            output.eval_js('new Date().toISOString()')
            # Make a simple fetch request to keep the connection active
            output.eval_js('fetch("https://httpbin.org/get")')
            print("♥", end="", flush=True)  # Visual heartbeat
        except:
            pass
        time.sleep(90)  # Check every 45 seconds

# Start the anti-disconnect thread
print("Starting anti-disconnect protection...")
keep_alive_thread = threading.Thread(target=keep_colab_alive, daemon=True)
keep_alive_thread.start()


Starting anti-disconnect protection...


In [None]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seeds for reproducibility
def set_random_seeds(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_random_seeds()

# Step 5: Set up training parameters
# You can customize these parameters
import argparse

# Parse arguments from command line or use defaults
# This allows you to change parameters when running the notebook
parser = argparse.ArgumentParser(description="AlphaZero Catan Training")
parser.add_argument("--iterations", type=int, default=50, help="Number of training iterations")
parser.add_argument("--resume", type=str, default=None, help="Path to checkpoint to resume from")
parser.add_argument("--games", type=int, default=20, help="Number of self-play games per iteration")
parser.add_argument("--sims", type=int, default=100, help="Number of MCTS simulations per move")
parser.add_argument("--eval-games", type=int, default=10, help="Number of evaluation games")
parser.add_argument("--quick", action="store_true", help="Quick training (1 iteration, 2 games)")
parser.add_argument("--medium", action="store_true", help="Medium training (10 iterations, 5 games)")
parser.add_argument("--full", action="store_true", help="Full training (50 iterations, 20 games)")
parser.add_argument("--overnight", action="store_true", help="Overnight training (100 iterations, 30 games)")

# Parse the arguments directly
# args = parser.parse_args(['--overnight', '--resume', '/content/drive/MyDrive/CatanRL/models/best_model.pt'])  # Using existing model
args = parser.parse_args(['--overnight'])
# Configure training mode
if args.quick:
    print("Running in QUICK mode")
    args.iterations = 1
    args.games = 2
    args.sims = 10
    args.eval_games = 2
elif args.medium:
    print("Running in MEDIUM mode")
    args.iterations = 10
    args.games = 5
    args.sims = 50
    args.eval_games = 5
elif args.full:
    print("Running in FULL mode")
    args.iterations = 50
    args.games = 20
    args.sims = 100
    args.eval_games = 10
elif args.overnight:
    print("Running in OVERNIGHT mode")
    args.iterations = 100
    args.games = 30
    args.sims = 150
    args.eval_games = 15

print(f"\n=== AlphaZero Catan Training ===")
print(f"Iterations: {args.iterations}")
print(f"Self-play games per iteration: {args.games}")
print(f"MCTS simulations per move: {args.sims}")
print(f"Resume from: {args.resume if args.resume else 'Starting fresh'}")

# Step 6: Get configuration and modify for GPU
from AlphaZero.utils.config import get_config
config = get_config()

# Customize config with command line arguments
config['num_iterations'] = args.iterations
config['self_play_games'] = args.games
config['num_simulations'] = args.sims
config['eval_games'] = args.eval_games
config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

# Step 7: Create logs and models directories
!mkdir -p logs
!mkdir -p models
!mkdir -p plots

# Step 8: Start the training
from AlphaZero.training.training_pipeline import TrainingPipeline

try:
    # Start time tracking
    start_time = time.time()

    # Create the training pipeline
    pipeline = TrainingPipeline(config)

    # Train for the specified iterations
    pipeline.train(args.iterations, resume_from=args.resume)

    # Calculate total training time
    total_time = time.time() - start_time
    hours = int(total_time // 3600)
    minutes = int((total_time % 3600) // 60)
    seconds = int(total_time % 60)

    print(f"\nTraining completed in {hours}h {minutes}m {seconds}s")

except KeyboardInterrupt:
    print("\nTraining interrupted! Saving checkpoint...")
    pipeline.save_model(pipeline.current_iteration)
    print("Checkpoint saved. You can resume with this checkpoint later.")
except Exception as e:
    print(f"Error during training: {e}")
    import traceback
    traceback.print_exc()

# Step 9: Copy results back to Google Drive
!mkdir -p {DRIVE_PATH}/models_{timestamp}
!mkdir -p {DRIVE_PATH}/logs_{timestamp}
!mkdir -p {DRIVE_PATH}/plots_{timestamp}

!cp -r models/* {DRIVE_PATH}/models_{timestamp}/
!cp -r logs/* {DRIVE_PATH}/logs_{timestamp}/
!cp -r plots/* {DRIVE_PATH}/plots_{timestamp}/

print(f"\nTraining results saved to Google Drive in folders with timestamp {timestamp}")

Using device: cuda
Running in OVERNIGHT mode

=== AlphaZero Catan Training ===
Iterations: 100
Self-play games per iteration: 30
MCTS simulations per move: 150
Resume from: Starting fresh
[2025-04-20 18:18:09] AlphaZero Catan Training started at 20250420_181809
[2025-04-20 18:18:09] Configuration: {'state_dim': 992, 'action_dim': 200, 'hidden_dim': 256, 'learning_rate': 0.001, 'num_iterations': 100, 'self_play_games': 30, 'eval_games': 15, 'epochs': 10, 'batch_size': 128, 'buffer_size': 100000, 'num_simulations': 150, 'c_puct': 1.5, 'mcts_batch_size': 8, 'max_moves': 200, 'model_dir': 'models', 'device': 'cuda'}
[2025-04-20 18:18:09] 
=== Iteration 1/100 ===
[2025-04-20 18:18:09] Starting self-play...


Self-play games:   0%|          | 0/30 [00:00<?, ?it/s]