# Colab Setup Verification

**Purpose:** Quick test to verify Colab environment is ready for training

**Runtime:** 5-10 minutes

Run all cells in order and verify each step succeeds before starting full training.

## 1. GPU Check

In [None]:
import torch

print("GPU Information:")
print(f"  CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"  Device name: {torch.cuda.get_device_name(0)}")
    print(f"  CUDA version: {torch.version.cuda}")
    print(f"  Memory allocated: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")
    print(f"  Memory reserved: {torch.cuda.memory_reserved(0) / 1e9:.2f} GB")
    print("\n✓ GPU ready")
else:
    print("\n✗ No GPU detected!")
    print("  Go to Runtime → Change runtime type → Select GPU")

## 2. Mount Google Drive

In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

# Create checkpoint directory
checkpoint_dir = '/content/drive/MyDrive/chess_checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

# Test write access
test_file = f"{checkpoint_dir}/test.txt"
with open(test_file, 'w') as f:
    f.write("Colab setup test")

# Verify
if os.path.exists(test_file):
    os.remove(test_file)
    print("✓ Google Drive mounted and writable")
else:
    print("✗ Drive write test failed")

## 3. Clone Repository

In [None]:
# Remove existing repo if present
!rm -rf rl_chess_agent

# Clone
!git clone https://github.com/Capacap/rl_chess_agent.git
%cd rl_chess_agent

# Verify
import os
required_files = ['train.py', 'model/network.py', 'training/pipeline.py']
all_exist = all(os.path.exists(f) for f in required_files)

if all_exist:
    print("\n✓ Repository cloned successfully")
    !git log --oneline -3
else:
    print("\n✗ Missing required files")

## 4. Install Dependencies

In [None]:
!pip install -q -r requirements.txt

# Verify imports
try:
    import chess
    import numpy as np
    import torch
    from model.network import ChessNet
    from training.pipeline import training_pipeline
    
    print("✓ All dependencies installed")
    print(f"  chess: {chess.__version__}")
    print(f"  numpy: {np.__version__}")
    print(f"  torch: {torch.__version__}")
except ImportError as e:
    print(f"✗ Import failed: {e}")

## 5. Test Network Forward Pass (GPU)

In [None]:
import torch
from model.network import ChessNet
from encoding.state import encode_board
from encoding.move import create_legal_move_mask
import chess

# Create network and move to GPU
network = ChessNet(channels=64, num_blocks=4)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
network.to(device)

# Test forward pass
board = chess.Board()
state = encode_board(board).to(device)
mask = torch.tensor(create_legal_move_mask(board), dtype=torch.bool).unsqueeze(0).to(device)

with torch.no_grad():
    policy, value = network(state, mask)

print(f"Device: {device}")
print(f"Policy shape: {policy.shape}")
print(f"Value shape: {value.shape}")
print(f"Policy sum (should be ~1.0): {policy.sum().item():.4f}")
print(f"Value range: [{value.min().item():.4f}, {value.max().item():.4f}]")

if device.type == 'cuda':
    print(f"\n✓ GPU inference working")
else:
    print(f"\n⚠ Using CPU (slower but functional)")

## 6. Test Mini Training Run (1 iteration)

In [None]:
!python train.py \
  --iterations 1 \
  --games-per-iter 5 \
  --simulations 10 \
  --arena-games 5 \
  --checkpoint-dir checkpoints/test_run

## 7. Verify Checkpoint Saved

In [None]:
import os
import glob

checkpoints = glob.glob("checkpoints/test_run/*.pt")
log_exists = os.path.exists("checkpoints/test_run/training.log")

print(f"Checkpoints found: {len(checkpoints)}")
print(f"Training log exists: {log_exists}")

if checkpoints:
    for cp in sorted(checkpoints):
        size_mb = os.path.getsize(cp) / (1024 * 1024)
        print(f"  {cp}: {size_mb:.1f} MB")

if checkpoints and log_exists:
    print("\n✓ Checkpoint save/load working")
else:
    print("\n✗ Checkpoint creation failed")

## 8. Test Drive Backup

In [None]:
# Copy test checkpoint to Drive
!cp -r checkpoints/test_run /content/drive/MyDrive/chess_checkpoints/

# Verify
import os
drive_path = "/content/drive/MyDrive/chess_checkpoints/test_run"

if os.path.exists(drive_path):
    print("✓ Drive backup working")
    print(f"  Backed up to: {drive_path}")
    
    # Clean up test files
    !rm -rf checkpoints/test_run
    !rm -rf /content/drive/MyDrive/chess_checkpoints/test_run
    print("  Test files cleaned up")
else:
    print("✗ Drive backup failed")

## Summary

If all cells above show ✓, your Colab environment is ready for training.

**Next steps:**
1. Close this notebook
2. Open `train.ipynb`
3. Configure training parameters (Step 5)
4. Launch training (Step 6)
5. Enable background execution (Runtime → Background execution)

**Estimated training times:**
- Quick test (5 iter, 50 games): 2-3 hours
- Development (10 iter, 50 games): 12-15 hours
- Production (15 iter, 100 games): 24-30 hours