In [1]:
# Add this to the first cell of your notebook
%load_ext autoreload
%autoreload 2  # Reload all modules (except those excluded) before executing code

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install tqdm psutil plotly kaleido --quiet
import os
import sys
import random
import numpy as np
import time
import threading
import IPython
from google.colab import output
from datetime import datetime

In [4]:
# Ensure version compatibility with local setup
print("Installing compatible package versions...")

# Install specific versions to match local setup
!pip install torch==2.5.1 numpy==2.0.1 --quiet

# Reload modules to ensure changes take effect
%load_ext autoreload
%autoreload 2

# Verify PyTorch and NumPy versions after installation
!python -c "import torch; print(f'PyTorch post-install: {torch.__version__}')"
!python -c "import numpy; print(f'NumPy post-install: {numpy.__version__}')"

# Force CUDA setup for PyTorch
import torch
print(f"CUDA setup: available={torch.cuda.is_available()}, device count={torch.cuda.device_count() if torch.cuda.is_available() else 0}")
if torch.cuda.is_available():
    print(f"Current CUDA device: {torch.cuda.current_device()}, name: {torch.cuda.get_device_name()}")

Installing compatible package versions...
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
PyTorch post-install: 2.5.1+cu124
NumPy post-install: 2.0.1
CUDA setup: available=True, device count=1
Current CUDA device: 0, name: Tesla T4


In [5]:
# Set path to your project on Google Drive
DRIVE_PATH = '/content/drive/MyDrive/CatanRL'

# Change to the project directory
%cd {DRIVE_PATH}



/content/drive/MyDrive/CatanRL


In [6]:
# Run this in a Colab cell to check device handling
import torch

# Check CUDA availability
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device count: {torch.cuda.device_count()}")
    print(f"Current CUDA device: {torch.cuda.current_device()}")
    print(f"CUDA device name: {torch.cuda.get_device_name()}")

# Check device of your model
from AlphaZero.core.network import DeepCatanNetwork
model = DeepCatanNetwork(992, 200, 256)

# Try loading your checkpoint
checkpoint_path = '/content/drive/MyDrive/CatanRL/models/best_model.pt'
checkpoint = torch.load(checkpoint_path, map_location='cpu')  # Load to CPU first

# Print device information
print(f"\nModel device before loading: {next(model.parameters()).device}")

# Check devices in checkpoint
if 'network_state_dict' in checkpoint:
    sample_key = list(checkpoint['network_state_dict'].keys())[0]
    sample_tensor = checkpoint['network_state_dict'][sample_key]
    print(f"Checkpoint tensor device: {sample_tensor.device}")

# Try explicit device handling
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Target device: {device}")

# Move model to device and then load state dict
model = model.to(device)
print(f"Model device after .to(device): {next(model.parameters()).device}")

# Move checkpoint tensors to the right device
for key in checkpoint['network_state_dict']:
    checkpoint['network_state_dict'][key] = checkpoint['network_state_dict'][key].to(device)

# Now load the state dict
model.load_state_dict(checkpoint['network_state_dict'])
print(f"Model device after loading: {next(model.parameters()).device}")

CUDA available: True
CUDA device count: 1
Current CUDA device: 0
CUDA device name: Tesla T4


  checkpoint = torch.load(checkpoint_path, map_location='cpu')  # Load to CPU first



Model device before loading: cpu
Checkpoint tensor device: cpu
Target device: cuda
Model device after .to(device): cuda:0
Model device after loading: cuda:0


In [6]:


def keep_colab_alive():
    """
    This function runs in a separate thread and periodically
    executes JavaScript code to prevent Google Colab from disconnecting.
    """
    while True:
        # Execute JavaScript to simulate user activity
        try:
            output.eval_js('new Date().toISOString()')
            # Make a simple fetch request to keep the connection active
            output.eval_js('fetch("https://httpbin.org/get")')
            print("♥", end="", flush=True)  # Visual heartbeat
        except:
            pass
        time.sleep(90)  # Check every 45 seconds

# Start the anti-disconnect thread
print("Starting anti-disconnect protection...")
keep_alive_thread = threading.Thread(target=keep_colab_alive, daemon=True)
keep_alive_thread.start()


Starting anti-disconnect protection...


In [None]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seeds for reproducibility
def set_random_seeds(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_random_seeds()

# Step 5: Set up training parameters
# You can customize these parameters
import argparse

# Parse arguments from command line or use defaults
# This allows you to change parameters when running the notebook
parser = argparse.ArgumentParser(description="AlphaZero Catan Training")
parser.add_argument("--iterations", type=int, default=50, help="Number of training iterations")
parser.add_argument("--resume", type=str, default=None, help="Path to checkpoint to resume from")
parser.add_argument("--games", type=int, default=20, help="Number of self-play games per iteration")
parser.add_argument("--sims", type=int, default=100, help="Number of MCTS simulations per move")
parser.add_argument("--eval-games", type=int, default=10, help="Number of evaluation games")
parser.add_argument("--quick", action="store_true", help="Quick training (1 iteration, 2 games)")
parser.add_argument("--medium", action="store_true", help="Medium training (10 iterations, 5 games)")
parser.add_argument("--full", action="store_true", help="Full training (50 iterations, 20 games)")
parser.add_argument("--overnight", action="store_true", help="Overnight training (100 iterations, 30 games)")

# Parse the arguments directly
# args = parser.parse_args(['--overnight', '--resume', '/content/drive/MyDrive/CatanRL/models/best_model.pt'])  # Using existing model
args = parser.parse_args(['--overnight'])
# Configure training mode
if args.quick:
    print("Running in QUICK mode")
    args.iterations = 1
    args.games = 2
    args.sims = 10
    args.eval_games = 2
elif args.medium:
    print("Running in MEDIUM mode")
    args.iterations = 10
    args.games = 5
    args.sims = 50
    args.eval_games = 5
elif args.full:
    print("Running in FULL mode")
    args.iterations = 50
    args.games = 20
    args.sims = 100
    args.eval_games = 10
elif args.overnight:
    print("Running in OVERNIGHT mode")
    args.iterations = 100
    args.games = 30
    args.sims = 150
    args.eval_games = 15

print(f"\n=== AlphaZero Catan Training ===")
print(f"Iterations: {args.iterations}")
print(f"Self-play games per iteration: {args.games}")
print(f"MCTS simulations per move: {args.sims}")
print(f"Resume from: {args.resume if args.resume else 'Starting fresh'}")

# Step 6: Get configuration and modify for GPU
from AlphaZero.utils.config import get_config
config = get_config()

# Customize config with command line arguments
config['num_iterations'] = args.iterations
config['self_play_games'] = args.games
config['num_simulations'] = args.sims
config['eval_games'] = args.eval_games
config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

# Step 7: Create logs and models directories
!mkdir -p logs
!mkdir -p models
!mkdir -p plots

# Step 8: Start the training
from AlphaZero.training.training_pipeline import TrainingPipeline

try:
    # Start time tracking
    start_time = time.time()

    # Create the training pipeline
    pipeline = TrainingPipeline(config)

    # Train for the specified iterations
    pipeline.train(args.iterations, resume_from=args.resume)

    # Calculate total training time
    total_time = time.time() - start_time
    hours = int(total_time // 3600)
    minutes = int((total_time % 3600) // 60)
    seconds = int(total_time % 60)

    print(f"\nTraining completed in {hours}h {minutes}m {seconds}s")

except KeyboardInterrupt:
    print("\nTraining interrupted! Saving checkpoint...")
    pipeline.save_model(pipeline.current_iteration)
    print("Checkpoint saved. You can resume with this checkpoint later.")
except Exception as e:
    print(f"Error during training: {e}")
    import traceback
    traceback.print_exc()

# Step 9: Copy results back to Google Drive
!mkdir -p {DRIVE_PATH}/models_{timestamp}
!mkdir -p {DRIVE_PATH}/logs_{timestamp}
!mkdir -p {DRIVE_PATH}/plots_{timestamp}

!cp -r models/* {DRIVE_PATH}/models_{timestamp}/
!cp -r logs/* {DRIVE_PATH}/logs_{timestamp}/
!cp -r plots/* {DRIVE_PATH}/plots_{timestamp}/

print(f"\nTraining results saved to Google Drive in folders with timestamp {timestamp}")

Using device: cuda
Running in OVERNIGHT mode

=== AlphaZero Catan Training ===
Iterations: 100
Self-play games per iteration: 30
MCTS simulations per move: 150
Resume from: Starting fresh
[2025-04-20 17:56:01] AlphaZero Catan Training started at 20250420_175601
[2025-04-20 17:56:01] Configuration: {'state_dim': 992, 'action_dim': 200, 'hidden_dim': 256, 'learning_rate': 0.001, 'num_iterations': 100, 'self_play_games': 30, 'eval_games': 15, 'epochs': 10, 'batch_size': 128, 'buffer_size': 100000, 'num_simulations': 150, 'c_puct': 1.5, 'mcts_batch_size': 8, 'max_moves': 200, 'model_dir': 'models', 'device': 'cuda'}
[2025-04-20 17:56:01] 
=== Iteration 1/100 ===
[2025-04-20 17:56:01] Starting self-play...


Self-play games:   3%|▎         | 1/30 [00:01<00:35,  1.21s/it]


Game 1: Player 0 won with 11 VP


Self-play games:   7%|▋         | 2/30 [00:02<00:37,  1.33s/it]


Game 2: Player 2 won with 8 VP


Self-play games:  10%|█         | 3/30 [00:03<00:33,  1.25s/it]


Game 3: Player 1 won with 10 VP


Self-play games:  13%|█▎        | 4/30 [00:04<00:28,  1.11s/it]


Game 4: Player 0 won with 10 VP


Self-play games:  17%|█▋        | 5/30 [00:05<00:27,  1.09s/it]


Game 5: Player 0 won with 10 VP


Self-play games:  20%|██        | 6/30 [00:06<00:25,  1.08s/it]


Game 6: Player 0 won with 10 VP


Self-play games:  23%|██▎       | 7/30 [00:08<00:28,  1.22s/it]


Game 7: Player 2 won with 10 VP


Self-play games:  27%|██▋       | 8/30 [00:09<00:27,  1.25s/it]


Game 8: Player 2 won with 7 VP


Self-play games:  30%|███       | 9/30 [00:10<00:24,  1.19s/it]


Game 9: Player 0 won with 11 VP


Self-play games:  33%|███▎      | 10/30 [00:12<00:25,  1.29s/it]


Game 10: Player 1 won with 7 VP


Self-play games:  37%|███▋      | 11/30 [00:13<00:25,  1.33s/it]


Game 11: Player 1 won with 10 VP


Self-play games:  40%|████      | 12/30 [00:14<00:23,  1.29s/it]


Game 12: Player 2 won with 9 VP


Self-play games:  43%|████▎     | 13/30 [00:15<00:20,  1.19s/it]


Game 13: Player 3 won with 10 VP


Self-play games:  47%|████▋     | 14/30 [00:17<00:20,  1.31s/it]


Game 14: Player 0 won with 7 VP


Self-play games:  50%|█████     | 15/30 [00:18<00:19,  1.30s/it]


Game 15: Player 0 won with 10 VP


Self-play games:  53%|█████▎    | 16/30 [00:20<00:19,  1.40s/it]


Game 16: Player 2 won with 8 VP


Self-play games:  57%|█████▋    | 17/30 [00:21<00:17,  1.35s/it]


Game 17: Player 1 won with 10 VP


Self-play games:  60%|██████    | 18/30 [00:22<00:16,  1.38s/it]


Game 18: Player 1 won with 9 VP


Self-play games:  63%|██████▎   | 19/30 [00:23<00:13,  1.25s/it]


Game 19: Player 2 won with 10 VP


Self-play games:  67%|██████▋   | 20/30 [00:25<00:13,  1.32s/it]


Game 20: Player 0 won with 9 VP


Self-play games:  70%|███████   | 21/30 [00:26<00:11,  1.23s/it]


Game 21: Player 0 won with 10 VP


Self-play games:  73%|███████▎  | 22/30 [00:27<00:10,  1.26s/it]


Game 22: Player 0 won with 7 VP


Self-play games:  77%|███████▋  | 23/30 [00:29<00:09,  1.37s/it]


Game 23: Player 0 won with 7 VP


Self-play games:  80%|████████  | 24/30 [00:30<00:08,  1.37s/it]


Game 24: Player 1 won with 9 VP


Self-play games:  83%|████████▎ | 25/30 [00:31<00:06,  1.26s/it]


Game 25: Player 0 won with 10 VP


Self-play games:  87%|████████▋ | 26/30 [00:32<00:04,  1.24s/it]


Game 26: Player 3 won with 10 VP


Self-play games:  90%|█████████ | 27/30 [00:34<00:03,  1.32s/it]


Game 27: Player 0 won with 7 VP


Self-play games:  93%|█████████▎| 28/30 [00:35<00:02,  1.32s/it]


Game 28: Player 0 won with 7 VP


Self-play games:  97%|█████████▋| 29/30 [00:37<00:01,  1.32s/it]


Game 29: Player 3 won with 10 VP


Self-play games: 100%|██████████| 30/30 [00:38<00:00,  1.28s/it]


Game 30: Player 0 won with 11 VP
[2025-04-20 17:56:40] Self-play completed in 38.35s, generated 5984 examples (156.0 games/s)
[2025-04-20 17:56:40] Training network...





Epoch 1/10: Loss 0.4477 (Value 0.4477, Policy 0.0000)
Epoch 2/10: Loss 0.3430 (Value 0.3430, Policy 0.0000)
Epoch 3/10: Loss 0.2526 (Value 0.2526, Policy 0.0000)
Epoch 4/10: Loss 0.1384 (Value 0.1384, Policy 0.0000)
Epoch 5/10: Loss 0.0663 (Value 0.0663, Policy 0.0000)
Epoch 6/10: Loss 0.0251 (Value 0.0251, Policy 0.0000)
Epoch 7/10: Loss 0.0280 (Value 0.0280, Policy 0.0000)
Epoch 8/10: Loss 0.0162 (Value 0.0162, Policy 0.0000)
Epoch 9/10: Loss 0.0067 (Value 0.0067, Policy 0.0000)
Epoch 10/10: Loss 0.0130 (Value 0.0130, Policy 0.0000)
[2025-04-20 17:56:48] Training completed in 7.96s
[2025-04-20 17:56:48] Iteration 1 done in 46.32s
[2025-04-20 17:56:48] Resource usage: CPU 14.8%, RAM 5.2%, GPU peak memory 0.08 GB
[2025-04-20 17:56:48] 
=== Iteration 2/100 ===
[2025-04-20 17:56:48] Starting self-play...


Self-play games:   3%|▎         | 1/30 [00:01<00:35,  1.22s/it]


Game 1: Player 3 won with 9 VP


Self-play games:   7%|▋         | 2/30 [00:02<00:37,  1.33s/it]


Game 2: Player 3 won with 10 VP


Self-play games:  10%|█         | 3/30 [00:04<00:36,  1.37s/it]


Game 3: Player 2 won with 8 VP


Self-play games:  13%|█▎        | 4/30 [00:05<00:32,  1.25s/it]


Game 4: Player 0 won with 10 VP


Self-play games:  17%|█▋        | 5/30 [00:05<00:26,  1.06s/it]


Game 5: Player 0 won with 10 VP


Self-play games:  20%|██        | 6/30 [00:06<00:23,  1.04it/s]


Game 6: Player 0 won with 10 VP


Self-play games:  23%|██▎       | 7/30 [00:07<00:24,  1.05s/it]


Game 7: Player 0 won with 10 VP


Self-play games:  27%|██▋       | 8/30 [00:08<00:23,  1.09s/it]


Game 8: Player 0 won with 10 VP


Self-play games:  30%|███       | 9/30 [00:10<00:26,  1.26s/it]


Game 9: Player 1 won with 7 VP


Self-play games:  33%|███▎      | 10/30 [00:11<00:21,  1.10s/it]


Game 10: Player 0 won with 10 VP


Self-play games:  37%|███▋      | 11/30 [00:12<00:23,  1.23s/it]


Game 11: Player 0 won with 8 VP


Self-play games:  40%|████      | 12/30 [00:14<00:23,  1.31s/it]


Game 12: Player 2 won with 8 VP


Self-play games:  43%|████▎     | 13/30 [00:15<00:19,  1.17s/it]


Game 13: Player 3 won with 10 VP


Self-play games:  47%|████▋     | 14/30 [00:16<00:19,  1.24s/it]


Game 14: Player 0 won with 7 VP


Self-play games:  50%|█████     | 15/30 [00:17<00:17,  1.18s/it]


Game 15: Player 0 won with 10 VP


Self-play games:  53%|█████▎    | 16/30 [00:19<00:17,  1.24s/it]


Game 16: Player 0 won with 8 VP


Self-play games:  57%|█████▋    | 17/30 [00:20<00:16,  1.27s/it]


Game 17: Player 3 won with 8 VP


Self-play games:  60%|██████    | 18/30 [00:21<00:14,  1.20s/it]


Game 18: Player 2 won with 10 VP


Self-play games:  63%|██████▎   | 19/30 [00:23<00:14,  1.31s/it]


Game 19: Player 1 won with 11 VP


Self-play games:  67%|██████▋   | 20/30 [00:24<00:13,  1.33s/it]


Game 20: Player 1 won with 7 VP


Self-play games:  70%|███████   | 21/30 [00:25<00:12,  1.34s/it]


Game 21: Player 1 won with 9 VP


Self-play games:  73%|███████▎  | 22/30 [00:26<00:09,  1.16s/it]


Game 22: Player 0 won with 10 VP


Self-play games:  77%|███████▋  | 23/30 [00:27<00:08,  1.23s/it]


Game 23: Player 1 won with 7 VP


Self-play games:  80%|████████  | 24/30 [00:29<00:07,  1.32s/it]


Game 24: Player 2 won with 11 VP


Self-play games:  83%|████████▎ | 25/30 [00:30<00:06,  1.31s/it]


Game 25: Player 0 won with 8 VP


Self-play games:  87%|████████▋ | 26/30 [00:31<00:05,  1.27s/it]


Game 26: Player 1 won with 10 VP


Self-play games:  90%|█████████ | 27/30 [00:33<00:03,  1.32s/it]


Game 27: Player 0 won with 8 VP


Self-play games:  93%|█████████▎| 28/30 [00:34<00:02,  1.32s/it]


Game 28: Player 3 won with 9 VP


Self-play games:  97%|█████████▋| 29/30 [00:35<00:01,  1.26s/it]


Game 29: Player 0 won with 10 VP


Self-play games: 100%|██████████| 30/30 [00:36<00:00,  1.23s/it]


Game 30: Player 0 won with 8 VP
[2025-04-20 17:57:25] Self-play completed in 36.98s, generated 5685 examples (153.7 games/s)
[2025-04-20 17:57:25] Training network...





Epoch 1/10: Loss 0.0705 (Value 0.0705, Policy 0.0000)
Epoch 2/10: Loss 0.0149 (Value 0.0149, Policy 0.0000)
Epoch 3/10: Loss 0.0187 (Value 0.0187, Policy 0.0000)
Epoch 4/10: Loss 0.0012 (Value 0.0012, Policy 0.0000)
Epoch 5/10: Loss 0.0008 (Value 0.0008, Policy 0.0000)
Epoch 6/10: Loss 0.0006 (Value 0.0006, Policy 0.0000)
Epoch 7/10: Loss 0.0017 (Value 0.0017, Policy 0.0000)
Epoch 8/10: Loss 0.0175 (Value 0.0175, Policy 0.0000)
Epoch 9/10: Loss 0.0051 (Value 0.0051, Policy 0.0000)
Epoch 10/10: Loss 0.0008 (Value 0.0008, Policy 0.0000)
[2025-04-20 17:57:38] Training completed in 12.92s
[2025-04-20 17:57:38] Evaluating network...


Evaluation games:   7%|▋         | 1/15 [00:00<00:03,  3.65it/s]

[2025-04-20 17:57:38] Game 1: duration=0.27s, moves=200, our_VP=9, winner=0 VP=9


Evaluation games:  13%|█▎        | 2/15 [00:00<00:03,  4.07it/s]

[2025-04-20 17:57:38] Game 2: duration=0.23s, moves=153, our_VP=3, winner=2 VP=10


Evaluation games:  20%|██        | 3/15 [00:00<00:02,  4.19it/s]

[2025-04-20 17:57:38] Game 3: duration=0.23s, moves=200, our_VP=5, winner=0 VP=5


Evaluation games:  27%|██▋       | 4/15 [00:01<00:02,  3.85it/s]

[2025-04-20 17:57:39] Game 4: duration=0.29s, moves=184, our_VP=10, winner=0 VP=10


Evaluation games:  40%|████      | 6/15 [00:01<00:02,  4.49it/s]

[2025-04-20 17:57:39] Game 5: duration=0.20s, moves=200, our_VP=4, winner=3 VP=5
[2025-04-20 17:57:39] Game 6: duration=0.19s, moves=200, our_VP=4, winner=0 VP=4


Evaluation games:  47%|████▋     | 7/15 [00:01<00:01,  4.32it/s]

[2025-04-20 17:57:39] Game 7: duration=0.25s, moves=200, our_VP=6, winner=0 VP=6


Evaluation games:  53%|█████▎    | 8/15 [00:01<00:01,  4.20it/s]

[2025-04-20 17:57:39] Game 8: duration=0.25s, moves=200, our_VP=3, winner=2 VP=7


Evaluation games:  67%|██████▋   | 10/15 [00:02<00:01,  4.40it/s]

[2025-04-20 17:57:40] Game 9: duration=0.26s, moves=200, our_VP=5, winner=0 VP=5
[2025-04-20 17:57:40] Game 10: duration=0.19s, moves=200, our_VP=3, winner=1 VP=5


Evaluation games:  73%|███████▎  | 11/15 [00:02<00:00,  4.44it/s]

[2025-04-20 17:57:40] Game 11: duration=0.22s, moves=200, our_VP=4, winner=0 VP=4


Evaluation games:  87%|████████▋ | 13/15 [00:03<00:00,  4.50it/s]

[2025-04-20 17:57:40] Game 12: duration=0.30s, moves=200, our_VP=9, winner=0 VP=9
[2025-04-20 17:57:41] Game 13: duration=0.16s, moves=200, our_VP=4, winner=0 VP=4


Evaluation games:  93%|█████████▎| 14/15 [00:03<00:00,  3.82it/s]

[2025-04-20 17:57:41] Game 14: duration=0.35s, moves=200, our_VP=8, winner=0 VP=8


Evaluation games: 100%|██████████| 15/15 [00:03<00:00,  4.09it/s]


[2025-04-20 17:57:41] Game 15: duration=0.26s, moves=200, our_VP=6, winner=0 VP=6
[2025-04-20 17:57:41] Evaluated 15 games in 3.67s (4.09 games/s)
[2025-04-20 17:57:41] Evaluation results: win_rate=0.73, avg_vp=5.53, avg_length=195.80, total_moves=2937
[2025-04-20 17:57:41] Eval resource usage: CPU 20.8%, RAM 5.3%, GPU peak memory 0.07 GB
[2025-04-20 17:57:41] Evaluation completed in 3.68s
[2025-04-20 17:57:41] Iteration 2 done in 53.57s
[2025-04-20 17:57:41] Resource usage: CPU 0.0%, RAM 5.3%, GPU peak memory 0.06 GB
[2025-04-20 17:57:41] 
=== Iteration 3/100 ===
[2025-04-20 17:57:41] Starting self-play...


Self-play games:   3%|▎         | 1/30 [00:01<00:40,  1.39s/it]


Game 1: Player 2 won with 9 VP


Self-play games:   7%|▋         | 2/30 [00:02<00:42,  1.50s/it]


Game 2: Player 1 won with 10 VP


Self-play games:  10%|█         | 3/30 [00:04<00:40,  1.52s/it]


Game 3: Player 2 won with 9 VP


Self-play games:  13%|█▎        | 4/30 [00:05<00:37,  1.45s/it]


Game 4: Player 2 won with 10 VP


Self-play games:  17%|█▋        | 5/30 [00:07<00:34,  1.36s/it]


Game 5: Player 1 won with 9 VP


Self-play games:  20%|██        | 6/30 [00:08<00:32,  1.34s/it]


Game 6: Player 0 won with 6 VP


Self-play games:  23%|██▎       | 7/30 [00:09<00:26,  1.16s/it]


Game 7: Player 2 won with 10 VP


Self-play games:  27%|██▋       | 8/30 [00:10<00:26,  1.22s/it]


Game 8: Player 1 won with 6 VP


Self-play games:  30%|███       | 9/30 [00:11<00:26,  1.28s/it]


Game 9: Player 3 won with 7 VP


Self-play games:  33%|███▎      | 10/30 [00:12<00:24,  1.22s/it]


Game 10: Player 3 won with 10 VP


Self-play games:  37%|███▋      | 11/30 [00:14<00:24,  1.26s/it]


Game 11: Player 2 won with 9 VP


Self-play games:  40%|████      | 12/30 [00:15<00:23,  1.29s/it]


Game 12: Player 2 won with 10 VP


Self-play games:  43%|████▎     | 13/30 [00:16<00:21,  1.25s/it]


Game 13: Player 0 won with 11 VP


Self-play games:  47%|████▋     | 14/30 [00:18<00:20,  1.29s/it]


Game 14: Player 3 won with 10 VP


Self-play games:  50%|█████     | 15/30 [00:19<00:18,  1.27s/it]


Game 15: Player 3 won with 10 VP


Self-play games:  53%|█████▎    | 16/30 [00:20<00:16,  1.20s/it]


Game 16: Player 2 won with 10 VP


Self-play games:  57%|█████▋    | 17/30 [00:21<00:14,  1.11s/it]


Game 17: Player 0 won with 11 VP


Self-play games:  60%|██████    | 18/30 [00:22<00:13,  1.13s/it]


Game 18: Player 0 won with 8 VP


Self-play games:  63%|██████▎   | 19/30 [00:23<00:13,  1.20s/it]


Game 19: Player 3 won with 8 VP


Self-play games:  67%|██████▋   | 20/30 [00:25<00:12,  1.24s/it]


Game 20: Player 0 won with 7 VP


Self-play games:  70%|███████   | 21/30 [00:26<00:11,  1.27s/it]


Game 21: Player 1 won with 7 VP


Self-play games:  73%|███████▎  | 22/30 [00:28<00:10,  1.32s/it]


Game 22: Player 1 won with 10 VP


Self-play games:  77%|███████▋  | 23/30 [00:28<00:08,  1.20s/it]


Game 23: Player 1 won with 5 VP


Self-play games:  80%|████████  | 24/30 [00:30<00:07,  1.21s/it]


Game 24: Player 2 won with 7 VP


Self-play games:  83%|████████▎ | 25/30 [00:31<00:06,  1.25s/it]


Game 25: Player 3 won with 10 VP


Self-play games:  87%|████████▋ | 26/30 [00:33<00:05,  1.37s/it]


Game 26: Player 2 won with 10 VP


Self-play games:  90%|█████████ | 27/30 [00:34<00:03,  1.31s/it]


Game 27: Player 0 won with 11 VP


Self-play games:  93%|█████████▎| 28/30 [00:35<00:02,  1.35s/it]


Game 28: Player 3 won with 8 VP


Self-play games:  97%|█████████▋| 29/30 [00:37<00:01,  1.33s/it]


Game 29: Player 0 won with 7 VP


Self-play games: 100%|██████████| 30/30 [00:38<00:00,  1.29s/it]


Game 30: Player 2 won with 9 VP
[2025-04-20 17:58:20] Self-play completed in 38.75s, generated 6040 examples (155.9 games/s)
[2025-04-20 17:58:20] Training network...





Epoch 1/10: Loss 0.0470 (Value 0.0470, Policy 0.0000)
Epoch 2/10: Loss 0.0142 (Value 0.0142, Policy 0.0000)
Epoch 3/10: Loss 0.0026 (Value 0.0026, Policy 0.0000)
Epoch 4/10: Loss 0.0088 (Value 0.0088, Policy 0.0000)
Epoch 5/10: Loss 0.0085 (Value 0.0085, Policy 0.0000)
Epoch 6/10: Loss 0.0027 (Value 0.0027, Policy 0.0000)
Epoch 7/10: Loss 0.0008 (Value 0.0008, Policy 0.0000)
Epoch 8/10: Loss 0.0006 (Value 0.0006, Policy 0.0000)
Epoch 9/10: Loss 0.0005 (Value 0.0005, Policy 0.0000)
Epoch 10/10: Loss 0.0004 (Value 0.0004, Policy 0.0000)
[2025-04-20 17:58:33] Training completed in 13.51s
[2025-04-20 17:58:33] Iteration 3 done in 52.26s
[2025-04-20 17:58:33] Resource usage: CPU 21.1%, RAM 5.3%, GPU peak memory 0.07 GB
[2025-04-20 17:58:33] 
=== Iteration 4/100 ===
[2025-04-20 17:58:33] Starting self-play...


Self-play games:   3%|▎         | 1/30 [00:01<00:41,  1.43s/it]


Game 1: Player 0 won with 9 VP


Self-play games:   7%|▋         | 2/30 [00:02<00:37,  1.32s/it]


Game 2: Player 0 won with 10 VP


Self-play games:  10%|█         | 3/30 [00:04<00:37,  1.40s/it]


Game 3: Player 3 won with 9 VP


Self-play games:  13%|█▎        | 4/30 [00:05<00:33,  1.30s/it]


Game 4: Player 3 won with 10 VP


Self-play games:  17%|█▋        | 5/30 [00:06<00:28,  1.14s/it]


Game 5: Player 0 won with 10 VP


Self-play games:  20%|██        | 6/30 [00:06<00:23,  1.04it/s]


Game 6: Player 1 won with 10 VP


Self-play games:  23%|██▎       | 7/30 [00:08<00:27,  1.20s/it]


Game 7: Player 3 won with 10 VP


Self-play games:  27%|██▋       | 8/30 [00:09<00:27,  1.24s/it]


Game 8: Player 2 won with 7 VP


Self-play games:  30%|███       | 9/30 [00:11<00:29,  1.39s/it]


Game 9: Player 0 won with 9 VP


Self-play games:  33%|███▎      | 10/30 [00:12<00:25,  1.25s/it]


Game 10: Player 0 won with 10 VP


Self-play games:  37%|███▋      | 11/30 [00:14<00:25,  1.34s/it]


Game 11: Player 3 won with 8 VP


Self-play games:  40%|████      | 12/30 [00:15<00:24,  1.38s/it]


Game 12: Player 2 won with 9 VP


Self-play games:  43%|████▎     | 13/30 [00:16<00:22,  1.34s/it]


Game 13: Player 2 won with 10 VP


Self-play games:  47%|████▋     | 14/30 [00:18<00:22,  1.39s/it]


Game 14: Player 3 won with 8 VP


Self-play games:  50%|█████     | 15/30 [00:19<00:21,  1.41s/it]


Game 15: Player 3 won with 9 VP


Self-play games:  53%|█████▎    | 16/30 [00:20<00:17,  1.27s/it]


Game 16: Player 0 won with 10 VP


Self-play games:  57%|█████▋    | 17/30 [00:21<00:14,  1.12s/it]


Game 17: Player 3 won with 10 VP


Self-play games:  60%|██████    | 18/30 [00:22<00:14,  1.24s/it]


Game 18: Player 1 won with 10 VP


Self-play games:  63%|██████▎   | 19/30 [00:24<00:14,  1.34s/it]


Game 19: Player 1 won with 8 VP


Self-play games:  67%|██████▋   | 20/30 [00:26<00:13,  1.40s/it]


Game 20: Player 1 won with 8 VP


Self-play games:  70%|███████   | 21/30 [00:27<00:12,  1.38s/it]


Game 21: Player 1 won with 7 VP


Self-play games:  73%|███████▎  | 22/30 [00:28<00:10,  1.28s/it]


Game 22: Player 0 won with 10 VP


Self-play games:  77%|███████▋  | 23/30 [00:29<00:08,  1.28s/it]


Game 23: Player 0 won with 7 VP


Self-play games:  80%|████████  | 24/30 [00:31<00:07,  1.30s/it]


Game 24: Player 2 won with 7 VP


Self-play games:  83%|████████▎ | 25/30 [00:32<00:06,  1.27s/it]


Game 25: Player 0 won with 10 VP


Self-play games:  87%|████████▋ | 26/30 [00:33<00:05,  1.34s/it]


Game 26: Player 0 won with 8 VP


Self-play games:  90%|█████████ | 27/30 [00:35<00:04,  1.38s/it]


Game 27: Player 0 won with 9 VP


Self-play games:  93%|█████████▎| 28/30 [00:36<00:02,  1.43s/it]


Game 28: Player 1 won with 8 VP


Self-play games:  97%|█████████▋| 29/30 [00:37<00:01,  1.35s/it]


Game 29: Player 0 won with 10 VP


Self-play games: 100%|██████████| 30/30 [00:39<00:00,  1.32s/it]


Game 30: Player 0 won with 8 VP
[2025-04-20 17:59:13] Self-play completed in 39.71s, generated 6043 examples (152.2 games/s)
[2025-04-20 17:59:13] Training network...





Epoch 1/10: Loss 0.0758 (Value 0.0758, Policy 0.0000)
Epoch 2/10: Loss 0.0337 (Value 0.0337, Policy 0.0000)
Epoch 3/10: Loss 0.0160 (Value 0.0160, Policy 0.0000)
Epoch 4/10: Loss 0.0044 (Value 0.0044, Policy 0.0000)
Epoch 5/10: Loss 0.0045 (Value 0.0045, Policy 0.0000)
Epoch 6/10: Loss 0.0013 (Value 0.0013, Policy 0.0000)
Epoch 7/10: Loss 0.0009 (Value 0.0009, Policy 0.0000)
Epoch 8/10: Loss 0.0009 (Value 0.0009, Policy 0.0000)
Epoch 9/10: Loss 0.0006 (Value 0.0006, Policy 0.0000)
Epoch 10/10: Loss 0.0005 (Value 0.0005, Policy 0.0000)
[2025-04-20 17:59:26] Training completed in 12.74s
[2025-04-20 17:59:26] Evaluating network...


Evaluation games:   7%|▋         | 1/15 [00:00<00:03,  4.07it/s]

[2025-04-20 17:59:26] Game 1: duration=0.25s, moves=200, our_VP=7, winner=0 VP=7


Evaluation games:  13%|█▎        | 2/15 [00:00<00:03,  3.79it/s]

[2025-04-20 17:59:26] Game 2: duration=0.28s, moves=200, our_VP=6, winner=0 VP=6


Evaluation games:  20%|██        | 3/15 [00:00<00:03,  3.74it/s]

[2025-04-20 17:59:27] Game 3: duration=0.27s, moves=200, our_VP=6, winner=0 VP=6


Evaluation games:  27%|██▋       | 4/15 [00:01<00:02,  3.96it/s]

[2025-04-20 17:59:27] Game 4: duration=0.23s, moves=200, our_VP=6, winner=0 VP=6


Evaluation games:  33%|███▎      | 5/15 [00:01<00:02,  3.83it/s]

[2025-04-20 17:59:27] Game 5: duration=0.28s, moves=200, our_VP=5, winner=0 VP=5


Evaluation games:  40%|████      | 6/15 [00:01<00:02,  3.78it/s]

[2025-04-20 17:59:28] Game 6: duration=0.27s, moves=200, our_VP=9, winner=0 VP=9


Evaluation games:  47%|████▋     | 7/15 [00:01<00:02,  3.90it/s]

[2025-04-20 17:59:28] Game 7: duration=0.24s, moves=164, our_VP=11, winner=0 VP=11


Evaluation games:  53%|█████▎    | 8/15 [00:02<00:01,  3.91it/s]

[2025-04-20 17:59:28] Game 8: duration=0.25s, moves=200, our_VP=4, winner=0 VP=4


Evaluation games:  60%|██████    | 9/15 [00:02<00:01,  3.86it/s]

[2025-04-20 17:59:28] Game 9: duration=0.26s, moves=200, our_VP=8, winner=0 VP=8


Evaluation games:  67%|██████▋   | 10/15 [00:02<00:01,  3.98it/s]

[2025-04-20 17:59:29] Game 10: duration=0.23s, moves=193, our_VP=10, winner=0 VP=10


Evaluation games:  73%|███████▎  | 11/15 [00:02<00:01,  3.82it/s]

[2025-04-20 17:59:29] Game 11: duration=0.28s, moves=200, our_VP=7, winner=0 VP=7


Evaluation games:  80%|████████  | 12/15 [00:03<00:00,  3.84it/s]

[2025-04-20 17:59:29] Game 12: duration=0.26s, moves=200, our_VP=2, winner=3 VP=3


Evaluation games:  87%|████████▋ | 13/15 [00:03<00:00,  3.98it/s]

[2025-04-20 17:59:29] Game 13: duration=0.23s, moves=200, our_VP=4, winner=1 VP=5


Evaluation games:  93%|█████████▎| 14/15 [00:03<00:00,  3.84it/s]

[2025-04-20 17:59:30] Game 14: duration=0.28s, moves=200, our_VP=7, winner=0 VP=7


Evaluation games: 100%|██████████| 15/15 [00:03<00:00,  3.81it/s]

[2025-04-20 17:59:30] Game 15: duration=0.32s, moves=188, our_VP=10, winner=0 VP=10
[2025-04-20 17:59:30] Evaluated 15 games in 3.94s (3.81 games/s)
[2025-04-20 17:59:30] Evaluation results: win_rate=0.87, avg_vp=6.80, avg_length=196.33, total_moves=2945
[2025-04-20 17:59:30] Eval resource usage: CPU 20.9%, RAM 5.4%, GPU peak memory 0.07 GB
[2025-04-20 17:59:30] Evaluation completed in 3.94s
[2025-04-20 17:59:30] New best model at iteration 4 (win_rate=0.87)
[2025-04-20 17:59:30] Checkpoint saved: models/model_iter_4.pt





[2025-04-20 17:59:30] Best model saved: models/best_model.pt
[2025-04-20 17:59:30] Iteration 4 done in 56.64s
[2025-04-20 17:59:30] Resource usage: CPU 21.3%, RAM 5.3%, GPU peak memory 0.06 GB
[2025-04-20 17:59:30] 
=== Iteration 5/100 ===
[2025-04-20 17:59:30] Starting self-play...


Self-play games:   3%|▎         | 1/30 [00:01<00:38,  1.32s/it]


Game 1: Player 3 won with 10 VP


Self-play games:   7%|▋         | 2/30 [00:02<00:35,  1.29s/it]


Game 2: Player 0 won with 10 VP


Self-play games:  10%|█         | 3/30 [00:04<00:38,  1.42s/it]


Game 3: Player 0 won with 7 VP


Self-play games:  13%|█▎        | 4/30 [00:05<00:36,  1.42s/it]


Game 4: Player 0 won with 8 VP


Self-play games:  17%|█▋        | 5/30 [00:06<00:31,  1.27s/it]


Game 5: Player 2 won with 10 VP


Self-play games:  20%|██        | 6/30 [00:07<00:30,  1.28s/it]


Game 6: Player 2 won with 10 VP


Self-play games:  23%|██▎       | 7/30 [00:08<00:26,  1.14s/it]


Game 7: Player 0 won with 10 VP


Self-play games:  27%|██▋       | 8/30 [00:10<00:27,  1.23s/it]


Game 8: Player 0 won with 7 VP


Self-play games:  30%|███       | 9/30 [00:11<00:27,  1.29s/it]


Game 9: Player 2 won with 9 VP


Self-play games:  33%|███▎      | 10/30 [00:13<00:26,  1.33s/it]


Game 10: Player 1 won with 10 VP


Self-play games:  37%|███▋      | 11/30 [00:14<00:25,  1.35s/it]


Game 11: Player 1 won with 6 VP


Self-play games:  40%|████      | 12/30 [00:15<00:24,  1.35s/it]


Game 12: Player 2 won with 10 VP


Self-play games:  43%|████▎     | 13/30 [00:17<00:24,  1.44s/it]


Game 13: Player 3 won with 9 VP


Self-play games:  47%|████▋     | 14/30 [00:18<00:20,  1.27s/it]


Game 14: Player 1 won with 10 VP


Self-play games:  50%|█████     | 15/30 [00:19<00:19,  1.29s/it]


Game 15: Player 1 won with 10 VP


Self-play games:  53%|█████▎    | 16/30 [00:20<00:16,  1.17s/it]


Game 16: Player 0 won with 10 VP


Self-play games:  57%|█████▋    | 17/30 [00:21<00:15,  1.16s/it]


Game 17: Player 3 won with 10 VP


Self-play games:  60%|██████    | 18/30 [00:22<00:14,  1.22s/it]


Game 18: Player 2 won with 11 VP


Self-play games:  63%|██████▎   | 19/30 [00:24<00:12,  1.17s/it]


Game 19: Player 0 won with 10 VP


Self-play games:  67%|██████▋   | 20/30 [00:25<00:12,  1.27s/it]


Game 20: Player 0 won with 8 VP


Self-play games:  70%|███████   | 21/30 [00:27<00:12,  1.35s/it]


Game 21: Player 2 won with 10 VP


Self-play games:  73%|███████▎  | 22/30 [00:28<00:10,  1.36s/it]


Game 22: Player 0 won with 7 VP


Self-play games:  77%|███████▋  | 23/30 [00:29<00:09,  1.37s/it]


Game 23: Player 2 won with 10 VP


Self-play games:  80%|████████  | 24/30 [00:31<00:08,  1.37s/it]


Game 24: Player 0 won with 8 VP


Self-play games:  83%|████████▎ | 25/30 [00:32<00:06,  1.30s/it]


Game 25: Player 0 won with 7 VP


Self-play games:  87%|████████▋ | 26/30 [00:33<00:04,  1.25s/it]


Game 26: Player 1 won with 10 VP


Self-play games:  90%|█████████ | 27/30 [00:34<00:03,  1.29s/it]


Game 27: Player 3 won with 7 VP


Self-play games:  93%|█████████▎| 28/30 [00:35<00:02,  1.21s/it]


Game 28: Player 3 won with 10 VP


Self-play games:  97%|█████████▋| 29/30 [00:37<00:01,  1.28s/it]


Game 29: Player 1 won with 7 VP


Self-play games: 100%|██████████| 30/30 [00:38<00:00,  1.29s/it]


Game 30: Player 3 won with 6 VP
[2025-04-20 18:00:09] Self-play completed in 38.81s, generated 5985 examples (154.2 games/s)
[2025-04-20 18:00:09] Training network...





Epoch 1/10: Loss 0.0658 (Value 0.0658, Policy 0.0000)
Epoch 2/10: Loss 0.0149 (Value 0.0149, Policy 0.0000)
Epoch 3/10: Loss 0.0144 (Value 0.0144, Policy 0.0000)
Epoch 4/10: Loss 0.0034 (Value 0.0034, Policy 0.0000)
Epoch 5/10: Loss 0.0270 (Value 0.0270, Policy 0.0000)
Epoch 6/10: Loss 0.0074 (Value 0.0074, Policy 0.0000)
Epoch 7/10: Loss 0.0014 (Value 0.0014, Policy 0.0000)
Epoch 8/10: Loss 0.0012 (Value 0.0012, Policy 0.0000)
Epoch 9/10: Loss 0.0010 (Value 0.0010, Policy 0.0000)
Epoch 10/10: Loss 0.0008 (Value 0.0008, Policy 0.0000)
[2025-04-20 18:00:22] Training completed in 13.13s
[2025-04-20 18:00:23] Plotly metrics visualization saved to plots/training_metrics.html
[2025-04-20 18:00:23] Checkpoint saved: models/model_iter_5.pt
[2025-04-20 18:00:23] Iteration 5 done in 52.94s
[2025-04-20 18:00:23] Resource usage: CPU 21.4%, RAM 5.5%, GPU peak memory 0.07 GB
[2025-04-20 18:00:23] 
=== Iteration 6/100 ===
[2025-04-20 18:00:23] Starting self-play...


Self-play games:   3%|▎         | 1/30 [00:01<00:32,  1.13s/it]


Game 1: Player 0 won with 8 VP


Self-play games:   7%|▋         | 2/30 [00:02<00:30,  1.11s/it]


Game 2: Player 2 won with 8 VP


Self-play games:  10%|█         | 3/30 [00:03<00:30,  1.14s/it]


Game 3: Player 2 won with 8 VP


Self-play games:  13%|█▎        | 4/30 [00:04<00:29,  1.14s/it]


Game 4: Player 3 won with 7 VP


Self-play games:  17%|█▋        | 5/30 [00:05<00:26,  1.07s/it]


Game 5: Player 0 won with 10 VP


Self-play games:  20%|██        | 6/30 [00:06<00:28,  1.17s/it]


Game 6: Player 2 won with 9 VP


Self-play games:  23%|██▎       | 7/30 [00:07<00:25,  1.11s/it]


Game 7: Player 2 won with 10 VP


Self-play games:  27%|██▋       | 8/30 [00:08<00:24,  1.11s/it]


Game 8: Player 2 won with 10 VP


Self-play games:  30%|███       | 9/30 [00:09<00:22,  1.08s/it]


Game 9: Player 0 won with 10 VP


Self-play games:  33%|███▎      | 10/30 [00:10<00:20,  1.05s/it]


Game 10: Player 0 won with 10 VP


Self-play games:  37%|███▋      | 11/30 [00:12<00:21,  1.13s/it]


Game 11: Player 1 won with 7 VP


Self-play games:  40%|████      | 12/30 [00:13<00:21,  1.19s/it]


Game 12: Player 1 won with 10 VP


Self-play games:  43%|████▎     | 13/30 [00:14<00:18,  1.10s/it]


Game 13: Player 0 won with 11 VP


Self-play games:  47%|████▋     | 14/30 [00:15<00:18,  1.13s/it]


Game 14: Player 0 won with 8 VP


Self-play games:  50%|█████     | 15/30 [00:16<00:16,  1.11s/it]


Game 15: Player 0 won with 6 VP


Self-play games:  53%|█████▎    | 16/30 [00:17<00:15,  1.13s/it]


Game 16: Player 0 won with 8 VP


Self-play games:  57%|█████▋    | 17/30 [00:19<00:14,  1.14s/it]


Game 17: Player 0 won with 10 VP


Self-play games:  60%|██████    | 18/30 [00:20<00:14,  1.18s/it]


Game 18: Player 1 won with 7 VP


Self-play games:  63%|██████▎   | 19/30 [00:21<00:12,  1.16s/it]


Game 19: Player 1 won with 9 VP


Self-play games:  67%|██████▋   | 20/30 [00:22<00:11,  1.13s/it]


Game 20: Player 3 won with 10 VP


Self-play games:  70%|███████   | 21/30 [00:23<00:09,  1.10s/it]


Game 21: Player 3 won with 9 VP


Self-play games:  73%|███████▎  | 22/30 [00:24<00:08,  1.12s/it]


Game 22: Player 3 won with 9 VP


Self-play games:  77%|███████▋  | 23/30 [00:25<00:07,  1.11s/it]


Game 23: Player 2 won with 9 VP


Self-play games:  80%|████████  | 24/30 [00:26<00:06,  1.11s/it]


Game 24: Player 0 won with 6 VP


Self-play games:  83%|████████▎ | 25/30 [00:28<00:05,  1.16s/it]


Game 25: Player 0 won with 8 VP


Self-play games:  87%|████████▋ | 26/30 [00:29<00:04,  1.06s/it]


Game 26: Player 0 won with 10 VP


Self-play games:  90%|█████████ | 27/30 [00:30<00:03,  1.04s/it]


Game 27: Player 0 won with 10 VP


Self-play games:  93%|█████████▎| 28/30 [00:31<00:02,  1.08s/it]


Game 28: Player 1 won with 10 VP


Self-play games:  97%|█████████▋| 29/30 [00:32<00:01,  1.10s/it]


Game 29: Player 2 won with 10 VP


Self-play games: 100%|██████████| 30/30 [00:33<00:00,  1.10s/it]


Game 30: Player 0 won with 10 VP
[2025-04-20 18:00:56] Self-play completed in 33.03s, generated 6219 examples (188.3 games/s)
[2025-04-20 18:00:56] Training network...





Epoch 1/10: Loss 0.0416 (Value 0.0416, Policy 0.0000)
Epoch 2/10: Loss 0.0050 (Value 0.0050, Policy 0.0000)
Epoch 3/10: Loss 0.0015 (Value 0.0015, Policy 0.0000)
Epoch 4/10: Loss 0.0047 (Value 0.0047, Policy 0.0000)
Epoch 5/10: Loss 0.0132 (Value 0.0132, Policy 0.0000)
Epoch 6/10: Loss 0.0113 (Value 0.0113, Policy 0.0000)
Epoch 7/10: Loss 0.0068 (Value 0.0068, Policy 0.0000)
Epoch 8/10: Loss 0.0011 (Value 0.0011, Policy 0.0000)
Epoch 9/10: Loss 0.0009 (Value 0.0009, Policy 0.0000)
Epoch 10/10: Loss 0.0007 (Value 0.0007, Policy 0.0000)
[2025-04-20 18:01:05] Training completed in 9.05s
[2025-04-20 18:01:05] Evaluating network...


Evaluation games:   7%|▋         | 1/15 [00:00<00:02,  5.84it/s]

[2025-04-20 18:01:05] Game 1: duration=0.17s, moves=200, our_VP=5, winner=0 VP=5


Evaluation games:  20%|██        | 3/15 [00:00<00:02,  5.16it/s]

[2025-04-20 18:01:06] Game 2: duration=0.23s, moves=200, our_VP=6, winner=0 VP=6
[2025-04-20 18:01:06] Game 3: duration=0.18s, moves=200, our_VP=6, winner=0 VP=6


Evaluation games:  27%|██▋       | 4/15 [00:00<00:02,  4.85it/s]

[2025-04-20 18:01:06] Game 4: duration=0.22s, moves=200, our_VP=7, winner=0 VP=7


Evaluation games:  40%|████      | 6/15 [00:01<00:01,  4.93it/s]

[2025-04-20 18:01:06] Game 5: duration=0.21s, moves=200, our_VP=9, winner=0 VP=9
[2025-04-20 18:01:06] Game 6: duration=0.19s, moves=200, our_VP=6, winner=0 VP=6


Evaluation games:  53%|█████▎    | 8/15 [00:01<00:01,  5.11it/s]

[2025-04-20 18:01:07] Game 7: duration=0.19s, moves=200, our_VP=4, winner=3 VP=6
[2025-04-20 18:01:07] Game 8: duration=0.18s, moves=200, our_VP=5, winner=1 VP=6


Evaluation games:  60%|██████    | 9/15 [00:01<00:01,  4.45it/s]

[2025-04-20 18:01:07] Game 9: duration=0.29s, moves=200, our_VP=7, winner=0 VP=7


Evaluation games:  67%|██████▋   | 10/15 [00:02<00:01,  4.47it/s]

[2025-04-20 18:01:07] Game 10: duration=0.22s, moves=200, our_VP=7, winner=0 VP=7


Evaluation games:  80%|████████  | 12/15 [00:02<00:00,  4.70it/s]

[2025-04-20 18:01:07] Game 11: duration=0.21s, moves=195, our_VP=10, winner=0 VP=10
[2025-04-20 18:01:08] Game 12: duration=0.19s, moves=163, our_VP=10, winner=0 VP=10


Evaluation games:  87%|████████▋ | 13/15 [00:02<00:00,  4.72it/s]

[2025-04-20 18:01:08] Game 13: duration=0.21s, moves=200, our_VP=9, winner=0 VP=9


Evaluation games:  93%|█████████▎| 14/15 [00:02<00:00,  4.65it/s]

[2025-04-20 18:01:08] Game 14: duration=0.22s, moves=200, our_VP=8, winner=0 VP=8


Evaluation games: 100%|██████████| 15/15 [00:03<00:00,  4.75it/s]

[2025-04-20 18:01:08] Game 15: duration=0.21s, moves=200, our_VP=5, winner=0 VP=5
[2025-04-20 18:01:08] Evaluated 15 games in 3.16s (4.75 games/s)
[2025-04-20 18:01:08] Evaluation results: win_rate=0.87, avg_vp=6.93, avg_length=197.20, total_moves=2958
[2025-04-20 18:01:08] Eval resource usage: CPU 17.9%, RAM 5.6%, GPU peak memory 0.07 GB
[2025-04-20 18:01:08] Evaluation completed in 3.16s
[2025-04-20 18:01:08] New best model at iteration 6 (win_rate=0.87)
[2025-04-20 18:01:08] Checkpoint saved: models/model_iter_6.pt





[2025-04-20 18:01:09] Best model saved: models/best_model.pt
[2025-04-20 18:01:09] Iteration 6 done in 45.46s
[2025-04-20 18:01:09] Resource usage: CPU 14.1%, RAM 5.5%, GPU peak memory 0.06 GB
[2025-04-20 18:01:09] 
=== Iteration 7/100 ===
[2025-04-20 18:01:09] Starting self-play...


Self-play games:   3%|▎         | 1/30 [00:00<00:23,  1.23it/s]


Game 1: Player 3 won with 10 VP


Self-play games:   7%|▋         | 2/30 [00:01<00:24,  1.15it/s]


Game 2: Player 2 won with 6 VP


Self-play games:  10%|█         | 3/30 [00:02<00:27,  1.01s/it]


Game 3: Player 1 won with 7 VP


Self-play games:  13%|█▎        | 4/30 [00:03<00:22,  1.14it/s]


Game 4: Player 0 won with 10 VP


Self-play games:  17%|█▋        | 5/30 [00:04<00:24,  1.03it/s]


Game 5: Player 2 won with 10 VP


Self-play games:  20%|██        | 6/30 [00:05<00:25,  1.06s/it]


Game 6: Player 1 won with 9 VP


Self-play games:  23%|██▎       | 7/30 [00:07<00:25,  1.10s/it]


Game 7: Player 3 won with 11 VP


Self-play games:  27%|██▋       | 8/30 [00:08<00:23,  1.05s/it]


Game 8: Player 0 won with 11 VP


Self-play games:  30%|███       | 9/30 [00:08<00:19,  1.05it/s]


Game 9: Player 1 won with 10 VP


Self-play games:  33%|███▎      | 10/30 [00:09<00:18,  1.07it/s]


Game 10: Player 0 won with 10 VP


Self-play games:  37%|███▋      | 11/30 [00:10<00:17,  1.11it/s]


Game 11: Player 0 won with 10 VP


Self-play games:  40%|████      | 12/30 [00:11<00:16,  1.06it/s]


Game 12: Player 2 won with 10 VP


Self-play games:  43%|████▎     | 13/30 [00:12<00:16,  1.01it/s]


Game 13: Player 3 won with 10 VP


Self-play games:  47%|████▋     | 14/30 [00:13<00:17,  1.07s/it]


Game 14: Player 1 won with 9 VP


Self-play games:  50%|█████     | 15/30 [00:14<00:15,  1.06s/it]


Game 15: Player 0 won with 10 VP


Self-play games:  53%|█████▎    | 16/30 [00:16<00:15,  1.09s/it]


Game 16: Player 0 won with 9 VP


Self-play games:  57%|█████▋    | 17/30 [00:17<00:13,  1.06s/it]


Game 17: Player 1 won with 5 VP


Self-play games:  60%|██████    | 18/30 [00:18<00:12,  1.08s/it]


Game 18: Player 3 won with 7 VP


Self-play games:  63%|██████▎   | 19/30 [00:19<00:12,  1.10s/it]


Game 19: Player 0 won with 11 VP
