In [11]:
# Download and install Stockfish
!wget https://github.com/official-stockfish/Stockfish/releases/download/sf_17.1/stockfish-ubuntu-x86-64-avx2.tar
!tar xf stockfish-ubuntu-x86-64-avx2.tar

--2025-07-26 20:41:25--  https://github.com/official-stockfish/Stockfish/releases/download/sf_17.1/stockfish-ubuntu-x86-64-avx2.tar
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/20976138/85758419-9488-4267-84ea-dc1379a61eb1?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-07-26T21%3A25%3A00Z&rscd=attachment%3B+filename%3Dstockfish-ubuntu-x86-64-avx2.tar&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-07-26T20%3A24%3A19Z&ske=2025-07-26T21%3A25%3A00Z&sks=b&skv=2018-11-09&sig=Gt101ePPr%2FYtdFO9MDvlH2avVoiyPa0wmTF%2BX1KWT5s%3D&jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1MzU2Mjc4NSwibmJmIjoxNzUzNTYyNDg

In [12]:
# Set perms
!ls
!chmod +x stockfish

chess_training.log  stockfish-ubuntu-x86-64-avx2.tar
stockfish	    stockfish-ubuntu-x86-64-avx2.tar.1


In [13]:
# Install dependancies
!pip install chess tqdm torch denoising-diffusion-pytorch



In [14]:
# Compile Stockfish
!cd ./stockfish/src && make -j profile-build

Existing nn-1c0000000000.nnue validated, skipping download
Existing nn-37f18f62d772.nnue validated, skipping download

Config:
debug: 'no'
sanitize: 'none'
optimize: 'yes'
arch: 'x86_64'
bits: '64'
kernel: 'Linux'
os: 'GNU/Linux'
prefetch: 'yes'
popcnt: 'yes'
pext: 'yes'
sse: 'yes'
mmx: 'no'
sse2: 'yes'
ssse3: 'yes'
sse41: 'yes'
avx2: 'yes'
avxvnni: 'no'
avx512: 'yes'
vnni256: 'no'
vnni512: 'no'
altivec: 'no'
vsx: 'no'
neon: 'no'
dotprod: 'no'
arm_version: '0'
lsx: 'no'
lasx: 'no'
target_windows: ''

Flags:
CXX: g++
CXXFLAGS:  -Wall -Wcast-qual -fno-exceptions -std=c++17  -pedantic -Wextra -Wshadow -Wmissing-declarations -m64 -DUSE_PTHREADS -DNDEBUG -O3 -funroll-loops -DIS_64BIT -msse -msse3 -mpopcnt -DUSE_POPCNT -DUSE_AVX2 -mavx2 -mbmi -DUSE_AVX512 -mavx512f -mavx512bw -DUSE_SSE41 -msse4.1 -DUSE_SSSE3 -mssse3 -DUSE_SSE2 -msse2 -DUSE_PEXT -mbmi2 -DARCH=x86-64-avx512 -flto -flto-partition=one
LDFLAGS:   -m64 -Wl,--no-as-needed -lpthread  -Wall -Wcast-qual -fno-exceptions -std=c++17  -pe

In [15]:
# Import neccassary libraries
import logging
import os
import pickle
import random
import sys
import time
from collections import deque, defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Any

import chess
import chess.engine
import numpy as np
import torch
import torch.nn.functional as F
from tqdm import tqdm

# Import lucidrains library
from denoising_diffusion_pytorch import Unet1D, GaussianDiffusion1D

In [16]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.FileHandler('chess_training.log', mode='a')
    ]
)
log = logging.getLogger("BlackMatter-XL")

In [17]:
@dataclass
class TrainingConfig:
    """Training configuration parameters"""
    model_dim: int = 128
    model_layers: Tuple[int, ...] = (1, 2, 4, 8)
    timesteps: int = 1000
    sampling_timesteps: int = 100
    learning_rate: float = 1e-4
    batch_size: int = 32
    games_per_epoch: int = 50
    total_epochs: int = 10
    stockfish_skill_level: int = 15
    stockfish_time_limit: float = 0.1
    replay_buffer_size: int = 100000
    min_buffer_size: int = 1000
    save_frequency: int = 5
    eval_games: int = 10
    eval_temperature: float = 1.0
    gradient_accumulate_steps: int = 2
    ddpm_mode: str = 'fast'

In [18]:
class ChessPositionEncoder:
    """Encodes chess positions and moves for neural network processing"""

    def __init__(self):
        self.position_features = 800
        self.move_space = 4096

    def encode_position(self, board: chess.Board) -> torch.Tensor:
        """Encode board position into feature vector"""
        features = torch.zeros(self.position_features, dtype=torch.float32)

        # Piece placement (64 squares * 12 piece types)
        for square in range(64):
            piece = board.piece_at(square)
            if piece:
                piece_offset = (piece.piece_type - 1) + (6 if piece.color == chess.BLACK else 0)
                features[square * 12 + piece_offset] = 1.0

        # Additional game state features
        offset = 768
        features[offset] = float(board.turn)
        features[offset+1:offset+5] = torch.tensor([
            board.has_kingside_castling_rights(chess.WHITE),
            board.has_queenside_castling_rights(chess.WHITE),
            board.has_kingside_castling_rights(chess.BLACK),
            board.has_queenside_castling_rights(chess.BLACK)
        ], dtype=torch.float32)

        if board.ep_square:
            features[offset + 5] = 1.0
            features[offset + 6] = board.ep_square / 63.0

        features[offset + 7] = min(board.halfmove_clock / 50.0, 1.0)
        features[offset + 8] = min(board.fullmove_number / 100.0, 1.0)

        return features

    def encode_move_target(self, move: chess.Move, legal_moves: List[chess.Move]) -> torch.Tensor:
        """Encode target move distribution"""
        target = torch.zeros(self.move_space, dtype=torch.float32)

        # Base probability for all legal moves
        move_prob = 1.0 / max(1, len(legal_moves))
        for lm in legal_moves:
            target[self._move_to_index(lm)] = move_prob

        # Higher probability for the actual move
        target[self._move_to_index(move)] = 0.8

        # Normalize
        if target.sum() > 0:
            target /= target.sum()

        return target

    def _move_to_index(self, move: chess.Move) -> int:
        """Convert move to index in move space"""
        return move.from_square * 64 + move.to_square

    def decode_move_distribution(self, move_probs: torch.Tensor, board: chess.Board,
                               temperature: float = 1.0) -> Optional[chess.Move]:
        """Decode move probabilities to actual move"""
        legal = list(board.legal_moves)
        if not legal:
            return None

        try:
            if temperature != 1.0:
                move_probs = move_probs / temperature

            scores = []
            for m in legal:
                idx = self._move_to_index(m)
                scores.append(move_probs[idx].item() if idx < len(move_probs) else 0.0)

            scores = torch.tensor(scores)
            if len(scores) == 0 or torch.all(scores <= 0):
                return random.choice(legal)

            # Select from top moves
            top_k = min(5, len(legal))
            top_scores, top_indices = torch.topk(scores, top_k)

            if torch.all(top_scores <= 0):
                return random.choice(legal)

            probs = F.softmax(top_scores, dim=0).cpu().numpy()
            if np.isnan(probs).any() or probs.sum() == 0:
                return random.choice(legal)

            choice_idx = np.random.choice(top_k, p=probs)
            return legal[top_indices[choice_idx].item()]

        except Exception as e:
            log.warning(f"decode_move_distribution error: {e}")
            return random.choice(legal)

In [19]:
class ReplayBuffer:
    """Experience replay buffer for training data"""

    def __init__(self, max_size: int):
        self.buf = deque(maxlen=max_size)

    def push_many(self, samples: List[Tuple[torch.Tensor, torch.Tensor]]):
        """Add multiple samples to buffer"""
        self.buf.extend(samples)

    def sample_batch(self, batch_size: int) -> List[Tuple[torch.Tensor, torch.Tensor]]:
        """Sample random batch from buffer"""
        return random.sample(self.buf, min(batch_size, len(self.buf)))

    def __len__(self):
        return len(self.buf)

In [20]:
class DataCollector:
    """Collects training data by playing against Stockfish"""

    def __init__(self, encoder: ChessPositionEncoder, config: TrainingConfig):
        self.encoder = encoder
        self.config = config

    def play_vs_stockfish(self, stockfish_path: str) -> List[Tuple[torch.Tensor, torch.Tensor]]:
        """Play one game against Stockfish and collect position-move pairs"""
        board = chess.Board()
        samples = []

        try:
            with chess.engine.SimpleEngine.popen_uci(stockfish_path) as engine:
                engine.configure({"Skill Level": self.config.stockfish_skill_level})

                for _move_num in range(200):  # Max 200 moves per game
                    if board.is_game_over():
                        break

                    legal = list(board.legal_moves)
                    if not legal:
                        break

                    if board.turn == chess.WHITE:  # Our model plays white
                        result = engine.play(board, chess.engine.Limit(time=self.config.stockfish_time_limit))
                        move = result.move

                        # Store position-move pair for training
                        samples.append((
                            self.encoder.encode_position(board),
                            self.encoder.encode_move_target(move, legal)
                        ))

                        board.push(move)
                    else:  # Stockfish plays black
                        result = engine.play(board, chess.engine.Limit(time=self.config.stockfish_time_limit))
                        board.push(result.move)

        except Exception as e:
            log.error(f"Stockfish data collection failed: {e}")

        return samples

In [21]:
class DDPMChessModel(torch.nn.Module):
    """DDPM Chess Model using lucidrains library"""

    def __init__(self, config: TrainingConfig, encoder: ChessPositionEncoder):
        super().__init__()
        self.encoder = encoder

        # Configure sampling steps based on mode
        sampling_steps = 10 if config.ddpm_mode == 'fast' else config.sampling_timesteps

        # Initialize Unet1D from lucidrains library
        self.unet = Unet1D(
            dim=config.model_dim,
            dim_mults=config.model_layers,
            channels=1,
        )

        # Initialize GaussianDiffusion1D from lucidrains library
        self.diffusion = GaussianDiffusion1D(
            self.unet,
            seq_length=encoder.move_space,
            timesteps=config.timesteps,
            sampling_timesteps=sampling_steps,
            objective='pred_noise',
        )

    def forward(self, x):
        """Forward pass for training"""
        return self.diffusion(x)

    def sample(self, batch_size=1):
        """Sample move probabilities"""
        return self.diffusion.sample(batch_size=batch_size)

In [22]:
class Pipeline:
    """Main training and inference pipeline"""

    def __init__(self, config: TrainingConfig, stockfish_path: str, model_path: Optional[str] = None):
        self.config = config
        self.stockfish_path = stockfish_path
        self.encoder = ChessPositionEncoder()
        self.buffer = ReplayBuffer(config.replay_buffer_size)
        self.collector = DataCollector(self.encoder, config)

        self.model = DDPMChessModel(config, self.encoder)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = self.model.to(self.device)

        self.optimizer = torch.optim.AdamW(
            self.model.parameters(),
            lr=config.learning_rate,
            weight_decay=1e-4
        )

        self.training_step = 0
        self.epoch = 0

        if model_path and os.path.exists(model_path):
            self.load_checkpoint(model_path)

        log.info(f"Model initialized on {self.device} with {sum(p.numel() for p in self.model.parameters()):,} parameters")

    def save_checkpoint(self, path: str, meta: Dict = None):
        """Save model checkpoint using pickle"""
        checkpoint = {
            'model_state': self.model.state_dict(),
            'optimizer_state': self.optimizer.state_dict(),
            'training_step': self.training_step,
            'epoch': self.epoch,
            'config': self.config,
            'meta': meta or {}
        }

        with open(path, 'wb') as f:
            pickle.dump(checkpoint, f)

        log.info(f"Checkpoint saved to {path}")

    def load_checkpoint(self, path: str):
        """Load model checkpoint from pickle"""
        try:
            with open(path, 'rb') as f:
                checkpoint = pickle.load(f)

            self.model.load_state_dict(checkpoint['model_state'])

            if 'optimizer_state' in checkpoint:
                self.optimizer.load_state_dict(checkpoint['optimizer_state'])

            self.training_step = checkpoint.get('training_step', 0)
            self.epoch = checkpoint.get('epoch', 0)

            log.info(f"Loaded checkpoint from {path}, step {self.training_step}, epoch {self.epoch}")

        except Exception as e:
            log.error(f"Failed to load checkpoint {path}: {e}")

    @torch.no_grad()
    def ddpm_move(self, board: chess.Board, temperature: float = 1.0, attempts: int = 3) -> Optional[chess.Move]:
        """Generate move using DDPM model"""
        legal = list(board.legal_moves)
        if not legal or board.is_game_over():
            return None

        for _ in range(attempts):
            try:
                self.model.eval()
                # Sample from the diffusion model
                sampled = self.model.sample(batch_size=1)
                sampled_probs = sampled.squeeze(0).squeeze(0)

                if torch.isnan(sampled_probs).any() or torch.isinf(sampled_probs).any():
                    continue

                move = self.encoder.decode_move_distribution(sampled_probs, board, temperature)
                if move and move in legal:
                    return move

            except Exception as e:
                log.warning(f"DDPM move generation failed: {e}")

        return random.choice(legal)

    def train_one_epoch(self) -> float:
        """Train model for one epoch"""
        if len(self.buffer) < self.config.min_buffer_size:
            log.warning(f"Buffer not filled ({len(self.buffer)} < {self.config.min_buffer_size})")
            return 0.0

        self.model.train()
        total_loss = 0.0
        batches = 0

        # Shuffle buffer data
        buffer_data = list(self.buffer.buf)
        random.shuffle(buffer_data)

        num_batches = len(buffer_data) // self.config.batch_size

        with tqdm(total=num_batches, desc="Training", unit="batch") as pbar:
            for i in range(num_batches):
                batch_start = i * self.config.batch_size
                batch_end = batch_start + self.config.batch_size
                batch = buffer_data[batch_start:batch_end]

                # Extract targets (move distributions)
                targets = torch.stack([sample[1] for sample in batch]).unsqueeze(1).to(self.device)

                # Forward pass through lucidrains diffusion model
                loss = self.model(targets)

                # Backward pass
                self.optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                self.optimizer.step()

                total_loss += loss.item()
                batches += 1
                self.training_step += 1

                pbar.update(1)
                pbar.set_postfix(loss=f"{loss.item():.4f}")

        avg_loss = total_loss / max(1, batches)
        log.info(f"Epoch {self.epoch + 1} training loss: {avg_loss:.4f}")
        return avg_loss

    def fill_replay_buffer(self, num_games: int):
        """Fill replay buffer with training data"""
        log.info(f"Collecting {num_games} games vs Stockfish...")

        with tqdm(total=num_games, desc="Data Collection", unit="game") as pbar:
            for _ in range(num_games):
                samples = self.collector.play_vs_stockfish(self.stockfish_path)
                self.buffer.push_many(samples)
                pbar.update(1)

        log.info(f"Replay buffer size: {len(self.buffer)}")

    def evaluate(self, num_games: Optional[int] = None) -> Dict[str, Any]:
        """Evaluate model performance"""
        num_games = num_games or self.config.eval_games
        results = defaultdict(int)

        with tqdm(total=num_games, desc="Evaluation", unit="game") as pbar:
            for game_idx in range(num_games):
                try:
                    board = chess.Board()
                    move_count = 0

                    with chess.engine.SimpleEngine.popen_uci(self.stockfish_path) as stockfish:
                        stockfish.configure({"Skill Level": self.config.stockfish_skill_level})

                        while not board.is_game_over() and move_count < 200:
                            if board.turn == chess.WHITE:  # Our model plays white
                                move = self.ddpm_move(board, self.config.eval_temperature)
                                if not move or move not in board.legal_moves:
                                    move = random.choice(list(board.legal_moves))
                                board.push(move)
                            else:  # Stockfish plays black
                                sf_result = stockfish.play(
                                    board,
                                    chess.engine.Limit(time=self.config.stockfish_time_limit)
                                )
                                board.push(sf_result.move)

                            move_count += 1

                    if board.is_game_over():
                        results[board.result()] += 1
                    else:
                        results['*'] += 1  # Unfinished game

                except Exception as e:
                    log.error(f"Evaluation game {game_idx + 1} failed: {e}")
                    results['*'] += 1

                pbar.update(1)
                pbar.set_postfix(
                    wins=results['1-0'],
                    losses=results['0-1'],
                    draws=results['1/2-1/2']
                )

        total_games = sum(results.values())
        win_rate = results['1-0'] / total_games if total_games > 0 else 0.0

        log.info(f"Evaluation results: Win rate {win_rate:.1%} "
                f"({results['1-0']}/{results['0-1']}/{results['1/2-1/2']})")

        return dict(results)

    def pipeline_train(self):
        """Main training pipeline"""
        log.info("Starting training pipeline...")

        for epoch in range(self.epoch, self.config.total_epochs):
            self.epoch = epoch
            log.info(f"=== Epoch {epoch + 1}/{self.config.total_epochs} ===")

            # Collect training data
            self.fill_replay_buffer(self.config.games_per_epoch)

            # Train model
            avg_loss = self.train_one_epoch()

            # Evaluate model
            eval_results = self.evaluate()

            # Save checkpoint
            if ((epoch + 1) % self.config.save_frequency == 0 or
                (epoch + 1) == self.config.total_epochs):

                checkpoint_path = f"ddpm_chess_epoch_{epoch + 1}.pkl"
                meta = {
                    'epoch': epoch + 1,
                    'loss': avg_loss,
                    'eval_results': eval_results
                }
                self.save_checkpoint(checkpoint_path, meta)

        log.info("Training pipeline completed!")

In [23]:
class UCIInterface:
    """UCI protocol interface for chess engines"""

    def __init__(self, pipeline: Pipeline):
        self.pipeline = pipeline
        self.board = chess.Board()
        self.temperature = 1.0
        self.running = True

    def run(self):
        """Main UCI event loop"""
        print("id name BlackMatter-XL")
        print("id author w1nd0wsXP")
        print("option name Temperature type spin default 10 min 1 max 100")
        print("uciok")
        sys.stdout.flush()

        while self.running:
            try:
                line = input().strip()
                if line:
                    self._handle_command(line)
            except (EOFError, KeyboardInterrupt):
                break

    def _handle_command(self, line: str):
        """Handle UCI commands"""
        parts = line.split()
        if not parts:
            return

        cmd = parts[0]

        if cmd == "uci":
            print("id name BlackMatter-XL")
            print("id author w1nd0wsXP")
            print("option name Temperature type spin default 10 min 1 max 100")
            print("uciok")

        elif cmd == "isready":
            print("readyok")

        elif cmd == "setoption":
            self._handle_setoption(parts[1:])

        elif cmd == "ucinewgame":
            self.board.reset()

        elif cmd == "position":
            self._handle_position(parts[1:])

        elif cmd == "go":
            self._handle_go(parts[1:])

        elif cmd == "quit":
            self.running = False

        sys.stdout.flush()

    def _handle_setoption(self, args: List[str]):
        """Handle setoption command"""
        if len(args) >= 4 and args[0] == "name" and args[1] == "Temperature":
            try:
                temp_value = int(args[3])
                self.temperature = temp_value / 10.0
            except (ValueError, IndexError):
                pass

    def _handle_position(self, args: List[str]):
        """Handle position command"""
        if not args:
            return

        if args[0] == "startpos":
            self.board.reset()
            args = args[1:]
        elif args[0] == "fen":
            # Find moves index
            moves_idx = len(args)
            if "moves" in args:
                moves_idx = args.index("moves")

            fen_parts = args[1:moves_idx]
            if len(fen_parts) >= 6:
                fen = " ".join(fen_parts)
                try:
                    self.board.set_fen(fen)
                except chess.InvalidFenError:
                    return

            args = args[moves_idx:]

        # Apply moves
        if args and args[0] == "moves":
            for move_str in args[1:]:
                try:
                    move = chess.Move.from_uci(move_str)
                    if move in self.board.legal_moves:
                        self.board.push(move)
                except (ValueError, chess.IllegalMoveError):
                    break

    def _handle_go(self, args: List[str]):
        """Handle go command"""
        move = self.pipeline.ddpm_move(self.board, self.temperature)
        if move:
            print(f"bestmove {move.uci()}")
        else:
            legal_moves = list(self.board.legal_moves)
            if legal_moves:
                print(f"bestmove {random.choice(legal_moves).uci()}")

In [None]:
def main():
    """Main entry point"""
    TOTAL_EPOCHS = 3
    TOTAL_GAMES_PER_EPOCH = 50
    SKILL_LEVEL = 20
    BATCH_SIZE = 16
    LEARNING_RATE = 1e-4
    DDPM_MODE = "fast"
    STOCKFISH_PATH = "./stockfish/stockfish-ubuntu-x86-64-avx2"
    MODEL = "./base_model.pkl"
    TRAIN = True


    # Create configuration
    config = TrainingConfig(
        total_epochs=TOTAL_EPOCHS,
        games_per_epoch=TOTAL_GAMES_PER_EPOCH,
        stockfish_skill_level=SKILL_LEVEL,
        batch_size=BATCH_SIZE,
        learning_rate=LEARNING_RATE,
        ddpm_mode=DDPM_MODE
    )

    # Initialize pipeline
    pipeline = Pipeline(config, STOCKFISH_PATH, MODEL)

    if TRAIN:
        # Training mode
        pipeline.pipeline_train()
    else:
        # UCI mode
        uci = UCIInterface(pipeline)
        uci.run()

if __name__ == "__main__":
    main()

Data Collection: 100%|██████████| 50/50 [12:33<00:00, 15.07s/game]
Training: 100%|██████████| 228/228 [06:09<00:00,  1.62s/batch, loss=0.0445]
Evaluation:   0%|          | 0/10 [00:00<?, ?game/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation:  10%|█         | 1/10 [00:05<00:53,  5.90s/game, draws=0, losses=1, wins=0]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation:  20%|██        | 2/10 [00:10<00:40,  5.10s/game, draws=0, losses=2, wins=0]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation:  30%|███       | 3/10 [00:18<00:44,  6.31s/game, draws=0, losses=3, wins=0]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation:  40%|████      | 4/10 [00:27<00:44,  7.45s/game, draws=0, losses=4, wins=0]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation:  50%|█████     | 5/10 [00:31<00:31,  6.28s/game, draws=0, losses=5, wins=0]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation:  60%|██████    | 6/10 [00:38<00:26,  6.64s/game, draws=0, losses=6, wins=0]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation:  70%|███████   | 7/10 [00:44<00:18,  6.24s/game, draws=0, losses=7, wins=0]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation:  80%|████████  | 8/10 [00:48<00:11,  5.69s/game, draws=0, losses=8, wins=0]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation:  90%|█████████ | 9/10 [00:53<00:05,  5.48s/game, draws=0, losses=9, wins=0]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluation: 100%|██████████| 10/10 [01:01<00:00,  6.19s/game, draws=0, losses=10, wins=0]
Data Collection: 100%|██████████| 50/50 [13:54<00:00, 16.69s/game]
Training:  25%|██▍       | 120/481 [03:17<09:34,  1.59s/batch, loss=0.0058]