# todo
* adaptive learning rate
* batch normalization
* focus on mid-end game
* save tensors to chunks and train on bigger datasets
* more comment tips


In [5]:
import os
import numpy as np # type: ignore
import time
import torch
import torch.nn as nn # type: ignore
import torch.optim as optim # type: ignore
from torch.utils.data import DataLoader # type: ignore
from chess import pgn # type: ignore
from tqdm import tqdm # type: ignore
from dataset import ChessDataset
from model import ChessModel
from helper_funcs import create_input_for_nn, encode_moves

# Data Processing

## load data - into chunks so that memory is not overwhelmed, store them in sepearte folder

In [4]:
def load_pgn(file_path):
    games = []
    with open(file_path, 'r') as pgn_file:
        while True:
            game = pgn.read_game(pgn_file)
            if game is None:
                break
            games.append(game)
    return games

files = [file for file in os.listdir("../data/pgn") if file.endswith(".pgn")]
LIMIT_OF_FILES = min(len(files), 28)
games = []
i = 1
for file in tqdm(files):
    games.extend(load_pgn(f"../data/pgn/{file}"))
    if i >= LIMIT_OF_FILES:
        break
    i += 1
    
print(f"GAMES PARSED: {len(games)}")

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [13:47<?, ?it/s]

GAMES PARSED: 315135





In [6]:
X, y = create_input_for_nn(games)

print(f"NUMBER OF SAMPLES: {len(y)}")

X = X[0:2500000]
y = y[0:2500000]

y, move_to_int = encode_moves(y)
num_classes = len(move_to_int)
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

: 

: 

## setup

In [3]:
# Create Dataset and DataLoader
dataset = ChessDataset(X, y)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Check for GPU
if torch.backends.mps.is_available():
    device = torch.device('mps')
    print("Using MPS backend on Apple Silicon (M2).")
else:
    device = torch.device('cpu')
    print("MPS backend not available. Using CPU.")

# Model Initialization
model = ChessModel(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

Using MPS backend on Apple Silicon (M2).


TimeoutError: [Errno 60] Operation timed out

Using MPS backend on Apple Silicon (M2).


# Train

In [None]:
num_epochs = 50
for epoch in range(num_epochs):
    start_time = time.time()
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(dataloader):
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()

        outputs = model(inputs)  # Raw logits

        # Compute loss
        loss = criterion(outputs, labels)
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        running_loss += loss.item()
    end_time = time.time()
    epoch_time = end_time - start_time
    minutes: int = int(epoch_time // 60)
    seconds: int = int(epoch_time) - minutes * 60
    print(f'Epoch {epoch + 1 + 50}/{num_epochs + 1 + 50}, Loss: {running_loss / len(dataloader):.4f}, Time: {minutes}m{seconds}s')

In [None]:
# Save the model
torch.save(model.state_dict(), "../models/mark1-10e.pth")

Epoch 1/10 - Training:   0%|          | 17/145578 [03:52<553:00:22, 13.68s/batch]