### About this project

#### This notebook will guide you how you train an AI model for Chess.

#### Convert Chessboard to np array

In [None]:
import numpy as np


def board_to_array(board):
    # Initialize an empty array to represent the board
    board_array = np.zeros((8, 8, 12), dtype=np.float32)

    # Mapping of piece types to index in the third dimension of the board_array
    piece_idx = {'p': 0, 'P': 6, 'n': 1, 'N': 7, 'b': 2, 'B': 8, 'r': 3, 'R': 9, 'q': 4, 'Q': 10, 'k': 5, 'K': 11}

    for i in range(8):
        for j in range(8):
            square = 8 * (7 - j) + i  # Calculate square index
            piece = board.piece_at(square)
            if piece:
                board_array[j, i, piece_idx[piece.symbol()]] = 1.0

    return board_array

#### Save/Load Chessboard

Simpiy use npz to save/load chessboard

In [None]:
import os

import numpy as np


def save_data(data, labels, file_name):
    # Function to save data
    np.savez(file_name, data=data, labels=labels)


def load_data(file_name):
    # Function to load data
    loaded_data = np.load(file_name)
    return loaded_data['data'], loaded_data['labels']


def merge_saved_data(output_filename):
    folder_path = "../data/temp"
    all_data = []
    all_labels = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".npz"):
            file_path = f"{folder_path}/{file_name}"
            data, labels = load_data(file_path)
            all_data.extend(data)
            all_labels.extend(labels)

    all_data = np.array(all_data)
    all_labels = np.array(all_labels)
    save_data(all_data, all_labels, output_filename)

#### Convert pgn files from real word to npz

This part will convert pgn files to npz files.

Data Source: [pgnmentor](https://www.pgnmentor.com/files.html#players).

In [None]:
import os
from concurrent.futures import ThreadPoolExecutor

import requests
from bs4 import BeautifulSoup


def download_file(file_url, output_folder):
    if file_url and file_url.endswith('.zip'):
        full_url = f"https://www.pgnmentor.com/{file_url}"
        file_name = os.path.join(output_folder, file_url.split('/')[-1])
        with requests.get(full_url) as r:
            with open(file_name, 'wb') as f:
                f.write(r.content)
        print(f"Downloaded {file_name}")


def download_files(url, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    file_urls = [link.get('href') for link in soup.find_all('a') if link.get('href') and link.get('href').endswith('.zip')]
    with ThreadPoolExecutor() as executor:
        executor.map(lambda file_url: download_file(file_url, output_folder), file_urls)


if __name__ == "__main__":
    url = "https://www.pgnmentor.com/files.html#players"
    output_folder = "../data/raw_data"
    download_files(url, output_folder)

In [None]:
import os
import zipfile


def unzip_files(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    for filename in os.listdir(input_folder):
        if filename.endswith(".zip"):
            zip_path = os.path.join(input_folder, filename)
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(output_folder)
            print(f"Unzipped {filename}")


if __name__ == "__main__":
    input_folder = "../data/raw_data"
    output_folder = "../data/20231001_raw_data"
    unzip_files(input_folder, output_folder)

In [None]:
import os

import chess
import chess.pgn
import numpy as np
from sklearn.model_selection import train_test_split


def convert_data_from_realworld(input_path, output_path):
    data = []
    labels = []
    filenames = [f for f in os.listdir(input_path) if f.endswith(".pgn")]
    for filename in filenames:
        with open(f"{input_path}/{filename}") as pgn:
            while True:
                game = chess.pgn.read_game(pgn)
                if game is None:
                    break  # End of file
                board = game.board()
                for move in game.mainline_moves():
                    board.push(move)
                    board_array = board_to_array(board)
                    label = 1.0 if board.turn == chess.WHITE else 0.0  # 1 for white's turn, 0 for black
                    data.append(board_array)
                    labels.append(label)
    # Adjust dimensions
    data = np.array(data)
    labels = np.array(labels)
    data = np.transpose(data, (0, 3, 1, 2))
    X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)
    os.makedirs(output_path, exist_ok=True)
    save_data(X_train, y_train, f"{output_path}/train_cases.npz")
    save_data(X_val, y_val, f"{output_path}/val_cases.npz")
    return X_train, X_val, y_train, y_val


if __name__ == "__main__":
    input_path = "../data/20230929_raw_data"
    output_path = "../data"
    convert_data_from_realworld(input_path, output_path)

#### Generate Legitimate Chessboard

This part will generate a legitimate chessboard for training by using `chess` library.

In [None]:
import os
import shutil

import chess
import numpy as np
from rich.progress import Progress


def generate_data(num_games, output_filename):
    data = []
    labels = []
    folder_path = "../data/temp"
    os.makedirs(folder_path, exist_ok=True)
    save_interval = num_games * 0.2

    with Progress() as progress:
        task = progress.add_task("[cyan]Generating data...", total=num_games)
        for i in range(1, num_games + 1):
            board = chess.Board()
            while not board.is_game_over():
                legal_moves = list(board.legal_moves)
                move = np.random.choice(legal_moves)
                board.push(move)
                board_array = board_to_array(board)
                label = 1.0 if board.turn == chess.WHITE else 0.0
                data.append(board_array)
                labels.append(label)
            progress.update(task, advance=1)
            if i % save_interval == 0:
                data_array = np.array(data)
                labels_array = np.array(labels)
                data_array = np.transpose(data_array, (0, 3, 1, 2))

                file_name = f"{folder_path}/generated_cases_{i}.npz"
                save_data(data_array, labels_array, file_name)

                data = []
                labels = []
    merge_saved_data(output_filename)
    shutil.rmtree(folder_path)


if __name__ == "__main__":
    generate_data(50, "../data/test_cases.npz")

#### Starting Training

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from lightning import LightningModule, Trainer

# Create a DataLoader
from torch.utils.data import DataLoader, TensorDataset

torch.set_float32_matmul_precision("high")


class ChessCNN(LightningModule):
    def __init__(self):
        super().__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(12, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 1)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.BCELoss()(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.BCELoss()(y_hat, y)
        self.log('val_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.BCELoss()(y_hat, y)
        self.log('test_loss', loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)


# Generate some training data
train_data, train_labels = generate_data()

# Convert to PyTorch tensors
train_data = torch.tensor(train_data).float()
train_labels = torch.tensor(train_labels).float().view(-1, 1)

train_dataset = TensorDataset(train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Initialize and train the model
model = ChessCNN()
trainer = Trainer(max_epochs=10)
trainer.fit(model, train_loader)
# Assuming `trainer` is your Trainer object
trainer.save_checkpoint("chess_model.ckpt")

In [None]:
import io

import torch
from PIL import Image


def choose_move(model, board):
    best_move = None
    best_value = -1.0  # Initialize with a low value

    # Iterate through all legal moves
    for move in board.legal_moves:
        board.push(move)
        board_array = board_to_array(board)
        board_array = np.transpose(board_array, (2, 0, 1))  # Adjust dimensions to match the model input
        board_array = torch.tensor(board_array).float().unsqueeze(0)  # Add batch dimension
        value = model(board_array).item()
        board.pop()  # Revert the move to go back to the original state

        if value > best_value:
            best_value = value
            best_move = move

    return best_move


board = chess.Board()
board.push_san("Nf3")
board.push_san("Nc6")
model = ChessCNN.load_from_checkpoint("chess_model.ckpt")
# move = choose_move(model, board)
# print("Best move according to the model:", move)
while not board.is_game_over():
    move = choose_move(model, board)
    board.push(move)
print(board)

# Generate an SVG of the board
board_svg = chess.svg.board(board=board)

# Convert the SVG to a PIL image
image = Image.open(io.BytesIO(board_svg.encode('utf-8')))
image.show()