<a href="https://colab.research.google.com/github/DashShantanu/chess-engine/blob/main/chess_engine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install kaggle -q
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 /root/.kaggle/kaggle.json
! kaggle datasets download -d arevel/chess-games
! unzip -qq /content/chess-games.zip

# dataset url
# https://www.kaggle.com/datasets/arevel/chess-games

# !nvcc --version
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# !pip3 install torchvision

mkdir: cannot create directory ‘/root/.kaggle’: File exists
Downloading chess-games.zip to /content
100% 1.45G/1.45G [00:15<00:00, 112MB/s] 
100% 1.45G/1.45G [00:15<00:00, 98.8MB/s]


In [None]:
! pip install chess -q
import chess

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/154.4 kB[0m [31m1.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Column indexes mapping from letter to num and vice-versa
letter_to_num = { 'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7 }
num_to_letter = { 0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h' }

In [None]:
import numpy as np
import pandas as pd
import re

# Chess-board to matrix representation
def board_to_rep(board):
  pieces = ['p', 'r', 'n', 'b', 'q', 'k']
  layers = []

  for piece in pieces:
    layers.append(create_rep_layer(board, piece))

  # combining into a single 3-D tensor
  board_rep = np.stack(layers)

  return board_rep

In [None]:
# Create a layer of the matrix representation, white value is positive, black is negative
def create_rep_layer(board, type):
    # convert board object to string
    s = str(board)

    # remove all characters except for the type we are looking for
    s = re.sub(f'[^{type}{type.upper()} \n]', '.', s)
    # replace all occurences of the type in uppercase with 1 (white pieces)
    s = re.sub(f'[{type.upper()}]', '1', s)
    # replace all occurences of the type in lowercase with -1 (black pieces)
    s = re.sub(f'{type}', '-1', s)
    # replace all occurences of empty spaces or other characters with 0
    s = re.sub(f'\.', '0', s)

    board_matrix = []
    for row in s.split('\n'):
        # split each row into a list of characters
        row = row.split(' ')
        # convert each character to an integer
        row = [int(x) for x in row]
        # append the row to the board matrix
        board_matrix.append(row)

    return np.array(board_matrix)

In [None]:
# chess-move to matrix representation
def move_to_rep(move, board):
    # make the move on the board and convert move to uci format
    board.push_san(move).uci()

    # Make a copy of the original board to preserve its state
    board_copy = board.copy()
    # convert board object to string
    move = str(board_copy.pop())

    from_output_layer = np.zeros((8,8))
    from_row = 8 - int(move[1])
    from_column = letter_to_num[move[0]]
    from_output_layer[from_row, from_column] = 1

    to_output_layer = np.zeros((8,8))
    to_row = 8 - int(move[3])
    to_column = letter_to_num[move[2]]
    to_output_layer[to_row, to_column] = 1

    return np.stack([from_output_layer, to_output_layer])

In [None]:
# Break down game into individual moves
def create_move_list(s):
    # remove the move numbers and periods
    # split the string into a list of moves
    # The last element is excluded because it often contains empty space due to the trailing space after the last move in the original string
    return re.sub('\d*\. ', '', s).split(' ')[ : -1]

Loading the Chess Dataset

In [None]:
chess_data_raw = pd.read_csv('/content/chess_games.csv', usecols=['AN', 'WhiteElo'])
chess_data = chess_data_raw[chess_data_raw['WhiteElo'] > 2000]

In [None]:
import gc
# remove rejected games from memory
del chess_data_raw
gc.collect()

0

In [None]:
chess_data = chess_data[['AN']]
chess_data = chess_data[~chess_data['AN'].str.contains('{')]
chess_data = chess_data[chess_data['AN'].str.len() > 20]

In [None]:
print(chess_data.shape[0])

883376


In [None]:
from torch.utils.data import Dataset

# pytorch dataset class
class ChessDataset(Dataset):
    def __init__(self, games):
      super(ChessDataset, self).__init__()
      self.games = games

    def __len__(self):
      return 40_000

    def __getitem__(self, index):
      # generate a random index from the number of games in the dataset
      game_i = np.random.randint(self.games.shape[0])

      # get the algebraic notation from the random index
      random_game = chess_data['AN'].values[game_i]

      # create a list of moves from the random game
      moves = create_move_list(random_game)

      #  randomly select a state (position) from the game by choosing a move number within the range of the number of moves in the game
      game_state_i = np.random.randint(len(moves) - 1)

      # get the move from the randomly selected position
      # this will be the target move the model will try to predict
      next_move = moves[game_state_i]

      # keep only the moves up to the randomly selected position
      moves = moves[ : game_state_i]

      # create a new board object
      board = chess.Board()
      # make all the moves up to the randomly selected position
      for move in moves:
        board.push_san(move)

      # convert the board to a matrix representation, this will be the input to the model
      x = board_to_rep(board)
      # convert the move to a matrix representation, this will be the target output of the model
      y = move_to_rep(next_move, board)

      # if it is an odd index move, it means black's move
      if game_state_i % 2 == 1:
        x *= -1;

      # Convert the NumPy array x to a PyTorch tensor
      x = torch.tensor(x, dtype=torch.float32)
      y = torch.tensor(y, dtype=torch.float32)

      return x, y

CNN Network and Custom Modules

In [None]:
import torch.nn as nn

#  creating a custom convolutional neural network module
class module(nn.Module):

    # initialize model's layers and parameters
    def __init__(self, hidden_size):
        super(module, self).__init__()

        # Two 2-D convolutional layers
        # applies a 3x3 convolution to the input tensor
        self.conv1 = nn.Conv2d(hidden_size, hidden_size,
                               3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(hidden_size, hidden_size,
                               3, stride=1, padding=1)
        # 2 batch normalization layers
        # stabilizes and accelerates training by normalizing the input of each layer
        self.bn1 = nn.BatchNorm2d(hidden_size)
        self.bn2 = nn.BatchNorm2d(hidden_size)
        # 2 SELU activation functions
        # helps with vanishing and exploding gradients
        self.activation1 = nn.SELU()
        self.activation2 = nn.SELU()

    def forward(self, x):
        # save the input tensor for the skip connection
        x_input = torch.clone(x)

        # pass the input tensor through the layers
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.activation1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.activation2(x)

        # add the input tensor to the processed tensor
        # this is called a skip or residual connection, it helps to learn the residual between the input and output tensors
        x = x + x_input
        # pass the tensor through the activation function
        x = self.activation2(x)

        return x

In [None]:
import torch.nn.functional as F

# creating the entire chess network
class ChessNet(nn.Module):

    def __init__(self, hidden_layers=4, hidden_size=200):
        super(ChessNet, self).__init__()

        self.hidden_layers = hidden_layers
        # set input with 6 channels (one for each piece type for both colors)
        self.input_layer = nn.Conv2d(6, hidden_size, 3, stride=1, padding=1)
        # create a list-like container to store the custom modules
        # for each hidden layer in the network create a custom module
        self.module_list = nn.ModuleList(
            [module(hidden_size) for _ in range(hidden_layers)])
        # set output with 2 channels (one for the from square and one for the to square)
        self.output_layer = nn.Conv2d(hidden_size, 2, 3, stride=1, padding=1)

    def forward(self, x):
        # pass the input tensor through the layers
        x = self.input_layer(x)

        # pass through a ReLU activation function
        x = F.relu(x)

        # iterate through the custom modules and pass the input tensor through each one
        for i in range(self.hidden_layers):
            x = self.module_list[i](x)

        x = self.output_layer(x)

        return x

In [None]:
# Initialize the ChessNet model with desired hyperparameters
hidden_layers = 4
hidden_size = 200
chess_net_model = ChessNet(hidden_layers, hidden_size)

# Define the predict function to make predictions


def predict(x):
    # Ensure the input tensor has the correct shape (e.g., (batch_size, channels, height, width))

    # Set the model in evaluation mode
    chess_net_model.eval()
    # Make predictions by passing the input tensor through the model
    with torch.no_grad():
        output = chess_net_model(x)

    return output

Picking Moves

In [None]:
# Check mate in 1
def checkmate_in_one(board):
    board = board.copy()
    legal_moves = list(board.legal_moves)
    for move in legal_moves:
        board.push_uci(str(move))
        if board.is_checkmate():
            move = board.pop()
            return move
        _ = board.pop()

# Distribution over moves
def distribution_over_moves(vals):
    # Application of softmax function on the values
    probs = np.array(vals)
    probs = np.exp(probs)
    probs = probs / np.sum(probs)

    # Take distribution to the power of 3 and normalize it again
    # Increases gap between high and low probs, so bad moves are chosen less often
    probs = probs ** 3
    probs = probs / np.sum(probs)

    return probs

In [None]:
import tensorflow as tf

# Overall move selection
def choose_move(board, player, color):

    legal_moves = list(board.legal_moves)

    # check for forced checkmates
    move = checkmate_in_one(board)
    if move is not None:
        return move

    # convert board to matrix representation
    x = torch.Tensor(board_to_rep(board)).float().unsqueeze(0).to('cpu')
    if color == chess.BLACK:
        x *= -1

    # If x is a single sample, you may need to add a batch dimension.
    # As x has shape (channels, height, width), we can add a batch dimension as follows:
    # x = x.unsqueeze(0)
    # predict the move
    move = predict(x)
    # print(tf.shape(move))

    ##############################################
    # from the list of legal moves, make a list of 'from' squares
    vals = []
    froms = [str(legal_move)[:2] for legal_move in legal_moves]
    froms = list(set(froms))

    # for each 'from' sqaure, take the value that the network assigned to it, and make a move from that position
    for from_ in froms:
        val = move[0, 0, 8 - int(from_[1]), letter_to_num[from_[0]]]
        vals.append(val)

    # convert the values to a probability distribution
    probs = distribution_over_moves(vals)

    # choose a 'from' square based on the probability distribution
    chosen_from = str(np.random.choice(froms, size=1, p=probs)[0])[: 2]

    ###############################################
    # make a second feature map, for the 'to' squares
    vals = []
    # for each legal move, if the 'from' square matches the chosen 'from' square, take the value that the network assigned to it, and make a move to that position
    for legal_move in legal_moves:

        from_ = str(legal_move)[:2]

        if from_ == chosen_from:
            to = str(legal_move)[2:]
            val = move[0, 0, 8 - int(to[1]), letter_to_num[to[0]]]
            vals.append(val)
        else:
            vals.append(0)

    # take the max from this 2nd feature map
    chosen_move = legal_moves[np.argmax(vals)]

    return chosen_move