In [3]:
! pip install chess
! pip install tensorflow


import chess
import numpy as np
import chess.pgn

import tensorflow as tf
from tensorflow.keras import layers, models



Collecting chess
  Downloading chess-1.11.2.tar.gz (6.1 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/6.1 MB[0m [31m9.8 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/6.1 MB[0m [31m34.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.1/6.1 MB[0m [31m66.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m49.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25l[?25hdone
  Created wheel for chess: filename=chess-1.11.2-py3-none-any.whl size=147775 sha256=b4121a7a7f537013494d319f4118925ff4fc9e61e5304ce57bc41f5428

In [4]:
def create_tensor(chess_board):
    """
    Function to create a tensor based on the current layout of the chess pieces to feed into an ML Model:
    input: Board object
    output: numpy array of size (8,8,12)
    """
    chess_tensor = np.zeros((8, 8, 12))

    # Define (layer index) -> (piece_type, color) mapping
    # chess.PAWN=1, chess.KNIGHT=2, ..., chess.KING=6; chess.WHITE=True, chess.BLACK=False
    layer_to_piece_type = {}
    for color in [chess.WHITE, chess.BLACK]:
        for piece_type in range(1, 7):  # chess.PAWN==1, chess.KING==6
            if color == chess.WHITE:
                layer = piece_type - 1  # 0-indexed layers for white
            else:
                layer = piece_type - 1 + 6  # 6-11 for black
            layer_to_piece_type[layer] = (piece_type, color)

    # Fill in the tensors with the state of each tile in the chess board (what type of piece is on that
    # tile or if there is a tile on that piece at all)
    for i in range(12):  # piece layers
        piece_type, color = layer_to_piece_type[i]
        piece_pos = set(chess_board.pieces(piece_type, color))

        tracker = 0
        # Loop through squares and assign to the 3rd dimension [row][col][layer]
        for j in range(7, -1, -1):  # Ranks (dim2)
            for k in range(8):      # Files (dim1)
                if tracker in piece_pos:
                    # Direct assignment to the last dimension
                    chess_tensor[j][k][i] = 1
                tracker += 1

    return chess_tensor

In [10]:
def create_training_data(filename='lichess_db_standard_rated_2013-07.pgn', num_games=None):
    """
    Loads chess games from a PGN file, creates training data arrays X (features) and y (labels),
    and saves them to 'X.npy' and 'y.npy'.

    :param filename: PGN file name to load games from. If None, defaults to 'lichess_db_standard_rated_2013-07.pgn'.
    :param num_games: Number of games to load. If None, loads all games in the file.
    """
    with open(filename) as chess_data:
        game_count = 0
        X_list = []
        y_list = []

        while True:
            if num_games is not None and game_count >= num_games:
                break
            game = chess.pgn.read_game(chess_data)
            if game is None:
                break  # End of file
            result = game.headers['Result']
            if result == '1-0':
                y = 1
            elif result == '0-1':
                y = -1
            else:
                y = 0

            board = game.board()
            for move in game.mainline_moves():
                board.push(move)
                X_list.append(create_tensor(board))
                y_list.append(y)

            game_count += 1

        X = np.array(X_list)
        y = np.array(y_list)
        np.save('X.npy', X)
        np.save('y.npy', y)

# These layers determine the depth of what the model can see. It will find spacial patterns within more spaces with each consecutive layer.
# Consider it a 'field of vision' for interactions between pieces. The more layers, the higher the computational cost. More doesn't always mean better
def create_model():
    model = models.Sequential()

    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=(8, 8, 12))) # Square can see 1 square away
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu')) # Square can see 2 squares away
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu')) # Square can see 3 squares away
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu')) # Square can see 4 squares away
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu')) # Square can see 5 squares away

    model.add(layers.Flatten()) # Flatten to a 1D numpy array
    model.add(layers.Dense(128, activation='relu')) # To prevent overfitting with too many neurons
    model.add(layers.Dense(1, activation='tanh')) # Will produce a single number related to who is winning

    return model

In [11]:
# Only use this block if I am on a colab notebook to get my data here

from google.colab import drive
drive.mount('/content/drive')

pgn_file_path = '/content/drive/MyDrive/Colab Notebooks/chess_data/lichess_db_standard_rated_2013-07.pgn'
create_training_data(filename=pgn_file_path, num_games=100)
print("Training data 'X.npy' and 'y.npy' created successfully from 100 games in your Drive!")

Mounted at /content/drive
Training data 'X.npy' and 'y.npy' created successfully from 100 games in your Drive!


In [12]:
from re import X
relu_model = create_model()
relu_model.compile(optimizer='adam',loss='mean_squared_error',metrics=['mae'])
relu_model.summary()

X = np.load('X.npy')
y = np.load('y.npy')

relu_model.fit(
    X, y,
    epochs=20,
    batch_size=64,
    validation_split=0.2
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
