In [3]:
!pip install python-chess

import tensorflow as tf
import pandas as pd
import chess.pgn
import numpy as np
from tqdm import tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
# Load the PGN file
pgn = open('data.pgn')

In [5]:
# Read the PGN file and extract the ELO ratings and moves for each game
games = []
num_games = sum(1 for _ in open("data.pgn"))

In [None]:
for i in tqdm(range(num_games)):
    game = chess.pgn.read_game(pgn)
    if game is None:
        break
    games.append(game)

  1%|          | 6947/859598 [00:32<38:29, 369.25it/s]

In [None]:
# Extract the features and labels
X = []
Y = []

In [None]:
for game in tqdm(games):
    board = game.board()
    
    if "WhiteElo" in game.headers:
        elo_white = int(game.headers["WhiteElo"])
    else:
        elo_white = 0
    if "BlackElo" in game.headers:
        elo_black = int(game.headers["BlackElo"])
    else:
        elo_black = 0


    for move in game.mainline_moves():
        # Convert the board representation to a one-hot encoded format
        fen = board.fen()
        pieces = ['p', 'r', 'n', 'b', 'q', 'k', 'P', 'R', 'N', 'B', 'Q', 'K']
        piece_to_int = dict(zip(pieces, range(12)))
        one_hot = np.zeros((8, 8, 12), dtype=np.int8)
        rows = fen.split()[0].split('/')
        for i, row in enumerate(rows):
            j = 0
            for char in row:
                if char.isdigit():
                    j += int(char)
                else:
                    one_hot[i, j, piece_to_int[char]] = 1
                    j += 1
        X.append(one_hot)
        Y.append([elo_white, elo_black])
        board.push(move)

In [None]:
# Convert the features and labels to numpy arrays
X = np.array(X)
Y = np.array(Y)

In [None]:
# Split the dataset into training and testing sets
split = int(len(X) * 0.8)
X_train = X[:split]
Y_train = Y[:split]
X_test = X[split:]
Y_test = Y[split:]


In [None]:
# Normalize the feature matrix
X_train = X_train / 768
X_test = X_test / 768


In [None]:
# Define the TensorFlow model
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(8, 8, 12)),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(2)
])

In [None]:
# Compile the model
model.compile(optimizer="adam", loss="mse", metrics=["mae"])

In [None]:
# Train the model on the training set
model.fit(X_train, Y_train, epochs=10, validation_split=0.2)

In [None]:
# Evaluate the model on the testing set
mse, mae = model.evaluate(X_test, Y_test)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)