In [1]:
import pandas as pd
import numpy as np

The data used is accessible on kaggle : https://www.kaggle.com/datasets/arevel/chess-games?resource=download, it contains 6.2M games played on lichess.

In this file we will clean it by removing useless informations and making it ready to be used for our AI model.

In [None]:
file = "./data/chess_games.csv"
df = pd.read_csv(file)
df.head(5)

In [None]:
# Keep only the moves in the database
df = df[["AN"]]
print(df.iloc[0,0])
df.head(5)


In [4]:
# Remove the useless information in the move strings (the number before the moves)

def clean_chess_moves(move: str) -> str:
    elements = move.split()
    elements = elements[:-1]    #remove the result of the game
    clean_moves = [elem for elem in elements if '.' not in elem]

    return ' '.join(clean_moves)


for i in range(len(df)):
    df.iloc[i,0] = clean_chess_moves(df.iloc[i,0])

In [6]:
# Save the new dataframe
filename = "./data/chess_games_reduced.csv"
df.to_csv(filename)


**AT THIS POINT, NO NEED TO RERUN ALL BEFORE**

In [3]:
filename = "./data/chess_games_reduced.csv"

df = pd.read_csv(filename)

In our model, we'll represent a position as a vector of size 64\*12 (for each square and type and color) + 16 (for potential en passants) + 4 (for castle) + 1 (turn = -1 or 1). For the types, the order will be (R,N,B,Q,K,P). For the 64\*6, the first 6 will be for a1, the last 6 will be for h8, we will go left to right, bottom to top.

In [5]:
def getIndex(square:str)->int:
    dico_coord = {'a':1, 'b':2, 'c':3, 'd':4, 'e':5, 'f':6, 'g':7, 'h':8}
    x = dico_coord[square[0]]
    y = int(square[1])
    return 8*(y-1)+x-1

In [6]:
def toIndex(pos:str, type:str, color:int)->int:
    """ex : a1 -> 0, b1 -> 1, a2 -> 8, etc.
    color : 1 for white, 0 for black"""

    dico_type = {'R':0, 'N':1, 'B':2, 'Q':3, 'K':4, 'P':5}
    type = type.upper()
    square_index = getIndex(pos)
    type_index = dico_type[type]

    return 12*square_index + 6*color + type_index

init_board_white = [["a1", "R", 1],["b1", "N", 1],["c1", "B", 1],["d1", "Q", 1],["e1", "K", 1],["f1", "B", 1],["g1", "N", 1],["h1", "R", 1],["a2", "P", 1],["b2", "P", 1],["c2", "P", 1],["d2", "P", 1],["e2", "P", 1],["f2", "P", 1],["g2", "P", 1],["h2", "P", 1]]
init_board_black = [["a8", "R", 0],["b8", "N", 0],["c8", "B", 0],["d8", "Q", 0],["e8", "K", 0],["f8", "B", 0],["g8", "N", 0],["h8", "R", 0],["a7", "P", 0],["b7", "P", 0],["c7", "P", 0],["d7", "P", 0],["e7", "P", 0],["f7", "P", 0],["g7", "P", 0],["h7", "P", 0]]

index_for_1 = [toIndex(*elt) for elt in init_board_white] + [toIndex(*elt) for elt in init_board_black] + [784, 785, 786, 787, 788]   #castle and turn

board_size = 64*12+16+4+1
init_board = np.zeros(board_size)
init_board[index_for_1] = 1
print(init_board)

[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

In [9]:
def getMoveIndex(move:str)->int:
    if len(move) == 2:
        #c'est un pion
        return getIndex(move)
    else:
        return getIndex(move[1:3])

In [19]:

def getMoveVector(previous_move, new_move):
    """Na2, Nf3 -> vector representing (a2 -> f3)
    Renvoie un vecteur de taille 64*2
    avec la case de départ et la case d'arrivée (représenté par un 1)"""
    vector = np.zeros(128)
    index_pre = getMoveIndex(previous_move)
    index_new = getMoveIndex(new_move)
    vector[index_pre] = 1
    vector[64 + index_new] = 1
    return vector

def getMoveCoords(previous_move, new_move):
    """Na2, Nf3 -> [coord of a2, coord of f3]"""
    vector = np.zeros(128)
    index_pre = getMoveIndex(previous_move)
    index_new = getMoveIndex(new_move)
    return [index_pre, 64 + index_new]

def getNewBoard(previous_board, previous_move, move, color):
    """previous board : represented as an array, moves : string (Nf3)"""


print(getMoveCoords("Nf1", "h8"))

[5, 127]
