In [8]:
import chess.pgn

MIN_ELO = 1800
GENERATE_GAMES = 5000

In [9]:
# read games from a .pgn file

def process_game(game: chess.pgn.Game) -> None:
    # iterate over game moves and store fen in a list
    fen_list = []
    board = game.board()
    for move in game.mainline_moves():
        board.push(move)
        fen_list.append(" ".join(board.fen().split(" ")[:2]))
    
    # return list of fens
    return fen_list


moves = []
with open("../data/games.pgn") as pgn:
    while len(moves) < GENERATE_GAMES:
        game = chess.pgn.read_game(pgn)
        if game is None:
            break

        # if elo is too low, skip game
        elo = (int(game.headers["WhiteElo"]) + int(game.headers["BlackElo"])) / 2
        if elo > MIN_ELO:
          continue

        # process game
        fens = process_game(game)
        moves.extend(fens)

# elminiates duplicates
moves = list(set(moves))


# Calculations

run stockfish on every game and get the move evaluation

In [10]:
import chess

# create a stockfish engine
import chess.engine
stockfish = chess.engine.SimpleEngine.popen_uci("../stockfish.avx2")

# evaluate position using stockfish
def evaluate_position(fen: str) -> float:
    board = chess.Board(fen)
    info = stockfish.analyse(board, chess.engine.Limit(time=0.2))
    return info["score"].white().score(mate_score=5000)

# get evaluation for each position
fens = []
evaluations = []
for i, fen in enumerate(moves):
    fens.append(fen)
    evaluations.append(evaluate_position(fen))
    if(i % 10 == 0):
        print(f"{i} / {len(moves)}")


# close stockfish engine
stockfish.quit()

0 / 4852
10 / 4852
20 / 4852
30 / 4852
40 / 4852
50 / 4852
60 / 4852
70 / 4852
80 / 4852
90 / 4852
100 / 4852
110 / 4852
120 / 4852
130 / 4852
140 / 4852
150 / 4852
160 / 4852
170 / 4852
180 / 4852
190 / 4852
200 / 4852
210 / 4852
220 / 4852
230 / 4852
240 / 4852
250 / 4852
260 / 4852
270 / 4852
280 / 4852
290 / 4852
300 / 4852
310 / 4852
320 / 4852
330 / 4852
340 / 4852
350 / 4852
360 / 4852
370 / 4852
380 / 4852
390 / 4852
400 / 4852
410 / 4852
420 / 4852
430 / 4852
440 / 4852
450 / 4852
460 / 4852
470 / 4852
480 / 4852
490 / 4852
500 / 4852
510 / 4852
520 / 4852
530 / 4852
540 / 4852
550 / 4852
560 / 4852
570 / 4852
580 / 4852
590 / 4852
600 / 4852
610 / 4852
620 / 4852
630 / 4852
640 / 4852
650 / 4852
660 / 4852
670 / 4852
680 / 4852
690 / 4852
700 / 4852
710 / 4852
720 / 4852
730 / 4852
740 / 4852
750 / 4852
760 / 4852
770 / 4852
780 / 4852
790 / 4852
800 / 4852
810 / 4852
820 / 4852
830 / 4852
840 / 4852
850 / 4852
860 / 4852
870 / 4852
880 / 4852
890 / 4852
900 / 4852
910 / 4852

# dataset building

Create numpy arrays from chess positions.  

One-hot encode chess positions and white-black onehot.  

Array of float predictions from stockfish

In [11]:
import numpy as np

def fen_to_npy(fen: str) -> np.array:
    board = chess.Board(fen)
    npy = np.zeros((12, 8,8))
    for square, piece in board.piece_map().items():
        x = square%8
        y = square//8
        piece_owner = 0 if piece.color == chess.WHITE else 1
        piece_type = piece.piece_type -1 + 6*piece_owner

        npy[piece_type][y][x] = 1
    return npy

def fen_to_turn(fen: str) -> int:
    return [1, 0] if fen.split(" ")[1] == "w" else [0, 1]

In [12]:
one_hot_boards = np.array([fen_to_npy(fen) for fen in moves])
turns = np.array([fen_to_turn(fen) for fen in moves])
evaluations = np.array(evaluations)
fens = np.array(fens)

# save data to disk

use numpy.save()

In [13]:
# save data
np.save("../data/one_hot_boards.npy", one_hot_boards)
np.save("../data/turns.npy", turns)
np.save("../data/evaluations.npy", evaluations)
np.save("../data/fens.npy", fens)