# generate POS_DATA and PLAYED_MOVE

**Pos data**:  
Position data with all the information available from standard FEN notation.  
Encoded as a $(N,6+1=7,8,8)$ numpy array.  

## dimensions:
**N** is batch size  
**6+1=7** is move, casteling and player color encoding. 6 first layers denote were every of one 6 possible chess pieces are. -1 for white piece, 1 for black and 0 for no piece.  
1 additional layers encode casteling and player turns  
**8** chessboard heigh  
**8** chessboard width  

In [7]:
import chess.pgn

# elo limiting for mor consistent results as proposed by MAIA chess paper
# https://arxiv.org/abs/2006.01855
MIN_ELO = 2300
MAX_ELO = 2500  
GENERATE_MOVES = 10000
STOCKFISH_DEPTH = 20

# parameters for early game skipping
EARLY_MOVES_N = 8 # First {EARLY_MOVES_N} are considered to be na early game
EARLY_DROP_P = 0.8 # Probability of dropping a game if it is in the early game

In [8]:
# read games from a .pgn file
import random

def process_game(game: chess.pgn.Game) -> None:
    # iterate over game moves and store fen in a list
    fen_list = []
    next_move_list = []
    board = game.board()
    for move in game.mainline_moves():
        # play move on board
        board.push(move)

        # introduce a p percent chance to skip a move if it is early in the game
        # this should in theory avoid overfitting for early game moves
        move_number = board.fullmove_number
        if move_number < EARLY_MOVES_N:
            if random.random() < EARLY_DROP_P:
                continue
        
        # get the current position 
        fen_list.append(board.fen())
        next_move_list.append(move.uci())

    # return list of fens
    return fen_list, next_move_list

fens = []
moves = []
with open("../data/games.pgn") as pgn:
    while len(moves) < GENERATE_MOVES:
        game = chess.pgn.read_game(pgn)
        if game is None:
            break

        # if elo is too low, skip game
        elo = (int(game.headers["WhiteElo"]) + int(game.headers["BlackElo"])) / 2
        if  MIN_ELO > elo > MAX_ELO:
          continue

        # process game
        fens_o, moves_o = process_game(game)
        fens.extend(fens_o)
        moves.extend(moves_o)


print(f"Generated {len(fens)} fens and {len(moves)} moves")


Generated 10001 fens and 10001 moves


# Data preprocessing  

## board positions  
Board positions are represented as a matrix, explained above

## moves  
Moves will be prediceted by two models that branch from the main one and predict pieces and positions. PiecePicker and PositionPicker  
![architecture](../docs/architecture.png)  

We need to represent moves in such a way that 

In [9]:
import numpy as np


def fen_to_turn(fen: str) -> int:
    return [1, 0] if fen.split(" ")[1] == "w" else [0, 1]

def fen_to_castling(fen: str) -> int:
    castling = fen.split(" ")[2]
    return np.array([
      1 if "K" in castling else 0,
      1 if "Q" in castling else 0,
      1 if "k" in castling else 0,
      1 if "q" in castling else 0,
    ])

def fen_to_npy(fen: str) -> np.array:
    board = chess.Board(fen)
    npy = np.zeros((6+1,8,8))
    for square, piece in board.piece_map().items():
        x = square%8
        y = square//8
        piece_owner = 1 if piece.color == chess.WHITE else -1
        piece_type = piece.piece_type -1

        npy[piece_type][y][x] = piece_owner
    npy[6][0][0:2] = fen_to_turn(fen)
    npy[6][0][2:6] = fen_to_castling(fen)
    return npy


# convert one sample to npy
npy = fen_to_npy(fens[0])
print(npy.shape)
#print(npy)


(7, 8, 8)


In [20]:
# import chess

# # create a stockfish engine
# import chess.engine
# stockfish = chess.engine.SimpleEngine.popen_uci("../stockfish.avx2")
# # set stockfish depth
# stockfish.configure({"Skill Level": STOCKFISH_DEPTH, "Threads": 8})

# # evaluate position using stockfish
# def evaluate_position(fen: str) -> float:
#     board = chess.Board(fen)
#     info = stockfish.analyse(board, chess.engine.Limit(time=0.2))
#     return info["score"].white().score(mate_score=5000)


# # close stockfish engine
# stockfish.quit()



# REPRESENT MOVES
import numpy as np


# represent moves with a (8,8,2) npy 
# 2 8x8 boards, one for from_square and one for to_square

def move_to_npy(move: str) -> np.array:
    move = chess.Move.from_uci(move)
    npy = np.zeros((2,8,8))
    npy[0][move.from_square//8][move.from_square%8] = 1
    npy[1][move.to_square//8][move.to_square%8] = 1
    return npy

# dataset building

creating a dataset from moves created before

In [11]:
from tqdm import tqdm

# import tqdm for progress bar

In [12]:
# convert all samples to npy

# convert moves to npy
temp = []
for i in tqdm(range(len(fens))):
    temp.append(fen_to_npy(fens[i]))
npy_fens = np.array(temp)

# convert played moves to npy
temp = []

100%|██████████| 10001/10001 [00:01<00:00, 6877.77it/s]


In [21]:
# Convert moves to npy

temp = []
for i in tqdm(range(len(moves))):
    temp.append(move_to_npy(moves[i]))
npy_moves = np.array(temp)

# print one sample
print(npy_moves[0].shape)
print(npy_moves[0])

100%|██████████| 10001/10001 [00:00<00:00, 123909.95it/s]

(2, 8, 8)
[[[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]]]





# save data to disk

use numpy.save()

In [16]:
# save data
np.save("../data/seven/boards.npy", npy_fens)
np.save("../data/seven/moves.npy", npy_moves) 
