In [86]:
import numpy as np
import chess 
import chess.pgn
import random
import pandas as pd
from datetime import datetime
from tqdm import tqdm

In [123]:
def set_seed(seed_num):
    '''
    Sets the seed for the python random module
    '''
    random.seed(seed_num)
    
def outcome(game):
    '''
    Returns the results of a game.
        - 0: White won
        - 1: Black won
        - 2: Draw
    '''
    return str(game.headers['Result'][-1])

def sample_position(position_list, move_list, num_positions, num_omit):
    '''
    Returns a list of candidate positions for training our model. These positions must
    fit the criteria of:
        - Only taken after first num_omit steps,
        - Does not involve a capture move
    '''
    # Only look for moves after first num_omit steps
    pos = position_list[num_omit:]
    moves = move_list[num_omit:]
    
    # Exclude moves that involved a capture (i.e. has an 'x' notation in the moves list)
    pos = [pos[i] for i in range(len(moves)) if 'x' not in moves[i]]
    
    if len(pos) > num_positions:
        return random.sample(pos, num_positions)
    
    return pos
    
def pgn_to_fen(game):
    '''
    Reads a game and parses through the moves of the entire game. Returns two 
    lists, position_list and move_list which contains a list of FEN and SAN notation respectively.
    '''
    node = game
    position_list = []
    move_list = []
    
    while not node.is_end():
        next_node = node.variation(0)
        position_list.append(next_node.board().fen())
        move_list.append(node.board().san(next_node.move))
        node = next_node
        
    return position_list, move_list

def fen_to_bitboard(fen):
    '''
    Takes in a FEN position and returns a bitboard notation
    '''
    
    chess_pieces = {
        'p': 0,
        'n': 1,
        'b': 2,
        'r': 3,
        'q': 4,
        'k': 5,
        'P': 6,
        'N': 7,
        'B': 8,
        'R': 9,
        'Q': 10,
        'K': 11
    }
    
    bitboard = np.zeros((773), dtype=int)
    currIndex = 0
    [position, turn, castling, _, _, _] = fen.split(' ')
    for ch in position:
        if ch == '/':
            continue
        elif ch >= '1' and ch <= '8':
            currIndex += (ord(ch) - ord('0')) * 12
        else:
            bitboard[currIndex + chess_pieces[ch]] = 1
            currIndex += 12
    bitboard[768] = 1 if turn == 'w' else 0
    bitboard[769] = 1 if 'K' in castling else 0
    bitboard[770] = 1 if 'Q' in castling else 0
    bitboard[771] = 1 if 'k' in castling else 0
    bitboard[772] = 1 if 'q' in castling else 0
    return bitboard

def write_to_csv(dataframe, fp):
    '''
    Write a dictionary to a pandas dataframe and then to a csv file using fp as the file path.
    '''
    df = pd.DataFrame.from_dict(dataframe)
    df.to_csv(fp, index=False)
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print("Completion Time =", current_time)   
    
def games_to_bitboard_csv(file_path, num_positions=10, num_omit=5, seed_num=0, fp='../data/chess_dataset.csv'):
    '''
    Acts as the driver function that reads the games and call all other functions to
    eventually write the bitboard representation to csv
    '''
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print("Start Time =", current_time)       
    
    set_seed(seed_num)
    pgn = open(file_path)
    game = chess.pgn.read_game(pgn)
    dataframe = {
        'bitboard':[],
        'label':[],
        'identifier':[]        
    }    
    while tqdm(game is not None):
        result = outcome(game)
        if result != "2":
            position_list, move_list = pgn_to_fen(game)
            position_list = sample_position(position_list, move_list, num_positions, num_omit)
            for pos in position_list:
                dataframe['bitboard'].append(fen_to_bitboard(pos))
                dataframe['label'].append(result)
                dataframe['identifier'].append(str(game.headers))
            
        game = chess.pgn.read_game(pgn)
    write_to_csv(dataframe, fp)
    
    


0it [00:25, ?it/s]
0it [00:24, ?it/s]


In [124]:
games_to_bitboard_csv('./data/temp_sample.pgn', fp='./data/temp_sample.csv')

0it [00:00, ?it/s]
0it [00:00, ?it/s]

Start Time = 02:14:20


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s][A

Completion Time = 02:14:21
