In [6]:
# Imports
from chess import WHITE, BLACK, PAWN, KNIGHT, BISHOP, ROOK, QUEEN, Board, \
    Color, Piece
from chess.pgn import read_game, GameNode
import pandas as pd
from pandas import DataFrame

In [3]:
# Read the PGN DB
chess_db = open("data/AJ-OTB-PGN-001.pgn", encoding="utf-8")

In [30]:
# Setup empty dataframe
data_headers = [
    "White Pawns", "White Knights", "White Bishops", "White Rooks", "White Queens",
    "Black Pawns", "Black Knights", "Black Bishops", "Black Rooks", "Black Queens",
    "Result_W", "Result_B", "Result_S"]
data = pd.DataFrame(columns=data_headers, dtype=int)
data.head()

Unnamed: 0,White Pawns,White Knights,White Bishops,White Rooks,White Queens,Black Pawns,Black Knights,Black Bishops,Black Rooks,Black Queens,Result_W,Result_B,Result_S


In [39]:
# Logic Setup
colors = [WHITE, BLACK]
pieces = [PAWN, KNIGHT, BISHOP, ROOK, QUEEN]
piece_names = {
    PAWN: "Pawn",
    KNIGHT: "Knight",
    BISHOP: "Bishop",
    ROOK: "Rook",
    QUEEN: "Queen"
}

def is_stalemate(node: GameNode) -> bool:
    return node.game().headers["Result"] == "1/2-1/2"

def num_pieces(board: Board, color: Color, piece: int) -> int:
    return len(board.pieces(piece, color))

def column_name(color: Color, piece: int) -> str:
    c = "White" if color == WHITE else "Black"
    p = (piece_names[piece] + "s")
    return c + " " + p

def load_game_into_df(df: DataFrame, game_node: GameNode) -> DataFrame:
    while not game_node.is_end():
        series = pd.Series(index=data_headers, dtype=int)
        
        board = game_node.board()
        for c in colors:
            for p in pieces:
                series[column_name(c, p)] = num_pieces(board, c, p)
        
        res = game_node.game().headers["Result"]
        if res == "1-0":
            series["Result_W"] = 1
            series["Result_B"] = 0
            series["Result_S"] = 0
        elif res == "0-1":
            series["Result_W"] = 0
            series["Result_B"] = 1
            series["Result_S"] = 0
        else:
            series["Result_W"] = 0
            series["Result_B"] = 0
            series["Result_S"] = 1
            
        df = pd.concat([df, pd.DataFrame(series).T], ignore_index=True).astype(int)
        
        game_node = game_node.next()
        
    return df

In [44]:
# Load data into dataframe
i = 0
while i < 1000:
    game = read_game(chess_db)
    data = load_game_into_df(data, game)
    i += 1
data

Unnamed: 0,White Pawns,White Knights,White Bishops,White Rooks,White Queens,Black Pawns,Black Knights,Black Bishops,Black Rooks,Black Queens,Result_W,Result_B,Result_S
0,8,2,2,2,1,8,2,2,2,1,0,0,1
1,8,2,2,2,1,8,2,2,2,1,0,0,1
2,8,2,2,2,1,8,2,2,2,1,0,0,1
3,8,2,2,2,1,8,2,2,2,1,0,0,1
4,8,2,2,2,1,8,2,2,2,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
94923,3,0,0,0,1,2,0,0,0,0,1,0,0
94924,3,0,0,0,1,2,0,0,0,0,1,0,0
94925,3,0,0,0,1,2,0,0,0,0,1,0,0
94926,3,0,0,0,1,2,0,0,0,0,1,0,0


In [46]:
path = "data/chess_data_output-102424.csv"
data.to_csv(path, index=False)