# Some experiments on PGN data

### Imports

In [111]:
import os
import time
import chess
import chess.pgn
import numpy as np

### Parsing 50k games

In [None]:
start = time.time()

pgn = open('../data/fics_202011_notime_50k.pgn')

def doNothing():
    return

while chess.pgn.read_game(pgn):
    doNothing()

end = time.time()
print(f'Time elapsed: {end - start}')

### Calculating relative value of pieces for a given colour

In [None]:
# [pawn, knight, bishop, rook, queen], see https://en.wikipedia.org/wiki/Chess_piece_relative_value
piece_values = [1, 3, 3, 5, 9]

# Given chess.Board and chess.Color
# Returns sum of piece values for that color
def get_piece_value(board, color):
    piece_value_sum = 0
    for i in range(0, 5):
        piece_value_sum += piece_values[i] * len(board.pieces(i+1, color))
    return piece_value_sum

In [None]:
pgn = open('../data/fics_202011_notime_50k.pgn')

game = chess.pgn.read_game(pgn)
board = game.board()
get_piece_value(board, chess.BLACK)

### Game metadata

In [None]:
game.headers

In [None]:
# Given chess.pgn.Game and chess.Color
# Return -1 if draw, 1 if color won, 0 if color lost.
def get_game_result(game, color):
    if '1/2' in game.headers['Result']:
        return -1
    elif color == chess.WHITE:
        return game.headers['Result'][0]
    else:
        return game.headers['Result'][2]

In [None]:
white_elo = game.headers['WhiteElo']
black_elo = game.headers['BlackElo']
    
print(f'White Elo: {white_elo}, Black Elo: {black_elo}')
print(f'Result for white: {get_game_result(game, chess.WHITE)}')

### Representing game as vector

In [None]:
def board_to_vec(board):
    '''
        Given a chess.Board return a vector of length 64
        representing the piece / lack of piece at a given square.
    '''
    vec = np.zeros((64), dtype=int)
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None:
            if piece.color == chess.WHITE:
                vec[square] = piece.piece_type
            else:
                vec[square] = -1 * piece.piece_type
    return vec

def game_to_vec(game, moves_limit):
    '''
    Given a chess.Game, return a concatenation of board states
    represented as vectors, as generated by board_to_vec()
    '''
    board = game.board()
    game_as_vec = np.zeros((64 * moves_limit))
    i = 0
    for move in game.mainline_moves():
        if i >= moves_limit:
            break
        board.push(move)
        game_as_vec[(64*i):(64*(i+1))] = board_to_vec(board)
    return game_as_vec

In [None]:
game_to_vec(game, 10)

### Game as one-hot encoding of pgn text string
- There are many ways we could encode the game string.
- i.e. do we encode pairs of half-moves?
- Do we encode the move number?

In [162]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np

def game_to_movetext(game, move_limit=-1):
    '''
    Returns a list of the moves of chess.Game 'game' as strings in
    Standard Algebraic Notation (https://en.wikipedia.org/wiki/Algebraic_notation_(chess))
    '''
    game_string = str(game.mainline())
    move_strings = game_string.split('. ')[1:move_limit]
    move_strings = list(map(lambda s: s.rsplit(' ', 1)[0], move_strings))
    flattened_move_strings = [move for sublist in move_strings for move in sublist]
    return flattened_move_strings

len(game_to_movetext(game))

320

In [None]:
game_string = str(game.mainline())
# Extract 'move' strings
move_strings = game_string.split('. ')[1:-1]
move_strings = list(map(lambda s: s.rsplit(' ', 1)[0], move_strings))
flattened_move_strings = [move for sublist in move_strings for move in sublist]
flattened_move_strings

### Loading encoder from memory

In [144]:
import pickle

with open('../encoder', 'rb') as f:
    encoder = pickle.load(f)

### Compressing encoding for dimensionality reduction

In [192]:
MOVE_LIMIT = 15

game_string = game_to_movetext(game, MOVE_LIMIT)
game_string = np.array(game_string).reshape(-1, 1)
encoded_game = encoder.transform(game_string).toarray()
encoded_game = np.rot90(encoded_game, axes=(0, 1))

from sklearn.decomposition import PCA
pca = PCA(n_components=MOVE_LIMIT)
compressed_encoding = pca.fit_transform(encoded_game)
compressed_encoding = np.rot90(compressed_encoding, axes=(1, 0))
compressed_encoding.flatten()

array([ 3.61637899e+00, -1.02336176e-01, -1.31259051e-01, -1.10448610e-01,
       -1.10448610e-01, -1.44910920e-01, -1.82971601e-01, -1.31259051e-01,
       -2.10632833e-01, -1.44910920e-01, -1.02336176e-01, -1.02336176e-01,
       -2.48147158e-01, -1.02336176e-01, -1.44910920e-01, -1.61732214e-01,
       -1.10448610e-01, -1.10448610e-01, -1.19957966e-01, -1.19957966e-01,
       -1.44910920e-01, -1.82971601e-01, -1.82971601e-01, -1.19957966e-01,
       -1.31259051e-01, -1.31259051e-01, -1.31259051e-01,  6.60835564e-02,
       -5.17737603e-02, -8.37993352e-02, -5.93320685e-02, -5.93320685e-02,
       -1.05565883e-01, -2.19696473e-01, -8.37993352e-02, -4.78189095e-01,
       -1.05565883e-01, -5.17737603e-02, -5.17737603e-02,  2.70790950e+00,
       -5.17737603e-02, -1.05565883e-01, -1.42607663e-01, -5.93320685e-02,
       -5.93320685e-02, -6.94744517e-02, -6.94744517e-02, -1.05565883e-01,
       -2.19696473e-01, -2.19696473e-01, -6.94744517e-02, -8.37993352e-02,
       -8.37993352e-02, -