# Chess Game Analysis Using Graph Neural Networks


## Handle Data

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# cd into where the data file is stored
%cd /content/drive/MyDrive/cs224w-project/

/content/drive/MyDrive/cs224w-project


In [4]:
import numpy as np
import pandas as pd
import json
import re
import io
import zipfile

In [5]:
move_pattern = re.compile(r"W(\d+)\.([^\s]+)\s+B\1\.([^\s]+)")

def process_game_data(line):
    """
    Process the line of game data
    """
    metadata, moves_str = line.split('###')

    metadata_parts = metadata.strip().split()
    game_data = {
        'position': int(metadata_parts[0]),
        'date': metadata_parts[1],
        'result': metadata_parts[2],
        'welo': int(metadata_parts[3]) if metadata_parts[3].isdigit() else None,
        'belo': int(metadata_parts[4]) if metadata_parts[4].isdigit() else None,
        'len': int(metadata_parts[5]),
        'date_c': metadata_parts[6] == 'date_true',
        'resu_c': metadata_parts[7] == 'result_true',
        'welo_c': metadata_parts[8] == 'welo_true',
        'belo_c': metadata_parts[9] == 'belo_true',
        'edate_c': metadata_parts[10] == 'edate_true',
        'setup': metadata_parts[11] == 'setup_true',
        'fen': metadata_parts[12] == 'fen_true',
        'resu2_c': metadata_parts[13] == 'result2_true',
        'oyrange': metadata_parts[14] == 'oyrange_true',
        'bad_len': metadata_parts[15] == 'blen_true'
    }

    # process game moves
    moves_cleaned = []
    for match in move_pattern.finditer(moves_str):
        move_number = match.group(1)
        white_move = match.group(2)
        black_move = match.group(3)
        moves_cleaned.append({"white": f"W{move_number}.{white_move}", "black": f"B{move_number}.{black_move}"})

    game_data['moves'] = moves_cleaned

    return game_data


def save_to_json(iterable_data, output_path):
    """
    Save processed info into a JSON file incrementally
    """
    with open(output_path, 'w') as file:
        file.write('[')
        first = True
        for data in iterable_data:
            if not first:
                file.write(',')
            else:
                first = False
            json.dump(data, file, indent=4)
        file.write(']')


def process_zip(zip_path, output_path):
    """
    Process zip file with a data txt file stored in it and save to JSON incrementally
    """
    with zipfile.ZipFile(zip_path) as z:
        txt_file_name = z.namelist()[0]
        with z.open(txt_file_name) as file:
            content = io.TextIOWrapper(file)
            with open(output_path, 'w') as output_file:
                output_file.write('[')
                first = True
                for line in content:
                    if not line.startswith('#') and line.strip():
                        if not first:
                            output_file.write(',')
                        else:
                            first = False
                        game_data = process_game_data(line)
                        json.dump(game_data, output_file, indent=4)
                output_file.write(']')


In [7]:
input_zip_path = '/content/drive/MyDrive/cs224w-project/chess_db.zip'
output_file_path = 'processed_chess_data.json'

process_zip(input_zip_path, output_file_path)
print(f"Processed data has been saved to {output_file_path}")

Processed data has been saved to processed_chess_data.json


In [8]:
N = 100
with open("processed_chess_data.json") as f:
    for i in range(0, N):
        print(f.readline(), end = '')

[{
    "position": 1,
    "date": "2000.03.14",
    "result": "1-0",
    "welo": 2851,
    "belo": null,
    "len": 67,
    "date_c": false,
    "resu_c": false,
    "welo_c": false,
    "belo_c": true,
    "edate_c": true,
    "setup": false,
    "fen": false,
    "resu2_c": false,
    "oyrange": false,
    "bad_len": false,
    "moves": [
        {
            "white": "W1.d4",
            "black": "B1.d5"
        },
        {
            "white": "W2.c4",
            "black": "B2.e6"
        },
        {
            "white": "W3.Nc3",
            "black": "B3.Nf6"
        },
        {
            "white": "W4.cxd5",
            "black": "B4.exd5"
        },
        {
            "white": "W5.Bg5",
            "black": "B5.Be7"
        },
        {
            "white": "W6.e3",
            "black": "B6.Ne4"
        },
        {
            "white": "W7.Bxe7",
            "black": "B7.Nxc3"
        },
        {
            "white": "W8.Bxd8",
            "black": "B8.Nxd1"
        },
