In [None]:
import re
import chess
import chess.pgn
import io
import subprocess
import csv
from concurrent.futures import ThreadPoolExecutor, as_completed

# Path to the file containing the chess game data
file_path = "lichess_elite_2024-10.pgn"
# Path to the Stockfish executable
stockfish_path = "C:/Users/adtro/Downloads/stockfish-windows-x86-64-avx2/stockfish/stockfish-windows-x86-64-avx2.exe"
# Output path of the dataset (FEN, eval)
csv_path = "./dataset.csv"

In [None]:
# Read the content of the file
with open(file_path, "r") as file:
    chess_data = file.read()
    
# Regex pattern to extract each game, including headers and moves
game_pattern = r'(?s)\[Event.*?\n\n(1\..*?)\n\n'

# Use re.findall to extract all games
games = re.findall(game_pattern, chess_data)

# Print the extracted games
for i, game in enumerate(games, 1):
    print(f"Game {i}:\n{game}\n")

In [None]:
def get_fens_from_game(game_text):
    """
    Extract FEN strings for each move in a chess game.

    Parameters:
        game_text (str): The PGN text of a single game.

    Returns:
        list: A list of FEN strings, one for each move.
    """
    # Parse the game text using chess.pgn
    pgn = io.StringIO(game_text)
    game = chess.pgn.read_game(pgn)
    
    # Initialize the board
    board = game.board()
    
    # List to store FENs
    fens = []
    
    # Play through the moves
    for move in game.mainline_moves():
        board.push(move)
        fens.append(board.fen())
    return fens

get_fens_from_game(games[0])

In [None]:
def get_stockfish_eval(fen):
    """
    Run Stockfish commands on a given FEN and retrieve the evaluation.

    Parameters:
        fen (str): The FEN string to analyze.

    Returns:
        str: Stockfish's evaluation output.
    """

    # Start the Stockfish process
    with subprocess.Popen(stockfish_path, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True) as stockfish:
        try:
            # Send the FEN position command
            stockfish.stdin.write(f"position fen {fen}\n")
            stockfish.stdin.flush()

            # Send the 'eval' command
            stockfish.stdin.write("eval\n")
            stockfish.stdin.flush()

            # Read Stockfish output and look for 'Final evaluation'
            for _ in range(50):
                line = stockfish.stdout.readline().strip()
                if "Final evaluation" in line:
                    # Extract the last number from the line using regex
                    match = re.search(r"([-+]?\d+\.\d+)", line)
                    if match:
                        final_eval = float(match.group(1))
                        return final_eval
                    return None
            return None

        except Exception as e:
            return f"An error occurred: {e}"

example_fen = get_fens_from_game(games[0])[0]  
evaluation = get_stockfish_eval(example_fen)
print(f"Evaluation of '{example_fen}': {evaluation}")

In [None]:
def create_fen_eval_dataset_parallel(fen_list, output_file):
    """
    Generate a dataset of FEN positions and their Stockfish evaluations in parallel.

    Parameters:
        fen_list (list of str): List of FEN strings to analyze.
        output_file (str): Path to the output CSV file.

    Returns:
        None: Writes the dataset to a CSV file.
    """
    def process_fen(fen):
        """Process a single FEN and return the result as a tuple (fen, evaluation)."""
        try:
            eval_score = get_stockfish_eval(fen)
            if eval_score is None:
                return (fen, None)  # Skip FENs with no evaluation
            return (fen, eval_score)
        except Exception as e:
            print(f"Error processing FEN: {fen} -> {e}")
            return (fen, None)  # Return None to handle errors gracefully

    # Use ThreadPoolExecutor to process FENs in parallel
    with ThreadPoolExecutor() as executor:
        # Submit all FENs to the executor for processing
        future_to_fen = {executor.submit(process_fen, fen): fen for fen in fen_list}

        # Open the CSV file for writing
        with open(output_file, mode="w", newline="", encoding="utf-8") as file:
            writer = csv.writer(file)
            # Write the header
            writer.writerow(["fen", "evaluation"])

            # Track completed tasks and print index every 10 iterations
            completed_count = 0
            for future in as_completed(future_to_fen):
                result = future.result()
                if result and result[1] is not None:  # Write only valid results
                    writer.writerow(result)

                # Increment completed count and print the index every 10 iterations
                completed_count += 1
                if completed_count % 1000 == 0:
                    print(f"Processed {completed_count / 1000}% FENs...")

In [None]:
fens = [get_fens_from_game(game) for game in games[:1200]]
fens = [item for sublist in fens for item in sublist]
fens = list(set(fens))

In [None]:
len(fens)

In [None]:
create_fen_eval_dataset_parallel(fens, csv_path)

In [None]:
import pandas as pd
df = pd.read_csv("./final_data.csv", header=None, index_col=0)
df.drop_duplicates().to_csv("./final_data.csv", header=None, index=False)