In [1]:
import chess.pgn
import pandas as pd
import os
import re

def extract_blunders(pgn_file):
    blunders = []

    with open(pgn_file) as f:
        while True:
            game = chess.pgn.read_game(f)
            if game is None:
                break

            headers = game.headers
            node = game
            move_number = 0

            while node.variations:
                next_node = node.variation(0)
                move_number += 1

                comment = next_node.comment
                eval_match = re.search(r'\[%eval (-?\d+\.\d+)\]', comment)
                if eval_match:
                    eval_score = float(eval_match.group(1))
                else:
                    eval_score = None

                if "Blunder" in comment or "blunder" in comment:
                    before_blunder = node.board().fen()
                    after_blunder = next_node.board().fen()

                    better_move_match = re.search(r'\{ Blunder. (.*?) was best\.', comment)
                    if better_move_match:
                        better_move = better_move_match.group(1)
                    else:
                        better_move = None

                    blunder_info = {
                        "Event": headers.get("Event", "Unknown"),
                        "Site": headers.get("Site", "Unknown"),
                        "Date": headers.get("Date", "Unknown"),
                        "Round": headers.get("Round", "Unknown"),
                        "White Player": headers.get("White", "Unknown"),
                        "Black Player": headers.get("Black", "Unknown"),
                        "Result": headers.get("Result", "Unknown"),
                        "UTCDate": headers.get("UTCDate", "Unknown"),
                        "UTCTime": headers.get("UTCTime", "Unknown"),
                        "Variant": headers.get("Variant", "Standard"),
                        "ECO": headers.get("ECO", "Unknown"),
                        "Opening": headers.get("Opening", "Unknown"),
                        "Move Number": move_number,
                        "Player": headers.get("White", "Unknown") if move_number % 2 == 1 else headers.get("Black", "Unknown"),
                        "Blunder Move": node.board().san(next_node.move),
                        "Better Move": better_move,
                        "Comment": comment,
                        "Evaluation": eval_score,
                        "Position Before Blunder": before_blunder,
                        "Position After Blunder": after_blunder,
                    }
                    blunders.append(blunder_info)

                node = next_node

    return blunders

def process_pgn_files(pgn_directory, output_file):
    all_blunders = []

    for filename in os.listdir(pgn_directory):
        if filename.endswith(".pgn"):
            pgn_file = os.path.join(pgn_directory, filename)
            blunders = extract_blunders(pgn_file)
            all_blunders.extend(blunders)

    df = pd.DataFrame(all_blunders)
    df.to_csv(output_file, index=False)
    print(f"Blunders saved to {output_file}")

# Usage
pgn_directory = "Analysis_pgns_small"
output_file = "blunders.csv"
process_pgn_files(pgn_directory, output_file)

Blunders saved to blunders.csv
