In [1]:
import os
import chess
import chess.pgn
import sys, builtins

def process_files(folder_path):
    total_games = 0
    categories = ['pawns', 'bishops', 'knights', 'minor_pieces', 'rooks', 'queens']
    results = ['white_wins', 'black_wins', 'draws']
    game_stats = []
    game_counts = {res: 0 for res in results}

    for i, filename in enumerate(os.listdir(folder_path)):
        if i % 5000 == 0:
            print(f"Processing file {i}")
        # if i == 1000:
        #     break
        if filename.endswith('.pgn'):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as pgn_file:

                try:
                    original_stdout = sys.stdout
                    original_stderr = sys.stderr
                    sys.stdout = open(os.devnull, 'w')
                    sys.stderr = open(os.devnull, 'w')
            
                    game = chess.pgn.read_game(pgn_file)
            
                    sys.stdout = original_stdout
                    sys.stderr = original_stderr
                except ValueError:
                    continue  # chess 960

                result = game.headers.get("Result", "*")
                if result not in ["1-0", "0-1", "1/2-1/2"]:
                    continue

                try:
                    game_stat = process_game(game, result)
                    game_stats.append(game_stat)
                    total_games += 1
                    game_counts[game_stat['result']] += 1
                except AttributeError:
                    continue

    accumulated_stats = {res: {cat: {'white_initiated': 0, 'total': 0} for cat in categories} for res in results}
    for stat in game_stats:
        result = stat['result']
        for category in categories:
            accumulated_stats[result][category]['white_initiated'] += stat[category]['white_initiated']
            accumulated_stats[result][category]['total'] += stat[category]['total']

    return accumulated_stats, total_games, game_counts



In [2]:
def process_game(game, result):
    categories = ['pawns', 'bishops', 'knights', 'minor_pieces', 'rooks', 'queens']
    trade_stats = {cat: {'white_initiated': 0, 'total': 0} for cat in categories}

    board = game.board()
    moves = list(game.mainline_moves())

    for move_index in range(len(moves) - 1):
        move = moves[move_index]
        next_move = moves[move_index + 1]

        is_capture1 = board.is_capture(move)
        captured_piece1 = board.piece_at(move.to_square) if is_capture1 else None
        board.push(move)

        is_capture2 = board.is_capture(next_move)
        captured_piece2 = board.piece_at(next_move.to_square) if is_capture2 else None

        if is_capture1 and is_capture2:
            category = get_category(captured_piece1, captured_piece2)
            if category:
                initiator = not board.turn  # The side that made the first capture
                trade_stats[category]['total'] += 1
                if initiator == chess.WHITE:
                    trade_stats[category]['white_initiated'] += 1

    if result == "1-0":
        game_result = 'white_wins'
    elif result == "0-1":
        game_result = 'black_wins'
    else:
        game_result = 'draws'

    return {'result': game_result, **trade_stats}


In [3]:
def get_category(piece1, piece2):
    if piece1 is None or piece2 is None:
        return None
    if piece1.piece_type == chess.PAWN and piece2.piece_type == chess.PAWN:
        return 'pawns'
    elif piece1.piece_type == chess.BISHOP and piece2.piece_type == chess.BISHOP:
        return 'bishops'
    elif piece1.piece_type == chess.KNIGHT and piece2.piece_type == chess.KNIGHT:
        return 'knights'
    elif (piece1.piece_type in [chess.KNIGHT, chess.BISHOP] and
          piece2.piece_type in [chess.KNIGHT, chess.BISHOP] and
          piece1.piece_type != piece2.piece_type):
        return 'minor_pieces'
    elif piece1.piece_type == chess.ROOK and piece2.piece_type == chess.ROOK:
        return 'rooks'
    elif piece1.piece_type == chess.QUEEN and piece2.piece_type == chess.QUEEN:
        return 'queens'
    return None



In [4]:
folder_path = "pgns-run1-test80-20220404-1254"
trade_stats, total_games, game_counts = process_files(folder_path)

print(f"Total games processed: {total_games}")
print("Game counts:")
for result, count in game_counts.items():
    print(f"  {result.replace('_', ' ').title()}: {count}")
print()

for result, categories in trade_stats.items():
    print(f"\n{result.replace('_', ' ').title()} ({game_counts[result]} games):")
    for category, stats in categories.items():
        total_trades = stats['total']
        if total_trades > 0:
            white_percentage = (stats['white_initiated'] / total_trades) * 100
            print(f"  {category.capitalize()} trades:")
            print(f"    Total trades: {total_trades}")
            print(f"    White-initiated trades: {stats['white_initiated']} ({white_percentage:.2f}%)")
        else:
            print(f"  No {category} trades detected")
    print()

Processing file 0
Processing file 5000
Processing file 10000
Processing file 15000
Processing file 20000
Processing file 25000
Processing file 30000
Processing file 35000
Processing file 40000
Processing file 45000
Processing file 50000
Processing file 55000
Processing file 60000
Processing file 65000
Processing file 70000
Processing file 75000
Processing file 80000
Processing file 85000
Processing file 90000
Processing file 95000
Processing file 100000
Processing file 105000
Processing file 110000
Processing file 115000
Processing file 120000
Processing file 125000
Total games processed: 124964
Game counts:
  White Wins: 46392
  Black Wins: 36098
  Draws: 42474


White Wins (46392 games):
  Pawns trades:
    Total trades: 103043
    White-initiated trades: 50002 (48.53%)
  Bishops trades:
    Total trades: 26351
    White-initiated trades: 13601 (51.61%)
  Knights trades:
    Total trades: 32328
    White-initiated trades: 16661 (51.54%)
  Minor_pieces trades:
    Total trades: 50181
