In [3]:
import chess.pgn
import os

def extract_games(input_file, output_file, num_games=10000):
    """
    Extracts the first `num_games` chess games from `input_file` and writes them to `output_file`.

    Args:
    - input_file (str): Path to the source PGN file.
    - output_file (str): Path to save the extracted games.
    - num_games (int): Number of games to extract (default: 10,000).
    """

    # Ensure the output file is created or overwritten
    if not os.path.exists(output_file):
        with open(output_file, 'w') as f:
            f.write("")  # Create an empty PGN file
    
    with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
        game_count = 0
        while game_count < num_games:
            game = chess.pgn.read_game(infile)
            if game is None:
                print("\n⚠️  Reached end of the file before extracting all games.")
                break

            outfile.write(str(game) + "\n\n")
            game_count += 1

            if game_count % 1000 == 0:
                print(f"✅ Extracted {game_count} games...")

        print(f"\n🎯 Extraction completed! {game_count} games saved to {output_file}")


# Example usage:
# Extract 10,000 games from a 50,000-game PGN file and create the output file
input_pgn = "data/50000_games.pgn"     # Source PGN file
output_pgn = "data/10000_games.pgn"    # Output PGN file to create

extract_games("data/std_train_big.clean.pgn", output_pgn, num_games=10000)


✅ Extracted 1000 games...
✅ Extracted 2000 games...
✅ Extracted 3000 games...
✅ Extracted 4000 games...
✅ Extracted 5000 games...
✅ Extracted 6000 games...
✅ Extracted 7000 games...
✅ Extracted 8000 games...
✅ Extracted 9000 games...
✅ Extracted 10000 games...

🎯 Extraction completed! 10000 games saved to data/10000_games.pgn
