# Chess.com — Download last year's PGN games for selected players

This notebook downloads **the last 365 days** of games for the following players from Chess.com's Published Data API and saves them in separate folders:

- Magnus Carlsen → `MagnusCarlsen`
- Fabiano Caruana → `FabianoCaruana`
- Ding Liren → `Chefshouse`
- Boris Gelfand → `Gelfandbeautiful`
- Hikaru Nakamura → `Hikaru`
- Anish Giri → `AnishGiri`
- Vladimir Kramnik → `VladimirKramnik`

**What it does:**
1. Figures out the last 365 days (inclusive) relative to *when you run this notebook*.
2. For each player, it requests each monthly archive in that range via the Chess.com API
   and downloads the PGN for that month (if available) to a dedicated folder.
3. Concatenates those monthly files into one `all_<username>_<date-range>.pgn` per player.

**Notes**
- The Chess.com Published Data API returns archives grouped by **year/month**; some months may be missing (no games).
- If any request returns **404** (no archive or no PGN), the code just skips that month.
- It handles basic rate-limiting with a simple retry/backoff.
- Kramnik's account is currently shown as closed on Chess.com; earlier months may still be downloadable.

Run the cells top-to-bottom. If a library is missing, the notebook will try to `pip install` it.


In [33]:
# Install required packages if needed
import sys, subprocess
def ensure(pkg):
    try:
        __import__(pkg)
    except Exception:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg])

for p in ["requests", "tqdm"]:
    ensure(p)

import os, time, math
from datetime import date, timedelta
from tqdm import tqdm
import requests


In [None]:
# Configuration
BASE_DIR = os.path.abspath("chesscom_pgn_last_year")  # change if you like
os.makedirs(BASE_DIR, exist_ok=True)

players = {
    # "Magnus Carlsen": "MagnusCarlsen",
    # "Fabiano Caruana": "FabianoCaruana",
    # "Ding Liren": "Chefshouse",
    # "Daniel Naroditsky": "DanielNaroditsky",
    # "Alireza Firouzja": "Firouzja2003",
    # "Boris Gelfand": "Gelfandbeautiful",
    # "Hikaru Nakamura": "HikaruNakamura",
    # "Anish Giri": "AnishGiri",
    # "Levy Rozman": "gothamchess",
    # "Hans Niemann": "HansOnTwitch",
    "Maxime Vachier-Lagrave": "LyonBeast",
    "Levon Aronian": "LevonAronian",
    "Viswanathan Anand": "Anand",
    "Gukesh D": "GukeshDommaraju",
    "Praggnanandhaa": "rpragchess",
    "Wesley So": "GMWSO",
}

# Compute last 365 days as a set of (year, month) pairs
today = date.today()
start_date = today - timedelta(days=1825)

def months_between(start: date, end: date):
    y, m = start.year, start.month
    res = []
    while (y < end.year) or (y == end.year and m <= end.month):
        res.append((y, m))
        # increment month
        if m == 12:
            m = 1; y += 1
        else:
            m += 1
    return res

months = months_between(start_date.replace(day=1), today)
date_range_tag = f"{start_date.isoformat()}_to_{today.isoformat()}"
print(f"Collecting months: {months}\nDate range tag: {date_range_tag}")


Collecting months: [(2020, 8), (2020, 9), (2020, 10), (2020, 11), (2020, 12), (2021, 1), (2021, 2), (2021, 3), (2021, 4), (2021, 5), (2021, 6), (2021, 7), (2021, 8), (2021, 9), (2021, 10), (2021, 11), (2021, 12), (2022, 1), (2022, 2), (2022, 3), (2022, 4), (2022, 5), (2022, 6), (2022, 7), (2022, 8), (2022, 9), (2022, 10), (2022, 11), (2022, 12), (2023, 1), (2023, 2), (2023, 3), (2023, 4), (2023, 5), (2023, 6), (2023, 7), (2023, 8), (2023, 9), (2023, 10), (2023, 11), (2023, 12), (2024, 1), (2024, 2), (2024, 3), (2024, 4), (2024, 5), (2024, 6), (2024, 7), (2024, 8), (2024, 9), (2024, 10), (2024, 11), (2024, 12), (2025, 1), (2025, 2), (2025, 3), (2025, 4), (2025, 5), (2025, 6), (2025, 7), (2025, 8)]
Date range tag: 2020-08-14_to_2025-08-13


In [48]:
# Helpers to fetch monthly PGN and save
HEADERS = {
    "User-Agent": "PGN-fetcher/1.0 (+https://chess.com; script for personal research)",
    "Accept": "text/plain,application/x-chess-pgn,*/-*",
}

def save_month_pgn(username: str, year: int, month: int, out_dir: str, retries: int = 3, backoff: float = 1.5):
    url = f"https://api.chess.com/pub/player/{username}/games/{year}/{month:02d}/pgn"
    for attempt in range(retries):
        r = requests.get(url, headers=HEADERS, timeout=30)
        if r.status_code == 200 and r.text.strip():
            fn = os.path.join(out_dir, f"{year}-{month:02d}.pgn")
            with open(fn, "w", encoding="utf-8") as f:
                f.write(r.text)
            return True, fn
        elif r.status_code in (404, 410):
            # No archive or PGN for this month
            return False, None
        elif r.status_code == 429:
            # Too many requests — back off and try again
            sleep_s = backoff ** (attempt + 1)
            time.sleep(sleep_s)
            continue
        else:
            # Other errors — small delay and retry
            time.sleep(1.0)
            continue
    return False, None

def concat_pgns(src_dir: str, out_path: str):
    parts = []
    for name in sorted(os.listdir(src_dir)):
        if name.lower().endswith('.pgn'):
            with open(os.path.join(src_dir, name), 'r', encoding='utf-8') as f:
                parts.append(f.read().rstrip())
    with open(out_path, 'w', encoding='utf-8') as out:
        out.write("\n\n".join([p for p in parts if p]))


In [49]:
import chess
import chess.pgn

# Fetch for each player
summary = {}
for display_name, username in players.items():
    print(f"\n=== {display_name} (@{username}) ===")
    player_dir = os.path.join(BASE_DIR, username)
    os.makedirs(player_dir, exist_ok=True)
    downloaded = []
    for (y, m) in tqdm(months, desc=f"{username}"):
        ok, fp = save_month_pgn(username, y, m, player_dir)
        if ok and fp:
            downloaded.append(fp)

    # Split games by color into two combined PGNs
    combined_white_path = os.path.join(BASE_DIR, f"all_{username}_white_{date_range_tag}.pgn")
    combined_black_path = os.path.join(BASE_DIR, f"all_{username}_black_{date_range_tag}.pgn")
    white_count = 0
    black_count = 0
    with open(combined_white_path, "w", encoding="utf-8") as fw, open(combined_black_path, "w", encoding="utf-8") as fb:
        for monthly in sorted(downloaded):
            with open(monthly, "r", encoding="utf-8") as fpgn:
                while True:
                    game = chess.pgn.read_game(fpgn)
                    if game is None:
                        break
                    w = game.headers.get("White","").lower()
                    b = game.headers.get("Black","").lower()
                    u = username.lower()
                    if w == u:
                        exporter = chess.pgn.StringExporter(headers=True, variations=False, comments=False)
                        fw.write(game.accept(exporter) + "\n\n")
                        white_count += 1
                    if b == u:
                        exporter = chess.pgn.StringExporter(headers=True, variations=False, comments=False)
                        fb.write(game.accept(exporter) + "\n\n")
                        black_count += 1

    # Original (all games merged) combined PGN (kept if still needed)
    combined_path = os.path.join(BASE_DIR, f"all_{username}_{date_range_tag}.pgn")
    concat_pgns(player_dir, combined_path)

    cnt = sum(1 for n in os.listdir(player_dir) if n.lower().endswith('.pgn'))
    summary[username] = {
        "display_name": display_name,
        "folder": player_dir,
        "monthly_files": cnt,
        "combined_pgn_all": combined_path,
        "combined_pgn_white": combined_white_path,
        "combined_pgn_black": combined_black_path,
        "white_games": white_count,
        "black_games": black_count,
    }

print("\nDone. Summary:")
for u, info in summary.items():
    print(f"- {info['display_name']} (@{u}): {info['monthly_files']} months | W {info['white_games']} → {info['combined_pgn_white']} | B {info['black_games']} → {info['combined_pgn_black']}")



=== Maxime Vachier-Lagrave (@LyonBeast) ===


LyonBeast: 100%|██████████| 61/61 [02:59<00:00,  2.94s/it]



=== Levon Aronian (@LevonAronian) ===


LevonAronian: 100%|██████████| 61/61 [04:06<00:00,  4.04s/it]



=== Viswanathan Anand (@Anand) ===


Anand: 100%|██████████| 61/61 [05:24<00:00,  5.32s/it]



=== Gukesh D (@GukeshDommaraju) ===


GukeshDommaraju: 100%|██████████| 61/61 [02:45<00:00,  2.71s/it]



=== Praggnanandhaa (@rpragchess) ===


rpragchess: 100%|██████████| 61/61 [00:51<00:00,  1.18it/s]



=== Wesley So (@GMWSO) ===


GMWSO: 100%|██████████| 61/61 [01:41<00:00,  1.66s/it]



Done. Summary:
- Maxime Vachier-Lagrave (@LyonBeast): 54 months | W 1775 → /Users/syusuf/myprojects/experiments/import_chess_com_player_pgns/chesscom_pgn_last_year/all_LyonBeast_white_2020-08-14_to_2025-08-13.pgn | B 1787 → /Users/syusuf/myprojects/experiments/import_chess_com_player_pgns/chesscom_pgn_last_year/all_LyonBeast_black_2020-08-14_to_2025-08-13.pgn
- Levon Aronian (@LevonAronian): 38 months | W 537 → /Users/syusuf/myprojects/experiments/import_chess_com_player_pgns/chesscom_pgn_last_year/all_LevonAronian_white_2020-08-14_to_2025-08-13.pgn | B 539 → /Users/syusuf/myprojects/experiments/import_chess_com_player_pgns/chesscom_pgn_last_year/all_LevonAronian_black_2020-08-14_to_2025-08-13.pgn
- Viswanathan Anand (@Anand): 0 months | W 0 → /Users/syusuf/myprojects/experiments/import_chess_com_player_pgns/chesscom_pgn_last_year/all_Anand_white_2020-08-14_to_2025-08-13.pgn | B 0 → /Users/syusuf/myprojects/experiments/import_chess_com_player_pgns/chesscom_pgn_last_year/all_Anand_blac

In [None]:
import chess
import chess.pgn
import os
import csv

# --- Configuration ---
# Directories containing the PGN files to process.
# Assumes 'used_black_games' and 'used_white_games' are one level up from this notebook.
SOURCE_DIRS = {
    "white": "white_games",
    "black": "black_games"
}

# The directory where the transformed data will be saved.
OUTPUT_DIR = "fen_moves_output"

# Number of initial moves to skip per player
MOVES_TO_SKIP = 2

MAX_POSITIONS_PER_FILE = 200_000

# --- Create Output Directory ---
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"Output will be saved in the '{OUTPUT_DIR}' directory.")

# --- Main Processing Logic ---
total_files_processed = 0
total_moves_extracted = 0

# Loop through each color and its corresponding source directory
for color, source_dir in SOURCE_DIRS.items():
    
    if not os.path.exists(source_dir):
        print(f"\nWarning: Source directory not found at '{source_dir}'. Skipping.")
        continue

    print(f"\n--- Processing files for color: {color.upper()} from '{source_dir}' ---")
    
    try:
        pgn_files = [f for f in os.listdir(source_dir) if f.endswith(".pgn")]
        if not pgn_files:
            print("No .pgn files found in this directory.")
            continue
    except FileNotFoundError:
        print(f"Error: Could not access directory '{source_dir}'.")
        continue

    # Determine the color to extract moves for
    color_to_extract = chess.BLACK if color == "black" else chess.WHITE

    # Process each PGN file in the directory
    for pgn_filename in pgn_files:
        input_pgn_path = os.path.join(source_dir, pgn_filename)
        
        # Create a corresponding output filename (now with .csv extension)
        output_csv_filename = os.path.splitext(pgn_filename)[0] + "_fen_moves.csv"
        output_csv_path = os.path.join(OUTPUT_DIR, output_csv_filename)
        
        print(f"  Processing: {pgn_filename} -> {output_csv_filename}")

        try:
            with open(input_pgn_path, encoding='utf-8') as pgn_file, open(output_csv_path, 'w', newline='') as csv_file:
                # Create CSV writer
                csv_writer = csv.writer(csv_file)
                # Write header
                csv_writer.writerow(['FEN', 'MOVE'])
                
                file_moves_count = 0
                
                while file_moves_count < MAX_POSITIONS_PER_FILE:
                    try:
                        game = chess.pgn.read_game(pgn_file)
                    except (ValueError, RuntimeError):
                        # Skip malformed games without crashing
                        continue

                    if game is None:
                        break
                    
                    event_l   = game.headers.get("Event","").lower()
                    variant_l = game.headers.get("Variant","").lower()
                    
                    if ("chess960" in event_l) or ("fischer" in event_l) or ("chess960" in variant_l) or ("fischer" in variant_l):
                        break
                    
                    board = game.board()
                    move_count_per_player = {chess.WHITE: 0, chess.BLACK: 0}
                    
                    for move in game.mainline_moves():
                        current_player = board.turn
                        
                        # Check if it's the correct player's turn to move
                        if current_player == color_to_extract:
                            move_count_per_player[current_player] += 1
                            
                            # Skip the first MOVES_TO_SKIP moves for this player
                            if move_count_per_player[current_player] > MOVES_TO_SKIP:
                                fen_before_move = board.fen()
                                san_move = board.san(move)
                                # Write to CSV file
                                csv_writer.writerow([fen_before_move, san_move])
                                file_moves_count += 1
                        
                        # Always push the move to advance the board state
                        board.push(move)
            
            total_files_processed += 1
            total_moves_extracted += file_moves_count

        except Exception as e:
            print(f"    ERROR processing file {pgn_filename}: {e}")


print(f"\n\n--- PROCESSING COMPLETE ---")
print(f"Successfully processed {total_files_processed} PGN files.")
print(f"Extracted a total of {total_moves_extracted} moves (skipping first {MOVES_TO_SKIP} moves per player).")
print(f"All output files have been saved to the '{OUTPUT_DIR}' directory as CSV files.")

Output will be saved in the 'fen_moves_output' directory.

--- Processing files for color: WHITE from 'white_games' ---
  Processing: all_AnishGiri_white_2020-08-14_to_2025-08-13.pgn -> all_AnishGiri_white_2020-08-14_to_2025-08-13_fen_moves.csv
  Processing: all_LevonAronian_white_2020-08-14_to_2025-08-13.pgn -> all_LevonAronian_white_2020-08-14_to_2025-08-13_fen_moves.csv
  Processing: all_GukeshDommaraju_white_2020-08-14_to_2025-08-13.pgn -> all_GukeshDommaraju_white_2020-08-14_to_2025-08-13_fen_moves.csv
  Processing: all_Chefshouse_white_2020-08-14_to_2025-08-13.pgn -> all_Chefshouse_white_2020-08-14_to_2025-08-13_fen_moves.csv
  Processing: all_MagnusCarlsen_white_2020-08-14_to_2025-08-13.pgn -> all_MagnusCarlsen_white_2020-08-14_to_2025-08-13_fen_moves.csv
  Processing: all_GMWSO_white_2020-08-14_to_2025-08-13.pgn -> all_GMWSO_white_2020-08-14_to_2025-08-13_fen_moves.csv
  Processing: all_FabianoCaruana_white_2020-08-14_to_2025-08-13.pgn -> all_FabianoCaruana_white_2020-08-14_to_

## Optional: Customize the players or date range

To change the list of players, edit the `players` dict above. To change the date range, replace the
calculation of `start_date` with a fixed date (e.g., `date(2024, 8, 13)`) or adjust the number of days.
