In [None]:
import zstandard as zstd
import io
import chess.pgn
import pandas as pd
from chess.pgn import StringExporter
import time

def has_eval_annotation(game):
    node = game
    while node.variations:
        node = node.variations[0]
        if node.comment and "[%eval" in node.comment:
            return True
    return False

def is_aggressive(game):
    # 1. Vérifie le résultat : on garde les victoires
    result = game.headers.get("Result", "")
    if result not in ["1-0", "0-1"]:
        return False  # pas une victoire
    
    elo_white = int(game.headers.get("WhiteElo", ""))
    if elo_white <= 1800 :
        return False

    # 2. Vérifie l'ouverture
    opening = game.headers.get("Opening", "").lower()
    aggressive_openings = ["gambit", "attack","albin", "englund", "scotch"]
    if not any(word in opening for word in aggressive_openings):
        return False

    # 3. Nombre de coups : partie courte (<=40 demi-coups = 20 coups par joueur)
    moves = list(game.mainline_moves())
    if len(moves) > 60:  # trop long, pas assez explosif
        return False

    # 4. Analyse des coups
    board = game.board()
    san_moves = []
    tactical_count = 0
    early_pawn_push = 0

    for i, move in enumerate(moves):
        san = board.san(move)
        san_moves.append(san)
        

        # Compter les coups tactiques : captures ou échecs
        if "x" in san or "+" in san or "#" in san:
            tactical_count += 1

        # Rechercher des poussées agressives de pions sur les 10 premiers coups
        if i < 20 and any([p in san for p in ["h4", "g4", "f4"]]):
            early_pawn_push += 1

        if i < 30 and ("0-0-0" in san or san in ["a4", "b4", "c4"]):
            early_pawn_push += 1

        board.push(move)
    if not has_eval_annotation(game):
        return False

    # 5. Heuristiques finales
    if tactical_count / len(san_moves) < 0.2:
        return False
    if early_pawn_push < 1:
        return False

    return (True)

L=[]
exporter = StringExporter(headers=True, variations=True, comments=True)
with open("lichess_db_standard_rated_2025-05.pgn.zst", 'rb') as compressed_file, \
     open("aggressive_games_1800.pgn", "w", encoding="utf-8") as output_file:

    dctx = zstd.ZstdDecompressor()
    stream_reader = dctx.stream_reader(compressed_file)
    text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')

    t0 = time.time()
    i = 0
    kept = 0
    while i < 94000000:  
        game = chess.pgn.read_game(text_stream)
        if game is None:
            break
        if i%1000==0 and i>0 :
            t1 = time.time()
            h, r1 = int(t1-t0)//3600, int(t1-t0) % 3600
            m, s = r1//60, r1%60
            print(f"{i} parties traitées. {kept} parties gardées. {h}h {m}min {s}sec écoulées")
        if is_aggressive(game):
            output_file.write(game.accept(StringExporter()) + "\n\n")
            kept += 1
        i += 1