In [84]:
pip install chess.com

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [85]:
import time
import pandas as pd
import requests
from chessdotcom import ChessDotComClient
import numpy as np
from sklearn.preprocessing import OneHotEncoder

In [86]:
def parcourir_avec_delai(liste_liens):
    '''
    Cette fonction recupere une liste de lien json [08/2013,09/2013,...] 
    represantant les parties jouées a un mois donné
    '''
    tous_les_jeux = []
    
    # IMPORTANT : Ajouter un User-Agent
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }
    for i, url in enumerate(liste_liens):
        try:
            print(f"Récupération {i+1}/{len(liste_liens)}: {url}")
            response = requests.get(url,headers=headers)
            response.raise_for_status()
            data = response.json() #{'games': [{'url': 'https://www.chess.com/game/daily/74856790', 'pgn': '[Event "Let\'s Play!"]\n[Site "Chess.com"]\n[Da
            
            tous_les_jeux.extend(data['games'])#extrait la liste'games
            
            # Pause pour ne pas surcharger l'API
            time.sleep(0.5)
            
        except requests.exceptions.RequestException as e:
            print(f"Erreur pour {url}: {e}")
    
    return tous_les_jeux #retourne toutes les parties


In [87]:
import re

def extraire_opening_robuste(pgn):
    """
    Extrait le nom de l'ouverture de manière robuste
    Args :
        - rubrique pgn du fichier json

    output : extract l'opening ou N/A
    """
    # Chercher l'ECOUrl avec regex
    match = re.search(r'\[ECOUrl "https://www\.chess\.com/openings/([^"]+)"\]', pgn)
    if match:
        return match.group(1)
    
    # Fallback si regex échoue
    idx = pgn.find("https://www.chess.com/openings/")
    if idx != -1:
        start = idx + 33
        end = pgn.find('"', start) #-1 si n'existe pas
        if end != -1:
            return pgn[start:end]
    
    return 'N/A'

In [88]:
def extraire_code_opening(pgn):
    
    """
    Extrait le code opening de manière robuste
    Args :
        - rubrique pgn du fichier json

    output : extract l'opening ou N/A
    """
    # Chercher l'ECOUrl avec regex
    match = re.search(r'\[ECO "([^"]+)"\]', pgn)
    
    if match:
        return match.group(1)
    
    # Fallback si regex échoue
    idx = pgn.find('[ECO "')
    if idx != -1:
        start = idx + 6
        end = pgn.find('"', start) #-1 si n'existe pas
        if end != -1:
            return pgn[start:end]
    
    return 'N/A'

In [89]:

   
client = ChessDotComClient(user_agent = "My Python Application...")

response = client.get_player_profile("Tuco007")#recuperation des données du profil "Tuco007"

print("*"*50)
print(f"Je suis : {response.player.name}") # Affichage du nom 
Tuco_stats = client.get_player_stats(username="Tuco007") #recuperation des stats de Tuco007
print(f"Blitz ELO : {Tuco_stats.stats.chess_blitz.last.rating}          Pic ELO : {Tuco_stats.stats.chess_blitz.best.rating} ")
print(f"Rapid ELO : {Tuco_stats.stats.chess_rapid.last.rating}          Pic ELO : {Tuco_stats.stats.chess_rapid.best.rating}")
print(f"Classique ELO : {Tuco_stats.stats.chess_daily.last.rating}      Pic ELO : {Tuco_stats.stats.chess_daily.best.rating}")
response.player

archive = client.get_player_game_archives(username="Tuco007")#recupere toutes les parties du joueur depuis qu'il a join le site
archive_json = archive.json #recupere en json

dictionnaire_partie = {'Player white' : [],'Player black' : [],'Result white':[],'Result black':[],'Date partie':[],'Heure partie':[],'ELO white': [], 'ELO black':[],'Opening':[],'Code Opening':[],'Rated':[],'Time control':[],'Liste des coups':[]}

#archive_json : {archives : [games08/2013,08/2013,....]}
#archive_json[archives] : recupere la liste [games08/2013,08/2013,....]}}

all_games = parcourir_avec_delai(archive_json["archives"])

#creation d'un dictionnaire ou les clefs sont les paramètres à recuperer et les valeurs sont les valeurs a recuperer
for j in range(len(all_games)):
    dictionnaire_partie['Player white'].append(all_games[j]['white']['username'])
    dictionnaire_partie['Player black'].append(all_games[j]['black']['username'])
    
    dictionnaire_partie['Result white'].append(all_games[j]['white']['result'])
    dictionnaire_partie['Result black'].append(all_games[j]['black']['result'])

    indice_date = all_games[j]['pgn'].find("Date") #trouver l'indice de la position DATE dans la rubrique pdn du json
    dictionnaire_partie['Date partie'].append(all_games[j]['pgn'][indice_date+6:indice_date+6+10])

    indice_heure = all_games[j]['pgn'].find("UTCTime") #trouver l'indice de la position UTCDate dans la rubrique pdn du json (pour trouver l'heure)
    dictionnaire_partie['Heure partie'].append(all_games[j]['pgn'][indice_heure+9:indice_heure+9+8])

    dictionnaire_partie['ELO white'].append(all_games[j]['white']['rating'])
    dictionnaire_partie['ELO black'].append(all_games[j]['black']['rating'])

    opening = extraire_opening_robuste(all_games[j]['pgn'])
    dictionnaire_partie['Opening'].append(opening)

    opening_code = extraire_code_opening(all_games[j]['pgn'])
    dictionnaire_partie['Code Opening'].append(opening_code)

    dictionnaire_partie['Rated'].append(all_games[j]['rated'])

    dictionnaire_partie['Time control'].append(all_games[j]['time_control'])

    startlistecoups = all_games[j]['pgn'].find("]\n\n1.")#trouver l'endroit ou commence la liste des coups dans pgn
    dictionnaire_partie['Liste des coups'].append(all_games[j]['pgn'][startlistecoups+2:])
    
df = pd.DataFrame(dictionnaire_partie)


**************************************************
Je suis : Max Rasseneur
Blitz ELO : 1297          Pic ELO : 1497 
Rapid ELO : 1652          Pic ELO : 1870
Classique ELO : 1455      Pic ELO : 1543
Récupération 1/130: https://api.chess.com/pub/player/tuco007/games/2013/08
Récupération 2/130: https://api.chess.com/pub/player/tuco007/games/2013/09
Récupération 3/130: https://api.chess.com/pub/player/tuco007/games/2013/10
Récupération 4/130: https://api.chess.com/pub/player/tuco007/games/2013/11
Récupération 5/130: https://api.chess.com/pub/player/tuco007/games/2013/12
Récupération 6/130: https://api.chess.com/pub/player/tuco007/games/2014/01
Récupération 7/130: https://api.chess.com/pub/player/tuco007/games/2014/02
Récupération 8/130: https://api.chess.com/pub/player/tuco007/games/2014/03
Récupération 9/130: https://api.chess.com/pub/player/tuco007/games/2014/04
Récupération 10/130: https://api.chess.com/pub/player/tuco007/games/2014/05
Récupération 11/130: https://api.chess.com/pub/pla

In [90]:
#check NULL values
df.isnull().sum()

Player white       0
Player black       0
Result white       0
Result black       0
Date partie        0
Heure partie       0
ELO white          0
ELO black          0
Opening            0
Code Opening       0
Rated              0
Time control       0
Liste des coups    0
dtype: int64

In [91]:
#suprimons les lignes ou il n'y a pas d'opening
df_parties=df[df["Opening"]!="N/A"]



In [92]:
#valeurs uniques dans le DF
df_parties.nunique()

Player white        5350
Player black        5336
Result white          10
Result black          10
Date partie         1886
Heure partie       11257
ELO white           1211
ELO black           1181
Opening             3001
Code Opening         248
Rated                  2
Time control          19
Liste des coups    12442
dtype: int64

In [93]:
#Pre processing (a ne lancer qu'une fois)
df_parties["Date partie"]=df_parties["Date partie"].str.replace(".","/")
df_parties["Date partie"]=df_parties["Date partie"].str.strip()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_parties["Date partie"]=df_parties["Date partie"].str.replace(".","/")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_parties["Date partie"]=df_parties["Date partie"].str.strip()


In [94]:
#Assurer le typage de heure et de date

df_parties["Date partie"]=pd.to_datetime(df_parties["Date partie"],format='%Y/%m/%d')
df_parties["Heure partie"]=pd.to_datetime(df_parties["Heure partie"],format='%H:%M:%S').dt.time


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_parties["Date partie"]=pd.to_datetime(df_parties["Date partie"],format='%Y/%m/%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_parties["Heure partie"]=pd.to_datetime(df_parties["Heure partie"],format='%H:%M:%S').dt.time


In [95]:
# Création Résultat myself
df_parties["Result maximilien"] = np.where(
    df_parties["Player white"] == "Tuco007",
    df_parties["Result white"],
    df_parties["Result black"]
)

# Création Résultat advsersaire
df_parties["Result adv"] = np.where(
    df_parties["Player white"] != "Tuco007",
    df_parties["Result white"],
    df_parties["Result black"]
)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_parties["Result maximilien"] = np.where(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_parties["Result adv"] = np.where(


In [96]:
df_parties["Result maximilien"].unique()

array(['win', 'resigned', 'checkmated', 'insufficient', 'abandoned',
       'repetition', 'agreed', 'stalemate', 'timeout',
       'timevsinsufficient'], dtype=object)

In [97]:
#Création d'une colonne Victory/Draw/Defaite
def WDL(row):
    
    if row["Result maximilien"] in ['win']:
        return 'Win'
    elif row["Result maximilien"] in ['resigned','checkmated','timeout','abandonned']:
        return 'Lost'
    else:
        return 'Draw'

df_parties["WDL"]=df_parties.apply(WDL,axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_parties["WDL"]=df_parties.apply(WDL,axis=1)


In [98]:
#EDA
df_parties.groupby(by="WDL",sort=True)["WDL"].count().nlargest()

WDL
Win     6299
Lost    5401
Draw     744
Name: WDL, dtype: int64

In [99]:
#Mapping pour resultat maximilien WDL 1 : victory,0 : lost, 0.5 : draw
df_parties["WDL"]=df_parties["WDL"].map({"Win":1,"Draw":0.5,"Lost":0})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_parties["WDL"]=df_parties["WDL"].map({"Win":1,"Draw":0.5,"Lost":0})


In [101]:
df_parties


Unnamed: 0,Player white,Player black,Result white,Result black,Date partie,Heure partie,ELO white,ELO black,Opening,Code Opening,Rated,Time control,Liste des coups,Result maximilien,Result adv,WDL
0,Rizzen_92,Tuco007,checkmated,win,2013-08-25,09:21:42,1054,1346,Indian-Game-Fantasy-Variation-2...d5,A45,True,1/259200,\n1. d4 Nf6 2. f3 d5 3. Nc3 e6 4. a3 Nbd7 5. N...,win,checkmated,1.0
1,Tuco007,Rizzen_92,win,checkmated,2013-08-25,09:23:02,1346,1054,Kings-Pawn-Opening-Kings-Knight-Variation,C40,False,1/86400,\n1. e4 e5 2. Nf3 f6 3. d3 d6 4. d4 Nc6 5. dxe...,win,checkmated,1.0
2,Rizzen_92,Tuco007,checkmated,win,2013-08-26,09:44:23,997,1403,Kings-Pawn-Opening-Kings-Knight-Variation,C40,True,1/259200,\n1. e4 e5 2. Nf3 f6 3. d4 Bb4+ 4. Ke2 d6 5. a...,win,checkmated,1.0
3,Tuco007,Rizzen_92,win,checkmated,2013-08-26,11:54:31,1435,965,Pirc-Defense-2.d4-Nf6-3.Nc3,B07,True,1/86400,\n1. e4 Nf6 2. Nc3 d6 3. d4 Bg4 4. Bb5+ c6 5. ...,win,checkmated,1.0
4,Rizzen_92,Tuco007,checkmated,win,2013-08-26,13:40:14,943,1457,Pirc-Defense,B07,True,1/86400,\n1. e4 d6 2. Bb5+ Nc6 3. Nc3 Bd7 4. Nf3 e5 5....,win,checkmated,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12440,Nospang,Tuco007,win,resigned,2025-12-22,19:26:42,1128,1297,Alapin-Sicilian-Defense-2...Nc6-3.Nf3-d6-4.d4,B22,False,180,\n1. e4 {[%clk 0:02:58.8]} 1... c5 {[%clk 0:02...,resigned,win,0.0
12441,Tuco007,Ttmcswag,timeout,win,2025-12-22,21:12:17,1297,1033,Closed-Sicilian-Defense-Traditional-Line-3.Nf3...,B23,False,180,\n1. e4 {[%clk 0:02:58.1]} 1... c5 {[%clk 0:02...,timeout,win,0.0
12442,kozhar_viktor,Tuco007,checkmated,win,2025-12-23,09:55:11,1297,1297,Queens-Pawn-Opening-Blackmar-Diemer-Vienna-Die...,D00,False,180,\n1. d4 {[%clk 0:02:59.3]} 1... d5 {[%clk 0:02...,win,checkmated,1.0
12443,araghaz,Tuco007,resigned,win,2025-12-23,10:01:10,1235,1297,Slav-Defense-3.Nc3-Nf6,D10,False,180,\n1. d4 {[%clk 0:02:59.9]} 1... d5 {[%clk 0:02...,win,resigned,1.0


In [105]:
df_parties=df_parties.drop("Liste des coups",axis=1)

In [106]:
df_parties.to_csv(path_or_buf=r"C:\Users\rasse\_Data_analyst\My Projects\Chess analyse\REDO 122025\Chess_games.csv")