### Pipeline ETL Basique

In [2]:
import requests
# Requête GET
response = requests.get("http://datascientest.com/")
# Affichage du code de statut
print(response.status_code)

# Requête GET
response = requests.get("http://datascientest.com/corrections_examens")
# Affichage du code de statut
print(response.status_code)

200
404


In [None]:
import pandas as pd

# EXTRACT : Création de dataframes à partir de plusieurs requêtes
def extract_skills_data():
    # GET liste des ids membres
    response = requests.get("https://dst-moduleapi.s3.eu-west-1.amazonaws.com/members")
    memberIds = response.json()['members']
    # GET infos de chaque membre
    members = []
    format_requete = "https://dst-moduleapi.s3.eu-west-1.amazonaws.com/members/{ID}"
    for memberId in memberIds:
        requete = format_requete.format(ID=memberId)
        response = requests.get(requete)
        members.append(response.json())
    df = pd.DataFrame(members)
    # GET liste des skills
    skills = []
    response = requests.get(f"https://dst-moduleapi.s3.eu-west-1.amazonaws.com/skills")
    skills = response.json()['skills']
    skills_df = pd.DataFrame(skills)
    
    return df,skills_df

In [5]:
display (*extract_skills_data())

Unnamed: 0,MemberID,MemberName,MemberAge,MemberSkillsID
0,c20ad4d76fe97759aa27a0c99bff6710,Yohan,29,"[2, 3, 4]"
1,67ed8e627baa71e60d13227a8e4141a7,Pierre,39,"[1, 2]"
2,1631a423b23892c59f86d982b3bf1d9c,Robin,19,"[1, 2, 3, 4]"


Unnamed: 0,SkillID,SkillName
0,1,Python
1,2,Java
2,3,R
3,4,SQL


In [6]:
# TRANSFORM : Séparation des éléments de liste, jointures entre dataframes, aggrégations...
def transform_skills(df,skills_df):
    df = df.explode("MemberSkillsID")
    df = df.merge(skills_df, how = "left", left_on = "MemberSkillsID", right_on = "SkillID")
    stats = df.groupby("SkillName").agg({"MemberID" : "count","MemberAge" : "mean"})
    stats = stats.rename(columns = {"MemberID" : "SkillMastery","MemberAge" : "AverageAge"})
    return (stats)

In [7]:
display(transform_skills(*extract_skills_data()))

Unnamed: 0_level_0,SkillMastery,AverageAge
SkillName,Unnamed: 1_level_1,Unnamed: 2_level_1
Java,3,29.0
Python,2,29.0
R,2,24.0
SQL,2,24.0


In [None]:
# LOAD : Chargement du dataframe dans un csv
def load_skills (stats):
    stats.to_csv("skills_kpis.csv")

In [9]:
load_skills(transform_skills(*extract_skills_data()))
pd.read_csv("skills_kpis.csv")

Unnamed: 0,SkillName,SkillMastery,AverageAge
0,Java,3,29.0
1,Python,2,29.0
2,R,2,24.0
3,SQL,2,24.0


### Exemple 1 AWS : KPIs des magasins

In [None]:
# On récupère tous les magasins

response = requests.get("https://dst-moduleapi.s3.eu-west-1.amazonaws.com/global_transactions")
stores = response.json()['Stores']
print (stores[0].keys())

df_stores = pd.DataFrame(stores)
display(df_stores.head())

display(df_stores.isna().mean())

dict_keys(['Staff', 'StoreCity', 'BestSellingProduct', 'Manager', 'Surface', 'StoreID'])


Unnamed: 0,Staff,StoreCity,BestSellingProduct,Manager,Surface,StoreID
0,46,Paris,thread,Harry Kinchen,1409.0,0
1,39,Marseille,,,,1
2,31,Lyon,,,1798.0,2
3,55,Toulouse,candle,,1221.0,3
4,28,Nice,bottle,Robert Bagley,1546.0,4


Staff                 0.00
StoreCity             0.00
BestSellingProduct    0.49
Manager               0.64
Surface               0.22
StoreID               0.00
dtype: float64

In [None]:
# Fonction qui récupère les transactions d'un magasin
def get_transactions_from_store_id(store_id):
    format_requete = "https://dst-moduleapi.s3.eu-west-1.amazonaws.com/transactions/store/{ID}"
    requete = format_requete.format(ID=store_id)
    response = requests.get(requete)
    transactions = response.json()['Transactions']
    
    # Création d'un dataframe à partir du JSON et ajout de la colonne pour l'id du magasin
    df_transactions = pd.DataFrame(transactions)
    df_transactions['StoreID'] = store_id
    return (df_transactions)

# On concatène les transactions pour tous les magasins
all_stores = [get_transactions_from_store_id(storeId) for storeId in df_stores['StoreID']]
df_all_stores = pd.concat(all_stores)
display(df_all_stores)

df_all_stores['StoreID'].isna().sum()

Unnamed: 0,TransactionID,ClientID,ProductID,Price,Quantity,ProductDiscount,ProductName,TotalAmountSpent,StoreID
0,6080,608,15,91.98,0.0,80.0,tooth picks,0.00,0
1,4326,29,43,2.42,2.0,90.0,scotch tape,4.84,0
2,1883,206,36,11.92,1.0,10.0,window,11.92,0
3,2304,434,88,13.06,8.0,90.0,sand paper,104.48,0
4,8807,520,65,55.06,7.0,70.0,clothes,385.42,0
...,...,...,...,...,...,...,...,...,...
0,6207,21,81,80.02,7.0,10.0,watch,560.14,86
0,940,966,71,96.67,6.0,20.0,spring,580.02,87
0,996,310,59,6.07,2.0,80.0,pillow,12.14,89
0,7752,154,14,40.21,1.0,90.0,speakers,40.21,94


np.int64(0)

In [13]:
# Jointure
df_stats = df_all_stores.merge(df_stores,"left",on="StoreID")

# Aggrégation pour calcul des KPIs
pan_moy = df_stats.groupby("StoreID")['TotalAmountSpent'].mean()
CA = df_stats.groupby("StoreID")['TotalAmountSpent'].sum()
prod_pop = df_stats.groupby("StoreID").agg({'ProductName': lambda x:x.mode()[0]})['ProductName']

# On ne garde qu'une ligne par magasin avec : ses  caractéristiques + 5 KPIs
df_stats = df_stats[df_stores.columns].drop_duplicates(subset=['StoreID'])
df_stats['Panier_moyen'] = pan_moy.values
df_stats['CA'] = CA.values
df_stats['Produit_Populaire'] = prod_pop.values
df_stats['CA_by_staff'] = df_stats['CA']/df_stats['Staff']
df_stats['CA_by_surface'] = df_stats['CA']/df_stats['Surface']

# Tri par chiffre d'affaires
df_stats = df_stats.sort_values('CA',ascending=False)

display(df_stats)

Unnamed: 0,Staff,StoreCity,BestSellingProduct,Manager,Surface,StoreID,Panier_moyen,CA,Produit_Populaire,CA_by_staff,CA_by_surface
0,46,Paris,thread,Harry Kinchen,1409.0,0,209.484216,21367.39,piano,464.508478,15.164933
181,31,Lyon,,,1798.0,2,229.284521,16737.77,cell phone,539.928065,9.309105
102,39,Marseille,,,,1,188.762658,14912.25,balloon,382.365385,
365,20,Nantes,,,1272.0,5,267.578824,13646.52,balloon,682.326000,10.728396
312,28,Nice,bottle,Robert Bagley,1546.0,4,237.879623,12607.62,box,450.272143,8.154994
...,...,...,...,...,...,...,...,...,...,...,...
992,27,La Seyne-sur-Mer,,,,75,67.210000,67.21,bed,2.489259,
964,36,Dunkirk,fridge,,1120.0,47,66.300000,66.30,window,1.841667,0.059196
971,34,Courbevoie,,,1201.0,50,41.890000,41.89,speakers,1.232059,0.034879
998,45,Villejuif,,,1570.0,94,40.210000,40.21,speakers,0.893556,0.025611


### Exemple 2 AWS : KPIs des clients

In [None]:
# On récupère les transactions depuis 24 heures
response = requests.get("https://dst-moduleapi.s3.eu-west-1.amazonaws.com/transactions24")
transactions24 = response.json()
print (transactions24[0].keys())

df_transactions24 = pd.DataFrame(transactions24)
display(df_transactions24.isna().mean())
display(df_transactions24.duplicated().sum())
df_transactions24.head()

dict_keys(['TransactionID', 'ClientID', 'ProductID', 'StoreID', 'Quantity', 'Date'])


TransactionID    0.0
ClientID         0.0
ProductID        0.0
StoreID          0.0
Quantity         0.0
Date             0.0
dtype: float64

np.int64(0)

Unnamed: 0,TransactionID,ClientID,ProductID,StoreID,Quantity,Date
0,3236,176,19,37,4,2022-11-28
1,4961,872,86,62,7,2022-11-28
2,6053,205,7,75,0,2022-11-28
3,141,975,15,27,9,2022-11-28
4,4121,936,23,87,0,2022-11-28


In [15]:
# On récupère les magasins, clients et produits associés à ces transactions

def get_stores():
    format_requete = "https://dst-moduleapi.s3.eu-west-1.amazonaws.com/Store/{StoreID}"
    stores = [requests.get(format_requete.format(StoreID=store_id)).json() for store_id in df_transactions24['StoreID'].unique()]
    df_stores = pd.DataFrame(stores)
    return (df_stores)

def get_clients():
    format_requete = "https://dst-moduleapi.s3.eu-west-1.amazonaws.com/Client/{ClientID}"
    clients = [requests.get(format_requete.format(ClientID=client_id)).json() for client_id in df_transactions24['ClientID'].unique()]
    df_clients = pd.DataFrame(clients)
    return (df_clients)

def get_products():
    format_requete = "https://dst-moduleapi.s3.eu-west-1.amazonaws.com/Product/{ProductID}"
    products = [requests.get(format_requete.format(ProductID=product_id)).json() for product_id in df_transactions24['ProductID'].unique()]
    df_products = pd.DataFrame(products)
    return (df_products)

df_stores24 = get_stores()
df_clients24 = get_clients()
df_products24 = get_products()

display(df_stores24)
display(df_clients24)
display(df_products24)


Unnamed: 0,Staff,StoreCity,Manager,StoreID,BestSellingProduct,Surface
0,33,Argenteuil,Maritza Vance,37,,
1,29,Béziers,,62,packing peanuts,1030.0
2,27,La Seyne-sur-Mer,,75,,
3,58,Amiens,,27,puddle,
4,41,Ivry-sur-Seine,Blake Tucker,87,rubber duck,1448.0
...,...,...,...,...,...,...
95,30,Vénissieux,Marcus Baldridge,83,,1497.0
96,46,Paris,Harry Kinchen,0,thread,1409.0
97,10,"Pau, Pyrénées-Atlantiques",,57,,1870.0
98,39,Marseille,,1,,


Unnamed: 0,ClientID,ClientFirstName,ClientLastName,ClientIsFidelized
0,176,Beverly,Berry,
1,872,Robert,Dickens,
2,205,Bessie,Silva,
3,975,Jimmy,Clark,
4,936,Marie,Comstock,
...,...,...,...,...
995,303,Beverly,Mcneil,
996,270,Sharon,Bailey,
997,266,Marisa,Pratt,
998,392,Robert,Mathis,1


Unnamed: 0,ProductID,ProductName,Price,PopularityRank
0,19,car,3.245688226824227,20.0
1,86,shirt,8.20156956982211,70.0
2,7,chocolate,5.7683603230124225,11.0
3,15,tooth picks,7.089089819693466,2.0
4,23,key chain,0.5061594533875906,47.0
...,...,...,...,...
93,52,candy wrapper,7.723165123060141,94.0
94,68,computer,9.354955644522024,25.0
95,26,bread,8.744481075451283,66.0
96,69,cat,9.843349368968816,86.0


In [16]:
# On modifie certains types object

df_clients24 = df_clients24.astype({'ClientID':'int64',
                                    'ClientIsFidelized':'float'})

df_products24 = df_products24.astype({'ProductID':'int64',
                                   'Price':'float',
                                  'PopularityRank':'float'})

In [19]:
# Jointures
df_stats24 = df_transactions24.merge(df_clients24,on="ClientID")
df_stats24 = df_stats24.merge(df_products24,on="ProductID")
df_stats24 = df_stats24.merge(df_stores24,on="StoreID")

# Augmentation
df_stats24['TotalPrice'] = df_stats24['Quantity'] * df_stats24['Price']

# Aggrégations pour calcul des KPIs
nb_cmd = df_stats24.groupby("ClientID")['TransactionID'].count()
pan_tot = df_stats24.groupby("ClientID")['TotalPrice'].sum()
pan_moy = df_stats24.groupby("ClientID")['TotalPrice'].mean()
best_storecity = df_stats24.groupby("ClientID").agg({'StoreCity': lambda x:x.mode()[0]})['StoreCity']
best_ProductName = df_stats24.groupby("ClientID").agg({'ProductName': lambda x:x.mode()[0]})['ProductName']

# On ne garde qu'une ligne par client avec : ses  caractéristiques + 5 KPIs
df_stats24 = df_stats24[df_clients24.columns].drop_duplicates(subset=['ClientID'])
df_stats24['NbCommandes'] = nb_cmd.values
df_stats24['PanierTotal'] = pan_tot.values
df_stats24['PanierMoyen'] = pan_moy.values
df_stats24['VillePref'] = best_storecity.values
df_stats24['ProduitPref'] = best_ProductName.values

# Tri par panier total
df_stats24 = df_stats24.sort_values('PanierTotal',ascending=False)

display(df_stats24)

Unnamed: 0,ClientID,ClientFirstName,ClientLastName,ClientIsFidelized,NbCommandes,PanierTotal,PanierMoyen,VillePref,ProduitPref
149,564,Robert,Tuholski,,25,745.988022,29.839521,Lyon,bottle
105,37,Lottie,Meyer,0.0,22,717.860063,32.630003,Saint-Nazaire,toilet
5798,474,Herbert,Ortega,,21,699.652678,33.316794,Beauvais,spring
69,87,Paul,Zenz,,28,685.316236,24.475580,Besançon,paint brush
21,151,Susan,Beil,,28,660.876387,23.602728,Le Tampon,balloon
...,...,...,...,...,...,...,...,...,...
1328,684,Timothy,Christy,0.0,13,90.532640,6.964049,Villeneuve-d'Ascq,canvas
47,724,Mildred,Torre,,6,84.748025,14.124671,Bordeaux,cell phone
1699,135,Kelly,Mendelson,,10,77.230270,7.723027,"Antony, Hauts-de-Seine",air freshener
423,31,Joshua,Vierra,,7,76.607536,10.943934,Beauvais,soy sauce packet


### Exemple : Grands Maîtres aux échecs

In [None]:
# On récupères les usernames des grands maîtres
response = requests.get("https://api.chess.com/pub/titled/GM",headers = {'User-Agent': 'ByPass'}).json()
print (response.keys())

grandmasters = response['players']
print ("Nb de grands maîtres :", len(grandmasters))

dict_keys(['players'])
Nb de grands maîtres : 1639


In [22]:
# Fonction qui récupère les infos des joueurs
def extract_player_info(username):
    format_requete = "https://api.chess.com/pub/player/{}"
    response = requests.get(format_requete.format(username),headers = {'User-Agent': 'ByPass'}).json()
    return (response)

extract_player_info('emilanka')

{'avatar': 'https://images.chesscomfiles.com/uploads/v1/user/9565724.b39fd80d.200x200o.9ac13c2aea21.jpg',
 'player_id': 9565724,
 '@id': 'https://api.chess.com/pub/player/emilanka',
 'url': 'https://www.chess.com/member/EmilAnka',
 'name': 'Emil Anka',
 'username': 'emilanka',
 'title': 'GM',
 'followers': 158,
 'country': 'https://api.chess.com/pub/country/US',
 'location': 'Kirkland, WA',
 'last_online': 1751595035,
 'joined': 1352403905,
 'status': 'premium',
 'is_streamer': False,
 'verified': False,
 'league': 'Wood',
 'streaming_platforms': []}

In [23]:
# Récupération des infos de tous les grands maîtres
gms = [extract_player_info(username) for username in grandmasters]

df_gms = pd.DataFrame(gms)
display(df_gms)

Unnamed: 0,player_id,@id,url,username,title,followers,country,last_online,joined,status,is_streamer,verified,league,streaming_platforms,avatar,name,location,twitch_url
0,360558673,https://api.chess.com/pub/player/0blivi0usspy,https://www.chess.com/member/0blivi0usspy,0blivi0usspy,GM,14,https://api.chess.com/pub/country/IS,1751630095,1714573164,premium,False,False,Legend,[],,,,
1,18800602,https://api.chess.com/pub/player/123lt,https://www.chess.com/member/123lt,123lt,GM,211,https://api.chess.com/pub/country/CN,1731911102,1410059361,premium,False,False,Stone,[],https://images.chesscomfiles.com/uploads/v1/us...,Tingjie Lei,,
2,29499974,https://api.chess.com/pub/player/124chess,https://www.chess.com/member/124chess,124chess,GM,76,https://api.chess.com/pub/country/RU,1750873990,1471316272,premium,False,False,Bronze,[],https://images.chesscomfiles.com/uploads/v1/us...,Dmitriy Khegay,Красноярск,
3,30610578,https://api.chess.com/pub/player/1977ivan,https://www.chess.com/member/1977Ivan,1977ivan,GM,271,https://api.chess.com/pub/country/RS,1748899057,1477565847,premium,False,False,Champion,[],https://images.chesscomfiles.com/uploads/v1/us...,Ivan Ivanisevic,Belgrade,
4,13013662,https://api.chess.com/pub/player/1stsecond,https://www.chess.com/member/1stSecond,1stsecond,GM,603,https://api.chess.com/pub/country/LV,1751572927,1375617889,premium,False,False,Legend,[],https://images.chesscomfiles.com/uploads/v1/us...,Nikita Meshkovs,Riga,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1634,27723022,https://api.chess.com/pub/player/zstardust,https://www.chess.com/member/zstardust,zstardust,GM,33,https://api.chess.com/pub/country/AR,1695231634,1460538364,premium,False,False,,[],https://images.chesscomfiles.com/uploads/v1/us...,Fernando Peralta,Barcelona,
1635,56242896,https://api.chess.com/pub/player/zubridis,https://www.chess.com/member/zubridis,zubridis,GM,53,https://api.chess.com/pub/country/UA,1751527454,1547915802,premium,False,False,Crystal,[],https://images.chesscomfiles.com/uploads/v1/us...,Зубарев Александр,,
1636,32236996,https://api.chess.com/pub/player/zugazuando,https://www.chess.com/member/Zugazuando,zugazuando,GM,1107,https://api.chess.com/pub/country/BR,1751498864,1483890316,premium,True,False,Crystal,"[{'type': 'twitch', 'channel_url': 'https://tw...",https://images.chesscomfiles.com/uploads/v1/us...,Andre Diamant,,https://twitch.tv/cexequemate
1637,63063390,https://api.chess.com/pub/player/zuraazmai,https://www.chess.com/member/ZURAAZMAI,zuraazmai,GM,180,https://api.chess.com/pub/country/GE,1751563103,1562518973,premium,False,False,Legend,[],https://images.chesscomfiles.com/uploads/v1/us...,ZURAB AZMAIPARASHVILI,Тбилиси,


In [24]:
df_gms.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1639 entries, 0 to 1638
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   player_id            1639 non-null   int64 
 1   @id                  1639 non-null   object
 2   url                  1639 non-null   object
 3   username             1639 non-null   object
 4   title                1639 non-null   object
 5   followers            1639 non-null   int64 
 6   country              1639 non-null   object
 7   last_online          1639 non-null   int64 
 8   joined               1639 non-null   int64 
 9   status               1639 non-null   object
 10  is_streamer          1639 non-null   bool  
 11  verified             1639 non-null   bool  
 12  league               1357 non-null   object
 13  streaming_platforms  1639 non-null   object
 14  avatar               1157 non-null   object
 15  name                 1383 non-null   object
 16  locati

In [None]:
# Suppression des colonnes inutiles
df_gms = df_gms.drop(["url", "is_streamer", "avatar", "@id", "verified", "location", "status","twitch_url"],axis=1)

In [26]:
# On récupère le nom de chaque pays à l'aide d'une réquête GET
# Dans le dataframe, on remplace l'endpoint par le nom du pays
countries = df_gms['country'].unique()
dico_countries = {country:requests.get(country,headers = {'User-Agent': 'ByPass'}).json()['name'] for country in countries}
df_gms = df_gms.replace(dico_countries)

display(df_gms)

Unnamed: 0,player_id,username,title,followers,country,last_online,joined,league,streaming_platforms,name
0,360558673,0blivi0usspy,GM,14,Iceland,1751630095,1714573164,Legend,[],
1,18800602,123lt,GM,211,China,1731911102,1410059361,Stone,[],Tingjie Lei
2,29499974,124chess,GM,76,Russia,1750873990,1471316272,Bronze,[],Dmitriy Khegay
3,30610578,1977ivan,GM,271,Serbia,1748899057,1477565847,Champion,[],Ivan Ivanisevic
4,13013662,1stsecond,GM,603,Latvia,1751572927,1375617889,Legend,[],Nikita Meshkovs
...,...,...,...,...,...,...,...,...,...,...
1634,27723022,zstardust,GM,33,Argentina,1695231634,1460538364,,[],Fernando Peralta
1635,56242896,zubridis,GM,53,Ukraine,1751527454,1547915802,Crystal,[],Зубарев Александр
1636,32236996,zugazuando,GM,1107,Brazil,1751498864,1483890316,Crystal,"[{'type': 'twitch', 'channel_url': 'https://tw...",Andre Diamant
1637,63063390,zuraazmai,GM,180,Georgia,1751563103,1562518973,Legend,[],ZURAB AZMAIPARASHVILI


In [27]:
# On convertit les colonnes date au format DateTime
df_gms['last_online'] = pd.to_datetime(df_gms['last_online'],unit="s")
df_gms['joined'] = pd.to_datetime(df_gms['joined'],unit="s")

display(df_gms)

Unnamed: 0,player_id,username,title,followers,country,last_online,joined,league,streaming_platforms,name
0,360558673,0blivi0usspy,GM,14,Iceland,2025-07-04 11:54:55,2024-05-01 14:19:24,Legend,[],
1,18800602,123lt,GM,211,China,2024-11-18 06:25:02,2014-09-07 03:09:21,Stone,[],Tingjie Lei
2,29499974,124chess,GM,76,Russia,2025-06-25 17:53:10,2016-08-16 02:57:52,Bronze,[],Dmitriy Khegay
3,30610578,1977ivan,GM,271,Serbia,2025-06-02 21:17:37,2016-10-27 10:57:27,Champion,[],Ivan Ivanisevic
4,13013662,1stsecond,GM,603,Latvia,2025-07-03 20:02:07,2013-08-04 12:04:49,Legend,[],Nikita Meshkovs
...,...,...,...,...,...,...,...,...,...,...
1634,27723022,zstardust,GM,33,Argentina,2023-09-20 17:40:34,2016-04-13 09:06:04,,[],Fernando Peralta
1635,56242896,zubridis,GM,53,Ukraine,2025-07-03 07:24:14,2019-01-19 16:36:42,Crystal,[],Зубарев Александр
1636,32236996,zugazuando,GM,1107,Brazil,2025-07-02 23:27:44,2017-01-08 15:45:16,Crystal,"[{'type': 'twitch', 'channel_url': 'https://tw...",Andre Diamant
1637,63063390,zuraazmai,GM,180,Georgia,2025-07-03 17:18:23,2019-07-07 17:02:53,Legend,[],ZURAB AZMAIPARASHVILI


In [28]:
# Les 5 nationalités les + représentées
display (df_gms['country'].value_counts().head(5))

# Les 5 joueurs les + suivis
display (df_gms.sort_values("followers",ascending=False)[['username','followers']].head(5))

# Les joueurs avec leur vrai nom renseigné qui ont le plus d'ancienneté
display(df_gms.loc[df_gms['name'].notna()].sort_values("joined")[['name','joined']].head(5))

country
United States    222
Russia           116
India             99
Ukraine           61
Spain             58
Name: count, dtype: int64

Unnamed: 0,username,followers
711,hikaru,1294721
978,magnuscarlsen,266229
540,ginger_gm,56371
585,gmkrikor,40676
331,danielnaroditsky,39374


Unnamed: 0,name,joined
68,Aman Hambleton,2007-06-17 09:54:05
1053,Misa Pap,2007-07-17 23:34:43
1359,Shyam Sundar,2007-07-30 13:12:36
1569,Yuri Vovk,2007-08-16 14:12:50
1111,Nigel Davies,2007-11-22 22:39:32


In [29]:
# Fonction qui récupère les stats d'un joueur
def get_player_stats(username):
    format_requete = "https://api.chess.com/pub/player/{}/stats"
    response = requests.get(format_requete.format(username),headers = {'User-Agent': 'ByPass'}).json()
    return (response)

get_player_stats('erik')

{'chess_daily': {'last': {'rating': 1482, 'date': 1751634741, 'rd': 60},
  'best': {'rating': 2065,
   'date': 1256228875,
   'game': 'https://www.chess.com/game/daily/26087202'},
  'record': {'win': 3055,
   'loss': 1966,
   'draw': 369,
   'time_per_move': 12732,
   'timeout_percent': 0}},
 'chess960_daily': {'last': {'rating': 1453, 'date': 1750343870, 'rd': 83},
  'best': {'rating': 1779,
   'date': 1466772304,
   'game': 'https://www.chess.com/game/daily/155362414'},
  'record': {'win': 327,
   'loss': 207,
   'draw': 26,
   'time_per_move': 12732,
   'timeout_percent': 0}},
 'chess_rapid': {'last': {'rating': 1904, 'date': 1749350596, 'rd': 80},
  'best': {'rating': 1904,
   'date': 1647475349,
   'game': 'https://www.chess.com/game/live/41213397731'},
  'record': {'win': 27, 'loss': 11, 'draw': 1}},
 'chess_bullet': {'last': {'rating': 1660, 'date': 1751634919, 'rd': 46},
  'best': {'rating': 2071,
   'date': 1298134178,
   'game': 'https://www.chess.com/game/live/85305696'},
  

In [30]:
# On récupère les stats des grands maîtres et on calcule leurs KPI sur les blitz et les rapids

gms_stats = []
for username in df_gms['username']:
    stats = get_player_stats(username)
    if 'chess_blitz' in stats and 'chess_rapid' in stats:
        blitz_win = stats['chess_blitz']['record']['win']
        blitz_loss = stats['chess_blitz']['record']['loss']
        blitz_draw = stats['chess_blitz']['record']['draw']
        blitz_nb = blitz_win + blitz_loss + blitz_draw
        blitz_win_rate = blitz_win / blitz_nb
        blitz_loss_rate = blitz_loss / blitz_nb
        blitz_draw_rate = blitz_draw / blitz_nb
        blitz_last_rating = stats['chess_blitz']['last']['rating']
        rapid_win = stats['chess_rapid']['record']['win']
        rapid_loss = stats['chess_rapid']['record']['loss']
        rapid_draw = stats['chess_rapid']['record']['draw']
        rapid_nb = rapid_win + rapid_loss + rapid_draw
        rapid_win_rate = rapid_win / rapid_nb
        rapid_loss_rate = rapid_loss / rapid_nb
        rapid_draw_rate = rapid_draw / rapid_nb
        rapid_last_rating = stats['chess_rapid']['last']['rating']
        gms_stats.append(
            {"username":username,
             "blitz_win_rate":blitz_win_rate,
             "blitz_loss_rate":blitz_loss_rate,
             "blitz_draw_rate":blitz_draw_rate,
             "blitz_last_rating":blitz_last_rating,
             "rapid_win_rate":rapid_win_rate,
             "rapid_loss_rate":rapid_loss_rate,
             "rapid_draw_rate":rapid_draw_rate,
             "rapid_last_rating":rapid_last_rating
            })

# On construit un dataframe à partir de ces KPI
df_gms_stats = pd.DataFrame(gms_stats)

display(df_gms_stats)

Unnamed: 0,username,blitz_win_rate,blitz_loss_rate,blitz_draw_rate,blitz_last_rating,rapid_win_rate,rapid_loss_rate,rapid_draw_rate,rapid_last_rating
0,0blivi0usspy,0.489616,0.384887,0.125497,2952,0.571429,0.214286,0.214286,2524
1,123lt,0.476972,0.434700,0.088328,2781,0.523077,0.215385,0.261538,2477
2,124chess,0.493905,0.410539,0.095556,2585,0.666667,0.111111,0.222222,2244
3,1977ivan,0.568445,0.352668,0.078886,2887,0.378378,0.486486,0.135135,2372
4,1stsecond,0.523622,0.398294,0.078084,2912,0.417476,0.398058,0.184466,2532
...,...,...,...,...,...,...,...,...,...
1152,zstardust,0.843137,0.058824,0.098039,2540,0.437500,0.250000,0.312500,2536
1153,zubridis,0.562619,0.366224,0.071157,2638,0.481481,0.425926,0.092593,2243
1154,zugazuando,0.638263,0.278725,0.083013,2763,0.891304,0.052174,0.056522,2358
1155,zuraazmai,0.512094,0.368348,0.119558,2827,0.214286,0.500000,0.285714,1926


In [31]:
# Jointure entre les infos des grands maîtres et leurs KPI sur blitz et rapids
df_gms_full = df_gms.merge(df_gms_stats,on="username")

# Affichage des 5 meilleurs joueurs actuels au blitz et au rapid
display(df_gms_full[['username','blitz_last_rating']].sort_values('blitz_last_rating',ascending=False).head(5))
display(df_gms_full[['username','rapid_last_rating']].sort_values('rapid_last_rating',ascending=False).head(5))

# Affichage des 5 pays avec le plus gros score moyen au blitz
display(df_gms_full.groupby('country').agg({'blitz_last_rating':'mean'}).sort_values('blitz_last_rating',ascending=False).head(5))

Unnamed: 0,username,blitz_last_rating
511,hikaru,3350
345,firouzja2003,3310
706,magnuscarlsen,3295
1082,vincentkeymer,3254
499,hansontwitch,3251


Unnamed: 0,username,rapid_last_rating
706,magnuscarlsen,2942
493,gutovandrey,2936
184,chessbrah,2928
65,anishonyoutube,2927
497,hamonde,2917


Unnamed: 0_level_0,blitz_last_rating
country,Unnamed: 1_level_1
South Korea,3130.0
Tuvalu,3093.0
The Gambia,3062.0
Antigua/Barbuda,3047.0
Somalia,3010.0


In [None]:
# Corrélation entre taux de victoire au blitz et score actuel
X=df_gms_full['blitz_win_rate']
Y = df_gms_full['blitz_last_rating']
corr_blitz = ((X*Y).mean() - X.mean()*Y.mean())/(X.std()*Y.std())
print(corr_blitz)

# Pas de corrélation étonamment !

-0.04431788284364542
