# FC Metz - Exploration des APIs

Ce notebook permet d'explorer les données disponibles via les APIs StatsBomb et SkillCorner.
怎么探索和分析数据

## 1. API StatsBomb

In [1]:
from statsbombpy import sb
import sys, os

#verifier si on est dans le bon dossier
_root = os.path.dirname(os.getcwd()) if os.path.basename(os.getcwd()) == "notebooks" else os.getcwd()
sys.path.insert(0, _root)

#from config.py to import the credentials
from config import STATSBOMB_CREDS
creds = STATSBOMB_CREDS



In [2]:
# Voir toutes les compétitions disponibles
#拉取所有可用的赛事，comps是一个dataframe,每一行是一个【赛事，赛季】
comps = sb.competitions(creds=creds)
print(f"Nombre de compétitions-saisons: {len(comps)}")
comps.head(20)

Nombre de compétitions-saisons: 7


Unnamed: 0,competition_id,season_id,country_name,competition_name,competition_gender,competition_youth,competition_international,season_name,match_updated,match_updated_360,match_available_360,match_available
0,75,316,Sweden,Allsvenskan,male,False,False,2026,2026-01-26T05:45:29.796726,,,
1,75,315,Sweden,Allsvenskan,male,False,False,2025,2026-02-09T07:54:09.000000,2026-02-09T07:54:09.000000,2026-02-09T07:54:09.000000,2026-02-09T07:54:09.000000
2,88,316,Norway,Eliteserien,male,False,False,2026,2025-12-22T12:08:17.011976,,,
3,88,315,Norway,Eliteserien,male,False,False,2025,2026-02-11T14:30:15.000000,2026-02-11T14:30:15.000000,2026-02-11T14:30:15.000000,2026-02-11T14:30:15.000000
4,7,318,France,Ligue 1,male,False,False,2025/2026,2026-02-13T12:29:33.000000,2026-02-13T12:29:33.000000,2026-02-13T12:29:33.000000,2026-02-13T12:29:33.000000
5,7,317,France,Ligue 1,male,False,False,2024/2025,2026-01-22T03:20:39.000000,2026-01-22T03:20:39.000000,2026-01-22T03:20:39.000000,2026-01-22T03:20:39.000000
6,8,318,France,Ligue 2,male,False,False,2025/2026,2026-02-12T20:17:33.000000,2026-02-12T20:17:33.000000,2026-02-12T20:17:33.000000,2026-02-12T20:17:33.000000


In [3]:
# Filtrer pour la Ligue 1
ligue1 = comps[comps['competition_name'].str.contains('Ligue 1', case=False, na=False)]
print("Ligue 1 saisons disponibles:")
ligue1[['competition_id', 'season_id', 'competition_name', 'season_name', 'country_name']]

Ligue 1 saisons disponibles:


Unnamed: 0,competition_id,season_id,competition_name,season_name,country_name
4,7,318,Ligue 1,2025/2026,France
5,7,317,Ligue 1,2024/2025,France


In [16]:
# Charger les matchs de la saison la plus récente
latest = ligue1.sort_values('season_id', ascending=False).iloc[0]
comp_id = int(latest['competition_id'])
season_id = int(latest['season_id'])
print(f"Compétition: {latest['competition_name']} - {latest['season_name']}")
print(f"competition_id={comp_id}, season_id={season_id}")

matches = sb.matches(competition_id=comp_id, season_id=season_id, creds=creds)
print(f"\nNombre de matchs: {len(matches)}")
print(f"  - available (terminés, avec events): {(matches['match_status'] == 'available').sum()}")
print(f"  - scheduled (à venir): {(matches['match_status'] == 'scheduled').sum()}")

# Les 5 prochains matchs (à venir), triés par date
from IPython.display import display
prochains = matches[matches['match_status'] == 'scheduled'].sort_values('match_date').head(5)
print("--- Les 5 prochains matchs ---")
display(prochains[['match_date', 'kick_off', 'home_team', 'away_team', 'match_status']])

# Les 5 matchs les plus récents terminés
termines = matches[matches['match_status'] == 'available'].sort_values('match_date', ascending=False).head(5)
print("--- Les 5 matchs les plus récents terminés ---")
display(termines[['match_date', 'kick_off', 'home_team', 'away_team', 'home_score', 'away_score']])

Compétition: Ligue 1 - 2025/2026
competition_id=7, season_id=318

Nombre de matchs: 306
  - available (terminés, avec events): 198
  - scheduled (à venir): 108
--- Les 5 prochains matchs ---


Unnamed: 0,match_date,kick_off,home_team,away_team,match_status
133,2026-02-20,19:45:00.000,Stade Brestois,Marseille,scheduled
157,2026-02-21,20:05:00.000,Paris Saint-Germain,Metz,scheduled
250,2026-02-21,18:00:00.000,Toulouse,Paris FC,scheduled
80,2026-02-21,16:00:00.000,Lens,AS Monaco,scheduled
199,2026-02-22,16:15:00.000,Nantes,Le Havre,scheduled


--- Les 5 matchs les plus récents terminés ---


Unnamed: 0,match_date,kick_off,home_team,away_team,home_score,away_score
32,2026-02-15,14:00:00.000,Le Havre,Toulouse,2.0,1.0
263,2026-02-15,19:45:00.000,Lyon,OGC Nice,2.0,0.0
180,2026-02-15,16:15:00.000,Lorient,Angers,2.0,0.0
112,2026-02-15,16:15:00.000,Metz,Auxerre,1.0,3.0
257,2026-02-14,20:05:00.000,Paris FC,Lens,0.0,5.0


In [None]:
# Charger les événements du match le plus récent (passes, tirs, dribbles, etc.)
#单场比赛事件
matches_disponibles = matches[matches['match_status'] == 'available']
if len(matches_disponibles) == 0:
    print("Aucun match avec données events disponible.")
else:
    from IPython.display import display
    # Trier par date décroissante, prendre le plus récent
    match_row = matches_disponibles.sort_values('match_date', ascending=False).iloc[0]
    match_id = int(match_row['match_id'])
    print(f"Match: {match_row['home_team']} vs {match_row['away_team']}")
    print(f"Score: {match_row['home_score']}-{match_row['away_score']}\n")

    events = sb.events(match_id=match_id, creds=creds)
    print(f"Nombre d'événements: {len(events)}")
    print(f"Types d'événements: {events['type'].unique().tolist()[:15]}...\n")
    
    # Les premiers événements (Starting XI, Half Start) n'ont pas player/location -> NaN
    # Filtrer les événements avec joueur et afficher colonnes clés
    events_avec_joueur = events[events['player'].notna()]
    cols_cles = ['type', 'player', 'team', 'minute', 'second', 'location']
    if 'pass_end_location' in events.columns:
        cols_cles.append('pass_end_location')
    print("--- Aperçu des 10 premiers événements (Pass, Shot, etc.) ---")
    display(events_avec_joueur[cols_cles].head(10))
   
    """前几行是结构性事件
    events.head(10) 的前几行多为 Starting XI、Half Start 等非比赛行为事件，没有 player、location 等字段，所以会显示为 NaN。
    列太多且大多为空;   events 有 100+ 列，很多是特定事件类型才有的（如 shot_statsbomb_xg 只对射门有值），对大部分行是 NaN，显示会很乱。"""


Match: Le Havre vs Toulouse
Score: 2.0-1.0

Nombre d'événements: 3396
Types d'événements: ['Starting XI', 'Half Start', 'Pass', 'Ball Receipt*', 'Carry', 'Duel', 'Pressure', 'Ball Recovery', 'Foul Committed', 'Foul Won', 'Injury Stoppage', 'Tactical Shift', 'Block', 'Shot', 'Goal Keeper']...

--- Aperçu des 10 premiers événements (Pass, Shot, etc.) ---


  events = pd.concat([*events.values()], axis=0, ignore_index=True, sort=True)


Unnamed: 0,type,player,team,minute,second,location,pass_end_location
6,Pass,Cristian Sleiker Cásseres Yepes,Toulouse,0,0,"[61.0, 40.1]","[63.4, 39.9]"
7,Pass,Pape Demba Diop,Toulouse,0,1,"[63.4, 39.9]","[64.8, 68.8]"
8,Pass,Aron Dønnum,Toulouse,0,4,"[59.8, 62.1]","[89.1, 32.7]"
9,Pass,Stephan Cedric Zagadou,Le Havre,0,6,"[31.0, 47.4]","[56.3, 46.2]"
10,Pass,Rassoul N'Diaye,Le Havre,0,10,"[57.8, 49.2]","[54.0, 50.9]"
11,Pass,Lucas Gourna-Douath,Le Havre,0,11,"[54.8, 51.5]","[63.8, 60.0]"
12,Pass,Rassoul N'Diaye,Le Havre,0,14,"[75.3, 77.4]","[79.0, 64.2]"
13,Pass,Lucas Gourna-Douath,Le Havre,0,16,"[85.2, 64.0]","[112.5, 69.4]"
14,Pass,Rassoul N'Diaye,Le Havre,0,18,"[112.5, 69.4]","[118.8, 43.2]"
15,Pass,Guillaume Restes,Toulouse,0,27,"[11.9, 38.9]","[18.3, 25.2]"


In [22]:
# Stats de saison par joueur 球员赛季统计
try:
    from IPython.display import display
    player_stats = sb.player_season_stats(competition_id=comp_id, season_id=season_id, creds=creds)
    print(f"Nombre de joueurs avec stats: {len(player_stats)}")
    print(f"Colonnes disponibles: {player_stats.columns.tolist()[:20]}...")
    display(player_stats[['player_name', 'team_name', 'player_season_minutes', 
                   'player_season_goals_90', 'player_season_np_xg_90']].head(10))
except Exception as e:
    print(f"Erreur: {e}")

Nombre de joueurs avec stats: 524
Colonnes disponibles: ['account_id', 'player_id', 'player_name', 'team_id', 'team_name', 'competition_id', 'competition_name', 'season_id', 'season_name', 'country_id', 'birth_date', 'player_female', 'player_first_name', 'player_last_name', 'player_known_name', 'player_weight', 'player_height', 'player_season_minutes', 'player_season_np_xg_per_shot', 'player_season_np_xg_90']...


Unnamed: 0,player_name,team_name,player_season_minutes,player_season_goals_90,player_season_np_xg_90
0,Andrew Omobamidele,Strasbourg,684.3833,0.0,0.042494
1,Hyeon-seok Hong,Nantes,311.25,0.0,0.103786
2,Hyeok-Kyu Kwon,Nantes,823.2334,0.0,0.013067
3,Kang-In Lee,Paris Saint-Germain,1038.6667,0.173299,0.150212
4,Matvey Safonov,Paris Saint-Germain,493.09998,0.0,0.0
5,Aleksandr Golovin,AS Monaco,1200.3167,0.14996,0.114709
6,Mbwana Ally Samatta,Le Havre,942.8166,0.0,0.189986
7,Tanner Tessmann,Lyon,1662.9167,0.054122,0.080889
8,Mark McKenzie,Toulouse,1819.4165,0.0,0.016349
9,Folarin Balogun,AS Monaco,1377.2002,0.2614,0.353243


## 2. API SkillCorner

### Présentation

**SkillCorner** fournit des **données physiques/tracking** : distance parcourue, sprints, accélérations, vitesse max, etc. Complémentaire à StatsBomb (événements), utilisé pour analyser la charge physique des joueurs.

**Documentation** : [https://skillcorner.com/api/docs/](https://skillcorner.com/api/docs/)

| Endpoint | Description | Utilisation notebook |
|----------|-------------|----------------------|
| `get_seasons` | Liste des saisons | Filtrage saison la plus récente |
| `get_competitions` | Ligues par saison | Filtrage France / Ligue 1 |
| `get_competition_editions` | Éditions (ex: FRA Ligue 1 2025/26) | Sélection édition FC Metz |
| `get_teams` | Équipes par édition | Liste équipes Ligue 1 |
| `get_physical` | Données physiques (joueur × match) | Métriques clés : distance, sprints |

为什么只选择这几个分析，和fc metz，saison赛季，competition联赛，edition fra-figue1版本，teams优先fc

In [27]:
from skillcorner.client import SkillcornerClient
import sys, os
_root = os.path.dirname(os.getcwd()) if os.path.basename(os.getcwd()) == "notebooks" else os.getcwd()
sys.path.insert(0, _root)

# from config.py to import the credentials
from config import SKILLCORNER_USERNAME, SKILLCORNER_PASSWORD
client = SkillcornerClient(username=SKILLCORNER_USERNAME, password=SKILLCORNER_PASSWORD)

### 2.1 Saisons
Liste des saisons disponibles. On conserve les **5 plus récentes** pour le projet FC Metz.

In [28]:
# 2.1 Saisons disponibles
import pandas as pd
from IPython.display import display
seasons = client.get_seasons()
df_seasons = pd.DataFrame(seasons)[['id', 'name', 'start_year', 'end_year']]
df_seasons = df_seasons.sort_values('id', ascending=False).head(5)
print(f"Nombre total de saisons SkillCorner : {len(seasons)}")
print("→ 5 saisons les plus récentes :")
display(df_seasons)

Nombre total de saisons SkillCorner : 33
→ 5 saisons les plus récentes :


Unnamed: 0,id,name,start_year,end_year
0,130,2026,2026,2026
1,129,2025/2026,2025,2026
2,128,2025,2025,2025
3,95,2024/2025,2024,2025
27,62,2010,2010,2010


### 2.2 Compétitions (filtrées France)
Ligues disponibles pour la saison la plus récente. **Filtre** : France (FRA) ou contenant "Ligue".

In [29]:
# 2.2 Compétitions (saison récente, filtre France)
latest_season_id = max(s['id'] for s in seasons)
competitions = client.get_competitions(params={'season': latest_season_id})
df_comp = pd.DataFrame([{'id': c.get('id'), 'name': c.get('name', 'N/A'), 
    'area': c.get('area', {}).get('name', '') if isinstance(c.get('area'), dict) else ''} for c in competitions])
mask_fra = df_comp['name'].str.contains('Ligue|France|L1', case=False, na=False) | df_comp['area'].str.contains('France', na=False)
df_comp_fra = df_comp[mask_fra].head(10)
print(f"Compétitions saison {latest_season_id} : {len(competitions)} total, {mask_fra.sum()} France/Ligue")
display(df_comp_fra[['id', 'name', 'area']])

Compétitions saison 130 : 41 total, 0 France/Ligue


Unnamed: 0,id,name,area


### 2.3 Éditions (FRA Ligue 1 uniquement)
Une édition = une ligue pour une saison donnée (ex: FRA - Ligue 1 - 2025/2026). On filtre **FRA** pour le projet FC Metz.

In [30]:
# 2.3 Éditions FRA Ligue 1
editions = client.get_competition_editions()
ligue1_all = [e for e in editions if 'ligue 1' in str(e.get('name', '')).lower() 
             or 'ligue 1' in str(e.get('competition', {}).get('name', '')).lower()]
ligue1_fra = [e for e in ligue1_all if 'FRA' in str(e.get('name', '')).upper() and 'playoff' not in str(e.get('name', '')).lower()]
df_ed = pd.DataFrame([{'id': e.get('id'), 'name': e.get('name'), 
    'comp': e.get('competition', {}).get('name'), 'season': e.get('season', {}).get('name')} 
    for e in (ligue1_fra if ligue1_fra else ligue1_all)])
df_ed = df_ed.sort_values('id', ascending=False).head(5)
ligue1_editions = ligue1_fra if ligue1_fra else ligue1_all  # pour les cellules suivantes
print(f"Éditions Ligue 1 : {len(ligue1_all)} total, {len(ligue1_fra)} FRA (hors playoffs)")
display(df_ed)

Éditions Ligue 1 : 17 total, 10 FRA (hors playoffs)


Unnamed: 0,id,name,comp,season
9,1232,FRA - Ligue 1 - 2025/2026,Ligue 1,2025/2026
8,906,FRA - Ligue 1 - 2024/2025,Ligue 1,2024/2025
7,548,FRA - Ligue 1 - 2023/2024,Ligue 1,2023/2024
6,392,FRA - Ligue 1 - 2022/2023,Ligue 1,2022/2023
5,242,FRA - Ligue 1 - 2021/2022,Ligue 1,2021/2022


### 2.4 Équipes et données physiques
| Donnée | Description |
|--------|-------------|
| **Équipes** | Liste des équipes pour l'édition sélectionnée (priorité FC Metz) |
| **Données physiques** | Distance, sprints, haute intensité — par joueur × match |

In [32]:
# 2.4 Équipes et données physiques (FC Metz prioritaire)
if ligue1_editions:
    ed_id = ligue1_editions[-1]['id']  # édition la plus récente
    teams = client.get_teams(params={'competition_edition': ed_id})
    df_teams = pd.DataFrame([{'id': t.get('id'), 'name': t.get('name')} for t in teams])
    # Priorité FC Metz : le mettre en premier si présent
    mask_metz = df_teams['name'].str.contains('Metz', case=False, na=False)
    if mask_metz.any():
        df_teams = pd.concat([df_teams[mask_metz], df_teams[~mask_metz]]).reset_index(drop=True)
    print(f"Équipes pour l'édition {ed_id} : {len(teams)}")
    display(df_teams.head(10))
    
    if teams:
        metz_team = next((t for t in teams if 'metz' in str(t.get('name', '')).lower()), None)
        team = metz_team or teams[0]
        team_id = team['id']
        team_name = team.get('name', '')
        physical = client.get_physical(params={'team': team_id})
        print(f"\nDonnées physiques — {team_name} : {len(physical)} enregistrements")
        if physical:
            df_phys = pd.DataFrame(physical)
            cols = ['player_name', 'match_date', 'total_distance_full_all', 'sprint_count_full_all', 
                    'hsr_distance_full_all', 'minutes_full_all', 'total_metersperminute_full_all']
            cols = [c for c in cols if c in df_phys.columns]
            df_phys = df_phys.sort_values('match_date', ascending=False)
            display(df_phys[cols].head(10))

Équipes pour l'édition 1232 : 18


Unnamed: 0,id,name
0,97,FC Metz
1,85,RC Lens
2,65,AJ Auxerre
3,66,FC Nantes
4,69,Le Havre AC
5,70,OGC Nice
6,72,FC Lorient
7,74,Toulouse FC
8,78,LOSC Lille
9,98,Angers SCO



Données physiques — FC Metz : 319 enregistrements


Unnamed: 0,player_name,match_date,total_distance_full_all,sprint_count_full_all,hsr_distance_full_all,minutes_full_all,total_metersperminute_full_all
318,Nathan Mbala,2026-02-15,1498.0,1.0,49.0,12.72,117.8
271,Alpha Amadou Touré,2026-02-15,6913.0,7.0,512.0,49.13,140.7
249,Giorgi Abuashvili,2026-02-15,10410.0,12.0,697.0,82.5,126.18
314,Believe Munongo,2026-02-15,12541.0,10.0,772.0,95.22,131.71
78,Jessy Deminguet,2026-02-15,6700.0,4.0,381.0,46.08,145.39
211,Sadibou Sané,2026-02-15,11040.0,16.0,548.0,95.22,115.95
100,Georgiy Tsitaishvili,2026-02-15,11082.0,22.0,805.0,95.22,116.39
305,Morgan Bokele Mputu,2026-02-15,3625.0,6.0,295.0,32.28,112.29
194,Koffi Franck Kouao,2026-02-15,6936.0,11.0,380.0,62.93,110.21
102,Giorgi Kvilitaia,2026-02-15,2833.0,1.0,176.0,24.68,114.77


## 3. Lancer le Pipeline Complet

Pour lancer le pipeline complet depuis le terminal :
```bash
python main.py --quick  # Mode rapide pour tester
python main.py          # Pipeline complet
```

In [12]:
# Explorer la base de données après le pipeline (PostgreSQL)
import pandas as pd
import sys
import os

_project_root = os.path.dirname(os.getcwd()) if os.path.basename(os.getcwd()) == "notebooks" else os.getcwd()
sys.path.insert(0, _project_root)
from config import POSTGRES_CONFIG, DB_SCHEMA

try:
    conn = __import__("psycopg2").connect(**POSTGRES_CONFIG)
    conn.autocommit = True  # pour pandas read_sql
    
    # Voir les tables du schéma fc_metz
    tables = pd.read_sql(
        f"""SELECT tablename FROM pg_tables WHERE schemaname = '{DB_SCHEMA}'""",
        conn
    )
    print("Tables dans la base:")
    for t in tables['tablename']:
        count = pd.read_sql(f"SELECT COUNT(*) as n FROM {DB_SCHEMA}.{t}", conn)['n'][0]
        print(f"  {t}: {count} lignes")
    
    # Aperçu des joueurs
    players = pd.read_sql(f"""
        SELECT player_name, statsbomb_player_id, skillcorner_player_id, 
               transfermarkt_player_id, market_value, nationality, primary_position
        FROM {DB_SCHEMA}.players 
        WHERE statsbomb_player_id IS NOT NULL
        LIMIT 20
    """, conn)
    print("\nAperçu des joueurs:")
    display(players)
    
    conn.close()
except Exception as e:
    print(f"Base non trouvée. Lancez d'abord: python main.py --quick")
    print(f"Erreur: {e}")

Tables dans la base:
  competitions: 4 lignes
  events: 24 lignes
  match_lineups: 0 lignes
  matches: 306 lignes
  player_id_mapping: 1547 lignes
  player_match_physical: 285 lignes
  player_season_stats: 515 lignes
  seasons: 4 lignes
  teams: 18 lignes
  players: 672 lignes
  player_fused: 672 lignes

Aperçu des joueurs:


  tables = pd.read_sql(
  count = pd.read_sql(f"SELECT COUNT(*) as n FROM {DB_SCHEMA}.{t}", conn)['n'][0]
  players = pd.read_sql(f"""


Unnamed: 0,player_name,statsbomb_player_id,skillcorner_player_id,transfermarkt_player_id,market_value,nationality,primary_position
0,Yassine Kechta,118367,,667967.0,"5,00 mio. €","Maroc, France",Yassine KechtaMilieu central
1,Louis Mouton,183233,,904662.0,"2,50 mio. €",France,Louis MoutonMilieu offensif
2,Martín Satriano,85592,,,,Uruguay,
3,Himad Abdelli,21217,,592979.0,"7,00 mio. €","Algérie, France",Himad AbdelliMilieu offensif
4,Andrew Omobamidele,58467,,666975.0,100 K €,Sénégal,Ousmane BaGardien de but
5,Kang-In Lee,22740,,557149.0,"25,00 mio. €",Corée du Sud,Kang-in LeeMilieu offensif
6,Hyeok-Kyu Kwon,40710,,,,Korea Republic,
7,Hyeon-seok Hong,131657,,,,Korea Republic,
8,Aleksandr Golovin,5177,,263918.0,"18,00 mio. €",Russie,Aleksandr GolovinMilieu offensif
9,Matvey Safonov,21298,,318470.0,"15,00 mio. €",Russie,Matvey SafonovGardien de but
