In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup


In [2]:
# 📌 Liste étendue de clubs à scraper
clubs = {
    "Real Madrid": "https://fbref.com/en/squads/53a2f082/Real-Madrid-Stats",
    "Barcelona": "https://fbref.com/en/squads/206d90db/Barcelona-Stats",
    "Manchester City": "https://fbref.com/en/squads/b8fd03ef/Manchester-City-Stats",
    "Paris SG": "https://fbref.com/en/squads/e2d8892c/Paris-Saint-Germain-Stats",
    "Bayern Munich": "https://fbref.com/en/squads/054efa67/Bayern-Munich-Stats",
    "Arsenal": "https://fbref.com/en/squads/18bb7c10/Arsenal-Stats",
    "Liverpool": "https://fbref.com/en/squads/822bd0ba/Liverpool-Stats",
    "Juventus": "https://fbref.com/en/squads/e0652b02/Juventus-Stats",
    "AC Milan": "https://fbref.com/en/squads/dc56fe14/Milan-Stats",
    "Chelsea": "https://fbref.com/en/squads/cff3d9bb/Chelsea-Stats"
}

# 🧠 Fonction de scraping principale
def scrape_club_stats(club_name, url):
    print(f"Scraping : {club_name}")
    tables = pd.read_html(url)

    # Table 0 = General stats (toujours présente)
    df = tables[0].copy()
    df["Club"] = club_name  # Ajouter nom club

    # Ajouter d'autres tables disponibles si elles existent
    columns = ["Player", "Nation", "Pos", "Age", "MP", "Starts", "Min", "Gls", "Ast", "xG", "xA", "Tkl", "Int", "Blocks", "CrdY", "CrdR"]

    # Garder seulement les colonnes utiles si elles sont là
    df = df[[col for col in columns if col in df.columns] + ["Club"]]

    return df

# 🔁 Boucle sur les clubs
all_data = []

for club, url in clubs.items():
    try:
        df = scrape_club_stats(club, url)
        all_data.append(df)
    except Exception as e:
        print(f"Erreur pour {club} : {e}")

# 🧩 Fusionner toutes les données
final_df = pd.concat(all_data, ignore_index=True)

Scraping : Real Madrid
Scraping : Barcelona
Scraping : Manchester City
Scraping : Paris SG
Scraping : Bayern Munich
Scraping : Arsenal
Scraping : Liverpool
Scraping : Juventus
Scraping : AC Milan
Scraping : Chelsea


In [19]:
import pandas as pd

clubs = {
    "Real Madrid": "https://fbref.com/en/squads/53a2f082/Real-Madrid-Stats",
    "Barcelona": "https://fbref.com/en/squads/206d90db/Barcelona-Stats",
    "Manchester City": "https://fbref.com/en/squads/b8fd03ef/Manchester-City-Stats",
    "Paris SG": "https://fbref.com/en/squads/e2d8892c/Paris-Saint-Germain-Stats",
    "Bayern Munich": "https://fbref.com/en/squads/054efa67/Bayern-Munich-Stats", 
    "Liverpool": "https://fbref.com/en/squads/822bd0ba/Liverpool-Stats",
    "Inter Milan": "https://fbref.com/en/squads/fd962109/Internazionale-Stats"
}

all_data = []

for name, url in clubs.items():
    try:
        print(f"🔍 Scraping {name}")
        df = pd.read_html(url)[0]
        df["Club"] = name
        all_data.append(df)
    except Exception as e:
        print(f"❌ Erreur {name} : {e}")

if all_data:
    final_df = pd.concat(all_data, ignore_index=True)
    final_df.to_csv("../data/raw/fbref_final_basic_stats.csv", index=False)
    print("✅ Données sauvegardées dans fbref_final_basic_stats.csv")
else:
    print("❌ Aucun joueur récupéré.")


🔍 Scraping Real Madrid
❌ Erreur Real Madrid : HTTP Error 429: Too Many Requests
🔍 Scraping Barcelona
❌ Erreur Barcelona : HTTP Error 429: Too Many Requests
🔍 Scraping Manchester City
❌ Erreur Manchester City : HTTP Error 429: Too Many Requests
🔍 Scraping Paris SG
❌ Erreur Paris SG : HTTP Error 429: Too Many Requests
🔍 Scraping Bayern Munich
❌ Erreur Bayern Munich : HTTP Error 429: Too Many Requests
🔍 Scraping Liverpool
❌ Erreur Liverpool : HTTP Error 429: Too Many Requests
🔍 Scraping Inter Milan
❌ Erreur Inter Milan : HTTP Error 429: Too Many Requests
❌ Aucun joueur récupéré.


In [20]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time

headers = {"User-Agent": "Mozilla/5.0"}

clubs = {
    "Real Madrid": "https://fbref.com/en/squads/53a2f082/Real-Madrid-Stats",
    "Barcelona": "https://fbref.com/en/squads/206d90db/Barcelona-Stats",
    "Manchester City": "https://fbref.com/en/squads/b8fd03ef/Manchester-City-Stats",
    "Paris SG": "https://fbref.com/en/squads/e2d8892c/Paris-Saint-Germain-Stats",
    "Bayern Munich": "https://fbref.com/en/squads/054efa67/Bayern-Munich-Stats", 
    "Liverpool": "https://fbref.com/en/squads/822bd0ba/Liverpool-Stats",
    "Inter Milan": "https://fbref.com/en/squads/fd962109/Internazionale-Stats"
}

all_data = []

for name, url in clubs.items():
    try:
        print(f"🔍 Scraping {name}")
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        table = soup.find("table")
        
        if table:
            df = pd.read_html(str(table))[0]
            df["Club"] = name
            all_data.append(df)
            print(f"✅ {name} OK ({len(df)} lignes)")
        else:
            print(f"⚠️ Aucune table trouvée pour {name}")

        time.sleep(2)  # ⏱ Pause de 2 secondes pour éviter le blocage

    except Exception as e:
        print(f"❌ Erreur {name} : {e}")

# Fusionner et enregistrer
if all_data:
    final_df = pd.concat(all_data, ignore_index=True)
    final_df.to_csv("../data/raw/fbref_final_basic_stats.csv", index=False)
    print("✅ Données sauvegardées dans fbref_final_basic_stats.csv")
else:
    print("❌ Aucun joueur récupéré.")


🔍 Scraping Real Madrid
⚠️ Aucune table trouvée pour Real Madrid
🔍 Scraping Barcelona
⚠️ Aucune table trouvée pour Barcelona
🔍 Scraping Manchester City
⚠️ Aucune table trouvée pour Manchester City
🔍 Scraping Paris SG
⚠️ Aucune table trouvée pour Paris SG
🔍 Scraping Bayern Munich
⚠️ Aucune table trouvée pour Bayern Munich
🔍 Scraping Liverpool
⚠️ Aucune table trouvée pour Liverpool
🔍 Scraping Inter Milan
⚠️ Aucune table trouvée pour Inter Milan
❌ Aucun joueur récupéré.


In [22]:
!pip install selenium
!pip install webdriver-manager
!pip install pandas beautifulsoup4


Defaulting to user installation because normal site-packages is not writeable
Collecting selenium
  Downloading selenium-4.32.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting websocket-client~=1.8 (from selenium)
  Downloading websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.32.0-py3-none-any.whl (9.4 MB)
   ---------------------------------------- 0.0/9.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.4 MB ? eta -:--:--
   - -------------------------------------- 0.3/9.4 MB ? eta -


[notice] A new release of pip is available: 25.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Defaulting to user installation because normal site-packages is not writeable
Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)
Installing collected packages: webdriver-manager
Successfully installed webdriver-manager-4.0.2



[notice] A new release of pip is available: 25.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [24]:
import pandas as pd
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

# ✅ Configurer le navigateur (sans interface)
options = Options()
options.add_argument("--headless")  # Navigateur invisible
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("user-agent=Mozilla/5.0")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# ✅ Liste des clubs
clubs = {
    "Real Madrid": "https://fbref.com/en/squads/53a2f082/Real-Madrid-Stats",
    "Barcelona": "https://fbref.com/en/squads/206d90db/Barcelona-Stats",
    "Manchester City": "https://fbref.com/en/squads/b8fd03ef/Manchester-City-Stats",
    "Paris SG": "https://fbref.com/en/squads/e2d8892c/Paris-Saint-Germain-Stats",
    "Bayern Munich": "https://fbref.com/en/squads/054efa67/Bayern-Munich-Stats", 
    "Liverpool": "https://fbref.com/en/squads/822bd0ba/Liverpool-Stats",
    "Inter Milan": "https://fbref.com/en/squads/fd962109/Internazionale-Stats"
}

all_data = []

for club, url in clubs.items():
    try:
        print(f"🔍 Scraping {club}")
        driver.get(url)
        time.sleep(3)  # ⏱ Laisser la page se charger

        soup = BeautifulSoup(driver.page_source, "html.parser")
        table = soup.find("table")

        if table:
            df = pd.read_html(str(table))[0]
            df["Club"] = club
            all_data.append(df)
            print(f"✅ {club} OK ({len(df)} joueurs)")
        else:
            print(f"⚠️ Table non trouvée pour {club}")

    except Exception as e:
        print(f"❌ Erreur {club} : {e}")

# Fermer le navigateur
driver.quit()

# Fusion et export
if all_data:
    final_df = pd.concat(all_data, ignore_index=True)
    final_df.to_csv("../data/raw/fbref_final_basic_stats.csv", index=False)
    print("✅ Données sauvegardées dans fbref_final_basic_stats.csv")
else:
    print("❌ Aucun joueur récupéré.")


🔍 Scraping Real Madrid


  df = pd.read_html(str(table))[0]


✅ Real Madrid OK (44 joueurs)
🔍 Scraping Barcelona


  df = pd.read_html(str(table))[0]


✅ Barcelona OK (47 joueurs)
🔍 Scraping Manchester City


  df = pd.read_html(str(table))[0]


✅ Manchester City OK (41 joueurs)
🔍 Scraping Paris SG


  df = pd.read_html(str(table))[0]


✅ Paris SG OK (39 joueurs)
🔍 Scraping Bayern Munich


  df = pd.read_html(str(table))[0]


✅ Bayern Munich OK (40 joueurs)
🔍 Scraping Liverpool


  df = pd.read_html(str(table))[0]


✅ Liverpool OK (31 joueurs)
🔍 Scraping Inter Milan


  df = pd.read_html(str(table))[0]


✅ Inter Milan OK (30 joueurs)
✅ Données sauvegardées dans fbref_final_basic_stats.csv


In [27]:
df = pd.read_csv("C:/Users/pc/Analyse-Football-Web/data/raw/fbref_final_basic_stats.csv", sep=';')
