# Scraping Team Stats from FBRef

## Importing Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
import time

## Setup

In [2]:
# League names mapped to their FBref competition ID
FBREF_LEAGUES = {
    "premier_league": "9",
    "serie_a": "11",
    "la_liga": "12",
    "bundesliga": "20",
    "ligue_1": "13"
}

BASE_URL = "https://fbref.com"

STATS_PAGE = "/en/comps/{league_id}/2024-2025/stats/2024-2025-{league_slug}-Stats"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}

## Fetching League Stats

In [3]:
def fetch_league_stats(league_slug, league_id):
    url = BASE_URL + STATS_PAGE.format(league_id=league_id, league_slug=league_slug.replace('_', '-'))
    print(f"Fetching: {url}")
    response = requests.get(url, headers=headers)
    time.sleep(3)
    soup = BeautifulSoup(response.text, "html.parser")

    # Try to locate multiple stat tables to extract meaningful data
    tables = soup.find_all("table")
    combined = pd.DataFrame()

    for table in tables:
        if not table.get("id"):
            continue

        df = pd.read_html(str(table))[0]
        if "Squad" not in df.columns:
            continue

        df = df.loc[df["Squad"].notna() & (df["Squad"] != "Squad")]
        df = df.set_index("Squad")

        # Avoid duplicate columns
        df = df.loc[:, ~df.columns.duplicated()]

        if combined.empty:
            combined = df
        else:
            combined = combined.join(df, how="outer", rsuffix="_dup")

    combined.reset_index(inplace=True)
    combined.insert(0, "League", league_slug.replace('_', ' ').title())
    return combined

## Scraping Each League and Saving Stats

In [4]:
for league_slug, league_id in FBREF_LEAGUES.items():
    print(f"\n=== Scraping team stats for {league_slug.replace('_', ' ').title()} ===")
    df = fetch_league_stats(league_slug, league_id)
    df.to_csv(f"../../data/teams/raw/team_stats/{league_slug}_team_stats_2024_25.csv", index=False)
    print(f"Saved to ../../data/teams/raw/team_stats/{league_slug}_team_stats_2024_25.csv")


=== Scraping team stats for Premier League ===
Fetching: https://fbref.com/en/comps/9/2024-2025/stats/2024-2025-premier-league-Stats
Saved to ../../data/teams/raw/team_stats/premier_league_team_stats_2024_25.csv

=== Scraping team stats for Serie A ===
Fetching: https://fbref.com/en/comps/11/2024-2025/stats/2024-2025-serie-a-Stats
Saved to ../../data/teams/raw/team_stats/serie_a_team_stats_2024_25.csv

=== Scraping team stats for La Liga ===
Fetching: https://fbref.com/en/comps/12/2024-2025/stats/2024-2025-la-liga-Stats
Saved to ../../data/teams/raw/team_stats/la_liga_team_stats_2024_25.csv

=== Scraping team stats for Bundesliga ===
Fetching: https://fbref.com/en/comps/20/2024-2025/stats/2024-2025-bundesliga-Stats
Saved to ../../data/teams/raw/team_stats/bundesliga_team_stats_2024_25.csv

=== Scraping team stats for Ligue 1 ===
Fetching: https://fbref.com/en/comps/13/2024-2025/stats/2024-2025-ligue-1-Stats
Saved to ../../data/teams/raw/team_stats/ligue_1_team_stats_2024_25.csv
