In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

def scrape_league_data(league_name, url, table_id=None, season="2024/25"):
    print(f"Scraping {league_name}...")

    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers, verify=False)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find("table", id=table_id) if table_id else soup.find("table")
    if not table:
        print(f"Table not found for {league_name}")
        return None

    df = pd.read_html(str(table))[0]
    df['League'] = league_name
    df['Season'] = season

    return df


In [None]:
# List of leagues to scrape
years = ["2024-2025","2023-2024","2022-2023","2021-2022","2020-2021","2019-2020","2018-2019","2017-2018","2016-2017","2015-2016"]
leagueinfo = [
    {
        "name": "Bundesliga",
        "url": "https://fbref.com/en/comps/20/{year}/schedule/{year}-Bundesliga-Scores-and-Fixtures"

    },
    {
        "name": "Premier League",
        "url": "https://fbref.com/en/comps/9/{year}/schedule/{year}-Premier-League-Scores-and-Fixtures"
    },
    {
        "name": "Serie A",
        "url": "https://fbref.com/en/comps/11/{year}/schedule/{year}-Serie-A-Scores-and-Fixtures"
    }
    ,
    {
        "name": "La Liga",
        "url": "https://fbref.com/en/comps/12/{year}/schedule/{year}-La-Liga-Scores-and-Fixtures"
    },
    {
        "name": "Ligue 1",
        "url": "https://fbref.com/en/comps/13/{year}/schedule/{year}-Ligue-1-Scores-and-Fixtures"
    },
    {
        "name": "Eredivisie",
        "url": "https://fbref.com/en/comps/20/{year}/schedule/{year}-Eredivisie-Scores-and-Fixtures"
    },
    {
        "name": "Primeira Liga",
        "url": "https://fbref.com/en/comps/19/{year}/schedule/{year}-Primeira-Liga-Scores-and-Fixtures"
    },
    {
        "name": "EFL Championship",
        "url": "https://fbref.com/en/comps/10/{year}/schedule/{year}-EFL-Championship-Scores-and-Fixtures"
    }

]

leagues = []
for year in years:
    for league in leagueinfo:
        leagues.append({
            "name": f"{league['name']} {year}",
            "url": league["url"].format(year=year),
            "season": year.replace("-", "/")
        })

league_dfs = {}
combined_list = []

for league in leagues:
    df = scrape_league_data(league["name"], league["url"])
    if df is not None:
        league_dfs[league["name"]] = df 
        combined_list.append(df)        


In [14]:
league_dfs = pd.concat(combined_list, ignore_index=True)
league_dfs.head()

Unnamed: 0,Round,Wk,Day,Date,Time,Home,xG,Score,xG.1,Away,Attendance,Venue,Referee,Match Report,Notes,League,Season
0,Bundesliga,1.0,Fri,2024-08-23,20:30,Gladbach,1.6,2–3,2.7,Leverkusen,54042.0,Stadion im Borussia-Park,Robert Schröder,Match Report,,Bundesliga 2024-2025,2024/25
1,Bundesliga,1.0,Sat,2024-08-24,15:30,Hoffenheim,3.5,3–2,1.7,Holstein Kiel,18503.0,PreZero Arena,Tobias Stieler,Match Report,,Bundesliga 2024-2025,2024/25
2,Bundesliga,1.0,Sat,2024-08-24,15:30,Mainz 05,1.2,1–1,0.6,Union Berlin,31500.0,Mewa Arena,Harm Osmers,Match Report,,Bundesliga 2024-2025,2024/25
3,Bundesliga,1.0,Sat,2024-08-24,15:30,Augsburg,1.0,2–2,1.4,Werder Bremen,30660.0,WWK Arena,Sascha Stegemann,Match Report,,Bundesliga 2024-2025,2024/25
4,Bundesliga,1.0,Sat,2024-08-24,15:30,Freiburg,2.1,3–1,0.4,Stuttgart,34700.0,Europa-Park Stadion,Tobias Welz,Match Report,,Bundesliga 2024-2025,2024/25


In [7]:
leagues

[{'name': 'Bundesliga 2024-2025',
  'url': 'https://fbref.com/en/comps/20/2024-2025/schedule/2024-2025-Bundesliga-Scores-and-Fixtures',
  'season': '2024/2025'},
 {'name': 'Premier League 2024-2025',
  'url': 'https://fbref.com/en/comps/9/2024-2025/schedule/2024-2025-Premier-League-Scores-and-Fixtures',
  'season': '2024/2025'},
 {'name': 'Serie A 2024-2025',
  'url': 'https://fbref.com/en/comps/11/2024-2025/schedule/2024-2025-Serie-A-Scores-and-Fixtures',
  'season': '2024/2025'},
 {'name': 'La Liga 2024-2025',
  'url': 'https://fbref.com/en/comps/12/2024-2025/schedule/2024-2025-La-Liga-Scores-and-Fixtures',
  'season': '2024/2025'},
 {'name': 'Ligue 1 2024-2025',
  'url': 'https://fbref.com/en/comps/13/2024-2025/schedule/2024-2025-Ligue-1-Scores-and-Fixtures',
  'season': '2024/2025'},
 {'name': 'Eredivisie 2024-2025',
  'url': 'https://fbref.com/en/comps/20/2024-2025/schedule/2024-2025-Eredivisie-Scores-and-Fixtures',
  'season': '2024/2025'},
 {'name': 'Primeira Liga 2024-2025',
  