In [1]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup

def scrape_league_data(league_name, url, table_id=None, season="2024/25"):
    print(f"Scraping {league_name}...")

    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find("table", id=table_id) if table_id else soup.find("table")
    if not table:
        print(f"❌ Table not found for {league_name}")
        return None

    df = pd.read_html(str(table))[0]
    df['League'] = league_name
    df['Season'] = season

    return df




In [2]:
# List of leagues to scrape
leagues = [
    {
        "name": "Bundesliga",
        "url": "https://fbref.com/en/comps/20/2024-2025/schedule/2024-2025-Bundesliga-Scores-and-Fixtures"
    },
    {
        "name": "Premier League",
        "url": "https://fbref.com/en/comps/9/2024-2025/schedule/2024-2025-Premier-League-Scores-and-Fixtures"
    },
    {
        "name": "Serie A",
        "url": "https://fbref.com/en/comps/11/2024-2025/schedule/2024-2025-Serie-A-Scores-and-Fixtures"
    }
]

# 📦 Separate dataframes stored in a dictionary
league_dfs = {}
combined_list = []

for league in leagues:
    df = scrape_league_data(league["name"], league["url"])
    if df is not None:
        league_dfs[league["name"]] = df  # Store separately
        combined_list.append(df)         # Also save for merging

Scraping Bundesliga...
Scraping Premier League...
Scraping Serie A...


In [3]:
bundesliga_df = league_dfs["Bundesliga"]
bundesliga_df

Unnamed: 0,Round,Wk,Day,Date,Time,Home,xG,Score,xG.1,Away,Attendance,Venue,Referee,Match Report,Notes,League,Season
0,Bundesliga,1.0,Fri,2024-08-23,20:30,Gladbach,1.6,2–3,2.7,Leverkusen,54042.0,Stadion im Borussia-Park,Robert Schröder,Match Report,,Bundesliga,2024/25
1,Bundesliga,1.0,Sat,2024-08-24,15:30,Hoffenheim,3.5,3–2,1.7,Holstein Kiel,18503.0,PreZero Arena,Tobias Stieler,Match Report,,Bundesliga,2024/25
2,Bundesliga,1.0,Sat,2024-08-24,15:30,Mainz 05,1.2,1–1,0.6,Union Berlin,31500.0,Mewa Arena,Harm Osmers,Match Report,,Bundesliga,2024/25
3,Bundesliga,1.0,Sat,2024-08-24,15:30,Augsburg,1.0,2–2,1.4,Werder Bremen,30660.0,WWK Arena,Sascha Stegemann,Match Report,,Bundesliga,2024/25
4,Bundesliga,1.0,Sat,2024-08-24,15:30,Freiburg,2.1,3–1,0.4,Stuttgart,34700.0,Europa-Park Stadion,Tobias Welz,Match Report,,Bundesliga,2024/25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337,Bundesliga,34.0,Sat,2025-05-17,15:30,Dortmund,2.4,3–0,0.8,Holstein Kiel,81365.0,Signal Iduna Park,Harm Osmers,Match Report,,Bundesliga,2024/25
338,Bundesliga,34.0,Sat,2025-05-17,15:30,Hoffenheim,0.4,0–4,1.4,Bayern Munich,30150.0,PreZero Arena,Daniel Schlager,Match Report,,Bundesliga,2024/25
339,,,,,,,,,,,,,,,,Bundesliga,2024/25
340,German 1/2 Relegation/Promotion play-offs,,Thu,2025-05-22,20:30,Heidenheim,,2–2,,Elversberg,15000.0,Voith-Arena,Sven Jablonski,Match Report,Leg 1 of 2,Bundesliga,2024/25
