In [57]:
import requests
import pandas as pd
from bs4 import BeautifulSoup, Comment
import warnings
import time 

def scrape_player_stats(name, url, table_id, season=None):
    warnings.filterwarnings("ignore")
    headers = {'User-Agent': 'Mozilla/5.0'}

    response = requests.get(url, headers=headers,verify=False)
    soup = BeautifulSoup(response.content, 'html.parser')
    print(response.status_code, url)

    table = soup.find("table", id=table_id) 


    if not table:
        for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
            if table_id in comment:
                print(f"Found {table_id} in comment for {name}")
                comment_soup = BeautifulSoup(comment, 'html.parser')
                table = comment_soup.find('table', id=table_id)
                if table:
                    break

    if not table:
        print(f"Table not found for {name}")
        return None


    try:
        df = pd.read_html(str(table), header=1)[0]
    except Exception as e:
        print(f"Error parsing table for {name}: {e}")
        return None

    df['League'] = name
    df['Season'] = season

    return df

'''
    for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
        if 'stats_standard' in comment:
            comment_soup = BeautifulSoup(comment, 'html.parser')
            table = comment_soup.find('table', id='stats_standard')
'''

"\n    for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):\n        if 'stats_standard' in comment:\n            comment_soup = BeautifulSoup(comment, 'html.parser')\n            table = comment_soup.find('table', id='stats_standard')\n"

In [58]:
#https://fbref.com/en/comps/20/stats/Bundesliga-Stats
years = ["2024-2025","2023-2024","2022-2023","2021-2022","2020-2021","2019-2020","2018-2019","2017-2018","2016-2017","2015-2016"]
leagueinfo = [
    {
        "name": "Bundesliga",
        "url": "https://fbref.com/en/comps/20/{year}/stats/{year}-Bundesliga-Stats"
    },
    {
        "name": "Premier League",
        "url": "https://fbref.com/en/comps/9/{year}/stats/{year}-Premier-League-Stats"
    },
    {
        "name": "La Liga",
        "url": "https://fbref.com/en/comps/12/{year}/stats/{year}-La-Liga-Stats"
    },
    {
        "name": "Serie A",
        "url": "https://fbref.com/en/comps/11/{year}/stats/{year}-Serie-A-Stats"
    },
    {
        "name": "Ligue 1",
        "url": "https://fbref.com/en/comps/13/{year}/stats/{year}-Ligue-1-Stats"
    },
    {
        "name": "Eredivisie",
        "url": "https://fbref.com/en/comps/23/{year}/stats/{year}-Eredivisie-Stats"
    },
    {
        "name": "Primeira Liga",
        "url": "https://fbref.com/en/comps/32/{year}/stats/{year}-Primeira-Liga-Stats"
    },
    {
        "name": "Championship",
        "url": "https://fbref.com/en/comps/10/{year}/stats/{year}-EFL-Championship-Stats"
    },
    {
        "name": "Scottish Premiership",
        "url": "https://fbref.com/en/comps/40/{year}/stats/{year}-Scottish-Premiership-Stats"
    }
 
]

leagues = []
for year in years:
    for league in leagueinfo:
        leagues.append({
            "name": f"{league['name']} {year}",
            "url": league["url"].format(year=year),
            "season": year.replace("-", "/"),
        })

league_dfs = {}
combined_list = []
for league in leagues:
    df = scrape_player_stats(league["name"], league["url"], table_id='stats_standard')
    if df is not None:
        league_dfs[league["name"]] = df
        combined_list.append(df)
    else:
        print(f"Failed for {league['name']} at {league['url']}")
    time.sleep(5)  

200 https://fbref.com/en/comps/20/2024-2025/stats/2024-2025-Bundesliga-Stats
Found stats_standard in comment for Bundesliga 2024-2025
200 https://fbref.com/en/comps/9/2024-2025/stats/2024-2025-Premier-League-Stats
Found stats_standard in comment for Premier League 2024-2025
200 https://fbref.com/en/comps/12/2024-2025/stats/2024-2025-La-Liga-Stats
Found stats_standard in comment for La Liga 2024-2025
200 https://fbref.com/en/comps/11/2024-2025/stats/2024-2025-Serie-A-Stats
Found stats_standard in comment for Serie A 2024-2025
200 https://fbref.com/en/comps/13/2024-2025/stats/2024-2025-Ligue-1-Stats
Found stats_standard in comment for Ligue 1 2024-2025
200 https://fbref.com/en/comps/23/2024-2025/stats/2024-2025-Eredivisie-Stats
Found stats_standard in comment for Eredivisie 2024-2025
200 https://fbref.com/en/comps/32/2024-2025/stats/2024-2025-Primeira-Liga-Stats
Found stats_standard in comment for Primeira Liga 2024-2025
200 https://fbref.com/en/comps/10/2024-2025/stats/2024-2025-EFL-Cha

In [59]:
df = pd.concat(combined_list,ignore_index=True)

df

Unnamed: 0,Rk,Player,Nation,Pos,Squad,Age,Born,MP,Starts,Min,...,G-PK.1,G+A-PK,xG.1,xAG.1,xG+xAG,npxG.1,npxG+xAG.1,Matches,League,Season
0,1,Junior Adamu,at AUT,FW,Freiburg,23,2001,25,19,1545,...,0.12,0.23,0.32,0.14,0.46,0.32,0.46,Matches,Bundesliga 2024-2025,
1,2,Karim Adeyemi,de GER,"FW,MF",Dortmund,22,2002,25,17,1433,...,0.44,0.82,0.34,0.38,0.72,0.34,0.72,Matches,Bundesliga 2024-2025,
2,3,Amine Adli,ma MAR,"MF,FW",Leverkusen,24,2000,20,6,766,...,0.23,0.23,0.32,0.08,0.40,0.32,0.40,Matches,Bundesliga 2024-2025,
3,4,Oladapo Afolayan,eng ENG,"FW,MF",St. Pauli,26,1998,32,17,1639,...,0.16,0.22,0.17,0.21,0.38,0.17,0.38,Matches,Bundesliga 2024-2025,
4,5,Felix Agu,ng NGA,DF,Werder Bremen,24,1999,22,21,1751,...,0.15,0.15,0.07,0.09,0.17,0.07,0.17,Matches,Bundesliga 2024-2025,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51491,361,Scott Wright,sct SCO,FW,Aberdeen,17,1997,4,1,128,...,0.00,0.00,,,,,,Matches,Scottish Premiership 2015-2016,
51492,362,Dario Zanatta,ca CAN,"MF,FW",Hearts,18,1997,13,3,347,...,0.00,0.52,,,,,,Matches,Scottish Premiership 2015-2016,
51493,363,Luis Zwick,de GER,GK,Dundee United,21,1994,13,13,1170,...,0.00,0.00,,,,,,Matches,Scottish Premiership 2015-2016,
51494,364,Lewis Clark,,,Kilmarnock,,,1,0,24,...,0.00,0.00,,,,,,Matches,Scottish Premiership 2015-2016,


In [None]:
team_league_dfs = {}
team_combined_list = []
for team_league in leagues:
    team_df = scrape_player_stats(team_league["name"], team_league["url"], table_id='stats_squads_standard_for')
    if team_df is not None:
        team_league_dfs[team_league["name"]] = team_df
        team_combined_list.append(team_df)
    else:
        print(f"Failed for {team_league['name']} at {team_league['url']}")
    time.sleep(5)  

200 https://fbref.com/en/comps/20/2024-2025/stats/2024-2025-Bundesliga-Stats
200 https://fbref.com/en/comps/9/2024-2025/stats/2024-2025-Premier-League-Stats
200 https://fbref.com/en/comps/12/2024-2025/stats/2024-2025-La-Liga-Stats
200 https://fbref.com/en/comps/11/2024-2025/stats/2024-2025-Serie-A-Stats
200 https://fbref.com/en/comps/13/2024-2025/stats/2024-2025-Ligue-1-Stats
200 https://fbref.com/en/comps/23/2024-2025/stats/2024-2025-Eredivisie-Stats
200 https://fbref.com/en/comps/32/2024-2025/stats/2024-2025-Primeira-Liga-Stats
200 https://fbref.com/en/comps/10/2024-2025/stats/2024-2025-EFL-Championship-Stats
200 https://fbref.com/en/comps/40/2024-2025/stats/2024-2025-Scottish-Premiership-Stats
200 https://fbref.com/en/comps/20/2023-2024/stats/2023-2024-Bundesliga-Stats
200 https://fbref.com/en/comps/9/2023-2024/stats/2023-2024-Premier-League-Stats
200 https://fbref.com/en/comps/12/2023-2024/stats/2023-2024-La-Liga-Stats
200 https://fbref.com/en/comps/11/2023-2024/stats/2023-2024-Ser

In [61]:
team_df = pd.concat(team_league_dfs,ignore_index=True)
team_df

Unnamed: 0,Squad,# Pl,Age,Poss,MP,Starts,Min,90s,Gls,Ast,...,G+A.1,G-PK.1,G+A-PK,xG.1,xAG.1,xG+xAG,npxG.1,npxG+xAG.1,League,Season
0,Augsburg,29,26.8,44.3,34,374,3060,34.0,35,26,...,1.79,1.00,1.76,1.02,0.74,1.76,0.97,1.71,Bundesliga 2024-2025,
1,Bayern Munich,29,27.7,67.9,34,374,3060,34.0,96,66,...,4.76,2.56,4.50,2.40,1.76,4.16,2.18,3.93,Bundesliga 2024-2025,
2,Bochum,27,28.0,45.5,34,374,3060,34.0,32,23,...,1.62,0.91,1.59,1.23,0.88,2.11,1.16,2.04,Bundesliga 2024-2025,
3,Dortmund,28,26.6,58.9,34,374,3060,34.0,69,56,...,3.68,1.88,3.53,1.80,1.39,3.19,1.66,3.06,Bundesliga 2024-2025,
4,Eint Frankfurt,26,24.9,49.7,34,374,3060,34.0,68,47,...,3.38,1.91,3.29,1.91,1.33,3.24,1.81,3.13,Bundesliga 2024-2025,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1691,Kilmarnock,34,24.3,,38,418,3420,38.0,40,30,...,1.84,0.95,1.74,,,,,,Scottish Premiership 2015-2016,
1692,Motherwell,27,26.8,,38,418,3420,38.0,46,30,...,2.00,1.13,1.92,,,,,,Scottish Premiership 2015-2016,
1693,Partick Thistle,29,25.5,,38,418,3420,38.0,40,30,...,1.84,1.03,1.82,,,,,,Scottish Premiership 2015-2016,
1694,Ross County,26,26.5,,38,418,3420,38.0,54,34,...,2.32,1.29,2.18,,,,,,Scottish Premiership 2015-2016,
