In [3]:
import requests
import pandas as pd
from bs4 import BeautifulSoup, Comment
import warnings
import time 

def scrape_player_stats(player_name, url, table_id="stats_standard", season=None):
    warnings.filterwarnings("ignore")
    headers = {'User-Agent': 'Mozilla/5.0'}

    response = requests.get(url, headers=headers,verify=False)
    soup = BeautifulSoup(response.content, 'html.parser')
    print(response.status_code, url)

    # Try to find table normally
    table = soup.find("table", id="stats_standard")
    print(table)

    # If not found, look in comments
    
    for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
        if "stats_standard" in comment:
            print(f"Found{table_id} in comment for {player_name}")
            comment_soup = BeautifulSoup(comment, 'html.parser')
            table = comment_soup.find('table', id=table_id)
            if table:
                break

    if not table:
        print(f"Table not found for {player_name}")
        return None

    # Try reading the table with pandas
    try:
        df = pd.read_html(str(table), header=1)[0]
    except Exception as e:
        print(f"Error parsing table for {player_name}: {e}")
        return None

    df['League'] = player_name
    df['Season'] = season

    return df

'''
    for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
        if 'stats_standard' in comment:
            comment_soup = BeautifulSoup(comment, 'html.parser')
            table = comment_soup.find('table', id='stats_standard')
'''

"\n    for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):\n        if 'stats_standard' in comment:\n            comment_soup = BeautifulSoup(comment, 'html.parser')\n            table = comment_soup.find('table', id='stats_standard')\n"

In [4]:
#https://fbref.com/en/comps/20/stats/Bundesliga-Stats
years = ["2024-2025","2023-2024","2022-2023","2021-2022","2020-2021","2019-2020","2018-2019","2017-2018","2016-2017","2015-2016"]
leagueinfo = [
    {
        "name": "Bundesliga",
        "url": "https://fbref.com/en/comps/20/{year}/stats/{year}-Bundesliga-Stats"
    },
    {
        "name": "Premier League",
        "url": "https://fbref.com/en/comps/9/{year}/stats/{year}-Premier-League-Stats"
    },
    {
        "name": "La Liga",
        "url": "https://fbref.com/en/comps/12/{year}/stats/{year}-La-Liga-Stats"
    },
    {
        "name": "Serie A",
        "url": "https://fbref.com/en/comps/11/{year}/stats/{year}-Serie-A-Stats"
    },
    {
        "name": "Ligue 1",
        "url": "https://fbref.com/en/comps/13/{year}/stats/{year}-Ligue-1-Stats"
    },
    {
        "name": "Eredivisie",
        "url": "https://fbref.com/en/comps/23/{year}/stats/{year}-Eredivisie-Stats"
    },
    {
        "name": "Primeira Liga",
        "url": "https://fbref.com/en/comps/32/{year}/stats/{year}-Primeira-Liga-Stats"
    },
    {
        "name": "Championship",
        "url": "https://fbref.com/en/comps/10/{year}/stats/{year}-EFL-Championship-Stats"
    },
    {
        "name": "Scottish Premiership",
        "url": "https://fbref.com/en/comps/40/{year}/stats/{year}-Scottish-Premiership-Stats"
    }
 
]

leagues = []
for year in years:
    for league in leagueinfo:
        leagues.append({
            "name": f"{league['name']} {year}",
            "url": league["url"].format(year=year),
            "season": year.replace("-", "/"),
        })

league_dfs = {}
combined_list = []
for league in leagues:
    df = scrape_player_stats(league["name"], league["url"])
    if df is not None:
        league_dfs[league["name"]] = df
        combined_list.append(df)
    else:
        print(f"Failed for {league['name']} at {league['url']}")
    time.sleep(5)  

200 https://fbref.com/en/comps/20/2024-2025/stats/2024-2025-Bundesliga-Stats
None
Foundstats_standard in comment for Bundesliga 2024-2025
200 https://fbref.com/en/comps/9/2024-2025/stats/2024-2025-Premier-League-Stats
None
Foundstats_standard in comment for Premier League 2024-2025
200 https://fbref.com/en/comps/12/2024-2025/stats/2024-2025-La-Liga-Stats
None
Foundstats_standard in comment for La Liga 2024-2025
200 https://fbref.com/en/comps/11/2024-2025/stats/2024-2025-Serie-A-Stats
None
Foundstats_standard in comment for Serie A 2024-2025
200 https://fbref.com/en/comps/13/2024-2025/stats/2024-2025-Ligue-1-Stats
None
Foundstats_standard in comment for Ligue 1 2024-2025
200 https://fbref.com/en/comps/23/2024-2025/stats/2024-2025-Eredivisie-Stats
None
Foundstats_standard in comment for Eredivisie 2024-2025
200 https://fbref.com/en/comps/32/2024-2025/stats/2024-2025-Primeira-Liga-Stats
None
Foundstats_standard in comment for Primeira Liga 2024-2025
200 https://fbref.com/en/comps/10/2024-

In [5]:
df = pd.concat(combined_list,ignore_index=True)

df

Unnamed: 0,Rk,Player,Nation,Pos,Squad,Age,Born,MP,Starts,Min,...,G-PK.1,G+A-PK,xG.1,xAG.1,xG+xAG,npxG.1,npxG+xAG.1,Matches,League,Season
0,1,Junior Adamu,at AUT,FW,Freiburg,23,2001,25,19,1545,...,0.12,0.23,0.32,0.14,0.46,0.32,0.46,Matches,Bundesliga 2024-2025,
1,2,Karim Adeyemi,de GER,"FW,MF",Dortmund,22,2002,25,17,1433,...,0.44,0.82,0.34,0.38,0.72,0.34,0.72,Matches,Bundesliga 2024-2025,
2,3,Amine Adli,ma MAR,"MF,FW",Leverkusen,24,2000,20,6,766,...,0.23,0.23,0.32,0.08,0.40,0.32,0.40,Matches,Bundesliga 2024-2025,
3,4,Oladapo Afolayan,eng ENG,"FW,MF",St. Pauli,26,1998,32,17,1639,...,0.16,0.22,0.17,0.21,0.38,0.17,0.38,Matches,Bundesliga 2024-2025,
4,5,Felix Agu,ng NGA,DF,Werder Bremen,24,1999,22,21,1751,...,0.15,0.15,0.07,0.09,0.17,0.07,0.17,Matches,Bundesliga 2024-2025,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51491,361,Scott Wright,sct SCO,FW,Aberdeen,17,1997,4,1,128,...,0.00,0.00,,,,,,Matches,Scottish Premiership 2015-2016,
51492,362,Dario Zanatta,ca CAN,"MF,FW",Hearts,18,1997,13,3,347,...,0.00,0.52,,,,,,Matches,Scottish Premiership 2015-2016,
51493,363,Luis Zwick,de GER,GK,Dundee United,21,1994,13,13,1170,...,0.00,0.00,,,,,,Matches,Scottish Premiership 2015-2016,
51494,364,Lewis Clark,,,Kilmarnock,,,1,0,24,...,0.00,0.00,,,,,,Matches,Scottish Premiership 2015-2016,


In [6]:
league_dfs =  pd.concat(combined_list, ignore_index=True)
league_dfs['League'].unique().tolist()

['Bundesliga 2024-2025',
 'Premier League 2024-2025',
 'La Liga 2024-2025',
 'Serie A 2024-2025',
 'Ligue 1 2024-2025',
 'Eredivisie 2024-2025',
 'Primeira Liga 2024-2025',
 'Championship 2024-2025',
 'Scottish Premiership 2024-2025',
 'Bundesliga 2023-2024',
 'Premier League 2023-2024',
 'La Liga 2023-2024',
 'Serie A 2023-2024',
 'Ligue 1 2023-2024',
 'Eredivisie 2023-2024',
 'Primeira Liga 2023-2024',
 'Championship 2023-2024',
 'Scottish Premiership 2023-2024',
 'Bundesliga 2022-2023',
 'Premier League 2022-2023',
 'La Liga 2022-2023',
 'Serie A 2022-2023',
 'Ligue 1 2022-2023',
 'Eredivisie 2022-2023',
 'Primeira Liga 2022-2023',
 'Championship 2022-2023',
 'Scottish Premiership 2022-2023',
 'Bundesliga 2021-2022',
 'Premier League 2021-2022',
 'La Liga 2021-2022',
 'Serie A 2021-2022',
 'Ligue 1 2021-2022',
 'Eredivisie 2021-2022',
 'Primeira Liga 2021-2022',
 'Championship 2021-2022',
 'Scottish Premiership 2021-2022',
 'Bundesliga 2020-2021',
 'Premier League 2020-2021',
 'La L

In [7]:
df = pd.concat(combined_list, ignore_index=True)
df['League'].unique().tolist()


['Bundesliga 2024-2025',
 'Premier League 2024-2025',
 'La Liga 2024-2025',
 'Serie A 2024-2025',
 'Ligue 1 2024-2025',
 'Eredivisie 2024-2025',
 'Primeira Liga 2024-2025',
 'Championship 2024-2025',
 'Scottish Premiership 2024-2025',
 'Bundesliga 2023-2024',
 'Premier League 2023-2024',
 'La Liga 2023-2024',
 'Serie A 2023-2024',
 'Ligue 1 2023-2024',
 'Eredivisie 2023-2024',
 'Primeira Liga 2023-2024',
 'Championship 2023-2024',
 'Scottish Premiership 2023-2024',
 'Bundesliga 2022-2023',
 'Premier League 2022-2023',
 'La Liga 2022-2023',
 'Serie A 2022-2023',
 'Ligue 1 2022-2023',
 'Eredivisie 2022-2023',
 'Primeira Liga 2022-2023',
 'Championship 2022-2023',
 'Scottish Premiership 2022-2023',
 'Bundesliga 2021-2022',
 'Premier League 2021-2022',
 'La Liga 2021-2022',
 'Serie A 2021-2022',
 'Ligue 1 2021-2022',
 'Eredivisie 2021-2022',
 'Primeira Liga 2021-2022',
 'Championship 2021-2022',
 'Scottish Premiership 2021-2022',
 'Bundesliga 2020-2021',
 'Premier League 2020-2021',
 'La L