In [2]:
from bs4 import BeautifulSoup, Comment
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import pandas as pd
import time

PATH = "c:\\Users\\ADMIN\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe"
service = Service(executable_path=PATH)
driver = webdriver.Chrome(service=service)

def crawl_stats(url, table_id):
    time.sleep(3)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    table = soup.find('table', id=table_id)

    if not table:
        comments = soup.find_all(string=lambda text: isinstance(text, Comment))
        for comment in comments:
            if table_id in comment:
                soup = BeautifulSoup(comment, 'html.parser')
                table = soup.find('table', id=table_id)
                break

    rows = table.find("tbody").find_all("tr")
    data = [[td.text.strip() for td in row.find_all("td")]
            for row in rows if not (row.get("class") and "thead" in row.get("class")) and row.find_all("td")]
    headers = [col['data-stat'] for col in rows[0].find_all('td')]
    return pd.DataFrame(data, columns=headers)

df_standard = crawl_stats("https://fbref.com/en/comps/9/stats/Premier-League-Stats", "stats_standard")
df_keeper = crawl_stats('https://fbref.com/en/comps/9/keepers/Premier-League-Stats', 'stats_keeper')
df_shooting = crawl_stats('https://fbref.com/en/comps/9/shooting/Premier-League-Stats', 'stats_shooting')
df_passing = crawl_stats('https://fbref.com/en/comps/9/passing/Premier-League-Stats', 'stats_passing')
df_GaSC = crawl_stats('https://fbref.com/en/comps/9/gca/Premier-League-Stats','stats_gca')
df_DA = crawl_stats('https://fbref.com/en/comps/9/defense/Premier-League-Stats','stats_defense')
df_Poss = crawl_stats('https://fbref.com/en/comps/9/possession/Premier-League-Stats','stats_possession')
df_MS = crawl_stats('https://fbref.com/en/comps/9/misc/Premier-League-Stats','stats_misc')


df_standard['minutes'] = df_standard['minutes'].str.replace(',','')
df_standard['minutes'] = pd.to_numeric(df_standard['minutes'], errors='coerce')
df_standard = df_standard[df_standard['minutes'] > 90]
df_standard.drop(columns=[
    'birth_year','minutes_90s','goals_assists','goals_pens','pens_made','pens_att',
    'npxg','npxg_xg_assist','goals_assists_per90','goals_pens_per90',
    'goals_assists_pens_per90','xg_xg_assist_per90','npxg_per90',
    'npxg_xg_assist_per90','matches'
], errors='ignore', inplace=True)

keeper_stats = df_keeper[['player','team', 'gk_goals_against_per90','gk_save_pct','gk_clean_sheets_pct','gk_pens_save_pct']]

shooting_stats = df_shooting[['player', 'team', 'shots_on_target_pct', 'shots_on_target_per90', 'goals_per_shot', 'average_shot_distance']]

passing_stats = df_passing[['player', 'team', 'passes_completed', 'passes_pct', 'passes_total_distance', 
                            'passes_pct_short', 'passes_pct_medium', 'passes_pct_long', 
                            'assisted_shots', 'passes_into_final_third', 'passes_into_penalty_area', 
                            'crosses_into_penalty_area', 'progressive_passes']]

GaSc_stats = df_GaSC[['player','team', 'sca','sca_per90','gca','gca_per90']]

DA_stats = df_DA[['player','team', 'tackles','tackles_won','challenges','challenges_lost','blocks','blocked_shots','blocked_passes','interceptions']]

Poss_stats = df_Poss[['player','team', 'touches','touches_def_pen_area','touches_def_3rd','touches_mid_3rd','touches_att_3rd',
                      'touches_att_pen_area','take_ons','take_ons_won_pct','take_ons_tackled_pct','carries','carries_progressive_distance',
                      'progressive_carries','carries_into_final_third','carries_into_penalty_area','miscontrols','dispossessed'
                      ,'passes_received','progressive_passes_received']]

MS_stats = df_MS[['player','team', 'fouls','fouled','offsides','crosses','ball_recoveries','aerials_won','aerials_lost','aerials_won_pct']]


df_final = df_standard.merge(keeper_stats, how='left', on=['player', 'team']) \
                      .merge(shooting_stats, how='left', on=['player', 'team']) \
                      .merge(passing_stats, how='left', on=['player', 'team']) \
                      .merge(GaSc_stats, how='left', on=['player', 'team'])\
                      .merge(DA_stats,how='left',on=['player', 'team'])\
                      .merge(Poss_stats,how='left',on=['player', 'team'])\
                      .merge(MS_stats,how='left',on=['player', 'team'])

df_final.fillna('N/a', inplace=True)
df_final_sorted = df_final.sort_values(by=['player', 'team'])

df_final_sorted.to_csv("result.csv", index=False, encoding='utf-8-sig')
print(df_final_sorted)

driver.quit()


                 player nationality position             team     age games  \
92      Aaron Cresswell     eng ENG       DF         West Ham  35-146    15   
363      Aaron Ramsdale     eng ENG       GK      Southampton  26-361    27   
464   Aaron Wan-Bissaka     eng ENG       DF         West Ham  27-165    33   
122  Abdoulaye Doucouré      ml MLI       MF          Everton  32-129    30   
239  Abdukodir Khusanov      uz UZB       DF  Manchester City  21-070     6   
..                  ...         ...      ...              ...     ...   ...   
422   Yukinari Sugawara      jp JPN    DF,MF      Southampton  24-316    29   
47        Yves Bissouma      ml MLI       MF        Tottenham  28-253    25   
307         Álex Moreno      es ESP    DF,MF  Nott'ham Forest  31-336    15   
185      İlkay Gündoğan      de GER       MF  Manchester City  34-198    31   
142    Łukasz Fabiański      pl POL       GK         West Ham  40-022    13   

    games_starts  minutes goals assists  ... passes