In [1]:
import json
import requests
from bs4 import BeautifulSoup, Comment
import pandas as pd
import numpy as np

In [5]:
with open(".json", "r") as file:
    source_dict = json.load(file)

SOURCE_URL, STATISTICS_DICT = source_dict["source_url"], source_dict["statistics"]

In [6]:
stats = None

for key, value in STATISTICS_DICT.items():
    container_divs = value["container-divs"]
    columns = value["columns"]

    response = requests.get(SOURCE_URL.format(key))
    html = BeautifulSoup(response.text, features="html.parser")

    stats_div = html.find("body").find(id="wrap").find(id="content").find(id=container_divs[0])
    comment = stats_div.find(string=lambda text: isinstance(text, Comment))
    table = BeautifulSoup(comment, features="html.parser").find(id=container_divs[1]).find("table")

    tr_list = table.find("tbody").find_all("tr")
    tr_list = [tr for tr in tr_list if not tr.has_attr("class")]
    stats_list = []
    for tr in tr_list:
        player_stats = {td.get("data-stat"): td.text for td in tr.find_all("td") if td.get("data-stat") in columns}
        stats_list.append(player_stats)

    if isinstance(stats, pd.DataFrame):
        stats = stats.merge(
            pd.DataFrame(stats_list).set_index(["player", "team"]),
            on=["player", "team"],
            how="left"
        )
    else:
        stats = pd.DataFrame(stats_list).set_index(["player", "team"])

stats.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 580 entries, ('Max Aarons', 'Bournemouth') to ('Martin Ødegaard', 'Arsenal')
Columns: 159 entries, nationality to aerials_won_pct
dtypes: object(159)
memory usage: 743.6+ KB


In [10]:
stats["nationality"] = stats["nationality"].str.replace(r"[a-z]", "", regex=True)
stats["minutes"] = stats["minutes"].str.replace(",", "")
stats = stats.replace("", np.nan)

numerical_columns = stats.columns.difference(["nationality", "position"])
stats[numerical_columns] = stats[numerical_columns].astype(float)
stats.dtypes

nationality         object
position            object
age                float64
birth_year         float64
minutes            float64
                    ...   
own_goals          float64
ball_recoveries    float64
aerials_won        float64
aerials_lost       float64
aerials_won_pct    float64
Length: 159, dtype: object

In [12]:
stats = stats[stats["minutes"] >= 90]
stats

Unnamed: 0_level_0,Unnamed: 1_level_0,nationality,position,age,birth_year,minutes,goals_assists,goals_pens,npxg_xg_assist,goals_per90,assists_per90,...,cards_red,fouls,fouled,offsides,crosses,own_goals,ball_recoveries,aerials_won,aerials_lost,aerials_won_pct
player,team,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Max Aarons,Bournemouth,ENG,DF,23.0,2000.0,1237.0,1.0,0.0,0.9,0.00,0.07,...,0.0,12.0,26.0,2.0,13.0,0.0,75.0,5.0,11.0,31.3
Tyler Adams,Bournemouth,USA,MF,24.0,1999.0,121.0,0.0,0.0,0.1,0.00,0.00,...,0.0,0.0,3.0,0.0,1.0,0.0,7.0,2.0,3.0,40.0
Tosin Adarabioyo,Fulham,ENG,DF,25.0,1997.0,1617.0,2.0,2.0,0.8,0.11,0.00,...,0.0,10.0,5.0,0.0,1.0,0.0,43.0,56.0,28.0,66.7
Elijah Adebayo,Luton Town,ENG,FW,25.0,1998.0,1419.0,10.0,10.0,6.6,0.63,0.00,...,0.0,27.0,16.0,13.0,5.0,0.0,34.0,43.0,43.0,50.0
Simon Adingra,Brighton,CIV,FW,21.0,2002.0,2222.0,7.0,6.0,8.0,0.24,0.04,...,0.0,29.0,19.0,17.0,76.0,0.0,118.0,8.0,12.0,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Nicolò Zaniolo,Aston Villa,ITA,"FW,MF",24.0,1999.0,839.0,2.0,2.0,3.6,0.21,0.00,...,0.0,24.0,21.0,0.0,18.0,0.0,36.0,7.0,14.0,33.3
Anass Zaroury,Burnley,MAR,"FW,MF",22.0,2000.0,152.0,0.0,0.0,0.2,0.00,0.00,...,1.0,4.0,3.0,0.0,11.0,0.0,8.0,0.0,1.0,0.0
Oleksandr Zinchenko,Arsenal,UKR,DF,26.0,1996.0,1722.0,3.0,1.0,3.1,0.05,0.10,...,0.0,7.0,9.0,1.0,30.0,0.0,104.0,27.0,16.0,62.8
Kurt Zouma,West Ham,FRA,DF,28.0,1994.0,2838.0,3.0,3.0,2.0,0.10,0.00,...,0.0,24.0,6.0,0.0,0.0,0.0,95.0,58.0,43.0,57.4
