In [41]:
import json
from bs4 import BeautifulSoup
import pandas as pd
import requests
import re
import codecs

PATTERN = r"{}\s+=\s+JSON.parse\(\'(.*?)\'\)"

def find_match(scripts, pattern):
    """Returns the first match found in the given scripts."""

    for script in scripts:
        match = re.search(pattern, script.string)
        if match:
            break

    return match

def get_data(url, data_type):
    """Returns data from the given URL of the given data type."""

    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    result = requests.get(url, headers=headers)
    soup = BeautifulSoup(result.content, 'html.parser')
    scripts = soup.find_all("script")

    pattern = re.compile(PATTERN.format(data_type))
    match = find_match(scripts, pattern)
    data = decode_data(match)

    return data

def decode_data(match):
    """Returns data in the match's first group decoded to JSON."""

    byte_data = codecs.escape_decode(match.group(1))
    json_data = json.loads(byte_data[0].decode("utf-8"))

    return json_data

In [47]:
leagues = ['EPL', 'La_liga', 'Bundesliga', 'Serie_A', 'Ligue_1']
year = 2014
dfs = []

for i in leagues:
    dfs.append(pd.DataFrame(get_data(f'https://understat.com/league/{i}/{year}', 'playersData')))

In [48]:
total = pd.concat(dfs)

In [49]:
total

Unnamed: 0,id,player_name,games,time,goals,xG,assists,xA,shots,key_passes,yellow_cards,red_cards,position,team_title,npg,npxG,xGChain,xGBuildup
0,8260,Erling Haaland,35,2803,36,32.761399537324905,8,5.8491098545491695,123,29,5,0,F S,Manchester City,29,27.433209866285324,31.963398084044456,3.285816218703985
1,647,Harry Kane,38,3414,30,23.06444012373686,3,7.547407771460712,133,57,6,0,F,Tottenham,25,18.497427217662334,25.079117983579636,5.095352162607014
2,998,Ivan Toney,33,2960,20,21.69119757413864,4,4.909892840310931,94,26,9,0,F,Brentford,14,16.363018790259957,18.1100468672812,4.024585669860244
3,1250,Mohamed Salah,38,3307,19,23.340063713490963,12,8.720245610922575,125,65,2,0,F M S,Liverpool,17,20.295387655496597,32.54413793236017,8.866782674565911
4,468,Callum Wilson,31,1911,18,18.856768030673265,5,3.567963434383273,73,24,4,0,F S,Newcastle United,15,16.57326026633382,15.517562381923199,1.810890594497323
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,11550,Fallou Fall,1,30,0,0,0,0,0,0,1,0,S,Reims,0,0,0.05015653744339943,0.05015653744339943
581,11552,Ben Touré,4,207,0,0.035950008779764175,0,0,2,0,0,0,F S,Ajaccio,0,0.035950008779764175,0.137821014970541,0.10187100525945425
582,11579,Amine Messoussa,1,1,0,0,0,0,0,0,0,0,S,Lille,0,0,0,0
583,11613,Mamadou Diakhon,2,20,0,0.03598650172352791,0,0,1,0,0,0,S,Reims,0,0.03598650172352791,0.03598650172352791,0


In [46]:
total.to_csv("csv files/ustat_2014.csv", index=False)