In [65]:
import json
from selenium import webdriver # selenium 4.20.0
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pandas as pd
import numpy as np

In [66]:
options = webdriver.ChromeOptions()
options.set_capability(
    "goog:loggingPrefs", {"performance": "ALL", "browser": "ALL"}
)
# Make sure you already have Chrome installed
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
driver.set_page_load_timeout(10)
try:
    driver.get("https://www.sofascore.com/tournament/football/england/premier-league/17#id:10356")
except:
    pass

driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

In [67]:
# extract requests from logs
logs_raw = driver.get_log("performance")
logs = [json.loads(lr["message"])["message"] for lr in logs_raw]

In [68]:
summary_apis = []
attack_apis = []
defence_apis = []
passing_apis = []
for x in logs:
    path = x['params'].get('headers', {}).get(':path', '')
    if 'statistics?' in path:
        if 'summary' in path:
            summary_apis.append(x)
        elif 'attack' in path:
            attack_apis.append(x)
        elif 'defence' in path:
            defence_apis.append(x)
        elif 'passing' in path:
            passing_apis.append(x)

In [59]:
def extract_data(api, data_type):
    player_stats = []
    response = json.loads(driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': api["params"]["requestId"]})['body'])
    for result in response['results']:
        if data_type == 'summary':
            player_info = [result['player']['name'], 
                           result['goals'], 
                           result['successfulDribbles'], 
                           result['tackles'], 
                           result['assists'],
                           result['accuratePassesPercentage'],
                           result['rating']
                          ]
        elif data_type == 'attack':
            player_info = [result['player']['name'], 
                           result['goals'], 
                           result['bigChancesMissed'], 
                           result['totalShots'], 
                           result['goalConversionPercentage'],
                          ]
        elif data_type == "defence":
            player_info = [result['player']['name'], 
                           result['tackles'], 
                           result['interceptions'], 
                           result['clearances'], 
                           result['errorLeadToGoal'],
                          ]           
        elif data_type == "passing":
            player_info = [result['player']['name'], 
                           result['assists'], 
                           result['accuratePasses'], 
                           result['accuratePassesPercentage'], 
                           result['keyPasses'],
                          ]            
        player_stats.append(player_info)
    return player_stats

In [60]:
def aggregate_data(apis,data_type):
    reval = []
    for api in apis:
        player_data = extract_data(api,data_type)
        reval.extend(player_data)
    return reval

In [61]:
summary_headers = ["Player", "goals", "successfulDribbles", "tackles", "assists", "accuratePassesPercentage", "rating"]
attack_headers = ["Player", "goals", "bigChancesMissed", "totalShots", "goalConversionPercentage"]
defence_headers = ["Player", "tackles", "interceptions", "clearances", "errorLeadToGoal"]
passing_headers = ["Player", "assists", "accuratePasses", "accuratePassesPercentage", "keyPasses"]

In [71]:
summary_data = aggregate_data(summary_apis[1:],'summary')
attack_data = aggregate_data(attack_apis,'attack')
defence_data = aggregate_data(defence_apis,'defence')
passing_data = aggregate_data(passing_apis,'passing')

In [72]:
summary = pd.DataFrame(summary_data, columns=summary_headers)
attack = pd.DataFrame(attack_data, columns=attack_headers)
defence = pd.DataFrame(defence_data, columns=defence_headers)
passing = pd.DataFrame(passing_data, columns=passing_headers)

In [73]:
summary.to_csv("./epl_data/summary_per_90_min.csv")
attack.to_csv("./epl_data/attack_per_90_min.csv")
defence.to_csv("./epl_data/defence_per_90_min.csv")
passing.to_csv("./epl_data/passing_per_90_min.csv")

In [74]:
summary

Unnamed: 0,Player,goals,successfulDribbles,tackles,assists,accuratePassesPercentage,rating
0,Dimitri Payet,0.32,2.35,0.84,0.42,80.14,7.68
1,Mesut Özil,0.18,1.36,1.03,0.56,86.29,7.63
2,Santi Cazorla,0.00,2.58,1.95,0.21,90.24,7.59
3,Alexis Sánchez,0.48,3.72,1.73,0.15,79.58,7.49
4,Philippe Coutinho,0.36,2.20,1.57,0.22,78.94,7.46
...,...,...,...,...,...,...,...
545,João Carlos Teixeira,0.00,0.00,30.00,,100.00,0.00
546,Sam Field,0.00,0.00,0.00,,50.00,0.00
547,Rushian Hepburn-Murphy,0.00,0.00,0.00,,0.00,0.00
548,James Weir,0.00,0.00,0.00,,0.00,0.00
