Players Data Scraping

In [1]:
import requests
import json
import pandas as pd
import os

In [2]:
season_names = ['2014', '2015', '2016', '2017', '2018', '2019']
gws = ['3', '5', '10']
leagues = ['EPL', 'La_liga', 'Bundesliga', 'Serie_A', 'Ligue_1']

In [3]:
def scrape_understat(payload):
    #Build request using url, headers (mimicking what Firefox does normally)
    #Works best with verify=True as you won't get the ssl errors. Payload is 
    #taylored for each request
    url = 'https://understat.com/main/getPlayersStats/'
    headers = {'content-type':'application/json; charset=utf-8',
    'Host': 'understat.com',
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0',
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Accept-Encoding': 'gzip, deflate, br',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'X-Requested-With': 'XMLHttpRequest',
    'Content-Length': '39',
    'Origin': 'https: // understat.com',
    'Connection': 'keep - alive',
    'Referer': 'https: // understat.com / league / EPL'
    }
    response = requests.post(url, data=payload, headers = headers, verify=True)
    response_json = response.json()
    inner_wrapper = response_json['response']
    json_player_data = inner_wrapper['players']
    return json_player_data

In [4]:
def clean_df(player_df, weeks):
    # Get rid of the columns that we don't care about
    #player_df.drop(['yellow_cards','red_cards', 'xGChain','xGBuildup','games','time'], axis=1, inplace=True)
    player_df  = player_df.rename(columns={'goals':'goals_'+weeks,'xG':'xG_'+weeks,'assists':'assists_'+weeks, 'xA':'xA_'+weeks, 'shots':'shots_'+weeks, 'key_passes':
        'key_passes_'+weeks,'npg':'npg_'+weeks,'npxG':'npxG_'+weeks})
    
    return(player_df)

In [5]:
def gw_data(season , league,  no_of_gw):
#     Create Pandas dataframes from each html table
    print('Getting data for last {} matches'.format(no_of_gw))
    json_player_data = scrape_understat({'league':'EPL', 'season':season, 'n_last_matches': no_of_gw})
    gw_table = pd.DataFrame(json_player_data)
    gw_df = clean_df(gw_table,'3wks')
    #Replace Position indentifiers with something more useful
    gw_df['position'] = gw_df['position'].str.slice(0,1)
    position_map = {'D':'DEF', 'F':'FWD', 'M':'MID', 'G':'GK', 'S':'FWD'}
    gw_df = gw_df.replace({'position': position_map})
    gw_df.to_csv(r'E:\AMotefaker\ABC\FPA\Web Scraping\Understat Data for Teams-Players (2014- present)\Data\Player_Data\gw_data\last_{}_gw_data.csv'.format(no_of_gw), encoding='utf-8', index=False)
    print('last {} matches csv data written'.format(no_of_gw))
    return gw_df

In [8]:
last_3_gw_data_EPL = gw_data(season_names[-1], leagues[0], gws[0])
last_5_gw_data_EPL = gw_data(season_names[-1], leagues[0], gws[1])
last_10_gw_data_EPL = gw_data(season_names[-1], leagues[0], gws[2])

Getting data for last 3 matches
last 3 matches csv data written
Getting data for last 5 matches
last 5 matches csv data written
Getting data for last 10 matches
last 10 matches csv data written


In [9]:
def season_data(season, league):
    print('Getting data for {} season'.format(season))
    json_player_data = scrape_understat({'league': league, 'season':season})
    season_table = pd.DataFrame(json_player_data)
    season_df = clean_df(season_table, 'season')
    season_df.to_csv(r'E:\AMotefaker\ABC\FPA\Web Scraping\Understat Data for Teams-Players (2014- present)\Data\Player_Data\season_data\{}_whole_season_data.csv'.format(season), encoding='utf-8', index=False)
    print('csv file for {} season written'.format(season))
    return season_df

In [15]:
season_1415 = season_data(season_names[0], leagues[0])
season_1516 = season_data(season_names[1], leagues[0])
season_1617 = season_data(season_names[2], leagues[0])
season_1718 = season_data(season_names[3], leagues[0])
season_1819 = season_data(season_names[4], leagues[0])
season_1920 = season_data(season_names[5], leagues[0])

Getting data for 2014 season
csv file for 2014 season written
Getting data for 2015 season
csv file for 2015 season written
Getting data for 2016 season
csv file for 2016 season written
Getting data for 2017 season
csv file for 2017 season written
Getting data for 2018 season
csv file for 2018 season written
Getting data for 2019 season
csv file for 2019 season written


In [17]:
season_1415

Unnamed: 0,id,player_name,games,time,goals_season,xG_season,assists_season,xA_season,shots_season,key_passes_season,yellow_cards,red_cards,position,team_title,npg_season,npxG_season,xGChain,xGBuildup
0,619,Sergio Agüero,33,2551,26,25.270159743726254,8,5.568922242149711,148,33,4,0,F S,Manchester City,21,20.70318364351988,27.805154908448458,6.878173082135618
1,647,Harry Kane,34,2589,21,17.15729223564267,4,3.922500966116786,112,27,4,0,F M S,Tottenham,19,14.873822528868914,16.488438992761075,5.549698735587299
2,802,Diego Costa,26,2111,20,15.219103761017323,3,4.554670915938914,76,41,8,0,F S,Chelsea,19,14.45793492347002,21.365789908915758,5.2769727278500795
3,848,Charlie Austin,35,3078,18,17.881850190460682,5,2.548747032880783,131,23,4,1,F,Queens Park Rangers,15,14.076042897999287,13.718276167288423,3.041321298107505
4,498,Alexis Sánchez,35,2967,16,13.451749975793064,8,8.49417957291007,122,82,4,0,F M S,Arsenal,16,12.690581078641117,27.15757153555751,10.736752955242991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
526,4528,Alejandro Faurlin,2,94,0,0.11255800724029541,0,0.4859839975833893,3,1,0,0,M S,Queens Park Rangers,0,0.11255800724029541,0.055191781371831894,0.022535664960741997
527,4530,Valentin Roberge,1,90,0,0.2825593948364258,0,0,1,0,1,0,D,Sunderland,0,0.2825593948364258,0,0
528,4531,Stuart O&#039;Keefe,2,88,0,0,0,0.019946111366152763,0,1,0,0,M S,Crystal Palace,0,0,0.019946111366152763,0.019946111366152763
529,4532,Gary Taylor-Fletcher,1,16,0,0,0,0,0,0,0,0,S,Leicester,0,0,0,0


In [18]:
season_1516

Unnamed: 0,id,player_name,games,time,goals_season,xG_season,assists_season,xA_season,shots_season,key_passes_season,yellow_cards,red_cards,position,team_title,npg_season,npxG_season,xGChain,xGBuildup
0,647,Harry Kane,38,3382,25,22.732073578983545,1,3.088511780835688,159,44,5,0,F,Tottenham,20,18.926266126334667,26.939671490341425,8.189033068716526
1,619,Sergio Agüero,30,2399,24,20.077213659882545,2,2.0287597198039293,119,27,1,0,F S,Manchester City,20,16.271369472146034,18.786789989098907,3.925958522595465
2,755,Jamie Vardy,36,3154,24,22.135407269001007,6,6.267152491956949,115,48,5,1,F,Leicester,19,17.56843115389347,24.595971267670393,2.8863560035824776
3,594,Romelu Lukaku,37,3194,18,20.05878111720085,6,5.6804345063865185,118,51,3,0,F S,Everton,17,18.53648015856743,24.024903159588575,7.63484107516706
4,750,Riyad Mahrez,37,3087,17,11.882148338481784,11,11.45409781858325,86,68,1,0,M S,Leicester,13,7.315135253593326,21.18506269901991,7.174407683312893
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545,1078,Sergi Canos,1,7,0,0,0,0,0,0,0,0,S,Liverpool,0,0,0,0
546,1079,Thomas Robson,1,90,0,0,0,0,0,0,0,0,D,Sunderland,0,0,0.09102849662303925,0.09102849662303925
547,1080,Rees Greenwood,1,53,0,0,0,0.02298414707183838,0,1,0,0,M,Sunderland,0,0,0.6708943247795105,0.6708943247795105
548,1084,Jonjoe Kenny,1,62,0,0.09364968538284302,0,0,1,0,0,0,S,Everton,0,0.09364968538284302,0.09364968538284302,0


In [19]:
season_1617

Unnamed: 0,id,player_name,games,time,goals_season,xG_season,assists_season,xA_season,shots_season,key_passes_season,yellow_cards,red_cards,position,team_title,npg_season,npxG_season,xGChain,xGBuildup
0,647,Harry Kane,30,2556,29,19.82009919732809,7,5.5538915153592825,110,41,3,0,F M S,Tottenham,24,15.253085978329182,21.94719305820763,4.12599990144372
1,594,Romelu Lukaku,37,3271,25,16.665452419780195,6,5.440816408023238,110,47,3,0,F S,Everton,24,15.904283582232893,21.251998490653932,3.9702013842761517
2,498,Alexis Sánchez,38,3233,24,17.724449135363102,10,8.988117071799934,129,78,6,0,F M S,Arsenal,22,15.44094256311655,27.14118772558868,9.08672859147191
3,619,Sergio Agüero,31,2408,20,22.672585003077984,3,4.665906261652708,139,31,4,1,F S,Manchester City,16,18.866740860044956,25.35206551104784,5.0266740852966905
4,802,Diego Costa,35,3101,20,15.431797001510859,7,4.912058966234326,111,42,10,0,F,Chelsea,20,14.670628163963556,22.003214471042156,6.565549122169614
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519,5575,Joel Pereira,1,90,0,0,0,0,0,0,0,0,GK,Manchester United,0,0,0.2117270529270172,0.2117270529270172
520,5576,James Husband,1,62,0,0,0,0,0,0,0,0,D,Middlesbrough,0,0,0.15693369507789612,0.15693369507789612
521,5586,Michael Folivi,1,6,0,0,0,0,0,0,0,0,S,Watford,0,0,0,0
522,5598,Angel Gomes,1,1,0,0,0,0,0,0,0,0,S,Manchester United,0,0,0,0


In [20]:
season_1718

Unnamed: 0,id,player_name,games,time,goals_season,xG_season,assists_season,xA_season,shots_season,key_passes_season,yellow_cards,red_cards,position,team_title,npg_season,npxG_season,xGChain,xGBuildup
0,1250,Mohamed Salah,36,2954,32,25.136502970010042,10,8.344477602280676,143,62,1,0,F M S,Liverpool,31,23.6141653098166,35.301465447992086,5.898578152060509
1,647,Harry Kane,37,3094,30,26.859890587627888,2,3.8204412199556828,183,34,5,0,F S,Tottenham,28,24.576384104788303,28.51526607386768,7.9616343677043915
2,619,Sergio Agüero,25,1985,21,18.56861797720194,6,6.533229699358344,95,39,2,0,F S,Manchester City,17,15.523942567408085,23.707814872264862,7.512718818150461
3,755,Jamie Vardy,37,3269,20,15.266967067494988,1,3.6308264825493097,70,33,3,0,F,Leicester,15,10.699990747496486,15.584633570164442,1.9777417127043009
4,618,Raheem Sterling,33,2594,18,18.829967338591814,11,8.844115875661373,87,55,3,1,F S,Manchester City,17,18.06879848614335,33.65671702474356,12.149656612426043
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
510,6630,Joe Willock,2,82,0,0.1126566231250763,0,0,1,0,0,0,M S,Arsenal,0,0.1126566231250763,0.46951115131378174,0.35685452818870544
511,6657,Lasse Sorenson,1,78,0,0,1,0.36800387501716614,0,2,0,0,M,Stoke,0,0,0.06694087386131287,0.04735318198800087
512,6681,Harvey Barnes,3,5,0,0.05426165834069252,0,0,1,0,0,0,S,Leicester,0,0.05426165834069252,0.1033472940325737,0.1033472940325737
513,6722,Konstantinos Mavropanos,3,194,0,0,0,0,0,0,0,1,D,Arsenal,0,0,0.9823358356952667,0.9823358356952667


In [21]:
season_1819

Unnamed: 0,id,player_name,games,time,goals_season,xG_season,assists_season,xA_season,shots_season,key_passes_season,yellow_cards,red_cards,position,team_title,npg_season,npxG_season,xGChain,xGBuildup
0,318,Pierre-Emerick Aubameyang,36,2740,22,23.549966726452112,5,4.989846890792251,94,33,0,0,F M S,Arsenal,18,19.744122434407473,26.582426249980927,6.483642949722707
1,838,Sadio Mané,36,3100,22,16.762808084487915,1,5.123734523542225,87,45,2,0,F M S,Liverpool,22,16.762808084487915,28.40134635940194,8.485077820718288
2,1250,Mohamed Salah,38,3274,22,21.79245018400252,8,10.468589510768652,137,68,1,0,F S,Liverpool,19,19.508943611755967,31.780298396945,7.865945274010301
3,619,Sergio Agüero,33,2515,21,19.9491464830935,8,5.22924312017858,118,34,3,0,F S,Manchester City,19,18.426808904856443,28.70022003352642,8.100998356938362
4,755,Jamie Vardy,34,2740,18,19.11597566306591,4,5.13733013253659,79,32,3,1,F S,Leicester,14,15.310131333768368,20.782562378793955,1.9056932125240564
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500,7459,Will Norris,1,1,0,0,0,0,0,0,0,0,S,Wolverhampton Wanderers,0,0,0,0
501,7487,Aaron Rowe,2,68,0,0,0,0,0,0,0,0,M S,Huddersfield,0,0,0.022514449432492256,0.022514449432492256
502,7490,Mason Greenwood,3,111,0,0.7973630428314209,0,0.07234492897987366,7,3,0,0,F S,Manchester United,0,0.7973630428314209,1.0192557573318481,0.6869975328445435
503,7546,Harvey Elliott,2,16,0,0.056498244404792786,0,0,1,0,0,0,S,Fulham,0,0.056498244404792786,0.09627901762723923,0.03978077694773674


In [22]:
season_1920

Unnamed: 0,id,player_name,games,time,goals_season,xG_season,assists_season,xA_season,shots_season,key_passes_season,yellow_cards,red_cards,position,team_title,npg_season,npxG_season,xGChain,xGBuildup
0,755,Jamie Vardy,35,3034,23,18.903537318110466,5,6.3682975601404905,89,32,3,0,F S,Leicester,19,15.097693115472794,21.02660731226206,1.7243406660854816
1,318,Pierre-Emerick Aubameyang,36,3143,22,16.352623080834746,3,4.492486916482449,93,26,3,1,F M S,Arsenal,20,14.830358987674117,19.964282035827637,5.339657470583916
2,986,Danny Ings,38,2836,22,15.659717170521617,2,2.8490850934758782,93,35,3,0,F M S,Southampton,21,14.137379484251142,18.48803149908781,5.015938125550747
3,618,Raheem Sterling,33,2678,20,19.799906481057405,1,7.208586284890771,100,48,5,0,F M S,Manchester City,20,18.277568746358156,31.4420103430748,10.185997404158115
4,1250,Mohamed Salah,34,2904,19,20.66331870108843,10,8.72604252398014,132,60,1,0,F S,Liverpool,16,18.379812128841877,31.37419793009758,8.42502685263753
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
510,8493,Jake Vokins,1,90,0,0.013284161686897278,0,0.09659572690725327,1,1,0,0,D,Southampton,0,0.013284161686897278,0.5018613934516907,0.4885772168636322
511,8496,Tommy Doyle,1,13,0,0.07760051637887955,0,0,1,0,0,0,S,Manchester City,0,0.07760051637887955,0.07760051637887955,0
512,8518,Jordan Thomas,1,1,0,0,0,0,0,0,0,0,S,Norwich,0,0,0,0
513,8562,Luke Thomas,3,270,0,0,1,0.6299818009138107,0,9,0,0,D,Leicester,0,0,1.3788530230522156,1.2677359282970428
