# How to access FPL API data

In [1]:
import requests, os, time
from pprint import pprint
import pandas as pd
from tqdm.auto import tqdm
tqdm.pandas()

pd.set_option('display.max_columns', None)

## Bootstrap_url
This endpoint returns the data for players, positions and teams. We can inspect the different entities returned by this endpoint using the `pprint()` function with the `depth` parameter set to 1

In [2]:
# base url for all FPL API endpoints
base_url = 'https://fantasy.premierleague.com/api/'

# get data from bootstrap-static endpoint
r = requests.get(base_url+'bootstrap-static/').json()

# show the top level fields
pprint(r, indent=2, depth=1, compact=True)

{ 'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 8234727}


### Players
Player data can be found in the `elements` entity. The response from the `bootstrap_url` endpoint is in json format, which is essentially just a dictionary of keys and values.   
Therefore, to get the `elements` entity, we can use `r['elements']` 

In [3]:
# get player data from 'elements' field
players = r['elements']

# show data for first player
pprint(players[0])

{'assists': 0,
 'bonus': 0,
 'bps': 0,
 'chance_of_playing_next_round': 0,
 'chance_of_playing_this_round': 0,
 'clean_sheets': 0,
 'code': 37605,
 'corners_and_indirect_freekicks_order': None,
 'corners_and_indirect_freekicks_text': '',
 'cost_change_event': 0,
 'cost_change_event_fall': 0,
 'cost_change_start': -3,
 'cost_change_start_fall': 3,
 'creativity': '0.0',
 'creativity_rank': 706,
 'creativity_rank_type': 290,
 'direct_freekicks_order': None,
 'direct_freekicks_text': '',
 'dreamteam_count': 0,
 'element_type': 3,
 'ep_next': '0.0',
 'ep_this': '0.0',
 'event_points': 0,
 'first_name': 'Mesut',
 'form': '0.0',
 'goals_conceded': 0,
 'goals_scored': 0,
 'ict_index': '0.0',
 'ict_index_rank': 706,
 'ict_index_rank_type': 290,
 'id': 1,
 'in_dreamteam': False,
 'influence': '0.0',
 'influence_rank': 706,
 'influence_rank_type': 290,
 'minutes': 0,
 'news': "Not included in Arsenal's 25-man Premier League squad",
 'news_added': '2020-10-20T22:30:18.118477Z',
 'now_cost': 67,
 '

### Use pandas for cleaner visualisation
Pandas can easily convert JSON data into a dataframe using the `json_normalize()` function.

In [4]:
# create players dataframe
players = pd.json_normalize(r['elements'])

# show some information about first five players
players[['id', 'web_name', 'team', 'element_type']].head()

Unnamed: 0,id,web_name,team,element_type
0,1,Özil,1,3
1,2,Sokratis,1,2
2,3,David Luiz,1,2
3,4,Aubameyang,1,3
4,5,Cédric,1,2


In [5]:
data_folder = os.path.join('..', 'data', '2020-21')

# select columns of interest and save as CSV
players = players[
    ['first_name', 'second_name', 'web_name', 'id', 'team', 'total_points',
     'dreamteam_count', 'element_type', 'in_dreamteam', 'now_cost',
     'points_per_game', 'minutes', 'goals_scored', 'assists', 'clean_sheets', 
     'goals_conceded', 'own_goals', 'penalties_saved', 'penalties_missed',
     'yellow_cards', 'red_cards', 'saves', 'bonus', 'bps', 'influence',
     'creativity', 'threat', 'ict_index']]

players.to_csv(os.path.join(data_folder, 'players.csv'), index=False)

### Teams

In [6]:
# create teams dataframe
teams = pd.json_normalize(r['teams'])

teams.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,strength,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,4,,False,0,1200,1250,1130,1150,1220,1210,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,3,,False,0,1100,1160,1120,1140,1080,1100,2
2,36,0,,3,0,Brighton,0,0,0,BHA,3,,False,0,1130,1130,1150,1190,1100,1130,131
3,90,0,,4,0,Burnley,0,0,0,BUR,2,,False,0,1060,1070,1040,1090,1040,1080,43
4,8,0,,5,0,Chelsea,0,0,0,CHE,4,,False,0,1250,1280,1260,1300,1250,1310,4


In [7]:
# select columns of interest and save as CSV
teams.drop(['code', 'draw', 'form', 'loss', 'points', 'position',
            'team_division', 'unavailable', 'pulse_id'], axis=1, inplace=True)

teams.to_csv(os.path.join(data_folder, 'teams.csv'), index=False)

### Player positions

In [8]:
# get position information from 'element_types' field
positions = pd.json_normalize(r['element_types'])

positions.head()

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,1,1,True,[12],80
1,2,Defenders,DEF,Defender,DEF,5,3,5,False,[],245
2,3,Midfielders,MID,Midfielder,MID,5,2,5,False,[],290
3,4,Forwards,FWD,Forward,FWD,3,1,3,False,[],91


In [9]:
# select columns of interest and save as CSV
positions = positions[[
    'id', 'singular_name', 'singular_name_short', 'squad_select',
    'squad_min_play', 'squad_max_play']]
positions.to_csv(os.path.join(data_folder, 'positions.csv'), index=False)

## Combined dataset

In [10]:
# select columns of interest from players df
df = players[[
    'id', 'first_name', 'second_name', 'web_name', 'team', 'element_type']]

# join team name
df = df.merge(
    teams[['id', 'name']],
    left_on='team',
    right_on='id',
    suffixes=['_player', None]
).drop(
    ['team', 'id'], axis=1
# join player positions
).merge(
    positions[['id', 'singular_name_short']],
    left_on='element_type',
    right_on='id'
).drop(
    ['element_type', 'id'], axis=1)

df.rename(columns={'id_player': 'id', 'name': 'team_name',
                   'singular_name_short': 'position'},
          inplace=True)

df.head()

Unnamed: 0,id,first_name,second_name,web_name,team_name,position
0,1,Mesut,Özil,Özil,Arsenal,MID
1,4,Pierre-Emerick,Aubameyang,Aubameyang,Arsenal,MID
2,9,Granit,Xhaka,Xhaka,Arsenal,MID
3,18,Nicolas,Pépé,Pépé,Arsenal,MID
4,19,Lucas,Torreira,Torreira,Arsenal,MID


### Player gameweek history
Individual gameweek scores can be retrieved on a per-player basis

In [11]:
# get data from 'element-summary/{PID}/' endpoint for PID=4
r = requests.get(base_url + 'element-summary/4/').json()

# show top-level fields for player summary
pprint(r, depth=1)

{'fixtures': [...], 'history': [...], 'history_past': [...]}


In [12]:
pd.json_normalize(r['history']).head()

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out
0,4,2,8,7,False,2020-09-12T11:30:00Z,0,3,1,90,1,0,1,0,0,0,0,1,0,0,0,19,36.6,15.3,54.0,10.6,120,0,2823465,0,0
1,4,9,19,5,True,2020-09-19T19:00:00Z,2,1,2,90,0,1,0,1,0,0,0,0,0,0,0,20,28.0,36.4,15.0,7.9,120,-76570,3002806,146991,223561
2,4,23,11,2,False,2020-09-28T19:00:00Z,3,1,3,90,0,0,0,3,0,0,0,0,0,0,0,5,4.2,0.9,4.0,0.9,119,-967555,2046859,50656,1018211
3,4,29,15,2,True,2020-10-04T13:00:00Z,2,1,4,90,0,0,0,1,0,0,0,0,0,0,0,10,9.4,4.6,30.0,4.4,119,-120167,2000213,106551,226718
4,4,44,12,2,False,2020-10-17T16:30:00Z,1,0,5,90,0,0,0,1,0,0,0,0,0,0,0,7,8.6,29.9,2.0,4.1,118,-582811,1397443,29415,612226


If we create a function to get gameweek history for a player, we can then apply this function across all the rows of the `players` dataframe

In [13]:
def get_gameweek_history(player_id):
    '''get all gameweek info for a given player_id'''
    
    # send GET request to
    # https://fantasy.premierleague.com/api/element-summary/{PID}/
    r = requests.get(
            base_url + 'element-summary/' + str(player_id) + '/'
    ).json()
    
    # extract 'history' data from response into dataframe
    df = pd.json_normalize(r['history'])

    # avoid getting rate limited
    time.sleep(.3)
    
    return df

    
# show player #4's gameweek history
get_gameweek_history(4)[
    ['round', 'total_points', 'minutes', 'goals_scored', 'assists']].head()

Unnamed: 0,round,total_points,minutes,goals_scored,assists
0,1,7,90,1,0
1,2,5,90,0,1
2,3,2,90,0,0
3,4,2,90,0,0
4,5,2,90,0,0


The same can be done to get past seasons' summaries

In [14]:
def get_season_history(player_id):
    '''get all past season info for a given player_id'''
    
    # send GET request to
    # fantasy.premierleague.com/api/element-summary/{PID}/
    r = requests.get(
        base_url + 'element-summary/' + str(player_id) + '/').json()
    
    # extract 'history_past' data from response into dataframe
    df = pd.json_normalize(r['history_past'])
    df.insert(0, 'id', player_id)

    # avoid getting rate limited
    time.sleep(.3)
    
    return df


# show player #1's gameweek history
get_season_history(1)[['id', 'season_name', 'total_points', 'minutes',
                       'goals_scored', 'assists']].head(10)

Unnamed: 0,id,season_name,total_points,minutes,goals_scored,assists
0,1,2013/14,137,2141,5,10
1,1,2014/15,103,1857,4,6
2,1,2015/16,200,3036,6,19
3,1,2016/17,167,2841,8,11
4,1,2017/18,112,2161,4,9
5,1,2018/19,89,1732,5,3
6,1,2019/20,53,1439,1,3


In [15]:
# get gameweek histories for each player
points = players['id'].progress_apply(get_gameweek_history)

# combine results into single dataframe
points = pd.concat(df for df in points)

# rename columns
points.rename({'element':'id'}, axis=1, inplace=True)

# save to CSV
points.to_csv(os.path.join(data_folder, 'gameweek_history.csv'), index=False)

100%|██████████| 706/706 [05:35<00:00,  2.11it/s]


In [16]:
# get season histories for each player
seasons = players['id'].progress_apply(get_season_history)

# combine results into single dataframe
seasons = pd.concat(df for df in seasons)

# save to CSV
seasons.to_csv(os.path.join(data_folder, 'seasons_history.csv'), index=False)

100%|██████████| 706/706 [05:31<00:00,  2.13it/s]
