# How to access FPL API data
This notebook outlines how to extract data from the FPL API using the `requests` package in Python.

In [1]:
import requests, os, time
from pprint import pprint
import pandas as pd
from tqdm.auto import tqdm
tqdm.pandas()

pd.set_option('display.max_columns', None)

## Bootstrap_static
This endpoint returns the data for players, positions and teams. We can inspect the different entities returned by this endpoint using the `pprint()` function with the `depth` parameter set to 1

In [2]:
# base url for all FPL API endpoints
base_url = 'https://fantasy.premierleague.com/api/'

# get data from bootstrap-static endpoint
r = requests.get(base_url+'bootstrap-static/').json()

# show the top level fields
pprint(r, indent=2, depth=1, compact=True)

{ 'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 1410320}


### Players
Player data can be found in the `elements` entity. The response from the `bootstrap_static` endpoint is in json format, which is essentially just a dictionary of keys and values.   
Therefore, to get the `elements` entity, we can use `r['elements']`

In [3]:
# get player data from 'elements' field
players = r['elements']

# show data for first player
pprint(players[0])

{'assists': 3,
 'bonus': 0,
 'bps': 67,
 'chance_of_playing_next_round': None,
 'chance_of_playing_this_round': None,
 'clean_sheets': 1,
 'clean_sheets_per_90': 0.31,
 'code': 438098,
 'corners_and_indirect_freekicks_order': None,
 'corners_and_indirect_freekicks_text': '',
 'cost_change_event': 0,
 'cost_change_event_fall': 0,
 'cost_change_start': 0,
 'cost_change_start_fall': 0,
 'creativity': '88.8',
 'creativity_rank': 267,
 'creativity_rank_type': 146,
 'direct_freekicks_order': None,
 'direct_freekicks_text': '',
 'dreamteam_count': 0,
 'element_type': 3,
 'ep_next': '3.0',
 'ep_this': None,
 'event_points': 0,
 'expected_assists': '0.64',
 'expected_assists_per_90': 0.2,
 'expected_goal_involvements': '1.89',
 'expected_goal_involvements_per_90': 0.59,
 'expected_goals': '1.25',
 'expected_goals_conceded': '1.83',
 'expected_goals_conceded_per_90': 0.57,
 'expected_goals_per_90': 0.39,
 'first_name': 'Fábio',
 'form': '0.0',
 'form_rank': 464,
 'form_rank_type': 187,
 'goals_c

### Use pandas for cleaner visualisation
Pandas can easily convert JSON data into a dataframe using the `json_normalize()` function.

In [4]:
# create players dataframe
players = pd.json_normalize(r['elements'])

# show some information about first five players
players[['id', 'web_name', 'team', 'element_type']].head()

Unnamed: 0,id,web_name,team,element_type
0,1,Fábio Vieira,1,3
1,2,G.Jesus,1,4
2,3,Gabriel,1,2
3,4,Havertz,1,4
4,5,Hein,1,1


### Teams

In [5]:
# create teams dataframe
teams = pd.json_normalize(r['teams'])

teams.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,strength,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,5,,False,0,1350,1380,1370,1370,1330,1390,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,3,,False,0,1120,1245,1110,1140,1130,1350,2
2,91,0,,3,0,Bournemouth,0,0,0,BOU,3,,False,0,1100,1100,1075,1100,1130,1105,127
3,94,0,,4,0,Brentford,0,0,0,BRE,3,,False,0,1100,1100,1105,1095,1100,1110,130
4,36,0,,5,0,Brighton,0,0,0,BHA,3,,False,0,1100,1100,1100,1105,1100,1100,131


### Player positions

In [6]:
# get position information from 'element_types' field
positions = pd.json_normalize(r['element_types'])

positions.head()

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_select,squad_max_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,,,1,1,True,[12],63
1,2,Defenders,DEF,Defender,DEF,5,,,3,5,False,[],189
2,3,Midfielders,MID,Midfielder,MID,5,,,2,5,False,[],253
3,4,Forwards,FWD,Forward,FWD,3,,,1,3,False,[],68


## Combined dataset

In [7]:
# select columns of interest from players df
df = players[[
    'id', 'first_name', 'second_name', 'web_name', 'team', 'element_type']]

# join team name
df = df.merge(
    teams[['id', 'name']],
    left_on='team',
    right_on='id',
    suffixes=['_player', None]
).drop(
    ['team', 'id'], axis=1
# join player positions
).merge(
    positions[['id', 'singular_name_short']],
    left_on='element_type',
    right_on='id'
).drop(
    ['element_type', 'id'], axis=1)

df.rename(columns={'id_player': 'id', 'name': 'team_name',
                   'singular_name_short': 'position'},
          inplace=True)

df.head()

Unnamed: 0,id,first_name,second_name,web_name,team_name,position
0,1,Fábio,Ferreira Vieira,Fábio Vieira,Arsenal,MID
1,2,Gabriel,Fernando de Jesus,G.Jesus,Arsenal,FWD
2,3,Gabriel,dos Santos Magalhães,Gabriel,Arsenal,DEF
3,4,Kai,Havertz,Havertz,Arsenal,FWD
4,5,Karl,Hein,Hein,Arsenal,GKP


### Player gameweek history
Individual gameweek scores can be retrieved on a per-player basis

In [8]:
# get data from 'element-summary/{PID}/' endpoint for PID=3
r = requests.get(base_url + 'element-summary/3/').json()

# show top-level fields for player summary
pprint(r, depth=1)

{'fixtures': [...], 'history': [], 'history_past': [...]}


In [9]:
pd.json_normalize(r['history']).head()

If we create a function to get gameweek history for a player, we can then apply this function across all the rows of the `players` dataframe

In [10]:
def get_gameweek_history(player_id):
    '''get all gameweek info for a given player_id'''
    
    # send GET request to
    # https://fantasy.premierleague.com/api/element-summary/{PID}/
    r = requests.get(
            base_url + 'element-summary/' + str(player_id) + '/'
    ).json()
    
    # extract 'history' data from response into dataframe
    df = pd.json_normalize(r['history'])

    # avoid getting rate limited
    time.sleep(.3)
    
    return df

    
# show player #4's gameweek history
get_gameweek_history(player_id=3)[
    ['round', 'total_points', 'minutes', 'goals_scored', 'assists']].head(10)

KeyError: "None of [Index(['round', 'total_points', 'minutes', 'goals_scored', 'assists'], dtype='object')] are in the [columns]"

The same can be done to get past seasons' summaries

In [11]:
def get_season_history(player_id):
    '''get all past season info for a given player_id'''
    
    # send GET request to
    # fantasy.premierleague.com/api/element-summary/{PID}/
    r = requests.get(
        base_url + 'element-summary/' + str(player_id) + '/').json()
    
    # extract 'history_past' data from response into dataframe
    df = pd.json_normalize(r['history_past'])
    df.insert(0, 'id', player_id)

    # avoid getting rate limited
    time.sleep(.3)
    
    return df


# show player #3's gameweek history
get_season_history(player_id=3)[
    ['id', 'season_name', 'total_points', 'minutes',
     'goals_scored', 'assists']].head(10)

Unnamed: 0,id,season_name,total_points,minutes,goals_scored,assists
0,3,2020/21,78,1996,2,0
1,3,2021/22,146,3063,5,0
2,3,2022/23,146,3409,3,0
3,3,2023/24,149,3042,3,1
