In [16]:
from pprint import pprint

import pandas as pd
import requests
from tqdm.auto import tqdm

tqdm.pandas()

import plotly.express as px

pd.set_option("display.max_columns", None)

In [2]:
base_url = "https://fantasy.premierleague.com/api/"
r = requests.get(base_url + "bootstrap-static/").json()
pprint(r, indent=2, depth=1, compact=True)

{ 'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 10618394}


In [3]:
players = pd.json_normalize(r["elements"])
teams = pd.json_normalize(r["teams"])
position = pd.json_normalize(r["element_types"])

In [4]:
# merge
df = pd.merge(left=players, right=teams, left_on="team", right_on="id")

In [5]:
df = df.merge(position, left_on="element_type", right_on="id")

# rename columns
df = df.rename(columns={"name": "team_name", "singular_name": "position_name"})

In [6]:
def get_gameweek_history(player_id):
    """get all gameweek info for a given player_id"""

    # https://fantasy.premierleague.com/api/element-summary/{PID}/
    r = requests.get(base_url + "element-summary/" + str(player_id) + "/").json()

    # extract 'history' data from response into dataframe
    df = pd.json_normalize(r["history"])

    return df


def get_season_history(player_id):
    """get all past season info for a given player_id"""

    # https://fantasy.premierleague.com/api/element-summary/{PID}/
    r = requests.get(base_url + "element-summary/" + str(player_id) + "/").json()

    # extract 'history_past' data from response into dataframe
    df = pd.json_normalize(r["history_past"])

    return df

In [7]:
players_df = players[
    ["id", "first_name", "second_name", "web_name", "team", "element_type"]
]

players_df = players_df.merge(
    teams[["id", "name"]], left_on="team", right_on="id", suffixes=["_player", None]
).drop(["team", "id"], axis=1)

players_df = players_df.merge(
    position[["id", "singular_name_short"]], left_on="element_type", right_on="id"
).drop(["element_type", "id"], axis=1)

In [8]:
players_df

Unnamed: 0,id_player,first_name,second_name,web_name,name,singular_name_short
0,1,Folarin,Balogun,Balogun,Arsenal,FWD
1,2,Cédric,Alves Soares,Cédric,Arsenal,DEF
2,3,Mohamed,Elneny,M.Elneny,Arsenal,MID
3,4,Fábio,Ferreira Vieira,Fábio Vieira,Arsenal,MID
4,5,Gabriel,dos Santos Magalhães,Gabriel,Arsenal,DEF
...,...,...,...,...,...,...
795,769,Owen,Hesketh,Hesketh,Wolves,MID
796,770,Ty,Barnett,Barnett,Wolves,MID
797,787,Harvey,Griffiths,Griffiths,Wolves,MID
798,788,Yerson,Mosquera,Mosquera,Wolves,DEF


In [9]:
# get gameweek histories for each player
points = players_df["id_player"].progress_apply(get_gameweek_history)

  0%|          | 0/800 [00:00<?, ?it/s]

100%|██████████| 800/800 [02:05<00:00,  6.39it/s]


In [10]:
points_df = pd.concat(df for df in points)

In [51]:
players_df

Unnamed: 0,id_player,first_name,second_name,web_name,name,singular_name_short
0,1,Folarin,Balogun,Balogun,Arsenal,FWD
1,2,Cédric,Alves Soares,Cédric,Arsenal,DEF
2,3,Mohamed,Elneny,M.Elneny,Arsenal,MID
3,4,Fábio,Ferreira Vieira,Fábio Vieira,Arsenal,MID
4,5,Gabriel,dos Santos Magalhães,Gabriel,Arsenal,DEF
...,...,...,...,...,...,...
795,769,Owen,Hesketh,Hesketh,Wolves,MID
796,770,Ty,Barnett,Barnett,Wolves,MID
797,787,Harvey,Griffiths,Griffiths,Wolves,MID
798,788,Yerson,Mosquera,Mosquera,Wolves,DEF


In [1]:
final_df = players_df[["id_player", "web_name", "name"]].merge(
    points_df, left_on="id_player", right_on="element"
)

NameError: name 'players_df' is not defined

In [53]:
final_df

Unnamed: 0,id_player,web_name,name,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out
0,1,Balogun,Arsenal,1,2,16,0,True,2023-08-12T12:00:00Z,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,45,0,59090,0,0
1,1,Balogun,Arsenal,1,12,8,0,False,2023-08-21T19:00:00Z,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,45,-4744,63768,4959,9703
2,1,Balogun,Arsenal,1,21,10,0,True,2023-08-26T14:00:00Z,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,45,-7682,58109,2507,10189
3,1,Balogun,Arsenal,1,31,14,0,True,2023-09-03T15:30:00Z,3,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,44,-9696,49814,2558,12254
4,1,Balogun,Arsenal,1,43,9,0,False,2023-09-17T15:30:00Z,0,1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,44,-12786,37048,0,12786
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16067,787,Griffiths,Wolves,787,204,5,0,False,2024-01-22T19:45:00Z,0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,45,0,0,0,0
16068,787,Griffiths,Wolves,787,218,14,0,True,2024-02-01T20:15:00Z,0,2,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,45,20,37,25,5
16069,788,Mosquera,Wolves,788,204,5,0,False,2024-01-22T19:45:00Z,0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,40,0,0,0,0
16070,788,Mosquera,Wolves,788,218,14,0,True,2024-02-01T20:15:00Z,0,2,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,40,16,28,19,3


In [83]:
grouped_df = (
    final_df.groupby(
        [
            "element",
            "web_name",
            "name",
        ]
    )
    .agg(
        {
            "starts": "sum",
            "total_points": "sum",
            "goals_scored": "sum",
            "assists": "sum",
            "clean_sheets": "sum",
            "own_goals": "sum",
            "yellow_cards": "sum",
            "red_cards": "sum",
            "bonus": "sum",
            "bps": "sum",
        }
    )
    .reset_index()
)

In [78]:
grouped_df

Unnamed: 0,element,web_name,name,total_points,goals_scored,assists,bonus,clean_sheets,yellow_cards,red_cards
0,1,Balogun,Arsenal,0,0,0,0,0,0,0
1,2,Cédric,Arsenal,0,0,0,0,0,0,0
2,3,M.Elneny,Arsenal,4,0,1,0,0,0,0
3,4,Fábio Vieira,Arsenal,21,1,3,0,1,0,1
4,5,Gabriel,Arsenal,77,2,1,6,6,1,0
...,...,...,...,...,...,...,...,...,...,...
793,795,Hernes,Newcastle,0,0,0,0,0,0,0
794,796,Sekularac,Fulham,0,0,0,0,0,0,0
795,797,Fredrick,Brentford,0,0,0,0,0,0,0
796,799,Faivre,Bournemouth,0,0,0,0,0,0,0


In [104]:
grouped_df.head(20)

Unnamed: 0,element,web_name,name,total_points,goals_scored,assists,bonus
307,308,Salah,Liverpool,156,14,8,21
515,516,Son,Spurs,136,12,5,18
59,60,Watkins,Aston Villa,128,10,11,15
18,19,Saka,Arsenal,120,7,8,14
354,355,Haaland,Man City,113,14,5,15
525,526,Bowen,West Ham,113,11,2,8
342,343,J.Alvarez,Man City,112,8,8,17
411,412,Gordon,Newcastle,105,7,7,8
361,362,Palmer,Chelsea,104,9,5,14
352,353,Foden,Man City,104,5,7,12


In [75]:
fig = px.bar(
    grouped_df.sort_values("bonus", ascending=False).head(30), x="web_name", y="bonus"
)
fig.show()

In [84]:
grouped_df[
    grouped_df["web_name"].isin(
        [
            # 'Gross',
            "Trippier",
            # 'Douglas Luiz',
            # 'Bernardo',
            "Pedro Porro",
        ]
    )
]

Unnamed: 0,element,web_name,name,starts,total_points,goals_scored,assists,clean_sheets,own_goals,yellow_cards,red_cards,bonus,bps
429,430,Trippier,Newcastle,20,93,0,8,6,0,5,0,20,486
505,506,Pedro Porro,Spurs,21,84,0,8,5,0,3,0,13,482
