# This is my FPL analysis workbook

In [11]:
#!/usr/bin/env python
# coding: utf-8
import os
import numpy as np
import pandas as pd
import requests
from datetime import datetime
import json
from tqdm import tqdm

## Set display options

In [12]:
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)

In [13]:
#### Connect to FPL API
def get(url):
    response = requests.get(url)
    return json.loads(response.content)

#### Getting player summary data
def get_player_summary(player_id):
    url = 'https://fantasy.premierleague.com/api/element-summary/' + str(player_id) + '/' 
    response = get(url)
    fixtures = response['fixtures']
    history = response['history']
    history_past = response['history_past']
    return [fixtures, history, history_past]

## Dimension data from FPL

In [14]:
link = 'https://fantasy.premierleague.com/api/bootstrap-static/'
data = get(link)

In [15]:
print(data.keys())

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])


In [16]:
# All player stats (total)
all_players = pd.DataFrame(data['elements'])

# All tema stats (total)
all_teams = pd.DataFrame(data['teams'])

# All gameweek stats
all_events = pd.DataFrame(data['events'])

# Player types
player_types = pd.DataFrame(data['element_types'])

# Game setting
game_phases = pd.DataFrame(data['phases'])



## Fact data from FPL

In [17]:
player_season_history_list= []
player_gameweek_history_list = []
player_future_fixture_list = []

for i in tqdm(range(len(all_players))):
    try:
        fixtures, history, history_past = get_player_summary(i)
        for j in range(len(fixtures)):
            fixtures[j]['player_id'] = i
        player_gameweek_history_list.append(history)
        player_season_history_list.append(history_past)
        player_future_fixture_list.append(fixtures)
    except KeyError:
        continue


100%|██████████| 483/483 [00:22<00:00, 21.37it/s]


In [18]:
player_season_history = [item for sublist in player_season_history_list for item in sublist]
player_season_history = pd.DataFrame(player_season_history)

player_gameweek_history = [item for sublist in player_gameweek_history_list for item in sublist]
player_gameweek_history = pd.DataFrame(player_gameweek_history)

player_future_fixture = [item for sublist in player_future_fixture_list for item in sublist]
player_future_fixture = pd.DataFrame(player_future_fixture)


## Save Raw data files

In [19]:
all_players.to_csv (r'./data/2020-21/Raw_data/all_players.csv', index = False, header=True)
all_teams.to_csv (r'./data/2020-21/Raw_data/all_teams.csv', index = False, header = True)
all_events.to_csv (r'./data/2020-21/Raw_data/all_events.csv', index = False, header = True)
player_types.to_csv(r'./data/2020-21/Raw_data/player_types.csv', index = False, header = True)
game_phases.to_csv(r'./data/2020-21/Raw_data/game_phases.csv', index = False, header = True)

player_season_history.to_csv (r'./data/2020-21/Raw_data/player_season_history.csv', index = False, header=True)
player_gameweek_history.to_csv (r'./data/2020-21/Raw_data/player_gameweek_history.csv', index = False, header=True)
player_future_fixture.to_csv (r'./data/2020-21/Raw_data/player_future_fixture.csv', index = False, header=True)

## Clean Data

### Events

In [7]:
all_events['deadline_time'] = pd.to_datetime(all_events['deadline_time'])
all_events['deadline_time'] = all_events['deadline_time'].dt.tz_localize(None)
all_events.drop(["highest_scoring_entry", "deadline_time_epoch", "transfers_made", "deadline_time_game_offset"],axis = 1, inplace = True)
all_events.rename(columns={"id" : "gameweek_id", "name" : "gameweek_name"}, inplace=True)
all_events.head()


Unnamed: 0,gameweek_id,gameweek_name,deadline_time,average_entry_score,finished,data_checked,highest_score,is_previous,is_current,is_next,chip_plays,most_selected,most_transferred_in,top_element,top_element_info,most_captained,most_vice_captained
0,1,Gameweek 1,2019-08-09 18:00:00,65,True,False,142,False,False,False,"[{'chip_name': 'bboost', 'num_played': 128770}...",183,1,214,"{'id': 214, 'points': 20}",191,189
1,2,Gameweek 2,2019-08-17 10:30:00,41,True,False,119,False,False,False,"[{'chip_name': 'bboost', 'num_played': 114585}...",183,235,278,"{'id': 278, 'points': 17}",191,214
2,3,Gameweek 3,2019-08-23 18:00:00,44,True,False,107,False,False,False,"[{'chip_name': 'bboost', 'num_played': 79958},...",183,278,191,"{'id': 191, 'points': 15}",214,191
3,4,Gameweek 4,2019-08-31 10:30:00,57,True,False,136,False,False,False,"[{'chip_name': 'bboost', 'num_played': 72741},...",183,278,166,"{'id': 166, 'points': 16}",214,191
4,5,Gameweek 5,2019-09-14 10:30:00,52,True,False,128,False,False,False,"[{'chip_name': 'bboost', 'num_played': 58835},...",183,243,342,"{'id': 342, 'points': 16}",191,214


### Teams

In [8]:
all_teams.drop(["draw", "form", "loss", "played", "points", "position", "team_division", "unavailable", "win", "pulse_id"], axis = 1, inplace = True)


In [9]:
all_teams.rename(columns={"code" : "team_code_id", "id" : "team_id"}, inplace = True)

In [10]:
all_teams.head(20)

Unnamed: 0,team_code_id,team_id,name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away
0,3,1,Arsenal,ARS,4,1180,1240,1170,1170,1150,1200
1,7,2,Aston Villa,AVL,2,1020,1050,970,980,1000,1040
2,91,3,Bournemouth,BOU,2,1020,1020,990,1030,1000,1050
3,36,4,Brighton,BHA,2,1050,1010,1100,1100,1040,1030
4,90,5,Burnley,BUR,3,1110,1180,1130,1070,970,1110
5,8,6,Chelsea,CHE,4,1220,1230,1240,1240,1220,1250
6,31,7,Crystal Palace,CRY,3,1130,1120,1080,1180,1040,1040
7,11,8,Everton,EVE,3,1100,1240,1100,1210,1170,1180
8,13,9,Leicester,LEI,3,1210,1160,1150,1080,1210,1100
9,14,10,Liverpool,LIV,5,1340,1350,1310,1320,1310,1330


### Players

In [11]:
all_players['news_added'] = pd.to_datetime(all_players['news_added'])
all_players['news_added'] = all_players['news_added'].dt.tz_localize(None)
all_players.drop(["cost_change_event", "cost_change_event_fall", "cost_change_start_fall", "form", "in_dreamteam", "points_per_game", "special", "squad_number" , "dreamteam_count"], axis = 1, inplace = True)

all_players.rename(columns={"code" : "player_code" , "element_type" : "position_id" , "id" : "player_id", "news" : "player_news" , "now_cost" : "player_price" , "status" : "player_status" , "team" : "team_id" , "team_code" : "team_code_id" }, inplace = True)

all_players.player_price = all_players.player_price / 10


In [12]:
all_players.head()

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,player_code,cost_change_start,position_id,ep_next,ep_this,event_points,first_name,player_id,player_news,news_added,player_price,photo,second_name,selected_by_percent,player_status,team_id,team_code_id,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,...,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type
0,0.0,0.0,69140,-4,2,0.0,0.0,0,Shkodran,1,Hamstring injury - Unknown return date,2020-07-18 22:30:20.553834,5.1,69140.jpg,Mustafi,0.4,i,1,3,43,25007,0,47630,0,0.3,...,1205,0,2,4,17,0,0,0,2,0,0,2,242,277.2,45.5,155.0,47.9,257,100,352,122,234,58,313,106
1,100.0,100.0,98745,-3,2,2.3,2.1,0,Héctor,2,,2019-12-09 20:00:21.228098,5.2,98745.jpg,Bellerín,1.0,a,1,3,44,206616,0,159819,0,0.3,...,1156,1,0,4,18,0,0,0,2,0,0,4,204,187.8,76.9,103.0,37.0,322,124,301,95,275,81,339,122
2,100.0,100.0,111457,-3,2,3.3,3.1,1,Sead,3,,2020-02-23 18:30:13.672943,5.2,111457.jpg,Kolasinac,0.5,a,1,3,55,65194,0,134275,0,0.5,...,1694,0,2,4,26,0,0,0,4,0,0,1,331,269.6,182.5,81.0,53.0,264,104,215,52,305,98,293,96
3,100.0,100.0,154043,-5,2,0.8,0.9,1,Ainsley,4,,2019-09-22 18:00:10.824841,4.5,154043.jpg,Maitland-Niles,2.2,a,1,3,41,610816,0,653555,0,0.1,...,1382,0,2,3,22,0,0,0,4,1,0,3,244,301.8,182.0,58.0,53.6,241,94,216,53,337,119,287,94
4,100.0,100.0,39476,-2,2,0.5,0.5,0,Sokratis,5,,2020-06-18 18:00:15.974146,4.8,39476.jpg,Papastathopoulos,1.3,a,1,3,57,182201,0,231413,0,0.0,...,1696,2,0,4,25,0,0,0,6,0,0,5,305,436.2,36.8,110.0,58.5,166,62,365,130,266,76,275,88


## Save to files

In [13]:
all_players.to_csv (r'./data/all_players.csv', index = False, header=True)
all_teams.to_csv (r'./data/all_teams.csv', index = False, header = True)
all_events.to_csv (r'./data/all_events.csv', index = False, header = True)
