# This is my FPL analysis workbook

In [17]:
#!/usr/bin/env python
# coding: utf-8
import os
import numpy as np
import pandas as pd
import requests
from datetime import datetime
import json
from tqdm import tqdm

## Set display options

In [18]:
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)

In [19]:
#### Connect to FPL API
def get(url):
    response = requests.get(url)
    return json.loads(response.content)

#### Getting player summary data
def get_player_summary(player_id):
    url = 'https://fantasy.premierleague.com/api/element-summary/' + str(player_id) + '/' 
    response = get(url)
    fixtures = response['fixtures']
    history = response['history']
    history_past = response['history_past']
    return [fixtures, history, history_past]

## Dimension data from FPL

In [20]:
link = 'https://fantasy.premierleague.com/api/bootstrap-static/'
data = get(link)

In [21]:
print(data.keys())

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])


In [22]:
# All player stats (total)
all_players = pd.DataFrame(data['elements'])

# All tema stats (total)
all_teams = pd.DataFrame(data['teams'])

# All gameweek stats
all_events = pd.DataFrame(data['events'])

# Player types
player_types = pd.DataFrame(data['element_types'])

# Game setting
game_phases = pd.DataFrame(data['phases'])



## Fact data from FPL

In [23]:
player_season_history_list= []
player_gameweek_history_list = []
player_future_fixture_list = []

for i in tqdm(range(len(all_players))):
    try:
        fixtures, history, history_past = get_player_summary(i)
        for j in range(len(fixtures)):
            fixtures[j]['player_id'] = i
        player_gameweek_history_list.append(history)
        player_season_history_list.append(history_past)
        player_future_fixture_list.append(fixtures)
    except KeyError:
        continue


100%|██████████| 545/545 [00:48<00:00, 11.33it/s]


In [24]:
player_season_history = [item for sublist in player_season_history_list for item in sublist]
player_season_history = pd.DataFrame(player_season_history)

player_gameweek_history = [item for sublist in player_gameweek_history_list for item in sublist]
player_gameweek_history = pd.DataFrame(player_gameweek_history)

player_future_fixture = [item for sublist in player_future_fixture_list for item in sublist]
player_future_fixture = pd.DataFrame(player_future_fixture)


## Save Raw data files

In [25]:
all_players.to_csv (r'./data/2020-21/Raw_data/all_players.csv', index = False, header=True)
all_teams.to_csv (r'./data/2020-21/Raw_data/all_teams.csv', index = False, header = True)
all_events.to_csv (r'./data/2020-21/Raw_data/all_events.csv', index = False, header = True)
player_types.to_csv(r'./data/2020-21/Raw_data/player_types.csv', index = False, header = True)
game_phases.to_csv(r'./data/2020-21/Raw_data/game_phases.csv', index = False, header = True)

player_season_history.to_csv (r'./data/2020-21/Raw_data/player_season_history.csv', index = False, header=True)
player_gameweek_history.to_csv (r'./data/2020-21/Raw_data/player_gameweek_history.csv', index = False, header=True)
player_future_fixture.to_csv (r'./data/2020-21/Raw_data/player_future_fixture.csv', index = False, header=True)

## Clean Data

### Events

In [26]:
all_events['deadline_time'] = pd.to_datetime(all_events['deadline_time'])
all_events['deadline_time'] = all_events['deadline_time'].dt.tz_localize(None)
all_events.drop(["highest_scoring_entry", "deadline_time_epoch", "transfers_made", "deadline_time_game_offset"],axis = 1, inplace = True)
all_events.rename(columns={"id" : "gameweek_id", "name" : "gameweek_name"}, inplace=True)
all_events.head()


Unnamed: 0,gameweek_id,gameweek_name,deadline_time,average_entry_score,finished,data_checked,highest_score,is_previous,is_current,is_next,chip_plays,most_selected,most_transferred_in,top_element,top_element_info,most_captained,most_vice_captained
0,1,Gameweek 1,2020-09-12 10:00:00,50,True,True,142.0,True,False,False,"[{'chip_name': 'bboost', 'num_played': 112843}...",259.0,1.0,254.0,"{'id': 254, 'points': 20}",4.0,4.0
1,2,Gameweek 2,2020-09-19 10:00:00,59,True,True,165.0,False,True,False,"[{'chip_name': 'bboost', 'num_played': 94615},...",259.0,302.0,390.0,"{'id': 390, 'points': 24}",4.0,254.0
2,3,Gameweek 3,2020-09-26 10:00:00,0,False,False,,False,False,True,[],,,,,,
3,4,Gameweek 4,2020-10-03 10:00:00,0,False,False,,False,False,False,[],,,,,,
4,5,Gameweek 5,2020-10-17 12:30:00,0,False,False,,False,False,False,[],,,,,,


### Teams

In [27]:
all_teams.drop(["draw", "form", "loss", "played", "points", "position", "team_division", "unavailable", "win", "pulse_id"], axis = 1, inplace = True)


In [28]:
all_teams.rename(columns={"code" : "team_code_id", "id" : "team_id"}, inplace = True)

In [29]:
all_teams.head(20)

Unnamed: 0,team_code_id,team_id,name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away
0,3,1,Arsenal,ARS,4,1190,1270,1190,1200,1160,1270
1,7,2,Aston Villa,AVL,2,1050,1080,1010,1080,1000,1110
2,36,3,Brighton,BHA,2,1070,1070,1100,1130,1060,1060
3,90,4,Burnley,BUR,3,1140,1150,1130,1240,1050,1170
4,8,5,Chelsea,CHE,4,1200,1260,1140,1270,1240,1250
5,31,6,Crystal Palace,CRY,3,1080,1070,1080,1190,1010,1040
6,11,7,Everton,EVE,3,1100,1180,1090,1190,1150,1180
7,54,8,Fulham,FUL,2,1000,1030,1010,1030,1000,1050
8,13,9,Leicester,LEI,4,1180,1230,1180,1260,1220,1250
9,2,10,Leeds,LEE,2,1030,1040,1040,1070,990,990


### Players