## Import Libraries

In [47]:
#!/usr/bin/env python
# coding: utf-8
import os
import numpy as np
import pandas as pd
import requests
from datetime import datetime
import json

import time
from tqdm import tqdm

#Set Display options
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)

## Import files

In [48]:
all_players = pd.read_csv(r'./data/all_players.csv')
all_teams = pd.read_csv(r'./data/all_teams.csv')
all_events = pd.read_csv(r"./data/all_events.csv" , parse_dates= ["deadline_time"])


## Functions

In [49]:
#### Connect to FPL API
def get(url):
    response = requests.get(url)
    return json.loads(response.content)

#### Getting player summary data
def get_player_summary(player_id):
    url = 'https://fantasy.premierleague.com/api/element-summary/' + str(player_id) + '/' 
    response = get(url)
    fixtures = response['fixtures']
    history = response['history']
    history_past = response['history_past']
    return [fixtures, history, history_past]

## Create lists of data for all players

In [50]:
player_season_history_list= []
player_gameweek_history_list = []
player_future_fixture_list = []

for i in tqdm(range(len(all_players))):
    try:
        fixtures, history, history_past = get_player_summary(i)
        for j in range(len(fixtures)):
            fixtures[j]['player_id'] = i
        player_gameweek_history_list.append(history)
        player_season_history_list.append(history_past)
        player_future_fixture_list.append(fixtures)
    except KeyError:
        continue


100%|██████████| 666/666 [00:52<00:00, 12.71it/s]


### Flatten lists

In [51]:
player_season_history = [item for sublist in player_season_history_list for item in sublist]
player_season_history = pd.DataFrame(player_season_history)

player_gameweek_history = [item for sublist in player_gameweek_history_list for item in sublist]
player_gameweek_history = pd.DataFrame(player_gameweek_history)

player_future_fixture = [item for sublist in player_future_fixture_list for item in sublist]
player_future_fixture = pd.DataFrame(player_future_fixture)


#### SAVE RAW DATA

In [52]:
player_season_history.to_csv (r'./data/player_past_history/player_season_history_raw.csv', index = False, header=True)
player_gameweek_history.to_csv (r'./data/player_past_history/player_gameweek_history_raw.csv', index = False, header=True)
player_future_fixture.to_csv (r'./data/player_past_history/player_future_fixture_raw.csv', index = False, header=True)

## DATA CLEANING

### Player Season History

In [53]:
player_season_history.head()

Unnamed: 0,season_name,element_code,start_cost,end_cost,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index
0,2009/10,69140,45,45,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0
1,2010/11,69140,40,40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0
2,2011/12,69140,50,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0
3,2012/13,69140,40,40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0
4,2016/17,69140,60,58,90,2274,2,2,8,30,0,0,0,11,0,0,8,513,598.2,159.7,364.0,112.4


In [54]:
all_players.head()

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,player_code,cost_change_start,position_id,ep_next,ep_this,event_points,first_name,player_id,player_news,news_added,player_price,photo,second_name,selected_by_percent,player_status,team_id,team_code_id,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,...,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type
0,0.0,0.0,69140,-4,2,0.0,0.0,0,Shkodran,1,Hamstring injury - Unknown return date,2020-07-18 22:30:20.553834,5.1,69140.jpg,Mustafi,0.4,i,1,3,43,25007,0,47630,0,0.3,...,1205,0,2,4,17,0,0,0,2,0,0,2,242,277.2,45.5,155.0,47.9,257,100,352,122,234,58,313,106
1,100.0,100.0,98745,-3,2,2.3,2.1,0,Héctor,2,,2019-12-09 20:00:21.228098,5.2,98745.jpg,Bellerín,1.0,a,1,3,44,206616,0,159819,0,0.3,...,1156,1,0,4,18,0,0,0,2,0,0,4,204,187.8,76.9,103.0,37.0,322,124,301,95,275,81,339,122
2,100.0,100.0,111457,-3,2,3.3,3.1,1,Sead,3,,2020-02-23 18:30:13.672943,5.2,111457.jpg,Kolasinac,0.5,a,1,3,55,65194,0,134275,0,0.5,...,1694,0,2,4,26,0,0,0,4,0,0,1,331,269.6,182.5,81.0,53.0,264,104,215,52,305,98,293,96
3,100.0,100.0,154043,-5,2,0.8,0.9,1,Ainsley,4,,2019-09-22 18:00:10.824841,4.5,154043.jpg,Maitland-Niles,2.2,a,1,3,41,610816,0,653555,0,0.1,...,1382,0,2,3,22,0,0,0,4,1,0,3,244,301.8,182.0,58.0,53.6,241,94,216,53,337,119,287,94
4,100.0,100.0,39476,-2,2,0.5,0.5,0,Sokratis,5,,2020-06-18 18:00:15.974146,4.8,39476.jpg,Papastathopoulos,1.3,a,1,3,57,182201,0,231413,0,0.0,...,1696,2,0,4,25,0,0,0,6,0,0,5,305,436.2,36.8,110.0,58.5,166,62,365,130,266,76,275,88


In [55]:
player_season_history.start_cost = player_season_history.start_cost / 10
player_season_history.end_cost = player_season_history.end_cost / 10

df_merge = all_players[["player_code", "web_name", "player_id"]]

player_season_history = player_season_history.merge(df_merge, left_on="element_code", right_on="player_code")
player_season_history.drop(["element_code" , "player_code"], axis = 1, inplace = True)

player_season_history.head()

Unnamed: 0,season_name,start_cost,end_cost,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,web_name,player_id
0,2009/10,4.5,4.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,Mustafi,1
1,2010/11,4.0,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,Mustafi,1
2,2011/12,5.0,5.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,Mustafi,1
3,2012/13,4.0,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,Mustafi,1
4,2016/17,6.0,5.8,90,2274,2,2,8,30,0,0,0,11,0,0,8,513,598.2,159.7,364.0,112.4,Mustafi,1


### Player Gameweek History

In [56]:
player_gameweek_history.head()

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out
0,1,10,13,0,False,2019-08-11T13:00:00Z,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,55,0,33117,0,0
1,1,11,5,0,True,2019-08-17T11:30:00Z,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,55,-5280,36709,2868,8148
2,1,24,10,0,False,2019-08-24T16:30:00Z,3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,54,-6882,30975,534,7416
3,1,31,17,0,True,2019-09-01T15:30:00Z,2,2,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,54,-3872,28096,346,4218
4,1,49,18,0,False,2019-09-15T15:30:00Z,2,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,53,-2073,26902,581,2654


In [57]:
all_teams.head()

Unnamed: 0,team_code_id,team_id,name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away
0,3,1,Arsenal,ARS,4,1180,1240,1170,1170,1150,1200
1,7,2,Aston Villa,AVL,2,1020,1050,970,980,1000,1040
2,91,3,Bournemouth,BOU,2,1020,1020,990,1030,1000,1050
3,36,4,Brighton,BHA,2,1050,1010,1100,1100,1040,1030
4,90,5,Burnley,BUR,3,1110,1180,1130,1070,970,1110


In [58]:
df_merge = all_teams[["team_id","strength" , "short_name"]]
merged_data = player_gameweek_history.merge(df_merge, left_on="opponent_team", right_on="team_id")

df_merge2 = all_players[["player_id", "web_name" , "position_id" , "player_price"]]
merged_data = merged_data.merge(df_merge2, left_on="element", right_on="player_id")

merged_data['kickoff_time'] = pd.to_datetime(merged_data['kickoff_time'])
merged_data['kickoff_time'] = merged_data['kickoff_time'].dt.tz_localize(None)

merged_data.drop(["element"], axis = 1, inplace = True)
merged_data.rename(columns = {"fixture" : "fixture_id" , "opponent_team" : "opponent_team_id" , "round" : "gameweek_id" , "value" : "player_price", "short_name" : "opponent_team_name" , "strength" : "opponent_strength"}, inplace = True)

merged_data.sort_values("gameweek_id" , ascending = True , inplace = True)



In [59]:
merged_data.head()

Unnamed: 0,fixture_id,opponent_team_id,total_points,was_home,kickoff_time,team_h_score,team_a_score,gameweek_id,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,player_price,transfers_balance,selected,transfers_in,transfers_out,team_id,opponent_strength,opponent_team_name,player_id,web_name,position_id,player_price.1
0,10,13,0,False,2019-08-11 13:00:00,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,55,0,33117,0,0,13,3,NEW,1,Mustafi,2,5.1
16050,2,15,0,True,2019-08-10 14:00:00,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,45,0,8835,0,0,15,3,SHU,446,Stacey,2,4.3
5086,4,8,2,True,2019-08-10 14:00:00,0,0,1,90,0,0,1,0,0,0,0,1,0,0,0,-2,1.6,14.8,0.0,1.6,70,0,495302,0,0,8,3,EVE,134,Milivojevic,3,6.7
5048,4,8,1,True,2019-08-10 14:00:00,0,0,1,25,0,0,0,0,0,0,0,0,0,0,0,1,4.4,0.8,32.0,3.7,70,0,1082439,0,0,8,3,EVE,133,Zaha,3,6.6
12282,6,2,0,True,2019-08-10 16:30:00,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,85,0,38989,0,0,2,2,AVL,344,Alli,3,8.3


### Player Future Fixture

In [60]:
player_future_fixture

In [61]:
#player_future_fixture['kickoff_time'] = pd.to_datetime(player_future_fixture['kickoff_time'])
#player_future_fixture['kickoff_time'] = player_future_fixture['kickoff_time'].dt.tz_localize(None)

#player_future_fixture.drop(["team_h_score", "team_a_score", "finished", "minutes", "provisional_start_time"], axis = 1, inplace = True)

#player_future_fixture.rename(columns = {"id" : "fixture_id" , "code" : "fixture_code" , "team_h" : "team_h_id" , "team_a" : "team_a_id" , "event" : "gameweek_id" , "event_name" : "gameweek_name"}, inplace=True)

In [62]:
#player_future_fixture.head()

## Saving to File

In [63]:
player_season_history.to_csv (r'./data/player_past_history/player_season_history.csv', index = False, header=True)
merged_data.to_csv (r'./data/player_past_history/merged_data.csv', index = False, header=True)
#player_future_fixture.to_csv (r'./data/player_past_history/player_future_fixture.csv', index = False, header=True)


