# Dependencies

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)

import requests, json
from pprint import pprint

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Reading Dataset

#### Elements

In [2]:
# base url for all FPL API endpoints
image_base = 'https://resources.premierleague.com/premierleague/photos/players/110x140/p'
base_url = 'https://fantasy.premierleague.com/api/'


# get data from bootstrap-static endpoint
r = requests.get(base_url+'bootstrap-static/').json()
pprint(r, indent=2, depth=1, compact=True)

{ 'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 8151252}


In [3]:
# team_info

teams_info = pd.json_normalize(r['teams'])
teams_info = teams_info.loc[:, (teams_info != teams_info.iloc[0]).any()]
teams_info = teams_info.rename(columns = {'name' : 'club_name'})
teams_info = teams_info[['code', 'id', 'club_name', 'strength', 'strength_overall_home','strength_overall_away',
                        'strength_attack_home', 'strength_attack_away', 'strength_defence_home',
                         'strength_defence_away']]

teams_info.head()

Unnamed: 0,code,id,club_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away
0,3,1,Arsenal,4,1200,1210,1130,1150,1220,1210
1,7,2,Aston Villa,3,1100,1160,1120,1140,1080,1100
2,94,3,Brentford,2,1020,1030,1020,1030,1020,1030
3,36,4,Brighton,3,1100,1130,1150,1190,1100,1130
4,90,5,Burnley,2,1060,1070,1040,1090,1040,1080


In [4]:
# labels

pd.json_normalize(r['element_stats'])

Unnamed: 0,label,name
0,Minutes played,minutes
1,Goals scored,goals_scored
2,Assists,assists
3,Clean sheets,clean_sheets
4,Goals conceded,goals_conceded
5,Own goals,own_goals
6,Penalties saved,penalties_saved
7,Penalties missed,penalties_missed
8,Yellow cards,yellow_cards
9,Red cards,red_cards


In [5]:
# Players categories

player_cat = pd.json_normalize(r['element_types'])
player_cat = player_cat[['id', 'singular_name', 'squad_select', 
                         'squad_min_play', 'squad_max_play']]

player_cat

Unnamed: 0,id,singular_name,squad_select,squad_min_play,squad_max_play
0,1,Goalkeeper,2,1,1
1,2,Defender,5,3,5
2,3,Midfielder,5,2,5
3,4,Forward,3,1,3


In [6]:
# Players info

players =  pd.json_normalize(r['elements']).drop(['chance_of_playing_next_round', 'chance_of_playing_this_round','cost_change_event', 'cost_change_event_fall', 'cost_change_start',
                                      'cost_change_start_fall', 'ep_next', 'ep_this', 'event_points', 'id', 'news', 'news_added', 'special', 'squad_number',
                                      'team', 'corners_and_indirect_freekicks_order', 'direct_freekicks_order', 'penalties_order', 'corners_and_indirect_freekicks_text', 'direct_freekicks_text',
                                                  'penalties_text'], axis=1)

players.head()

Unnamed: 0,code,dreamteam_count,element_type,first_name,form,in_dreamteam,now_cost,photo,points_per_game,second_name,selected_by_percent,status,team_code,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type
0,80201,0,1,Bernd,0.8,False,49,80201.jpg,1.3,Leno,1.6,a,3,4,48384,411,109677,4046,0.2,0.8,Leno,270,0,0,0,9,0,0,0,0,0,9,0,48,79.0,0.0,0.0,7.9,96,15,528,52,503,50,229,16
1,115918,0,1,Rúnar Alex,0.0,False,40,115918.jpg,0.0,Rúnarsson,1.1,u,3,0,19017,0,41792,764,0.0,0.0,Rúnarsson,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,396,36,385,17,329,14,404,36
2,47431,0,3,Willian,0.0,False,63,47431.jpg,0.0,Borges Da Silva,0.1,u,3,0,914,0,18677,149,0.0,0.0,Willian,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,599,245,598,245,596,245,599,245
3,54694,0,4,Pierre-Emerick,3.2,False,99,54694.jpg,3.2,Aubameyang,2.3,a,3,13,157328,1861,201078,6077,0.3,1.3,Aubameyang,267,1,0,2,4,0,0,0,0,0,0,3,41,48.0,62.6,140.0,25.0,192,20,80,13,29,13,55,17
4,58822,0,2,Cédric,0.2,False,44,58822.jpg,0.3,Soares,0.2,a,3,1,8637,101,14761,373,0.0,0.2,Cédric,188,0,0,0,7,0,0,0,1,0,0,0,21,30.8,36.1,1.0,6.8,246,96,143,29,308,101,248,84


In [7]:
players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 605 entries, 0 to 604
Data columns (total 46 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   code                  605 non-null    int64 
 1   dreamteam_count       605 non-null    int64 
 2   element_type          605 non-null    int64 
 3   first_name            605 non-null    object
 4   form                  605 non-null    object
 5   in_dreamteam          605 non-null    bool  
 6   now_cost              605 non-null    int64 
 7   photo                 605 non-null    object
 8   points_per_game       605 non-null    object
 9   second_name           605 non-null    object
 10  selected_by_percent   605 non-null    object
 11  status                605 non-null    object
 12  team_code             605 non-null    int64 
 13  total_points          605 non-null    int64 
 14  transfers_in          605 non-null    int64 
 15  transfers_in_event    605 non-null    in

# Data Preparation

In [8]:
# Aggregating name

players['full_name'] = players[['first_name', 'second_name']].agg(' '.join, axis=1)
players = players.drop(['first_name', 'second_name'], axis=1)
players.now_cost = players.now_cost/10 
players.photo = players.photo.str.split('.', expand=True)[0]
players.head()

Unnamed: 0,code,dreamteam_count,element_type,form,in_dreamteam,now_cost,photo,points_per_game,selected_by_percent,status,team_code,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,full_name
0,80201,0,1,0.8,False,4.9,80201,1.3,1.6,a,3,4,48384,411,109677,4046,0.2,0.8,Leno,270,0,0,0,9,0,0,0,0,0,9,0,48,79.0,0.0,0.0,7.9,96,15,528,52,503,50,229,16,Bernd Leno
1,115918,0,1,0.0,False,4.0,115918,0.0,1.1,u,3,0,19017,0,41792,764,0.0,0.0,Rúnarsson,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,396,36,385,17,329,14,404,36,Rúnar Alex Rúnarsson
2,47431,0,3,0.0,False,6.3,47431,0.0,0.1,u,3,0,914,0,18677,149,0.0,0.0,Willian,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,599,245,598,245,596,245,599,245,Willian Borges Da Silva
3,54694,0,4,3.2,False,9.9,54694,3.2,2.3,a,3,13,157328,1861,201078,6077,0.3,1.3,Aubameyang,267,1,0,2,4,0,0,0,0,0,0,3,41,48.0,62.6,140.0,25.0,192,20,80,13,29,13,55,17,Pierre-Emerick Aubameyang
4,58822,0,2,0.2,False,4.4,58822,0.3,0.2,a,3,1,8637,101,14761,373,0.0,0.2,Cédric,188,0,0,0,7,0,0,0,1,0,0,0,21,30.8,36.1,1.0,6.8,246,96,143,29,308,101,248,84,Cédric Soares


In [9]:
# Attaching teams_info to players

players = pd.merge(left = players,
         right = teams_info,
         left_on = 'team_code',
         right_on = 'code',
         suffixes=('_player', '_team')).drop(['team_code', 'code_team'] ,axis=1)

players.head()

Unnamed: 0,code_player,dreamteam_count,element_type,form,in_dreamteam,now_cost,photo,points_per_game,selected_by_percent,status,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,full_name,id,club_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away
0,80201,0,1,0.8,False,4.9,80201,1.3,1.6,a,4,48384,411,109677,4046,0.2,0.8,Leno,270,0,0,0,9,0,0,0,0,0,9,0,48,79.0,0.0,0.0,7.9,96,15,528,52,503,50,229,16,Bernd Leno,1,Arsenal,4,1200,1210,1130,1150,1220,1210
1,115918,0,1,0.0,False,4.0,115918,0.0,1.1,u,0,19017,0,41792,764,0.0,0.0,Rúnarsson,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,396,36,385,17,329,14,404,36,Rúnar Alex Rúnarsson,1,Arsenal,4,1200,1210,1130,1150,1220,1210
2,47431,0,3,0.0,False,6.3,47431,0.0,0.1,u,0,914,0,18677,149,0.0,0.0,Willian,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,599,245,598,245,596,245,599,245,Willian Borges Da Silva,1,Arsenal,4,1200,1210,1130,1150,1220,1210
3,54694,0,4,3.2,False,9.9,54694,3.2,2.3,a,13,157328,1861,201078,6077,0.3,1.3,Aubameyang,267,1,0,2,4,0,0,0,0,0,0,3,41,48.0,62.6,140.0,25.0,192,20,80,13,29,13,55,17,Pierre-Emerick Aubameyang,1,Arsenal,4,1200,1210,1130,1150,1220,1210
4,58822,0,2,0.2,False,4.4,58822,0.3,0.2,a,1,8637,101,14761,373,0.0,0.2,Cédric,188,0,0,0,7,0,0,0,1,0,0,0,21,30.8,36.1,1.0,6.8,246,96,143,29,308,101,248,84,Cédric Soares,1,Arsenal,4,1200,1210,1130,1150,1220,1210


In [10]:
# Attaching player category to players

players = pd.merge(left = players,
         right = player_cat,
         left_on = 'element_type',
         right_on = 'id',
         suffixes=('_player', '_player_type'),).drop(['element_type', 'id_player_type'] ,axis=1)

players.head()

Unnamed: 0,code_player,dreamteam_count,form,in_dreamteam,now_cost,photo,points_per_game,selected_by_percent,status,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,full_name,id_player,club_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,singular_name,squad_select,squad_min_play,squad_max_play
0,80201,0,0.8,False,4.9,80201,1.3,1.6,a,4,48384,411,109677,4046,0.2,0.8,Leno,270,0,0,0,9,0,0,0,0,0,9,0,48,79.0,0.0,0.0,7.9,96,15,528,52,503,50,229,16,Bernd Leno,1,Arsenal,4,1200,1210,1130,1150,1220,1210,Goalkeeper,2,1,1
1,115918,0,0.0,False,4.0,115918,0.0,1.1,u,0,19017,0,41792,764,0.0,0.0,Rúnarsson,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,396,36,385,17,329,14,404,36,Rúnar Alex Rúnarsson,1,Arsenal,4,1200,1210,1130,1150,1220,1210,Goalkeeper,2,1,1
2,463748,0,0.0,False,4.0,463748,0.0,0.6,a,0,36655,371,9266,296,0.0,0.0,Hein,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,420,44,409,25,355,22,428,44,Karl Hein,1,Arsenal,4,1200,1210,1130,1150,1220,1210,Goalkeeper,2,1,1
3,225321,1,3.8,False,4.5,225321,7.5,1.0,a,15,88641,8563,18617,878,0.8,3.3,Ramsdale,180,0,0,2,0,0,0,0,0,0,4,2,55,35.4,0.0,0.0,3.5,231,22,492,48,458,46,303,22,Aaron Ramsdale,1,Arsenal,4,1200,1210,1130,1150,1220,1210,Goalkeeper,2,1,1
4,220682,0,0.0,False,4.0,220682,0.0,0.1,a,0,9614,140,2348,76,0.0,0.0,Okonkwo,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,404,38,393,19,337,16,412,38,Arthur Okonkwo,1,Arsenal,4,1200,1210,1130,1150,1220,1210,Goalkeeper,2,1,1


In [11]:
# next 3 fixtures

r = requests.get(base_url+'fixtures/').json()

matches = pd.json_normalize(r)
matches.kickoff_time = pd.to_datetime(matches.kickoff_time)

matches = matches[matches.kickoff_time>=pd.Timestamp.utcnow()][['event', 
                                                      'team_h', 'team_a', 'team_h_difficulty', 'team_a_difficulty']].head(30)

dict_team_name = dict()
for i in range(teams_info.shape[0]):
    dict_team_name[teams_info.loc[i, 'id']] = teams_info.loc[i, 'club_name']

matches.team_h = matches.team_h.map(dict_team_name) 
matches.team_a = matches.team_a.map(dict_team_name) 

team_home = matches.drop('team_a', axis=1)
team_home = team_home.rename(columns = {'team_h' : 'team'})
team_home['h_a'] = 'h'

team_away = matches.drop('team_h', axis=1)
team_away = team_away.rename(columns = {'team_a' : 'team'})
team_away['h_a'] = 'a'

fixture_next_3 = pd.concat([team_home, team_away],  axis=0).reset_index(drop=True)
fixture_next_3.head()

Unnamed: 0,event,team,team_h_difficulty,team_a_difficulty,h_a
0,5,Brighton,4,3,h
1,5,West Ham,4,4,h
2,5,Spurs,4,4,h
3,6,Chelsea,4,4,h
4,6,Man Utd,2,4,h


In [12]:
# Next first

matches1 = matches[['event', 'team_h', 'team_a']].reset_index(drop=True)

next_first1 = pd.merge(left = fixture_next_3[fixture_next_3.event == fixture_next_3.event.min()],
         right = matches1[matches1.event == matches1.event.min()],
         left_on = 'team',
         right_on = 'team_h', how='inner').drop(['event_y', 'team_h'], axis=1)
next_first1 = next_first1.rename(columns = {'team_a':'next_1', 'event_x':'event'})



next_first2 = pd.merge(left = fixture_next_3[fixture_next_3.event == fixture_next_3.event.min()],
         right = matches1[matches1.event == matches1.event.min()],
         left_on = 'team',
         right_on = 'team_a', how='inner').drop(['event_y', 'team_a'], axis=1)
next_first2 = next_first2.rename(columns = {'team_h':'next_1', 'event_x':'event'})

next_first_final = pd.concat([next_first1, next_first2], axis=0).reset_index(drop=True)




# Next second

next_second1 = pd.merge(left = fixture_next_3[fixture_next_3.event == fixture_next_3.event.median()],
         right = matches1[matches1.event == matches1.event.median()],
         left_on = 'team',
         right_on = 'team_h', how='inner').drop(['event_y', 'team_h'], axis=1)
next_second1 = next_second1.rename(columns = {'team_a':'next_2', 'event_x':'event'})



next_second2 = pd.merge(left = fixture_next_3[fixture_next_3.event == fixture_next_3.event.median()],
         right = matches1[matches1.event == matches1.event.median()],
         left_on = 'team',
         right_on = 'team_a', how='inner').drop(['event_y', 'team_a'], axis=1)
next_second2 = next_second2.rename(columns = {'team_h':'next_2', 'event_x':'event'})

next_second_final = pd.concat([next_second1, next_second2], axis=0).reset_index(drop=True)




# Next Third

next_third1 = pd.merge(left = fixture_next_3[fixture_next_3.event == fixture_next_3.event.max()],
         right = matches1[matches1.event == matches1.event.max()],
         left_on = 'team',
         right_on = 'team_h', how='inner').drop(['event_y', 'team_h'], axis=1)
next_third1 = next_third1.rename(columns = {'team_a':'next_3', 'event_x':'event'})



next_third2 = pd.merge(left = fixture_next_3[fixture_next_3.event == fixture_next_3.event.max()],
         right = matches1[matches1.event == matches1.event.max()],
         left_on = 'team',
         right_on = 'team_a', how='inner').drop(['event_y', 'team_a'], axis=1)
next_third2 = next_third2.rename(columns = {'team_h':'next_3', 'event_x':'event'})

next_third_final = pd.concat([next_third1, next_third2], axis=0).reset_index(drop=True)



# merging Next 1 and 2

first_second = pd.merge(left = next_first_final,
         right = next_second_final,
         left_on = 'team',
         right_on = 'team',
         suffixes = ('_next_first', '_next_second'))



# merging all

next_three_all = pd.merge(left = first_second,
         right = next_third_final,
         left_on = 'team',
         right_on = 'team',
         suffixes = ('_next_first', '_next_third'))

next_three_all = next_three_all.rename(columns = {'event' : 'event_third',
                                                 'team_h_difficulty' : 'team_h_difficulty_third',
                                                 'team_a_difficulty' : 'team_a_difficulty_third',
                                                 'h_a' : 'h_a_third'})
next_three_all[['team', 'next_1', 'next_2', 'next_3']].head()

Unnamed: 0,team,next_1,next_2,next_3
0,Brighton,Leicester,Arsenal,Norwich
1,Leicester,Brighton,Crystal Palace,Man Utd
2,Man Utd,West Ham,Everton,Leicester
3,Chelsea,Spurs,Southampton,Brentford


In [13]:
players = pd.merge(left = players,
         right = next_three_all,
         left_on = 'club_name',
         right_on = 'team')

In [14]:
# New feature: points_by_diff

players.points_per_game = players.points_per_game.astype('float64')
players.team_h_difficulty_next_first = players.team_h_difficulty_next_first.astype('float64')

points_by_diff = []
for i in range(players.shape[0]):
    if players.loc[i, 'h_a_next_first'] == 'h':
        points_by_diff.append(players.loc[i, 'points_per_game']/players.loc[i, 'team_h_difficulty_next_first'])
    else:
        points_by_diff.append(players.loc[i, 'points_per_game']/players.loc[i, 'team_a_difficulty_next_first'])
        
players['points_by_diff'] = points_by_diff

In [15]:
# Final dataset

players.head()

Unnamed: 0,code_player,dreamteam_count,form,in_dreamteam,now_cost,photo,points_per_game,selected_by_percent,status,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,full_name,id_player,club_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,singular_name,squad_select,squad_min_play,squad_max_play,event_next_first,team,team_h_difficulty_next_first,team_a_difficulty_next_first,h_a_next_first,next_1,event_next_second,team_h_difficulty_next_second,team_a_difficulty_next_second,h_a_next_second,next_2,event_third,team_h_difficulty_third,team_a_difficulty_third,h_a_third,next_3,points_by_diff
0,49262,0,0.0,False,4.0,49262,0.0,3.6,a,0,153752,825,63191,904,0.0,0.0,Steele,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,391,32,379,12,323,9,399,32,Jason Steele,4,Brighton,3,1100,1130,1150,1190,1100,1130,Goalkeeper,2,1,1,5,Brighton,4.0,3,h,Leicester,7,3,3,h,Arsenal,8,2,2,a,Norwich,0.0
1,131897,0,0.0,False,4.5,131897,0.0,0.0,u,0,0,0,609,7,0.0,0.0,Ryan,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,467,56,461,41,419,38,473,56,Mathew Ryan,4,Brighton,3,1100,1130,1150,1190,1100,1130,Goalkeeper,2,1,1,5,Brighton,4.0,3,h,Leicester,7,3,3,h,Arsenal,8,2,2,a,Norwich,0.0
2,215059,0,3.5,False,4.5,215059,4.0,25.6,a,16,637206,10025,406753,4720,0.8,3.6,Sánchez,360,0,0,2,3,0,0,0,0,0,7,0,82,66.4,10.0,0.0,7.6,132,17,252,1,446,43,233,17,Robert Sánchez,4,Brighton,3,1100,1130,1150,1190,1100,1130,Goalkeeper,2,1,1,5,Brighton,4.0,3,h,Leicester,7,3,3,h,Arsenal,8,2,2,a,Norwich,1.0
3,240514,0,0.0,False,4.5,240514,0.0,0.0,a,0,896,21,1533,36,0.0,0.0,Scherpen,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,502,61,497,49,463,47,506,61,Kjell Scherpen,4,Brighton,3,1100,1130,1150,1190,1100,1130,Goalkeeper,2,1,1,5,Brighton,4.0,3,h,Leicester,7,3,3,h,Arsenal,8,2,2,a,Norwich,0.0
4,39155,0,1.8,False,5.4,39155,2.0,0.2,a,8,5604,169,11578,187,0.3,1.5,Lallana,272,0,0,2,2,0,0,0,0,0,0,0,38,19.4,11.5,8.0,3.9,278,113,244,134,253,137,295,130,Adam Lallana,4,Brighton,3,1100,1130,1150,1190,1100,1130,Midfielder,5,2,5,5,Brighton,4.0,3,h,Leicester,7,3,3,h,Arsenal,8,2,2,a,Norwich,0.5


In [16]:
# Saving file

players.to_csv('fpl_data_processed.csv')

------------------