In [392]:
#Import the packages we will need 
#Requests to interact with the api, pandas for working with the data, numpy for data calcs

import requests
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import json

In [393]:
def get_recent_gameweek_id():
    """
    Get's the most recent gameweek's ID.
    """

    data = requests.get(url)
    data = json.loads(data.content)

    gameweeks = data['events']
    
    now = datetime.utcnow()
    for gameweek in gameweeks:
        next_deadline_date = datetime.strptime(gameweek['deadline_time'], '%Y-%m-%dT%H:%M:%SZ')
        if next_deadline_date > now:
            return gameweek['id'] 


if __name__ == '__main__':
     GW = float(get_recent_gameweek_id())
    
GW
    

32.0

In [394]:
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'

r = requests.get(url)

json = r.json()

#This lets us examine the JSON objext so we know what to do with it


In [395]:
elements_df = pd.DataFrame(json['elements'])
elements_types_df = pd.DataFrame(json['element_types'])
elements_stats_df = pd.DataFrame(json['element_stats'])
teams_df = pd.DataFrame(json['teams'])
events_df = pd.DataFrame(json['events'])
phases_df = pd.DataFrame(json['phases'])


teams_df.rename(columns={'strength_overall_home': 'strength_overall_away', 'strength_overall_away': 'strength_overall_home'}, inplace=True)

teams_df.head(20)

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,team_division,unavailable,win,strength_overall_away,strength_overall_home,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,...,,False,0,1200,1250,1130,1150,1220,1210,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,...,,False,0,1100,1160,1120,1140,1080,1100,2
2,36,0,,3,0,Brighton,0,0,0,BHA,...,,False,0,1130,1130,1150,1190,1100,1130,131
3,90,0,,4,0,Burnley,0,0,0,BUR,...,,False,0,1060,1070,1040,1090,1040,1080,43
4,8,0,,5,0,Chelsea,0,0,0,CHE,...,,False,0,1250,1280,1260,1300,1250,1310,4
5,31,0,,6,0,Crystal Palace,0,0,0,CRY,...,,False,0,1090,1110,1030,1120,1010,1020,6
6,11,0,,7,0,Everton,0,0,0,EVE,...,,False,0,1250,1130,1130,1100,1200,1140,7
7,54,0,,8,0,Fulham,0,0,0,FUL,...,,False,0,1090,1100,1150,1110,1040,1060,34
8,13,0,,9,0,Leicester,0,0,0,LEI,...,,False,0,1240,1260,1190,1150,1150,1230,26
9,2,0,,10,0,Leeds,0,0,0,LEE,...,,False,0,1160,1170,1060,1150,1210,1170,9


In [396]:
fig = go.Figure([go.Bar(name="home", x=teams_df["name"], y= teams_df["strength_overall_home"],offsetgroup=0,),
                 go.Bar(name ="away", x=teams_df["name"], y= teams_df["strength_overall_away"],offsetgroup=1,),],layout=go.Layout(
        title="Overall strength"))              

fig2 = go.Figure([go.Bar(name="home", x=teams_df["name"], y= teams_df["strength_attack_home"],offsetgroup=0,),
                 go.Bar(name ="away", x=teams_df["name"], y= teams_df["strength_attack_away"],offsetgroup=1,),],layout=go.Layout(
        title="Attacking strength"))

fig3 = go.Figure([go.Bar(name="home", x=teams_df["name"], y= teams_df["strength_defence_home"],offsetgroup=0,),
                 go.Bar(name ="away", x=teams_df["name"], y= teams_df["strength_defence_away"],offsetgroup=1,),],layout=go.Layout(
        title="Defensive strength"))

#fig.show()

#fig2.show()

#fig3.show()

In [397]:

slim_elements_df = elements_df[['first_name','second_name','team','element_type','minutes',
                                'now_cost','minutes','value_season','value_form','form','total_points','points_per_game',
                               'threat','influence','creativity','news']].copy()


#slim_elements_df.head(5)

In [398]:
slim_elements_df.loc[:,'position'] = slim_elements_df.element_type.map(elements_types_df.set_index('id').singular_name)
#slim_elements_df.head()

In [399]:
slim_elements_df['team'] = slim_elements_df.team.map(teams_df.set_index('id').name)

slim_elements_df['value'] = slim_elements_df.value_season.astype(float)

slim_elements_df['form'] = slim_elements_df.value_season.astype(float)

#slim_elements_df['games'] = slim_elements_df.games.astype(float)

slim_elements_df['total_points'] = slim_elements_df.total_points.astype(float)

slim_elements_df.sort_values('value',ascending=False)

Unnamed: 0,first_name,second_name,team,element_type,minutes,now_cost,minutes.1,value_season,value_form,form,total_points,points_per_game,threat,influence,creativity,news,position,value
33,Emiliano,Martínez,Aston Villa,1,2610,54,2610,29.1,0.4,29.1,157.0,5.4,0.0,784.0,20.0,,Goalkeeper,29.1
324,Illan,Meslier,Leeds,1,2610,47,2610,27.0,0.9,27.0,127.0,4.4,0.0,852.4,10.0,,Goalkeeper,27.0
311,Stuart,Dallas,Leeds,2,2690,51,2690,24.3,0.5,24.3,124.0,4.1,386.0,593.0,357.9,,Defender,24.3
261,Alphonse,Areola,Fulham,1,2730,46,2730,24.3,0.3,24.3,112.0,3.6,0.0,723.0,0.0,,Goalkeeper,24.3
313,Patrick,Bamford,Leeds,4,2499,66,2499,23.9,0.5,23.9,158.0,5.3,1252.0,699.0,293.2,,Forward,23.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,Sokratis,Papastathopoulos,Arsenal,2,0,48,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Left the club by mutual consent on 20/1,Defender,0.0
337,Gaetano,Berardi,Leeds,2,0,40,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Defender,0.0
37,Neil,Taylor,Aston Villa,2,15,42,15,-0.2,0.0,-0.2,-1.0,-1.0,0.0,0.8,0.7,,Defender,-0.2
544,Alexandre,Jankewitz,Southampton,3,2,45,2,-0.2,0.0,-0.2,-1.0,-0.5,0.0,4.4,0.3,,Midfielder,-0.2


In [400]:

# Function for creating cell-values
def unavailable(row):
    if row['news'] != '':
        return True
    else:
        return False
    


# Using function to create new column:
slim_elements_df['unavailable'] = slim_elements_df.apply(lambda row: unavailable(row), axis=1)

#slim_elements_df.head()

In [401]:


slim_elements_df['strength_overall_home'] = slim_elements_df.team.map(teams_df.set_index('name')['strength_overall_home'].to_dict())

slim_elements_df['strength_overall_away'] = slim_elements_df.team.map(teams_df.set_index('name')['strength_overall_away'].to_dict())

slim_elements_df.head()

Unnamed: 0,first_name,second_name,team,element_type,minutes,now_cost,minutes.1,value_season,value_form,form,...,points_per_game,threat,influence,creativity,news,position,value,unavailable,strength_overall_home,strength_overall_away
0,Mesut,Özil,Arsenal,3,0,67,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,Not included in Arsenal's 25-man Premier Leagu...,Midfielder,0.0,True,1250,1200
1,Sokratis,Papastathopoulos,Arsenal,2,0,48,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,Left the club by mutual consent on 20/1,Defender,0.0,True,1250,1200
2,David,Luiz Moreira Marinho,Arsenal,2,1344,54,1344,7.4,0.1,7.4,...,2.1,123.0,244.6,45.2,Knee injury - Unknown return date,Defender,7.4,True,1250,1200
3,Pierre-Emerick,Aubameyang,Arsenal,3,2007,114,2007,9.5,0.1,9.5,...,4.3,731.0,428.2,292.4,,Midfielder,9.5,False,1250,1200
4,Cédric,Soares,Arsenal,2,744,46,744,6.1,0.1,6.1,...,2.8,66.0,110.8,114.8,,Defender,6.1,False,1250,1200


In [402]:
grouped_se_df = slim_elements_df.groupby(slim_elements_df.position)

df_gk = grouped_se_df.get_group("Goalkeeper").copy()
df_df = grouped_se_df.get_group("Defender").copy()
df_mfd = grouped_se_df.get_group("Midfielder").copy()
df_fwd = grouped_se_df.get_group("Forward").copy()

df_gk.sort_values(['value'],inplace =True)
df_df.sort_values(['value'],inplace =True)
df_mfd.sort_values(['value'],inplace =True)
df_fwd.sort_values(['value'],inplace =True)


fig_ovr_val = px.scatter(slim_elements_df, x="now_cost", y="total_points", hover_data=['second_name'])


fig_gk = px.bar(df_gk, x = 'second_name', y = 'value',hover_data = ['team'])
fig_df = px.bar(df_df, x = 'second_name', y = 'value',hover_data = ['team'])
fig_mfd = px.bar(df_mfd, x = 'second_name', y = 'value',hover_data = ['team'])
fig_fwd = px.bar(df_fwd, x = 'second_name', y = 'value',hover_data = ['team'])

fig_gk_tp = px.scatter(df_gk, x="second_name", y="total_points", hover_data=['now_cost'])
fig_df_tp = px.scatter(df_df, x="second_name", y="total_points", hover_data=['now_cost'])
fig_mfd_tp = px.scatter(df_mfd, x="second_name", y="total_points", hover_data=['now_cost'])
fig_fwd_tp = px.scatter(df_fwd, x="second_name", y="total_points", hover_data=['now_cost'])

fig_gk.add_trace(fig_gk_tp.data[0])
fig_df.add_trace(fig_df_tp.data[0])
fig_mfd.add_trace(fig_mfd_tp.data[0])
fig_fwd.add_trace(fig_fwd_tp.data[0])

fig_ovr_val.show()

#fig_gk.show()
#fig_df.show()
#fig_mfd.show()
#fig_fwd.show()

In [403]:
Venues = {'Team':  ['West Brom','Sheffield Utd','Burnley','Southampton',
             'Fulham','Newcastle','Brighton','Crystal Palace',
            'Leeds','Wolves','Aston Villa','Arsenal',
             'Spurs','West Ham','Leicester','Everton',
            'Liverpool','Chelsea','Man Utd','Man City'],
        'Ground': ['The Hawthorns','Bramall Lane','Turf Moor',"St. Mary's Stadium",
                  'Craven Cottage',"St James' Park", 'Amex Stadium', 'Selhurst Park',
                   'Elland Road','Molineux Stadium','Villa Park','Emirates Stadium',
                  'Tottenham Hotspur Stadium','London Stadium','King Power Stadium',
                  'Goodison Park','Anfield','Stamford Bridge','Old Trafford','Etihad Stadium']}

Venues_df = pd.DataFrame (Venues, columns = ['Team','Ground'])

#Venues_df.head(20)

In [404]:
filename = "epl-2020-GMTStandardTime.csv"

fixtures = pd.read_csv("/Users/jamesdawson/Desktop/Data analysis /Python/" + filename) 

fixtures_filtered = fixtures.loc[fixtures['Round Number'] == 31].copy()



In [405]:
fixtures_flipped = fixtures.copy()

fixtures_flipped.rename(columns={'Home Team': 'Away Team', 'Away Team': 'Home Team'}, inplace=True)

fixtures_flipped_filtered = fixtures_flipped.loc[fixtures_flipped['Round Number'] == 31].copy()


combined_fixtures = fixtures_filtered.append(fixtures_flipped_filtered, ignore_index=True)


combined_fixtures['Ground'] = combined_fixtures['Home Team'].map(Venues_df.set_index('Team')['Ground'].to_dict())


def f(row):
    if row['Location'] == row['Ground']:
        val = 'Home'
    else:
        val = 'Away'
    return val


combined_fixtures['Home/Away'] = combined_fixtures.apply(f, axis=1)


In [406]:
slim_elements_df['opponent'] = slim_elements_df.team.map(combined_fixtures.set_index('Home Team')['Away Team'].to_dict())

slim_elements_df['HomeAway'] = slim_elements_df.team.map(combined_fixtures.set_index('Home Team')['Home/Away'].to_dict())

slim_elements_df['opponent_hso'] = slim_elements_df.opponent.map(teams_df.set_index('name')['strength_overall_home'].to_dict())

slim_elements_df['opponent_aso'] = slim_elements_df.opponent.map(teams_df.set_index('name')['strength_overall_away'].to_dict())


slim_elements_df['HomeAdv'] = np.where(slim_elements_df['HomeAway'] == 'Home',
                                        slim_elements_df['strength_overall_home']/slim_elements_df['opponent_aso'],
                                        slim_elements_df['strength_overall_away']/slim_elements_df['opponent_hso'])


slim_elements_df['form_adj'] = (1 / (100 -(slim_elements_df['form'])) +1)

slim_elements_df['Value_Adjusted'] = slim_elements_df['value'] * slim_elements_df['HomeAdv'] * slim_elements_df['form_adj']

slim_elements_df[200:210]

Unnamed: 0,first_name,second_name,team,element_type,minutes,now_cost,minutes.1,value_season,value_form,form,...,unavailable,strength_overall_home,strength_overall_away,opponent,HomeAway,opponent_hso,opponent_aso,HomeAdv,form_adj,Value_Adjusted
200,Fabian,Delph,Everton,3,211,48,211,1.2,0.0,1.2,...,True,1130,1250,Brighton,Away,1130,1130,1.106195,1.010121,1.340869
201,Gylfi,Sigurdsson,Everton,3,1555,68,1555,12.9,0.0,12.9,...,False,1130,1250,Brighton,Away,1130,1130,1.106195,1.011481,14.433745
202,Jonas,Lössl,Everton,1,0,43,0,0.0,0.0,0.0,...,True,1130,1250,Brighton,Away,1130,1130,1.106195,1.01,0.0
203,Seamus,Coleman,Everton,2,973,48,973,9.0,0.8,9.0,...,False,1130,1250,Brighton,Away,1130,1130,1.106195,1.010989,10.065156
204,Cenk,Tosun,Everton,4,44,60,44,0.7,0.0,0.7,...,True,1130,1250,Brighton,Away,1130,1130,1.106195,1.01007,0.782134
205,Bernard,Anício Caldeira Duarte,Everton,3,390,57,390,3.2,0.0,3.2,...,True,1130,1250,Brighton,Away,1130,1130,1.106195,1.010331,3.576391
206,Lucas,Digne,Everton,2,1871,62,1871,12.3,0.1,12.3,...,False,1130,1250,Brighton,Away,1130,1130,1.106195,1.011403,13.761339
207,Michael,Keane,Everton,2,2359,51,2359,19.6,0.2,19.6,...,False,1130,1250,Brighton,Away,1130,1130,1.106195,1.012438,21.951085
208,Jordan,Pickford,Everton,1,2022,48,2022,16.5,0.0,16.5,...,True,1130,1250,Brighton,Away,1130,1130,1.106195,1.011976,18.470802
209,André Filipe,Tavares Gomes,Everton,3,1386,53,1386,7.5,0.2,7.5,...,True,1130,1250,Brighton,Away,1130,1130,1.106195,1.010811,8.386152


In [407]:
slim_elements_df_stars = slim_elements_df.copy()

slim_elements_df_stars.sort_values(['total_points'],inplace = True, ascending = False)

In [408]:
#slim_elements_df_stars.head(5)

In [417]:
slim_elements_df_roi = slim_elements_df.copy()

slim_elements_df_roi.sort_values(['Value_Adjusted'], inplace = True, ascending = False)

slim_elements_df_roi.head()

Unnamed: 0,first_name,second_name,team,element_type,minutes,now_cost,minutes.1,value_season,value_form,form,...,unavailable,strength_overall_home,strength_overall_away,opponent,HomeAway,opponent_hso,opponent_aso,HomeAdv,form_adj,Value_Adjusted
33,Emiliano,Martínez,Aston Villa,1,2610,54,2610,29.1,0.4,29.1,...,False,1160,1100,Liverpool,Away,1210,1250,0.909091,1.014104,26.82767
324,Illan,Meslier,Leeds,1,2610,47,2610,27.0,0.9,27.0,...,False,1170,1160,Man City,Away,1360,1340,0.852941,1.013699,23.344883
311,Stuart,Dallas,Leeds,2,2690,51,2690,24.3,0.5,24.3,...,False,1170,1160,Man City,Away,1360,1340,0.852941,1.01321,21.000268
261,Alphonse,Areola,Fulham,1,2730,46,2730,24.3,0.3,24.3,...,False,1100,1090,Wolves,Home,1110,1080,1.018519,1.01321,25.076948
313,Patrick,Bamford,Leeds,4,2499,66,2499,23.9,0.5,23.9,...,False,1170,1160,Man City,Away,1360,1340,0.852941,1.013141,20.653169


In [410]:
#Theoretical best team 


def team_selecter():
    
    budget = 1000

    position_dict = {"Goalkeeper":2,"Defender":5,"Midfielder": 5, "Forward": 3}

    team_dict = {'West Brom':3,'Sheffield Utd':3,'Burnley':3,'Southampton':3,
             'Fulham':3,'Newcastle':3,'Brighton':3,'Crystal Palace':3,
            'Leeds':3,'Wolves':3,'Aston Villa':3,'Arsenal':3,
             'Spurs':3,'West Ham':3,'Leicester':3,'Everton':3,
            'Liverpool':3,'Chelsea':3,'Man Utd':3,'Man City':3}

    roi_team = [] #form an empty tuple for the team
    
    total_points = 0
    
    top_performer_limit = 3
    
    # Choosing 3 top performers from the "top players"-dataframe
    for idx, row in slim_elements_df_stars.iterrows():
        if budget >= row.now_cost and len(roi_team) < top_performer_limit and row.unavailable == False and position_dict[row.position] != 0 and team_dict[row.team] != 0:
            roi_team.append(row.second_name)
            budget -= row.now_cost #Deducting cost from budget
            position_dict[row.position] -= 1 # Deducting position from position dictionary
            team_dict[row.team] -= 1 # Deducting player from team dictionary
            total_points += row.total_points # adding to point score
            print("Player choosen from 'top players' " + str(row.second_name))
            
                   
        else:
            for idx, row in slim_elements_df_roi.iterrows():
                if row.second_name not in roi_team and budget >= row.now_cost and row.unavailable == False and position_dict[row.position] != 0 and team_dict[row.team] != 0:
                    roi_team.append(row.second_name)
                    budget -= row.now_cost
                    position_dict[row.position] -= 1 # Deducting position from position dictionary
                    team_dict[row.team] -= 1 # Deducting player from team dictionary
                    total_points += row.total_points # adding to point score
    
                   
    print("\nTeam chosen: " + str(roi_team))
    print("Remaining budget: " + str((budget/10)) + "M.")
    print("Total points from choosen team: " + str(total_points) + ".")
    print(position_dict)

In [414]:
team_selecter()

Player choosen from 'top players' Borges Fernandes
Player choosen from 'top players' Kane
Player choosen from 'top players' Salah

Team chosen: ['Borges Fernandes', 'Kane', 'Salah', 'Gündogan', 'Stones', 'Martínez', 'Santana de Moraes', 'Soucek', 'Cresswell', 'Frello Filho', 'Keane', 'Wan-Bissaka', 'Aina', 'Bamford']
Remaining budget: 3.8M.
Total points from choosen team: 1981.0.
{'Goalkeeper': 0, 'Defender': 0, 'Midfielder': 0, 'Forward': 1}
