In [1]:
import requests
import pandas as pd
import numpy as np
import scipy.stats as st

pd.options.display.max_columns = None

In [2]:
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()
elements_df = pd.DataFrame(json['elements'])
elements_types_df = pd.DataFrame(json['element_types'])
teams_df = pd.DataFrame(json['teams'])

elements_df = elements_df.loc[(elements_df.minutes > 0) & (elements_df.total_points > 0) & (elements_df.status != 'u')]

In [3]:
def changeName(team):
    if(team == 'Man Utd'):
        team = 'Man United'
    elif(team == 'Spurs'):
        team = 'Tottenham'
    else:
        team = team
    
    return team


def statusName(x):
    if(x == 'a'):
        return 'avail'
    elif(x == 'd'):
        return 'doubt'
    elif(x == 's'):
        return 'susp'
    elif(x == 'i'):
        return 'inj'
    else:
        return x

In [4]:
def category(x):
    if(x <= 0.55):
        return 'bronze'
    elif(x <= 0.7):
        return 'silver'
    elif(x <= 0.85):
        return 'gold'
    else:
        return 'platin'

In [5]:
for x in elements_df.index :
    element_id = elements_df.id[x]
    name = elements_df.web_name[x]
    team = elements_df.team[x]
    element_type = elements_df.element_type[x]
    url = f'https://fantasy.premierleague.com/api/element-summary/{element_id}/'
    r = requests.get(url)
    json = r.json()
    json_history_df = pd.DataFrame(json['history'])
    json_history_df['name'] = name
    json_history_df['team'] = team
    json_history_df['element_type'] = element_type

    
    if x == elements_df.index[0]:
        all_history_df = json_history_df
    else : 
        #all_history_df = all_history_df.append(json_history_df)
        all_history_df = pd.concat([all_history_df, json_history_df])

In [6]:
all_history_df['position'] = all_history_df.element_type.map(elements_types_df.set_index('id').singular_name)
all_history_df['teamName'] = all_history_df.team.map(teams_df.set_index('id').name)
all_history_df['team'] = all_history_df['teamName'].apply(changeName)
all_history_df['opponent'] = all_history_df.opponent_team.map(teams_df.set_index('id').name)
all_history_df['Date'] = all_history_df.kickoff_time.str[:10]

all_history_df.to_csv('all_history_df_current.csv')

In [7]:
elements_df['ict_index_per_90'] = elements_df.apply(lambda x: round((float(x['ict_index'])*10)/(round((float(x['total_points'])/float(x['points_per_game'])),0))/(float(x['now_cost'])),2), axis=1)
elements_df['bps_per_90'] = elements_df.apply(lambda x: round((float(x['bps'])+float(x['bonus']))/(round((float(x['total_points'])/float(x['points_per_game'])),0)),2), axis=1)
elements_df['value_bps'] = elements_df.apply(lambda x: round((float(x['bps'])+float(x['bonus']))/(float(x['now_cost'])),2), axis=1)

elements_df['games_featured'] = elements_df.apply(lambda x: (round((float(x['total_points'])/float(x['points_per_game'])),0)), axis=1)

elements_df['position'] = elements_df.element_type.map(elements_types_df.set_index('id').singular_name)
elements_df['teamName'] = elements_df.team.map(teams_df.set_index('id').name)
elements_df['team'] = elements_df['teamName'].apply(changeName)
elements_df['statusFull'] = elements_df['status'].apply(statusName)
elements_df['description'] = "["+elements_df['web_name']+", "+elements_df['team']+"]"

In [8]:
### statistics
elements_df['form'] = elements_df['form'].astype(float)
form_splice = elements_df.loc[(elements_df['form'] > 0)]
form_mean = form_splice['form'].mean()
form_std = form_splice['form'].std()
elements_df['zScore_form'] = (elements_df['form'] - form_mean)/form_std
elements_df['zScore_form'] = elements_df.apply(lambda x: x['zScore_form'] if x['zScore_form'] > 0 else 0, axis=1)
elements_df['p-form'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_form']), axis=1)
elements_df['form-cat'] = elements_df.apply(lambda x: category(x['p-form']), axis=1)

elements_df['now_cost'] = elements_df['now_cost'].astype(float)
cost_splice = elements_df.loc[(elements_df['now_cost'] > 0)]
cost_mean = cost_splice['now_cost'].mean()
cost_std = cost_splice['now_cost'].std()
elements_df['zScore_cost'] = (elements_df['now_cost'] - cost_mean)/cost_std
elements_df['zScore_cost'] = elements_df.apply(lambda x: x['zScore_cost'] if x['zScore_cost'] > 0 else 0, axis=1)
elements_df['p-cost'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_cost']), axis=1)
elements_df['cost-cat'] = elements_df.apply(lambda x: category(x['p-cost']), axis=1)

elements_df['value_season'] = elements_df['value_season'].astype(float)
vsea_splice = elements_df.loc[(elements_df['value_season'] > 0)]
vsea_mean = vsea_splice['value_season'].mean()
vsea_std = vsea_splice['value_season'].std()
elements_df['zScore_vsea'] = (elements_df['value_season'] - vsea_mean)/vsea_std
elements_df['zScore_vsea'] = elements_df.apply(lambda x: x['zScore_vsea'] if x['zScore_vsea'] > 0 else 0, axis=1)
elements_df['p-vsea'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_vsea']), axis=1)
elements_df['vsea-cat'] = elements_df.apply(lambda x: category(x['p-vsea']), axis=1)

elements_df['value_bps'] = elements_df['value_bps'].astype(float)
vbps_splice = elements_df.loc[(elements_df['value_bps'] > 0)]
vbps_mean = vbps_splice['value_bps'].mean()
vbps_std = vbps_splice['value_bps'].std()
elements_df['zScore_vbps'] = (elements_df['value_bps'] - vbps_mean)/vbps_std
elements_df['zScore_vbps'] = elements_df.apply(lambda x: x['zScore_vbps'] if x['zScore_vbps'] > 0 else 0, axis=1)
elements_df['p-vbps'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_vbps']), axis=1)
elements_df['vbps-cat'] = elements_df.apply(lambda x: category(x['p-vbps']), axis=1)

elements_df['selected_by_percent'] = elements_df['selected_by_percent'].astype(float)
sbp_splice = elements_df.loc[(elements_df['selected_by_percent'] > 0)]
sbp_mean = sbp_splice['selected_by_percent'].mean()
sbp_std = sbp_splice['selected_by_percent'].std()
elements_df['zScore_sbp'] = (elements_df['selected_by_percent'] - sbp_mean)/sbp_std
elements_df['zScore_sbp'] = elements_df.apply(lambda x: x['zScore_sbp'] if x['zScore_sbp'] > 0 else 0, axis=1)
elements_df['p-sbp'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_sbp']), axis=1)
elements_df['sbp-cat'] = elements_df.apply(lambda x: category(x['p-sbp']), axis=1)

elements_df['games_featured'] = elements_df['games_featured'].astype(float)
games_splice = elements_df.loc[(elements_df['games_featured'] > 0)]
games_mean = games_splice['games_featured'].mean()
games_std = games_splice['games_featured'].std()
#form_max = form_splice['form'].max()
elements_df['zScore_games'] = (elements_df['games_featured'] - games_mean)/games_std
elements_df['zScore_games'] = elements_df.apply(lambda x: x['zScore_games'] if x['zScore_games'] > 0 else 0, axis=1)
elements_df['p-games'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_games']), axis=1)
elements_df['games-cat'] = elements_df.apply(lambda x: category(x['p-games']), axis=1)

ict_splice = elements_df.loc[(elements_df['ict_index_per_90'] > 0)]
ict_mean = ict_splice['ict_index_per_90'].mean()
ict_std = ict_splice['ict_index_per_90'].std()
elements_df['zScore_ict'] = (elements_df['ict_index_per_90'] - ict_mean)/ict_std
elements_df['zScore_ict'] = elements_df.apply(lambda x: x['zScore_ict'] if x['zScore_ict'] > 0 else 0, axis=1)
elements_df['p-ict'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_ict']), axis=1)
elements_df['ict-cat'] = elements_df.apply(lambda x: category(x['p-ict']), axis=1)

bps_splice = elements_df.loc[(elements_df['bps_per_90'] > 0)]
bps_mean = bps_splice['bps_per_90'].mean()
bps_std = bps_splice['bps_per_90'].std()                                
elements_df['zScore_bps'] = (elements_df['bps_per_90'] - bps_mean)/bps_std
elements_df['zScore_bps'] = elements_df.apply(lambda x: x['zScore_bps'] if x['zScore_bps'] > 0 else 0, axis=1)
elements_df['p-bps'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_bps']), axis=1)
elements_df['bps-cat'] = elements_df.apply(lambda x: category(x['p-bps']), axis=1)
                                        
elements_df['points_per_game'] = elements_df['points_per_game'].astype(float)
gpts_splice = elements_df.loc[(elements_df['points_per_game'] > 0)]
gpts_mean = gpts_splice['points_per_game'].mean()
gpts_std = gpts_splice['points_per_game'].std()                                 
elements_df['zScore_gpts'] = (elements_df['points_per_game'] - gpts_mean)/gpts_std

elements_df['zScore_gpts'] = elements_df.apply(lambda x: x['zScore_gpts'] if x['zScore_gpts'] > 0 else 0, axis=1)
elements_df['p-gpts'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_gpts']), axis=1)
elements_df['gpts-cat'] = elements_df.apply(lambda x: category(x['p-gpts']), axis=1)

In [9]:
elements_df['strength'] = (elements_df['zScore_form'] + elements_df['zScore_bps'] + elements_df['zScore_gpts'] 
                           + elements_df['zScore_ict'] + elements_df['zScore_vbps'] + elements_df['zScore_vsea']
                           + elements_df['zScore_cost'] + elements_df['zScore_sbp'])

elements_df['strength'] = elements_df.apply(lambda x: x['strength'] if x['strength'] > 0 else 0, axis=1)
max_strength = elements_df['strength'].max()
elements_df['strength'] = elements_df['strength']/max_strength
elements_df['strength'] = elements_df.apply(lambda x: round(x['strength'],2) if x['strength'] > 0 else 0.00, axis=1)

str_splice = elements_df.loc[(elements_df['strength'] > 0)]
str_mean = str_splice['strength'].mean()
str_std = str_splice['strength'].std()
elements_df['zScore_str'] = (elements_df['strength'] - str_mean)/str_std
elements_df['zScore_str'] = elements_df.apply(lambda x: x['zScore_str'] if x['zScore_str'] > 0 else 0, axis=1)
elements_df['p-str'] = elements_df.apply(lambda x: st.norm.cdf(x['zScore_str']), axis=1)
elements_df['str-cat'] = elements_df.apply(lambda x: category(x['p-str']), axis=1)

value_map_d = {'bronze':0,'silver':1,'gold':2, 'platin': 3}
elements_df['class'] = elements_df['str-cat'].apply(lambda x: value_map_d.get(x))

elements_df.to_csv('elements.csv')