# Fantasy Premier League (FPL) Advisor

# Purpose
The purpose of this Jupyter notebook is to help with the selection of team members for the [Fantasy Premier League](https://fantasy.premierleague.com/) (FPL) by attempting to forecast how many points players will earn. It accesses the FPL API to download up-to-date stats, provides visual analysis and uses linear optimisation to recommend a team with the maximum expected points to improve the performance of your current team.

If you are not familar with the Fantasy Permier League, you can watch this introduction:

<a href="http://www.youtube.com/watch?v=SV_F-cL8fC0" target="_blank"><img src="http://img.youtube.com/vi/SV_F-cL8fC0/0.jpg" 
alt="How to play FPL" width="600" height="400"/></a>

# Installation
To get started, run the following command to install all required dependencies.

In [None]:
 !pip install -q -r ./requirements.txt

# Import requirements
Here we import all external and local modulues.

In [None]:
import pandas as pd, re, datetime as dt, numpy as np, ipywidgets as widgets, os, sys, unittest
from ipywidgets import interact, fixed
from fplpandas import FPLPandas
from datadict.jupyter import DataDict

import tensorflow as tf

# Load local modules
sys.path.append(os.getcwd())
from optimiser import get_optimal_squad
from common import *
from jupyter import *
from data import *
from nn import *

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

pd.set_option('display.max_columns', 100)

# Set variables
This section sets all important global variables.

In [None]:
fpl = FPLPandas()

# Load data dictionary
This section loads the data dictionary. The data dictionary contains default ordering of fields, for each field a description, default format and mapping of API field names to more readable ones. It is used to show data in a more user-friendly way.

In [None]:
dd = DataDict(data_dict_file=f'./data_dictionary.csv')

# Load player data
This section loads the player data and stats from the following FPL API endpoint: https://fantasy.premierleague.com/api/bootstrap-static/ and returns it as a panda data frame. **This can take a few seconds** because for each player the full history for the current season is downloaded.

In [None]:
players_raw, players_history_past_raw, players_history_raw, _ = fpl.get_players()

In [None]:
players = players_raw.pipe(prepare_players, dd)
players_history_past = players_history_past_raw.pipe(prepare_players_history_past, dd)
players_history = players_history_raw.pipe(prepare_players_history, dd)

In [None]:
seasons = (players_history_past
   .reset_index()['Season']
   .drop_duplicates()
   .sort_values()
   .values)
current_season = seasons[-1:][0]
last_season = seasons[-2:-1][0]
# Uncomment to view data: dd.display(players, head=5, excel_file='players.xlsx', stats=True)

# Load team data

This section loads the team data and stats from the following endpoint: https://fantasy.premierleague.com/api/bootstrap-static/ and returns it as a panda data frame.

In [None]:
teams = fpl.get_teams().pipe(prepare_teams, dd)
# Uncomment to view data: dd.display(teams, head=5, stats=True)

# Load fixture data

This section loads the fixture data and stats from the following endpoint: https://fantasy.premierleague.com/api/fixtures/ and returns it as a panda data frame.

In [None]:
fixtures = fpl.get_fixtures().pipe(prepare_fixtures, dd)
# Uncomment to view data: dd.display(fixtures, head=5, stats=True)

# Create derived data
This section creates new dataset by combining the previously loaded ones.

## Players with team info

In [None]:
player_teams = players.pipe(get_player_teams, teams, dd)
# Uncomment to view data: dd.display(player_teams, head=5)

## Fixtures with team info

In [None]:
fixture_teams = fixtures.pipe(get_fixture_teams, teams, dd)
# Uncomment to view data: dd.display(fixture_teams, head=10)

## Player derived fields and metrics
The section below derives a few useful player attributes but most importantly, it calculates the total points earned by a player devided by his current cost. This is can be an indicator for whether the player is undervalued or overpriced.

In [None]:
def calc_consistency(s: pd.Series):
    if s.count() == 0:
        return np.nan
    
    max_points = max(s)
    if max_points == 0:
        return np.nan
    
    return np.mean(s/max(s))*100

def calc_stats(df: pd.DataFrame, game_week: int = None):
    team_id = df['Player Team ID'].iloc[0]
    
    if not game_week is None:
        df = df[df['Game Week'] <= game_week]
    
    s={'Total Points': df['Game Total Points'].sum(),
       'Total Points Consistency': calc_consistency(df['Game Total Points']),
       'Player Team ID': team_id}
    
    return pd.Series(s)
    
players_history_fixtures = (players_history[['Game Total Points', 'Game Minutes Played', 'Game Cost']]
    .reset_index()
    .merge(fixtures, left_on='Fixture ID', right_index=True)
    .merge(player_teams[['Player Team ID', 'Field Position', 'Minutes Percent', 'News And Date', 'Team Short Name', 'Name and Short Team']], left_on='Player ID', right_index=True)
    .set_index(['Player ID', 'Fixture ID']))

players_history_stats = (players_history_fixtures
    .groupby(['Player ID'])
    .apply(lambda df: calc_stats(df)))

player_team_stats = (player_teams
    .reset_index()
    .merge(players_history_stats[['Total Points Consistency']], left_on='Player ID', right_on='Player ID')\
    .set_index('Player ID')
    .assign(**{'Points Per Cost': lambda df: df['Total Points']/df['Current Cost']}))

# Add the total points from the last season to the player stats so it can be used for the expected point calculation at the beginning of the season.
players_history_last_season = (players_history_past
    .reset_index()
    [['Player ID', 'Season', 'Season Total Points']]
    [lambda df: df['Season'] == last_season]
    .rename(columns={'Season Total Points': 'Last Season Total Points'}))

player_team_stats = (player_team_stats
    .merge(players_history_last_season.set_index('Player ID'), left_index=True, right_index=True, how='left')
    .pipe(dd.reorder))
# Uncomment to view data: dd.display(player_team_stats, head=15)

## Team metrics

In [None]:
team_score_stats = (fixture_teams
    .pipe(get_team_fixture_scores, teams)
    .pipe(get_team_score_stats))

In [None]:
team_points = (player_team_stats
    .reset_index()
    .groupby(['Player Team ID', 'Team Name', 'Team Short Name'])[['Total Points']]
    .sum()
    .reset_index()
    .set_index('Player Team ID'))
# Uncomment to view data:  dd.display(team_points, excel_file='team_points.xlsx', head=50)

## Fixture metrics
In order to calculate relative strengths of the teams, we aggregate the points that the team has earned so far. We later can use this information to adjust the expected points for each player.

In [None]:
fixture_teams_stats = (fixture_teams
    .merge(team_points[['Total Points']].rename(columns={'Total Points': 'Fixture Strength Home'}), left_on='Home Team ID', right_on='Player Team ID')
    .merge(team_points[['Total Points']].rename(columns={'Total Points': 'Fixture Strength Away'}), left_on='Away Team ID', right_on='Player Team ID')
    .assign(**{'Rel. Fixture Strength Home': lambda df: df['Fixture Strength Home']/df['Fixture Strength Away']})
    .assign(**{'Rel. Fixture Strength Away': lambda df: df['Fixture Strength Away']/df['Fixture Strength Home']})
    .assign(**{'Fixture Short Name': lambda df: df['Team Short Name Home']+'-'+df['Team Short Name Away']})
    .set_index('Fixture ID')
    .pipe(dd.reorder))
# Uncomment to view data:  dd.display(fixture_teams_stats.sort_values(['Game Week']), excel_file='fixture_teams_stats.xlsx', head=50)

## Calculate relative fixture strengths
Calculates a relative fixtures strengths for each team. The relative strength is a factor around 1 and is used in the expected point prediction below to adjust the predicted points based on the relative strengths of the upcoming game weeks. The simple idea here is that team with more total points so far are stronger. A value above 1 indicates that the player's team is relatively stronger and a value below 1 indicates that the team is relatively weaker. 

In [None]:
# Unfold data frame so that there a two rows for each fixture.
team_fixture_strength = (pd
    .melt(fixture_teams_stats
        .reset_index()
        [['Fixture ID', 'Home Team ID', 'Team Name Home', 'Away Team ID', 'Fixture Strength Home', 'Fixture Strength Away', 'Rel. Fixture Strength Home', 'Rel. Fixture Strength Away', 'Home Team Difficulty', 'Away Team Difficulty']],\
        id_vars=['Fixture ID', 'Fixture Strength Home', 'Fixture Strength Away', 'Rel. Fixture Strength Home', 'Rel. Fixture Strength Away', 'Home Team Difficulty', 'Away Team Difficulty'], 
        value_vars=['Home Team ID', 'Away Team ID'])
    .drop('variable', axis=1)
    .rename(columns={'value': 'Team ID'})
    .merge(fixture_teams_stats[['Home Team ID', 'Away Team ID','Game Week', 'Started', 'Fixture Short Name', 'Kick Off Time']], left_on='Fixture ID', right_index=True, suffixes=(False, False))
    .assign(**{'Is Home?': (lambda df: df['Home Team ID'] == df['Team ID'])})
    .assign(**{'Opp Team ID': (lambda df: df.apply(lambda row: row['Away Team ID'] if row['Is Home?'] else row['Home Team ID'], axis=1))})    
    .sort_values(['Game Week'])
    # Get the correct strength based on whether the team is the home or away team.
    .assign(**{'Rel. Fixture Strength': lambda df: df.apply(lambda row: row['Rel. Fixture Strength Home'] if row['Is Home?'] else row['Rel. Fixture Strength Away'], axis=1)})
    .assign(**{'Team Difficulty': lambda df: df.apply(lambda row: row['Home Team Difficulty'] if row['Is Home?'] else row['Away Team Difficulty'], axis=1)})
    .assign(**{'Fixture Short Name Difficulty': lambda df: df['Fixture Short Name']+' ('+df['Team Difficulty'].astype('str')+')'})
    .merge(teams, left_on='Team ID', right_on='Team ID')
    [['Team ID', 'Fixture ID', 'Rel. Fixture Strength', 'Fixture Short Name', 'Team Difficulty', 'Opp Team ID', 'Is Home?', 'Game Week', 'Started', 'Fixture Short Name Difficulty', 'Kick Off Time']]
    .set_index(['Team ID', 'Fixture ID'])    
    .assign(**{'Rel. Fixture Strength To GW': lambda df: df.groupby('Team ID')['Rel. Fixture Strength']
               .apply(lambda x: x.shift().expanding().mean())})
    .reset_index()
    .set_index(['Team ID'])
    .pipe(dd.reorder)) 

# Uncomment to view data for a specific game week: dd.display(team_fixture_strength[team_fixture_strength['Game Week'] == 5])
# Uncomment to view data: dd.display(team_fixture_strength, excel_file='team_fixture_strength.xlsx')

## Transfer relative fixture strengths from fixtures to players
This section joins the fixture strengths data set with the player data set so that expected points can be calculated on a player basis.

In [None]:
player_team_fixture_strength = (player_team_stats
        .merge(team_fixture_strength, left_on='Player Team ID', right_index=True, suffixes=(False, False))
        .reset_index()
        .set_index(['Player ID', 'Fixture ID']))

# Explore players based on expected points for different time horizons
The forecast of the points earned by each player for a specific time horizon is the minimum of two numbers: expected points based average points earned so far and the expected points calculated by the Neural Network. 

$$ep_{th} = min(ep_{av,th}, ep_{nn,th} )$$

where

* $ep_{av,th}$: Expected points for time horizon $th$ based average points earned so far
* $ep_{nn,th}$: Expected points  for time horizon $th$ based on neural network prediction. For training the model with different data, see the [train_nn_model.ipynb](train_nn_model.ipynb) notebook.

The first name is the average points earned over the past fixtures this season and then adjusting it for the difficulty of the upcoming fixtures over specific time horizon and likelihood of playing. It is calculated for every player as:

$$ep_{av,th} = \frac{tp}{gw}\sum_{i=1}^{t} p_i\frac{\sum_{i=1}^{th} s_i}{s_{gw}} $$

where

* $ep_{av,th}$: Expected points for a players over a specific time horizon, e.g. next game week, next five game weeks, remaining game weeks of the current season half and remaining game weeks of the current season
* $tp$: Total points earned by the player this season so far
* $gw$: Number of game weeks completed so far
* $s_i$: Relative strenth of a specific upcoming fixture for the team of the player. This value is usally just over 1 if the player's team is relatively to stronger and less than 1 if it is weaker.
* $s_{gw}$: Relative strenth of fixtures this season so far for the team of the player
* $p_i$: Chance of player playing in an upcoming fixture

This approach the following limitations:
* Expected points are solely predicted based on the past performance and no other information such as recent news is taken into account.
* All past performance this season is considered equal, regardless of how long ago it was. I.e. there is no recency bias.
* Past perforamnce does not take into account why a player has not earned any points during a game. It matters whether this due to manager selection or due to inhjury.
* The relative strengths are calculated based on many assumptions and the full meaning of the raw strength numbers published by FPL is not clear.
* The adjustment for the chance of the player playing really should be made on a per game basis as opposed to the overall expected points (but given the accuracy of the chance of playing this is probably a minor issue).

## Create combined data for past and future fixtures for each player
This section concatenates two sets: one historical and one future fixture set. The reason for this is that for completed matches, we need it to consider the team that player actually played for, while for future games we can assume that the player will play for the same team than he is currently in.

In [None]:
# Get current game week. When the first game of the game week starts is this number incremented.
current_gw = max(1, team_fixture_strength[team_fixture_strength['Started'] == True]['Game Week'].max()+1)
total_gws = (teams.shape[0]-1)*2

In [None]:
# Estimates the chance that a player is available for the future game weeks.
def est_chance_avail(df: pd.DataFrame)-> pd.Series:
    chance_avail = df['Chance Avail Next GW']
    
    if chance_avail.shape[0] == 0:
        return chance_avail
    
    if chance_avail.iloc[0] > 0 and chance_avail.iloc[0] <= 1: # If the chance available is not 0 or 1 then assume that the following game week the chance is 1.
        chance_avail.iloc[1:] = 1
        
    return chance_avail

team_future_fixtures = (team_fixture_strength[team_fixture_strength['Fixture ID']
        .isin(players_history_fixtures.index.get_level_values(level='Fixture ID')) == False])[['Fixture ID', 'Game Week']]

players_future_fixture_team_strengths = (player_teams
    [['Player Team ID', 'Chance Avail Next GW']]
    .reset_index()
    .merge(team_future_fixtures, left_on=['Player Team ID'], right_index=True, suffixes=(False, False))
    .sort_values('Game Week')
    .set_index(['Player ID', 'Fixture ID'])
    # Projects the chance available forward based on the chance avaible for the next game week.
    .assign(**{'Chance Avail': lambda df: df[['Chance Avail Next GW', 'Game Week']]  
        .groupby('Player ID').apply(lambda df: est_chance_avail(df).droplevel('Player ID'))}) 
    .drop(columns=['Chance Avail Next GW', 'Game Week']))

In [None]:
player_fixture_stats = (pd.concat([players_history_fixtures[['Player Team ID', 'Game Total Points', 'Game Minutes Played', 'Game Cost']], 
                                   players_future_fixture_team_strengths], sort=False)
    .merge(player_team_fixture_strength[['Minutes Percent', 'Chance Avail Next GW', 'Team Difficulty', 'Field Position ID', 'Field Position',
       'ICT Index', 'Minutes Played', 'Current Cost', 'Total Points', 'Game Week', 'Is Home?', 'Name', 'Team Short Name', 'Total Points Consistency',
       'Chance Avail This GW', 'Opp Team ID', 'Fixture Short Name Difficulty', 'Fixture Short Name', 'Rel. Fixture Strength', 'Rel. Fixture Strength To GW', 'Name and Short Team', 'News And Date', 'Kick Off Time']], left_index=True, right_index=True, suffixes=(False, False))
    .merge(team_score_stats[['Total Team Goals Scored', 'Total Team Goals Conceded']], left_on='Player Team ID', right_index=True, suffixes=(False, False))
    .merge(team_score_stats[['Team Short Name', 'Total Team Goals Scored', 'Total Team Goals Conceded']]
           .rename(columns={'Team Short Name': 'Opp Team Short Name', 'Total Team Goals Scored': 'Total Opp Team Goals Scored', 'Total Team Goals Conceded': 'Total Opp Team Goals Conceded'})
           , left_on='Opp Team ID', right_index=True, suffixes=(False, False))
    .merge(team_points[['Total Points']].rename(columns={'Total Points': 'Team Total Points'}), left_on='Player Team ID', right_index=True, suffixes=(False, False))
    .merge(team_points[['Total Points']].rename(columns={'Total Points': 'Opp Team Total Points'}), left_on='Opp Team ID', right_index=True, suffixes=(False, False))
    .assign(**{'Chance Avail': lambda df: df['Chance Avail'].fillna(0)})    
    .pipe(calc_player_fixture_stats)
    .assign(**{'Avg Rel. Fixture Strength To GW': lambda df: df.groupby('Player ID')['Rel. Fixture Strength'].apply(lambda x: x.shift().expanding().mean())}))

## Calculates the expected points for the following time horizons
Calculates the expected points for the following time horizons:
* the next game week (mainly for Free Hit and team selection optimisation)
* the next x game weeks (mainly for individuals transfers)
* the remaining game weeks of the current season

The expected points for each time horizon are simply the sum of expected points for each game week within the time horizon.

In [None]:
eps_model = tf.keras.models.load_model('models/expected_points')

In [None]:
def nn_eps_gw(player_fixture_stats: pd.DataFrame, model: tf.keras.Model) -> pd.DataFrame:
    return (player_fixture_stats
            [['Field Position', 'Avg Points Opp Points Adj To GW', 'Game Total Points', 'Is Home?', 'Avg Minutes Played Recently To GW', 'Total Opp Team Goals Scored Diff']]
            .dropna(how='any',axis=0)
            .assign(**{'Expected Points NN': lambda df: model.predict(df.pipe(nn_prep_ds, 'Game Total Points')).flatten()})
            ['Expected Points NN'])

Calculate expected points for each player and fixture combination.

In [None]:
players_fixture_team_eps = (player_fixture_stats
     .assign(**{'Expected Points NN': lambda df: df.pipe(nn_eps_gw, eps_model)})
     .assign(**{'Expected Points Calc': lambda df: df.pipe(calc_eps)})
     .assign(**{'Expected Points': lambda df: df[['Expected Points Calc', 'Expected Points NN']].min(axis=1)})
    )

Project the fixtures to game week level to deal with game week when there is no fixture for a team or double fixtures.

In [None]:
players_gw_team_eps = (players_fixture_team_eps
    .pipe(proj_to_gw))

Calculates the expected points for the different time horizons.

In [None]:
player_gw_next_eps = (players_gw_team_eps
                    .reset_index(level=1)
                    .groupby('Player ID')
                    .apply(lambda df: df.pipe(calc_eps_for_next_gws, next_gws, current_gw, total_gws)))

player_gw_next_eps_active = player_gw_next_eps[lambda df: df['Minutes Percent'] > 50] # Remove entries with less than 50% minutes played.)

## Visualise players' cost vs their expected points

In [None]:
_ = interact(player_strength_by_horizon, player_eps=fixed(player_gw_next_eps_active), dd=fixed(dd), current_gw=fixed(current_gw), horizon=widgets.ToggleButtons(description='Horizon: ', options=next_gws))

# Load user team data
This section loads the data of the user's team. **Note this requires your user credentials to be saved in fpl_credentials.csv in the same directory as this notebook.**

In [None]:
creds_file = 'fpl_credentials.csv'
if not 'fpl_email' in globals(): fpl_email = ''
user_widget, pass_widget = [None, None]
if os.path.exists(creds_file):
    fpl_cred = pd.read_csv('fpl_credentials.csv')
    fpl = FPLPandas(**fpl_cred.iloc[0].to_dict())
else:
    user_widget = widgets.Text(value=fpl_email, placeholder='Enter FPL email address', description='Email')
    pass_widget = widgets.Text(value='', placeholder='Enter FPL password', description='Password')
    display(widgets.HBox([user_widget, pass_widget]))

In [None]:
if not user_widget is None and not pass_widget is None:
    fpl_email = user_widget.value
    fpl = FPLPandas(email=fpl_email, password=pass_widget.value)

try:
    user_team_raw, _, user_trans_info_raw = fpl.get_user_team()
except Exception as e:
    print(e)
    
if not pass_widget is None:
    pass_widget.value = ''

In [None]:
user_team = (user_team_raw
    .pipe(dd.remap, data_set='player')
    .assign(**{'In Team?': True})
    .assign(**{'Selling Price': lambda df: df['Selling Price']/10})
    .assign(**{'Purchase Price': lambda df: df['Purchase Price']/10})
    .assign(**{'Selected?': lambda df: df['Team Position'].map(lambda x: x <= 11)}) 
    .rename_axis('Player ID'))

user_trans_info = user_trans_info_raw.loc[0]

## Current team

In [None]:
player_user_team = user_team.merge(player_gw_next_eps, left_on='Player ID', right_on='Player ID', how='left')
display_team(player_user_team, dd)

In [None]:
_ = interact(player_strength_by_horizon, player_eps=fixed(user_team.merge(player_gw_next_eps_active, left_on='Player ID', right_on='Player ID', how='outer')),
             dd=fixed(dd), current_gw=fixed(current_gw), horizon=widgets.ToggleButtons(description='Horizon: ', options=next_gws))

# Get best team for wildcard or season start
You can use the code below to get the best team for a wildcared or at the start of the season. It uses the [PuLP linear optimiser](https://pythonhosted.org/PuLP/) to find the team combination within the current money available with the highest total expected points of the over the next five gameweeks.

In [None]:
total_budget = (user_trans_info['bank']/10+player_user_team['Current Cost'].sum())
total_budget

In [None]:
player_team_optimal = get_optimal_squad(player_gw_next_eps_active, 
                                        optimise_team_on='Expected Points Next 5 GWs',
                                        optimise_sel_on='Expected Points Next GW', 
                                        formation='1-5-5-3', 
                                        budget=total_budget-4.0)\
    .sort_values(['Field Position ID'])
player_team_optimal = dd.reorder(player_team_optimal)
display_team(player_team_optimal, dd)

# Recommend selection or next GW and transfers for next 5 GWs
Use this section to get a recommendation on what players to select to optimise the expected points of your team and to improve it by making transfers. You need to have provided your FPL credentials for this to work.

It uses the PuLP linear optimiser to find the team combination within the current budget available with the highest total expected points of the over the next five game weeks while taking your current team into account for a user defined number of transfers. Note that when executing more than one transfer on the FPL website, 4 points will be deducted from your balance for every transfer.

It uses the same PuLP linear optimiser to find the selection with the highest expected points for the next game week.

## Recommended team

In [None]:
# Gets the cost and player ID of the second goal keeper so that the optimiser does not recommend his replacement.
second_gk = player_user_team[player_user_team['Field Position'] == 'GK'].sort_values('Expected Points Next GW')[['Current Cost']].iloc[0]
second_gk_cost = second_gk.values[0]
second_gk_id = second_gk.name

player_team_eps_user = (user_team
    .merge(player_gw_next_eps_active, left_on='Player ID', right_on='Player ID', how='right')
    .assign(**{'Current Cost': lambda df: df['Selling Price'].fillna(df['Current Cost'])}))

player_team_optimal = (get_optimal_squad(player_team_eps_user, 
                        optimise_team_on='Expected Points GWs To End',
                        optimise_sel_on='Expected Points Next GW', 
                        formation='1-5-5-3', # Not 2-5-5-3 if we want to avoid the transfer of the second goal keeper recommended.
                        budget=total_budget-second_gk_cost, # Not just total_budget if we want to avoid the transfer of the second goal keeper recommended.
                        recommend=2) # If set to 0, the optimiser will still recommend a team selection that maximises the expected points.\
    .sort_values(['Field Position ID']))
player_team_optimal = dd.reorder(player_team_optimal)
display_team(player_team_optimal, dd, in_team=True)

In [None]:
player_team_removed = player_user_team[(player_user_team['In Team?'] == True) 
                                       & (player_user_team.index.isin(player_team_optimal.index.values) == False)
                                      & (player_user_team.index.isin([second_gk_id]) == False)]
dd.display(player_team_removed[['Name', 'Current Cost', 'Field Position', 'Captain?', 'Vice Captain?', 'Minutes Percent', 'News And Date', 'Expected Points Next GW', 'Expected Points Next 5 GWs', 'Total Points Consistency']],
           index=False, footer=False, descriptions=False)

# Select a good week to play free hit
The idea is that there are game weeks when there are fixtures with large differences in team strengths than in others. This is the case when stronger teams mainly play weaker teams. The assumptions is that these game weeks have higher potential for earning points for the stronger teams. The list below shows the game weeks sorted by the average relative fixture strength (of the four fixtures with the highest relative fixture strength).

In [None]:
(team_fixture_strength.groupby('Game Week')['Rel. Fixture Strength']\
 .apply(lambda x: x.nlargest(4)).sum(level=0)/4)\
 .sort_values(ascending=False).to_frame().head(10)

# How predictive is the ICT index?
The Fantasy Premier League website says:
>The ICT Index is a football statistical index developed specifically to assess a player as an FPL asset. It uses match event data to generate a single score for three key areas – Influence, Creativity and Threat. These figures then combine to create an individual’s ICT Index score. It condenses more than 40 match event statistics into four distinct scores. These offer a view on player performance for factors that are known to produce FPL points. See https://fantasy.premierleague.com/help

So as the season progresses the chart should converge to a line if the index is indeed predictive.

In [None]:
# Calculate a fitted straight line to show what the ideal would look like

# Count the number of players
player_count = player_teams.shape[0]

# Split the data into training/testing sets
total_points = np.reshape(player_teams['Total Points'].values, (player_teams.shape[0], 1))
itc_index = np.reshape(player_teams['ICT Index'].values, (player_teams.shape[0], 1))
total_points_train, total_points_test, itc_index_train, itc_index_test = train_test_split(total_points, itc_index, test_size = 0.4, random_state = 1)

# Train linear regression model
regr = LinearRegression()
_ = regr.fit(total_points_train, itc_index_train)

# Make predictions using the testing set
itc_index_pred = regr.predict(total_points_test)

# The coefficients
print(f'Coefficients: {regr.coef_[0][0]:.2f} ')
# The mean squared error
print(f'Mean squared error: {np.mean((regr.predict(itc_index_test) - itc_index_pred) ** 2):.2f}')
# Explained variance score: 1 is perfect prediction
print(f'Variance score: {regr.score(total_points_test, itc_index_pred):.2f}')

In [None]:
fig = {
    'data': [
        {'x': player_teams['Total Points'], 'y': player_teams['ICT Index'], 'text': player_teams['Name and Short Team'], 'mode': 'markers', 'name': 'Data'},
        {'x': np.reshape(total_points_test,  (1, total_points_test.shape[0]))[0], 'y': np.reshape(regr.predict(total_points_test), (1, total_points_test.shape[0]))[0], 'mode': 'lines', 'name': 'Ideal (fully predictive)'}
    ],
    'layout': {
        'xaxis': {'title': 'Total Point'},
        'yaxis': {'title': 'ICT Index'},
        'template': 'plotly_white'
    }
}
py.iplot(fig)

# Back test the expected points
The basic idea of testing the predictions is to look at each past game week, predict the expected points for the game week (both adjusted for relative team strengths and not adjusted), optimise the team based on the expected points and then calculate the total expected points for the optimised team (only for the selected player). For validation, we calculate the actual points of the players of the optimised team. We also calculate the points of the dream team, i.e. the total points of the team with highest actual points for each game week.

In [None]:
import timeit

def filter_gw(player_fixture_stats: pd.DataFrame, gw: int) -> pd.DataFrame:
    return (player_fixture_stats
        [lambda df: (df['Game Week'] == gw) & (df['Game Cost'].isnull() == False)]
        .drop(columns=['Current Cost'])
        .rename(columns={'Game Cost': 'Current Cost'})
        .reset_index()
        .set_index('Player ID'))

def get_optimal_team_exp(player_team_exp_gw: pd.DataFrame, ep_column: str) -> pd.DataFrame:
    global player_team_optimal
    player_team_optimal = get_optimal_squad(player_team_exp_gw, 
                                    optimise_team_on=ep_column,
                                    optimise_sel_on=ep_column, 
                                    formation='2-5-5-3', 
                                    budget=100)
    return player_team_optimal[['Game Total Points', ep_column, 'Point Factor', 'Selected?']]
 
    
def get_optimal_team_act(players_history_fixtures_gw: pd.DataFrame) -> pd.DataFrame:
    player_team_optimal_act = get_optimal_squad(players_history_fixtures_gw, 
                                    optimise_team_on='Game Total Points',
                                    optimise_sel_on='Game Total Points', 
                                    formation='2-5-5-3', 
                                    budget=100)
    return player_team_optimal_act[['Game Total Points', 'Selected?']]  


def calc_team_points(player_team: pd.DataFrame, points_col: str) -> float:
    player_team = player_team[player_team['Selected?'] == True].copy()
    player_team['Points'] = player_team[points_col]
    
    if 'Point Factor' in player_team.columns.values:
        player_team['Points'] *= player_team['Point Factor']

    return player_team['Points'].sum()

def pred_gw(players_gw_next_eps_gw: pd.DataFrame, ep_column: str, player_teams: pd.DataFrame) -> pd.DataFrame:
    player_team_eps_gw = (players_gw_next_eps_gw
                            [['Game Total Points', ep_column, 'Current Cost']]
                            [lambda df: ~df[ep_column].isnull()]
                              .merge(player_teams[['Name', 'Field Position ID', 'Field Position', 'Player Team ID', 'First Name', 'Last Name', 'ICT Index', 'Team Short Name', 'Name and Short Team', 'Minutes Percent']],
                                           left_index=True, right_index=True, suffixes=(False, False))
                         )
    player_team_eps_gw['News And Date'] = None # Unfortunately, we don't have historic news information.
    return player_team_eps_gw[(player_team_eps_gw['Game Total Points'] > 0) & (player_team_eps_gw['Minutes Percent'] > 50)] 
    
def calc_stats(player_team_optimal_exp: pd.DataFrame, pred_kind: str, ep_column: str) -> dict:
    mae = abs(player_team_optimal_exp['Game Total Points']-player_team_optimal_exp[ep_column]).mean()
    mse = ((player_team_optimal_exp['Game Total Points']-player_team_optimal_exp[ep_column])**2).mean()
    return {f'{pred_kind} Expected Points': calc_team_points(player_team_optimal_exp, ep_column), 
            f'{pred_kind} Actual Points': calc_team_points(player_team_optimal_exp, 'Game Total Points'),
            f'{pred_kind} Mean Absolute Error': mae, f'{pred_kind} Mean Square Error': mse}    


def back_test_gw(players_gw_team_next_eps: pd.DataFrame, gw: int, player_teams: pd.DataFrame, team_fixture_strength: pd.DataFrame, eps_model: tf.keras.Model) -> dict:   
    global players_gw_next_eps_gw
    players_gw_next_eps_gw = (players_gw_team_next_eps
                               .pipe(filter_gw, gw))
    
    player_team_optimal_act = (players_gw_next_eps_gw
                               .pipe(get_optimal_team_act))
    
    global calc_pred_gw
    calc_pred_gw = (players_gw_next_eps_gw
        .pipe(pred_gw, 'Expected Points Calc', player_teams))
    
    global nn_pred_gw
    nn_pred_gw = (players_gw_next_eps_gw
        .pipe(pred_gw, 'Expected Points NN', player_teams))
    
    global calc_nn_pred_gw
    calc_nn_pred_gw = (players_gw_next_eps_gw
        .pipe(pred_gw, 'Expected Points', player_teams))
                        
    results = {}
    results['Game Week'] = gw
    results['Actual Points Dream Team'] = player_team_optimal_act.pipe(calc_team_points, 'Game Total Points')
    results = {**results, **calc_pred_gw.pipe(get_optimal_team_exp, 'Expected Points Calc').pipe(calc_stats, 'Calc', 'Expected Points Calc') }
    results = {**results, **nn_pred_gw.pipe(get_optimal_team_exp, 'Expected Points NN').pipe(calc_stats, 'NN', 'Expected Points NN')}
    results = {**results, **calc_nn_pred_gw.pipe(get_optimal_team_exp, 'Expected Points').pipe(calc_stats, 'NN+Calc', 'Expected Points')}
                  
    return results

In [None]:
backtest_results = pd.DataFrame()
for gw in log_progress(range(2, current_gw), name='Game weeks'):
    backtest_results = backtest_results.append(back_test_gw(players_gw_team_eps.reset_index(), gw, player_teams, team_fixture_strength, eps_model), ignore_index=True)

In [None]:

py.iplot([{'x': backtest_results['Game Week'], 
           'y':  backtest_results[col], 'name': col} for col in ('Actual Points Dream Team', 'Calc Actual Points', 'NN Actual Points', 'NN+Calc Actual Points')])

In [None]:
backtest_results.iloc[5:].mean()