# Fantasy Premier League (FPL) Advisor Neural Network Model Builder

The purpose of this notebook is to train a neural network that can predict the expected points for specific player and fixture combination. Currently, the training data is based on the 2018/2019 season FPL data.

# Installation
To get started, run the following command to install all required dependencies.

In [None]:
!pip install -q -r requirements.txt

# Import requirements
Here we import all external and local modulues.

In [None]:
import pandas as pd, re, datetime as dt, numpy as np, plotly.offline as py, cufflinks as cf, ipywidgets as widgets, os, sys
from ipywidgets import interact, fixed
from fplpandas import FPLPandas
from datadict.jupyter import DataDict

import tensorflow as tf
from tensorflow import feature_column
from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras import layers
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import matplotlib.pyplot as plt

# Load local modules
sys.path.append(os.getcwd())
from common import *
from jupyter import *
from data import *
from nn import *

cf.go_offline()
pd.set_option('display.max_columns', 100)

# Set variables
This section sets all important global variables.

In [None]:
season = '2018-19'

# Load data dictionary
This section loads the data dictionary. The data dictionary contains default ordering of fields, for each field a description, default format and mapping of API field names to more readable ones. It is used to show data in a more user-friendly way.

In [None]:
dd = DataDict(data_dict_file=f'./data_dictionary.csv')

# Load data

In [None]:
players = (pd.read_csv(f'./{season}/players_raw.csv', index_col='id', encoding='utf-8')
           .pipe(prepare_players, dd))
# Uncomment to view data: dd.display(players, head=5, excel_file='players.xlsx', stats=True)

In [None]:
players_history = (pd.read_csv(f'./{season}/merged_gw.csv', encoding='utf-8')
    .assign(**{'Player ID': lambda df: df['name'].str.extract(r'[^\d]+\_(\d+)$').astype('int')})
    .rename(columns={'fixture': 'Fixture ID'})
    .set_index(['Player ID', 'Fixture ID'])
    .pipe(prepare_players_history, dd)
    .drop(columns=['name']))    
# Uncomment to view data: dd.display(players_history, head=5, excel_file='players_history.xlsx', stats=True)

In [None]:
fixtures = (pd.read_csv(f'./{season}/fixtures.csv', index_col='id')
    .pipe(prepare_fixtures, dd))

In [None]:
teams = (pd.read_csv(f'./{season}/teams.csv')
         [lambda df: ~df['id'].isnull()]
         .set_index('id')
         .pipe(prepare_teams, dd))

# Create input features from derived data
This section creates new dataset by combining the previously loaded ones.

In [None]:
player_teams = players.pipe(get_player_teams, teams, dd)

In [None]:
fixture_teams = fixtures.pipe(get_fixture_teams, teams, dd)

In [None]:
team_fixture_scores = fixture_teams.pipe(get_team_fixture_scores, teams)

In [None]:
team_score_stats = team_fixture_scores.pipe(get_team_score_stats)

In [None]:
player_team_stats = (player_teams
                     .merge(team_score_stats[['Total Team Goals Scored', 'Total Team Goals Conceded']], left_on='Player Team ID', right_index=True)
                     .assign(**{'Team Total Points': (lambda df: df.groupby(['Player Team ID'])['Total Points'].sum())}))

In [None]:
players_history_fixtures = (players_history[['Game Total Points', 'Game Minutes Played', 'Game Cost']].reset_index()
    .merge(fixtures, left_on='Fixture ID', right_index=True)
    .merge(player_team_stats[['Player Team ID', 'Field Position ID', 'Field Position', 'Name', 'Total Team Goals Scored', 'Total Team Goals Conceded']], left_on='Player ID', right_index=True)
    .assign(**{'Is Home?': (lambda df: df['Home Team ID'] == df['Player Team ID'])})
    .assign(**{'Opp Team ID': (lambda df: df.apply(lambda row: row['Away Team ID'] if row['Is Home?'] else row['Home Team ID'], axis=1))}))

In [None]:
team_point_stats = (players_history_fixtures.groupby(['Player Team ID', 'Is Home?'])['Game Total Points']
    .sum()
    .unstack(level=-1)
    .reset_index()
    .rename(columns={False: 'Team Total Points Away', True: 'Team Total Points Home'})
    .set_index('Player Team ID')
    .assign(**{'Team Total Points':  (lambda df: df['Team Total Points Away']+df['Team Total Points Home'])})
    .assign(**{'Team Total Points Home Away Ratio':  (lambda df: df['Team Total Points Away']/df['Team Total Points Home']) }))

In [None]:
# Stats on the team's performance against all its opponents.
team_point_opp_stats = (players_history_fixtures.groupby(['Opp Team ID', 'Is Home?'])['Game Total Points']
    .sum()
    .unstack(level=-1)
    .reset_index()
    .rename(columns={'Opp Team ID': 'Team ID', False: 'Team Total Points Opps Away', True: 'Team Total Points Opps Home'})
    .set_index('Team ID')
    .assign(**{'Team Total Points Opps': (lambda df: df['Team Total Points Opps Away']+df['Team Total Points Opps Home']) })
    .assign(**{'Team Total Points Opps Home Away Ratio': (lambda df: df['Team Total Points Opps Away']/df['Team Total Points Opps Home']) }))

In [None]:
team_stats = (players_history_fixtures.groupby(['Field Position ID', 'Is Home?'])['Game Total Points']
    .sum()
    .unstack(level=-1)
    .rename(columns={False: 'Total Points Away', True: 'Total Points Home'})
    .assign(**{'Total Points Home Away Ratio': lambda df: 1-(1-df['Total Points Away']/df['Total Points Home'])/2}))
team_stats.columns.name = None

In [None]:
player_fixtures_team_stats = (players_history_fixtures
    .merge(team_point_stats[['Team Total Points']], left_on='Player Team ID', right_index=True)
    .merge(team_point_stats[['Team Total Points']].rename(columns={'Team Total Points': 'Opp Team Total Points'}), left_on='Opp Team ID', right_index=True)
    .merge(team_point_opp_stats[['Team Total Points Opps']], left_on='Player Team ID', right_index=True)
    .merge(team_score_stats[['Team Short Name', 'Total Team Goals Scored', 'Total Team Goals Conceded']].rename(columns={'Team Short Name': 'Opp Team Short Name', 'Total Team Goals Scored': 'Total Opp Team Goals Scored', 'Total Team Goals Conceded': 'Total Opp Team Goals Conceded', 'Team Total Points': 'Opp Team Total Points'}), left_on='Opp Team ID', right_index=True)
    .merge(team_stats[['Total Points Home Away Ratio']], left_on=['Field Position ID'], right_index=True)                          
    .set_index(['Player ID', 'Fixture ID']))

In [None]:
player_fixture_stats = (player_fixtures_team_stats
      [['Game Total Points', 'Game Minutes Played', 'Away Team ID', 'Home Team ID', 
        'Away Team Score', 'Home Team Score', 'Away Team Difficulty', 'Home Team Difficulty', 
        'Game Week', 'Total Team Goals Scored', 'Total Team Goals Conceded', 'Total Opp Team Goals Scored', 
        'Total Opp Team Goals Conceded', 'Is Home?', 'Field Position', 'Team Total Points',
        'Opp Team Total Points', 'Team Total Points Opps', 'Total Points Home Away Ratio', 'Kick Off Time']]
      [lambda df: df['Game Minutes Played'] > 0]
         .pipe(calc_player_fixture_stats))


In [None]:
print(calc_mse(player_fixture_stats, 'Avg Points To GW', 'Game Total Points'))
print(calc_mse(player_fixture_stats, 'Avg Points Opp Points Adj To GW', 'Game Total Points'))

## Create training and test datasets 

In [None]:
train_df, test_df = (player_fixture_stats
    [lambda df: df['GWs Played To GW'] >= 10]
    [['Field Position', 'Avg Points Opp Points Adj To GW', 'Game Total Points', 'Is Home?', 'Avg Minutes Played Recently To GW', 'Total Opp Team Goals Scored Diff']]
    .dropna(how='any', axis=0)
    .pipe(nn_split, frac=0.8))
train_ds = train_df.pipe(nn_prep_ds, 'Game Total Points')
test_ds = test_df.pipe(nn_prep_ds, 'Game Total Points')

In [None]:
feature_columns = []

field_pos = feature_column.categorical_column_with_vocabulary_list('field_position', ['GK', 'DEF', 'MID', 'FWD'])
field_pos_one_hot = feature_column.indicator_column(field_pos)
feature_columns.append(field_pos_one_hot)

for col in ['avg_points_opp_points_adj_to_gw', 'avg_minutes_played_recently_to_gw', 'is_home_', 'total_opp_team_goals_scored_diff']:
    feature_columns.append(feature_column.numeric_column(col))


## Create model
Here we create a neural network with four layers. Although ultimately the mean 

In [None]:
model = tf.keras.Sequential([
  tf.keras.layers.DenseFeatures(feature_columns, dtype='float64'),
  layers.Dense(4, activation='relu', dtype='float64'),
  layers.Dense(3, activation='relu', dtype='float64'),
  layers.Dense(1, dtype='float64')
])

model.compile(loss='mse',
                optimizer=tf.keras.optimizers.RMSprop(0.001),
                metrics=['mse', 'mae'])

## Train model

In [None]:
# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)

train_history = model.fit(train_ds,
          validation_data=test_ds,
          epochs=60,
          callbacks=[early_stop])

## Evaluate model

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)
plotter.plot({'Early Stopping': train_history}, metric = 'mse')
plt.ylabel('MSE [Game Total Points]')

In [None]:
test_predictions = model.predict(test_ds).flatten()
a = plt.axes(aspect='equal')
plt.scatter(test_df['Game Total Points'], test_predictions)
plt.xlabel('True Values [Game Total Points]')
plt.ylabel('Predictions [Game Total Points]')
lims = [-3, 20]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [None]:
test_df['Predicted'] = test_predictions
test_df

## Save model

In [None]:
model.save('models/expected_points')