# Imports & setup

In [1]:
import os
os.chdir("D:\PulpitE\FPL_ML")

In [2]:
import pandas as pd
import numpy as np
import pickle
from src.match_names import name_fbref_to_fpl, neutralize_name
from config import *

In [3]:
info = ["Name_original", "GW", "Season", "Team", "Opponent", "Was_home", "Team_rating", "Opp_rating", "FPL_pos", "Price"]

In [4]:
FEATURES_OUTFIELD = ['Was_home',
 'Rating_difference',
 'Avg_FPL_points',
 'xP_4',]

# Reading from files

In [5]:
df = pd.read_csv("data/upcoming/upcoming_fixtures.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [6]:
df["Was_home_xP"] = df["Avg_FPL_points"] * df["Was_home"]
df["RD_xP"] = df["Avg_FPL_points"] * df["Rating_difference"]

In [7]:
# df[FEATURES_OUTFIELD].isna().sum()

In [8]:
df = df[~df[FEATURES_OUTFIELD].isnull().any(axis=1)] # for some reason 45 rows are missing some feature values

In [9]:
X_test = df[df["Season"] == CURRENT_SEASON][FEATURES_OUTFIELD]

In [10]:
df.columns

Index(['Unnamed: 0', 'Date', 'Day', 'Venue', 'Team', 'Opponent', 'Name',
       'Start', 'Pos', 'Min',
       ...
       'Team_rating_30', 'Min_points_30', 'Team_result_30', 'DEF', 'FWD', 'GK',
       'MID', 'Finished', 'Was_home_xP', 'RD_xP'],
      dtype='object', length=276)

In [11]:
X_test.columns

Index(['Was_home', 'Rating_difference', 'Avg_FPL_points', 'xP_4'], dtype='object')

In [12]:
# df[df["FPL_pos"] == "GK"]

In [13]:
file = open("models/GBR.pkl",'rb')
model = pickle.load(file)

In [14]:
model

# Predicting

In [15]:
def adjust_for_injuries(df):
    active_players = pd.read_csv("data/misc/active_players.csv")
    injured_players = active_players[active_players["chance_of_playing_this_round"] == 0]["name"].to_list()
    df.loc[df["Name_original"].isin(injured_players),"Pred"] = 0
    return df

In [16]:
def adjust_goalkeepers(df):
    gks = pd.read_csv("data/misc/goalkeepers.csv")
    gk_names = gks["Name"].to_list()
    gk_names = [name_fbref_to_fpl(neutralize_name(n)) for n in gk_names]
    # print(gk_names)
    df.loc[(df["FPL_pos"] == "GK") & ~(df["Name_original"].apply(neutralize_name).isin(gk_names)), "Pred"] = 0
    return df

In [17]:
def get_predictions(model, df, X, all_remaining=False):
    # make predictions on the test data and glues them to the rest of the dataframe
    predictions = model.predict(X)
    df_predictions = df[(df["Season"] == CURRENT_SEASON)].reset_index(drop=True)
        
    df_predictions.loc[:, "Pred"] = predictions
    preds = df_predictions[info + ["Pred"]]
    
    preds = adjust_goalkeepers(preds)
    preds = adjust_for_injuries(preds).sort_values(by=["Pred"], ascending = False)
    
    return preds

In [18]:
preds = get_predictions(model, df, X_test)

Feature names unseen at fit time:
- Rating_difference
- Was_home
Feature names seen at fit time, yet now missing:
- RD_xP
- Was_home_xP

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [19]:
# preds[(preds["FPL_pos"] == "GK") & (preds["GW"] == NEXT_GAMEWEEK)].head(30)

In [20]:
# next gameweek
preds[preds["GW"] == NEXT_GAMEWEEK].head(30)

Unnamed: 0,Name_original,GW,Season,Team,Opponent,Was_home,Team_rating,Opp_rating,FPL_pos,Price,Pred
12105,Mohamed Salah,4.0,2023-24,Liverpool,Aston Villa,1.0,1946.521606,1825.741455,MID,12.5,6.359713
5514,Erling Haaland,4.0,2023-24,Manchester City,Fulham,1.0,2083.702148,1732.639404,FWD,14.0,6.305575
6316,Guglielmo Vicario,4.0,2023-24,Tottenham,Burnley,0.0,1829.239014,1722.952881,GK,5.0,6.015216
3314,Carlton Morris,4.0,2023-24,Luton,West Ham,1.0,1602.477417,1787.825562,FWD,5.5,5.943182
2930,Bruno Borges Fernandes,4.0,2023-24,Manchester Utd,Arsenal,0.0,1865.755249,1927.032104,MID,8.5,5.788198
3000,Bryan Mbeumo,4.0,2023-24,Brentford,Bournemouth,1.0,1835.573242,1660.290894,MID,6.7,5.644331
7784,Jarrod Bowen,4.0,2023-24,West Ham,Luton,0.0,1787.825562,1602.477417,MID,7.0,5.093213
13363,Phil Foden,4.0,2023-24,Manchester City,Fulham,1.0,2083.702148,1732.639404,MID,7.6,4.933043
14963,Solly March,4.0,2023-24,Brighton,Newcastle Utd,1.0,1843.758179,1884.512817,MID,6.6,4.798543
11059,Marcus Rashford,4.0,2023-24,Manchester Utd,Arsenal,0.0,1865.755249,1927.032104,MID,9.0,4.773205


In [21]:
# all preds
preds.head(20)

Unnamed: 0,Name_original,GW,Season,Team,Opponent,Was_home,Team_rating,Opp_rating,FPL_pos,Price,Pred
635,Mohamed Salah,1.0,2023-24,Liverpool,Chelsea,0.0,1946.89978,1788.09436,MID,12.5,7.03043
636,Mohamed Salah,2.0,2023-24,Liverpool,Bournemouth,1.0,1943.976074,1662.536621,MID,12.5,6.550406
12112,Mohamed Salah,11.0,2023-24,Liverpool,Luton,0.0,1946.521606,1602.477417,MID,12.5,6.426951
12116,Mohamed Salah,15.0,2023-24,Liverpool,Sheffield United,0.0,1946.521606,1631.764282,MID,12.5,6.42373
12122,Mohamed Salah,21.0,2023-24,Liverpool,Bournemouth,0.0,1946.521606,1660.290894,MID,12.5,6.420702
12128,Mohamed Salah,27.0,2023-24,Liverpool,Nott'ham Forest,0.0,1946.521606,1677.87793,MID,12.5,6.418887
12130,Mohamed Salah,29.0,2023-24,Liverpool,Everton,0.0,1946.521606,1691.463989,MID,12.5,6.41751
12106,Mohamed Salah,5.0,2023-24,Liverpool,Wolves,0.0,1946.521606,1704.537109,MID,12.5,6.416207
12120,Mohamed Salah,19.0,2023-24,Liverpool,Burnley,0.0,1946.521606,1722.952881,MID,12.5,6.414403
12135,Mohamed Salah,34.0,2023-24,Liverpool,Fulham,0.0,1946.521606,1732.639404,MID,12.5,6.41347


In [22]:
# points all season
pd.DataFrame(preds.groupby(["Name_original"])["Pred"].sum().sort_values(ascending=False)).head(30)

Unnamed: 0_level_0,Pred
Name_original,Unnamed: 1_level_1
Mohamed Salah,243.498854
Erling Haaland,233.343044
Guglielmo Vicario,224.334471
Carlton Morris,220.740209
Bruno Borges Fernandes,219.267812
Bryan Mbeumo,207.754938
Jarrod Bowen,191.290071
Phil Foden,182.543399
Solly March,182.082577
Marcus Rashford,180.761864


# Saving to file

In [23]:
preds.to_csv("predictions/preds_next_season.csv", index=False)

# Importance

In [24]:
# import shap
# explainer = shap.Explainer(model.predict, X_test[9000:10000])
# shap_values = explainer(X_test[9000:10000])

In [25]:
# shap.plots.bar(shap_values, max_display=15)

In [26]:
# # Salah vs Nottingham Forest
# shap.plots.waterfall(shap_values[502])

In [27]:
# shap.plots.waterfall(shap_values[470])