# Imports

In [1]:
import os
os.chdir("D:\PulpitE\FPL_ML")

In [2]:
import pandas as pd
import numpy as np
from vaastav.getters import *
from datetime import datetime

from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
import torchvision

import torch
import torch.nn as nn

import xgboost as xgb

# Constants

In [3]:
NEXT_GW = 25

# fixtures in sample
FIS = 8

# prediction range
FROM_GW = 25
TO_GW = 25

batch_size = 1

# Device

In [4]:
# device = torch.device("cuda")
# device = torch.device('mps')
device = torch.device('cpu')

# Reading data

In [5]:
upcoming_fixtures = pd.read_csv("data/upcoming_fixtures.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [6]:
# selecting needed rows
upcoming_fixtures = upcoming_fixtures[(upcoming_fixtures["Finished"] == True) | ((upcoming_fixtures["Finished"] == False) & (upcoming_fixtures["GW"].between(FROM_GW, TO_GW)))]

In [7]:
# upcoming_fixtures[upcoming_fixtures["Name"] == "Erling-Haaland"]

In [8]:
# upcoming_fixtures.loc[9315]

# Dataset

In [9]:
features = ["Min", "Gls", "Ast", "Sh", "SoT", "xG", "npxG", "xAG", "SCA", "GCA", "CS", "CrdY", "CrdR", "Opp rating", "Team Score", "Opp Score", "Was Home"]
to_predict = ["FPL"]

In [10]:
def init_index_table(df):
    names = df["Name"].unique()
    result_dict = {}
    
    current_index = 0
    player_index = 0
    
    for name in names:
        sample_index = 0
        samples_for_name = df[(df["Name"] == name) & (df["Finished"] == False)].shape[0]
        for i in range(samples_for_name):
            result_dict[current_index] = [player_index, sample_index]
            current_index += 1
            sample_index += 1
        player_index += 1
    
    return result_dict

In [11]:
class PandasDataset(Dataset):
    def __init__(self, dataframe, starting_gw, ending_gw):
        self.dataframe = dataframe
        
        self.starting_gw = starting_gw
        self.ending_gw = ending_gw
        self.names = self.dataframe["Name"].unique()
        self.length = self.calculate_length()

        self.index_table = init_index_table(self.dataframe)
        self.transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

    def calculate_length(self):
        length = 0
        for name in self.names:
            length += self.dataframe[(self.dataframe["Name"] == name) & self.dataframe["GW"].between(FROM_GW, TO_GW)].shape[0]
        return length
    
    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        # print(idx)
        player_id, sample_id = self.index_table[idx]
        name = self.names[player_id]

        rows_features = self.dataframe[(self.dataframe["Name"] == name) & (self.dataframe["Finished"] == True)]
        features_item = rows_features.tail(FIS)[features + to_predict].values[:, :-1]
        return features_item

In [12]:
df = upcoming_fixtures

df_general = df
df_gk = df[df["FPL position"] == "GK"]
df_def = df[df["FPL position"] == "DEF"]
df_mid = df[df["FPL position"] == "MID"]
df_fwd = df[df["FPL position"] == "FWD"]

In [13]:
dataset_general = PandasDataset(df_general, FROM_GW, TO_GW)

dataset_gk = PandasDataset(df_gk, FROM_GW, TO_GW)
dataset_def = PandasDataset(df_def, FROM_GW, TO_GW)
dataset_mid = PandasDataset(df_mid, FROM_GW, TO_GW)
dataset_fwd = PandasDataset(df_fwd, FROM_GW, TO_GW)

In [14]:
# df[df["Name"] == "Erling-Haaland"].tail(1)

In [15]:
dataset_general.__len__(), dataset_gk.__len__(), dataset_def.__len__(), dataset_mid.__len__(), dataset_fwd.__len__()

(438, 29, 154, 200, 49)

In [16]:
dataloader_general_test = DataLoader(dataset_general, batch_size=batch_size, shuffle=False)

dataloader_gk_test = DataLoader(dataset_gk, batch_size=batch_size, shuffle=False)
dataloader_def_test = DataLoader(dataset_def, batch_size=batch_size, shuffle=False)
dataloader_mid_test = DataLoader(dataset_mid, batch_size=batch_size, shuffle=False)
dataloader_fwd_test = DataLoader(dataset_fwd, batch_size=batch_size, shuffle=False)

In [17]:
len(dataloader_general_test), len(dataloader_gk_test), len(dataloader_def_test), len(dataloader_mid_test), len(dataloader_fwd_test)

(438, 29, 154, 200, 49)

In [18]:
# next(iter(dataloader_mid_test))

# Models

In [19]:
class LSTMNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(LSTMNetwork, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.transpose(x, 0, 1)
        # x = x.view(-1, batch_size)
        # x = torch.transpose(x, 0, 1)
        # print("FOR", x.shape)
        # print(x.shape)
        # print(x.shape)
        out, _ = self.lstm(x)
        # print(out.shape)
        out = self.fc(out[-1])
        return out

In [20]:
net = LSTMNetwork(1, 20, 2, 1, dropout=0.4).to(device)
net.load_state_dict(torch.load('models/best/best_lstm_general.pt'))
net.double()

LSTMNetwork(
  (lstm): LSTM(1, 20, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=20, out_features=1, bias=True)
)

In [21]:
net_gk = LSTMNetwork(1, 20, 2, 1, dropout=0.4).to(device)
net_gk.load_state_dict(torch.load('models/best/best_lstm_gk.pt', map_location=torch.device(device)))
net_gk.double()

LSTMNetwork(
  (lstm): LSTM(1, 20, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=20, out_features=1, bias=True)
)

In [22]:
net_def = LSTMNetwork(1, 8, 2, 1, dropout=0.4).to(device)
net_def.load_state_dict(torch.load('models/best/best_lstm_def.pt', map_location=torch.device(device)))
net_def.double()

LSTMNetwork(
  (lstm): LSTM(1, 8, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=8, out_features=1, bias=True)
)

In [23]:
net_mid = LSTMNetwork(1, 15, 2, 1, dropout=0.4).to(device)
net_mid.load_state_dict(torch.load('models/best/best_lstm_mid.pt', map_location=torch.device(device)))
net_mid.double()

LSTMNetwork(
  (lstm): LSTM(1, 15, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=15, out_features=1, bias=True)
)

In [24]:
net_fwd = LSTMNetwork(1, 8, 2, 1, dropout=0.4).to(device)
net_fwd.load_state_dict(torch.load('models/best/best_lstm_fwd.pt', map_location=torch.device(device)))
net_fwd.double()

LSTMNetwork(
  (lstm): LSTM(1, 8, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=8, out_features=1, bias=True)
)

# Generating LSTM predictions

In [25]:
def generate_predictions(dataloader_pos, dataset_pos, net_pos, df_original, first_gw):
    index = 0
    predictions_pos = []
    for inputs in dataloader_pos:
        inputs = torch.flatten(inputs, 1, 2)
        inputs = inputs.unsqueeze(2).to(device)

        # print("Inputs", inputs)
        # print("Target", target)
        # print("INPUTS", inputs)
        # print(inputs)
        outputs = net_pos(inputs).to(device)
        # print("OUTPUTS", outputs)
        for output in outputs:
            player_id, sample_id = dataset_pos.index_table[index]
            name = dataset_pos.names[player_id]
            points = round(output.item(), 2)
            # print("DFORW NEXT", player_id, sample_id, name, points)
            df_row = df_original[(df_original["Name"] == name) & (df_original["GW"] >= first_gw)].iloc[sample_id]
            # print("DFROW", df_row)
            df_row["Points"] = points
            df_gw = df_row["GW"]
            df_opp = df_row["Opponent"]

            # print("GEN PRED", player_id, sample_id, name, points, df_gw, df_opp)
            
            gw = sample_id + FIS + 1 # wrong
            predictions_pos.append(df_row)
            # df.loc[(df["Name"] == name) & (df["GW"] == gw), "LSTM"] = points
            index += 1
    
    output_df = pd.DataFrame(predictions_pos)
    output_df = output_df.loc[:, ~output_df.columns.str.contains('^Unnamed')]
    return output_df

In [26]:
dataset_mid.__getitem__(199).shape

(8, 17)

In [27]:
for inp in dataloader_mid_test:
    print(inp.shape)

torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size([1, 8, 17])
torch.Size(

In [28]:
prediction_columns = ["Name", "GW", "Opponent", "Was Home", "FPL", "Points"]

In [29]:
df_predictions_general = generate_predictions(dataloader_general_test, dataset_general, net, df_general, FROM_GW)

df_predictions_gk = generate_predictions(dataloader_gk_test, dataset_gk, net_gk, df_gk, FROM_GW)
df_predictions_def = generate_predictions(dataloader_def_test, dataset_def, net_def, df_def, FROM_GW)
df_predictions_mid = generate_predictions(dataloader_mid_test, dataset_mid, net_mid, df_mid, FROM_GW)
df_predictions_fwd = generate_predictions(dataloader_fwd_test, dataset_fwd, net_fwd, df_fwd, FROM_GW)

In [30]:
df_predictions_general[prediction_columns]

Unnamed: 0,Name,GW,Opponent,Was Home,FPL,Points
8166,Brenden-Aaronson,25.0,Southampton,1.0,,2.82
8400,Che-Adams,25.0,Leeds United,0.0,,2.81
13567,Tyler-Adams,25.0,Southampton,1.0,,2.80
13507,Tosin-Adarabioyo,25.0,Wolves,1.0,,2.81
12560,Rayan-Ait-Nouri,25.0,Fulham,0.0,,2.81
...,...,...,...,...,...,...
12236,Oleksandr-Zinchenko,25.0,Leicester City,0.0,,2.82
9583,Hakim-Ziyech,25.0,Tottenham,0.0,,2.82
11003,Kurt-Zouma,25.0,Nott'ham Forest,1.0,,2.81
11611,Martin-Odegaard,25.0,Everton,1.0,,2.82


In [31]:
df_predictions_fwd

Unnamed: 0,Date,Day,Comp,Round,Venue,Squad,Opponent,Start,Pos,Min,...,xGAvgOverall,xAGAvgOverall,CSAvgOverall,Team ScoreAvgOverall,Opp ScoreAvgOverall,FPLAvgOverall,Finished,Squad H,Squad A,Points
8400,2023-02-25,,,,,Southampton,Leeds United,,,,...,0.23,0.115,0.05,0.85,1.75,3.2,False,Leeds United,Southampton,4.14
10708,2023-02-25,,,,,Manchester City,Bournemouth,,,,...,0.1,0.021053,0.421053,2.631579,1.0,2.684211,False,Bournemouth,Manchester City,1.98
11848,2023-02-25,,,,,West Ham,Nott'ham Forest,,,,...,0.121053,0.047368,0.210526,0.894737,1.210526,2.473684,False,West Ham,Nott'ham Forest,1.66
8312,2023-02-25,,,,,Aston Villa,Everton,,,,...,0.0,0.0,0.214286,1.071429,1.571429,1.214286,False,Everton,Aston Villa,1.49
7531,2023-02-25,,,,,Southampton,Leeds United,,,,...,0.065,0.04,0.05,0.85,1.75,1.95,False,Leeds United,Southampton,1.51
12457,2023-02-26,,,,,Chelsea,Tottenham,,,,...,0.042857,0.028571,0.428571,1.0,0.857143,2.071429,False,Tottenham,Chelsea,1.32
13313,2023-02-25,,,,,Nott'ham Forest,West Ham,,,,...,0.25,0.05,0.277778,0.722222,1.888889,2.888889,False,West Ham,Nott'ham Forest,1.82
12353,2023-02-25,,,,,Leeds United,Southampton,,,,...,0.383333,0.025,0.166667,1.083333,1.666667,2.083333,False,Leeds United,Southampton,1.43
7886,2023-02-26,,,,,Chelsea,Tottenham,,,,...,0.061538,0.007692,0.307692,1.307692,1.076923,1.846154,False,Tottenham,Chelsea,1.51
9050,2023-02-25,,,,,Everton,Aston Villa,,,,...,0.21,0.03,0.2,0.7,1.6,2.5,False,Everton,Aston Villa,1.42


# XGBoost models

In [32]:
xgb_features = ["Points", "Was Home", "Opp rating", 'ScoreForLast5', 'ScoreAgainstLast5']

# career averages
xgb_features += ['MinAvgOverall', 'GlsAvgOverall',
       'AstAvgOverall', 'CrdYAvgOverall', 'CrdRAvgOverall', 'xGAvgOverall',
       'xAGAvgOverall', 'CSAvgOverall', 'Team ScoreAvgOverall',
       'Opp ScoreAvgOverall', 'FPLAvgOverall']

info = ["Name", "GW", "Squad", "Opponent", "Was Home", "Opp rating"]

In [33]:
model_general = xgb.XGBRegressor()
model_general.load_model("models/xgb/model_general.json")

In [34]:
model_gk = xgb.XGBRegressor()
model_gk.load_model("models/xgb/model_gk.json")

In [35]:
model_def = xgb.XGBRegressor()
model_def.load_model("models/xgb/model_def.json")

In [36]:
model_mid = xgb.XGBRegressor()
model_mid.load_model("models/xgb/model_mid.json")

In [37]:
model_fwd = xgb.XGBRegressor()
model_fwd.load_model("models/xgb/model_fwd.json")

# Generating XGBoost predictions

In [38]:
X_test_general = df_predictions_general[xgb_features]

X_test_gk = df_predictions_gk[xgb_features]
X_test_def = df_predictions_def[xgb_features]
X_test_mid = df_predictions_mid[xgb_features]
X_test_fwd = df_predictions_fwd[xgb_features]

In [39]:
predictions_general = model_general.predict(X_test_general)

predictions_gk = model_gk.predict(X_test_gk)
predictions_def = model_def.predict(X_test_def)
predictions_mid = model_mid.predict(X_test_mid)
predictions_fwd = model_fwd.predict(X_test_fwd)

In [40]:
df_predictions_general["pred"] = predictions_general

df_predictions_gk["pred"] = predictions_gk
df_predictions_def["pred"] = predictions_def
df_predictions_mid["pred"] = predictions_mid
df_predictions_fwd["pred"] = predictions_fwd

# Printing predictions

In [41]:
df_predictions_general[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,pred
11627,Martinelli,25.0,Arsenal,Leicester City,0.0,1746.499023,10.93376
9494,Gabriel-Jesus,25.0,Arsenal,Leicester City,0.0,1746.499023,10.366996
11923,Mohamed-Salah,25.0,Liverpool,Crystal Palace,0.0,1742.434937,8.937963
8992,Diego-Costa,25.0,Wolves,Fulham,0.0,1717.072754,8.705561
11626,Martinelli,25.0,Arsenal,Everton,1.0,1667.46228,8.549852
8993,Diego-Costa,25.0,Wolves,Liverpool,0.0,1963.46521,8.368153
12264,Ollie-Watkins,25.0,Aston Villa,Everton,0.0,1667.46228,8.359809
9568,Granit-Xhaka,25.0,Arsenal,Everton,1.0,1667.46228,8.332071
8254,Bukayo-Saka,25.0,Arsenal,Leicester City,0.0,1746.499023,8.238254
11612,Martin-Odegaard,25.0,Arsenal,Leicester City,0.0,1746.499023,7.784436


In [42]:
df_predictions_gk[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,pred
11581,Mark-Travers,25.0,Bournemouth,Manchester City,1.0,2026.310791,7.433331
8784,Danny-Ward,25.0,Leicester City,Arsenal,1.0,1946.912109,5.671182
10603,Jordan-Pickford,25.0,Everton,Aston Villa,1.0,1766.583008,5.293813
9508,Gavin-Bazunu,25.0,Southampton,Leeds United,0.0,1708.115723,4.822929
11316,Lukasz-Fabianski,25.0,West Ham,Nott'ham Forest,1.0,1655.643188,4.779517
9715,Hugo-Lloris,25.0,Tottenham,Chelsea,1.0,1870.791016,4.482022
7488,Aaron-Ramsdale,25.0,Arsenal,Leicester City,0.0,1746.499023,4.243928
9169,Ederson,25.0,Manchester City,Bournemouth,0.0,1636.081665,4.094285
12191,Neto,25.0,Bournemouth,Manchester City,1.0,2026.310791,3.758244
8873,Dean-Henderson,25.0,Nott'ham Forest,West Ham,0.0,1748.30542,3.591743


In [43]:
df_predictions_def[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,pred
12338,Pascal-Struijk,25.0,Leeds United,Southampton,1.0,1651.960449,5.372359
10856,Kenny-Tete,25.0,Fulham,Wolves,1.0,1695.581787,5.321419
12575,Reece-James,25.0,Chelsea,Tottenham,0.0,1858.896484,5.310695
7784,Andrew-Robertson,25.0,Liverpool,Wolves,1.0,1695.581787,5.267489
8049,Ben-White,25.0,Arsenal,Everton,1.0,1667.46228,5.117102
9700,Hugo-Bueno,25.0,Wolves,Fulham,0.0,1717.072754,4.470611
8725,Daniel-Amartey,25.0,Leicester City,Arsenal,1.0,1946.912109,4.458747
11196,Lloyd-Kelly,25.0,Bournemouth,Manchester City,1.0,2026.310791,4.152046
11434,Manuel-Akanji,25.0,Manchester City,Bournemouth,0.0,1636.081665,4.007499
13448,Timothy-Castagne,25.0,Leicester City,Arsenal,1.0,1946.912109,3.990758


In [45]:
df_predictions_mid[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,pred
11627,Martinelli,25.0,Arsenal,Leicester City,0.0,1746.499023,8.540466
8166,Brenden-Aaronson,25.0,Leeds United,Southampton,1.0,1651.960449,8.092587
11626,Martinelli,25.0,Arsenal,Everton,1.0,1667.46228,8.016395
11922,Mohamed-Salah,25.0,Liverpool,Wolves,1.0,1695.581787,7.91406
11923,Mohamed-Salah,25.0,Liverpool,Crystal Palace,0.0,1742.434937,7.899199
12768,Rodri,25.0,Manchester City,Bournemouth,0.0,1636.081665,7.278067
11612,Martin-Odegaard,25.0,Arsenal,Leicester City,0.0,1746.499023,6.957283
11611,Martin-Odegaard,25.0,Arsenal,Everton,1.0,1667.46228,6.071989
10513,John-McGinn,25.0,Aston Villa,Everton,0.0,1667.46228,5.848506
11833,Michael-Olise,25.0,Crystal Palace,Liverpool,1.0,1963.46521,5.693627


In [None]:
df_predictions_fwd[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)