# Imports

In [1]:
import os
os.chdir("D:\PulpitE\FPL_ML")

In [2]:
import pandas as pd
import numpy as np
from vaastav.getters import *
from datetime import datetime

from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
import torchvision

import torch
import torch.nn as nn

import xgboost as xgb

# Constants

In [3]:
NEXT_GW = 25

# fixtures in sample
FIS = 8

# prediction range
FROM_GW = 25
TO_GW = 26

batch_size = 1

# Device

In [4]:
# device = torch.device("cuda")
# device = torch.device('mps')
device = torch.device('cpu')

# Reading data

In [5]:
upcoming_fixtures = pd.read_csv("data/upcoming_fixtures.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [6]:
# selecting needed rows
upcoming_fixtures = upcoming_fixtures[(upcoming_fixtures["Finished"] == True) | ((upcoming_fixtures["Finished"] == False) & (upcoming_fixtures["GW"].between(FROM_GW, TO_GW)))]

In [7]:
# upcoming_fixtures[upcoming_fixtures["Name"] == "Erling-Haaland"]

In [8]:
# upcoming_fixtures.loc[9315]

# Dataset

In [9]:
features = ["Min", "Gls", "Ast", "Sh", "SoT", "xG", "npxG", "xAG", "SCA", "GCA", "CS", "CrdY", "CrdR", "Opp rating", "Team Score", "Opp Score", "Was Home"]
to_predict = ["FPL"]

In [10]:
def init_index_table(df):
    names = df["Name"].unique()
    result_dict = {}
    
    current_index = 0
    player_index = 0
    
    for name in names:
        sample_index = 0
        samples_for_name = df[(df["Name"] == name) & (df["Finished"] == False)].shape[0]
        for i in range(samples_for_name):
            result_dict[current_index] = [player_index, sample_index]
            current_index += 1
            sample_index += 1
        player_index += 1
    
    return result_dict

In [11]:
class PandasDataset(Dataset):
    def __init__(self, dataframe, starting_gw, ending_gw):
        self.dataframe = dataframe
        
        self.starting_gw = starting_gw
        self.ending_gw = ending_gw
        self.names = self.dataframe["Name"].unique()
        self.length = self.calculate_length()

        self.index_table = init_index_table(self.dataframe)
        self.transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

    def calculate_length(self):
        length = 0
        for name in self.names:
            length += self.dataframe[(self.dataframe["Name"] == name) & self.dataframe["GW"].between(FROM_GW, TO_GW)].shape[0]
        return length
    
    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        # print(idx)
        player_id, sample_id = self.index_table[idx]
        name = self.names[player_id]

        rows_features = self.dataframe[(self.dataframe["Name"] == name) & (self.dataframe["Finished"] == True)]
        features_item = rows_features.tail(FIS)[features + to_predict].values[:, :-1]
        return features_item

In [12]:
df = upcoming_fixtures

df_general = df
df_gk = df[df["FPL position"] == "GK"]
df_def = df[df["FPL position"] == "DEF"]
df_mid = df[df["FPL position"] == "MID"]
df_fwd = df[df["FPL position"] == "FWD"]

In [13]:
dataset_general = PandasDataset(df_general, FROM_GW, TO_GW)

dataset_gk = PandasDataset(df_gk, FROM_GW, TO_GW)
dataset_def = PandasDataset(df_def, FROM_GW, TO_GW)
dataset_mid = PandasDataset(df_mid, FROM_GW, TO_GW)
dataset_fwd = PandasDataset(df_fwd, FROM_GW, TO_GW)

In [14]:
# df[df["Name"] == "Erling-Haaland"].tail(1)

In [15]:
dataset_general.__len__(), dataset_gk.__len__(), dataset_def.__len__(), dataset_mid.__len__(), dataset_fwd.__len__()

(872, 57, 311, 396, 97)

In [16]:
dataloader_general_test = DataLoader(dataset_general, batch_size=batch_size, shuffle=False)

dataloader_gk_test = DataLoader(dataset_gk, batch_size=batch_size, shuffle=False)
dataloader_def_test = DataLoader(dataset_def, batch_size=batch_size, shuffle=False)
dataloader_mid_test = DataLoader(dataset_mid, batch_size=batch_size, shuffle=False)
dataloader_fwd_test = DataLoader(dataset_fwd, batch_size=batch_size, shuffle=False)

In [17]:
len(dataloader_general_test), len(dataloader_gk_test), len(dataloader_def_test), len(dataloader_mid_test), len(dataloader_fwd_test)

(872, 57, 311, 396, 97)

In [18]:
# next(iter(dataloader_mid_test))

# Models

In [19]:
class LSTMNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(LSTMNetwork, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.transpose(x, 0, 1)
        # x = x.view(-1, batch_size)
        # x = torch.transpose(x, 0, 1)
        # print("FOR", x.shape)
        # print(x.shape)
        # print(x.shape)
        out, _ = self.lstm(x)
        # print(out.shape)
        out = self.fc(out[-1])
        return out

In [20]:
net = LSTMNetwork(1, 20, 2, 1, dropout=0.4).to(device)
net.load_state_dict(torch.load('models/best/best_lstm_general.pt'))
net.double()

LSTMNetwork(
  (lstm): LSTM(1, 20, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=20, out_features=1, bias=True)
)

In [21]:
net_gk = LSTMNetwork(1, 20, 2, 1, dropout=0.4).to(device)
net_gk.load_state_dict(torch.load('models/best/best_lstm_gk.pt', map_location=torch.device(device)))
net_gk.double()

LSTMNetwork(
  (lstm): LSTM(1, 20, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=20, out_features=1, bias=True)
)

In [22]:
net_def = LSTMNetwork(1, 8, 2, 1, dropout=0.4).to(device)
net_def.load_state_dict(torch.load('models/best/best_lstm_def.pt', map_location=torch.device(device)))
net_def.double()

LSTMNetwork(
  (lstm): LSTM(1, 8, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=8, out_features=1, bias=True)
)

In [23]:
net_mid = LSTMNetwork(1, 15, 2, 1, dropout=0.4).to(device)
net_mid.load_state_dict(torch.load('models/best/best_lstm_mid.pt', map_location=torch.device(device)))
net_mid.double()

LSTMNetwork(
  (lstm): LSTM(1, 15, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=15, out_features=1, bias=True)
)

In [24]:
net_fwd = LSTMNetwork(1, 8, 2, 1, dropout=0.4).to(device)
net_fwd.load_state_dict(torch.load('models/best/best_lstm_fwd.pt', map_location=torch.device(device)))
net_fwd.double()

LSTMNetwork(
  (lstm): LSTM(1, 8, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=8, out_features=1, bias=True)
)

# Generating LSTM predictions

In [25]:
def generate_predictions(dataloader_pos, dataset_pos, net_pos, df_original, first_gw):
    index = 0
    predictions_pos = []
    for inputs in dataloader_pos:
        inputs = torch.flatten(inputs, 1, 2)
        inputs = inputs.unsqueeze(2).to(device)

        # print("Inputs", inputs)
        # print("Target", target)
        # print("INPUTS", inputs)
        # print(inputs)
        outputs = net_pos(inputs).to(device)
        # print("OUTPUTS", outputs)
        for output in outputs:
            player_id, sample_id = dataset_pos.index_table[index]
            name = dataset_pos.names[player_id]
            points = round(output.item(), 2)
            # print("DFORW NEXT", player_id, sample_id, name, points)
            df_row = df_original[(df_original["Name"] == name) & (df_original["GW"] >= first_gw)].iloc[sample_id]
            # print("DFROW", df_row)
            df_row["Points"] = points
            df_gw = df_row["GW"]
            df_opp = df_row["Opponent"]

            # print("GEN PRED", player_id, sample_id, name, points, df_gw, df_opp)
            
            gw = sample_id + FIS + 1 # wrong
            predictions_pos.append(df_row)
            # df.loc[(df["Name"] == name) & (df["GW"] == gw), "LSTM"] = points
            index += 1
    
    output_df = pd.DataFrame(predictions_pos)
    output_df = output_df.loc[:, ~output_df.columns.str.contains('^Unnamed')]
    return output_df

In [26]:
prediction_columns = ["Name", "GW", "Opponent", "Was Home", "FPL", "Points"]

In [27]:
df_predictions_general = generate_predictions(dataloader_general_test, dataset_general, net, df_general, FROM_GW)

df_predictions_gk = generate_predictions(dataloader_gk_test, dataset_gk, net_gk, df_gk, FROM_GW)
df_predictions_def = generate_predictions(dataloader_def_test, dataset_def, net_def, df_def, FROM_GW)
df_predictions_mid = generate_predictions(dataloader_mid_test, dataset_mid, net_mid, df_mid, FROM_GW)
df_predictions_fwd = generate_predictions(dataloader_fwd_test, dataset_fwd, net_fwd, df_fwd, FROM_GW)

In [28]:
df_predictions_general[prediction_columns]

Unnamed: 0,Name,GW,Opponent,Was Home,FPL,Points
8166,Brenden-Aaronson,25.0,Southampton,1.0,,2.82
8167,Brenden-Aaronson,26.0,Chelsea,0.0,,2.81
8400,Che-Adams,25.0,Leeds United,0.0,,2.80
8401,Che-Adams,26.0,Leicester City,1.0,,2.80
13567,Tyler-Adams,25.0,Southampton,1.0,,2.82
...,...,...,...,...,...,...
11003,Kurt-Zouma,25.0,Nott'ham Forest,1.0,,2.81
11004,Kurt-Zouma,26.0,Brighton,0.0,,2.79
11611,Martin-Odegaard,25.0,Everton,1.0,,2.81
11612,Martin-Odegaard,25.0,Leicester City,0.0,,2.80


In [29]:
df_predictions_fwd

Unnamed: 0,Date,Day,Comp,Round,Venue,Squad,Opponent,Start,Pos,Min,...,xGAvgOverall,xAGAvgOverall,CSAvgOverall,Team ScoreAvgOverall,Opp ScoreAvgOverall,FPLAvgOverall,Finished,Squad H,Squad A,Points
8400,2023-02-25,,,,,Southampton,Leeds United,,,,...,0.230000,0.115000,0.050000,0.850000,1.750000,3.200000,False,Leeds United,Southampton,4.25
8401,2023-03-04,,,,,Southampton,Leicester City,,,,...,0.230000,0.115000,0.050000,0.850000,1.750000,3.200000,False,Southampton,Leicester City,3.06
10708,2023-02-25,,,,,Manchester City,Bournemouth,,,,...,0.100000,0.021053,0.421053,2.631579,1.000000,2.684211,False,Bournemouth,Manchester City,1.98
10709,2023-03-04,,,,,Manchester City,Newcastle Utd,,,,...,0.100000,0.021053,0.421053,2.631579,1.000000,2.684211,False,Manchester City,Newcastle Utd,2.07
11848,2023-02-25,,,,,West Ham,Nott'ham Forest,,,,...,0.121053,0.047368,0.210526,0.894737,1.210526,2.473684,False,West Ham,Nott'ham Forest,1.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12265,2023-03-04,,,,,Aston Villa,Crystal Palace,,,,...,0.315789,0.110526,0.263158,1.105263,1.368421,3.842105,False,Aston Villa,Crystal Palace,4.18
8799,2023-03-04,,,,,Brighton,West Ham,,,,...,0.286667,0.166667,0.333333,1.600000,1.333333,2.800000,False,Brighton,West Ham,1.80
8283,2023-03-04,,,,,Newcastle Utd,Manchester City,,,,...,0.480000,0.093333,0.666667,1.800000,0.466667,5.066667,False,Manchester City,Newcastle Utd,4.25
8490,2023-02-25,,,,,Nott'ham Forest,West Ham,,,,...,0.095000,0.015000,0.550000,1.700000,0.600000,2.300000,False,West Ham,Nott'ham Forest,1.51


# XGBoost models

In [30]:
xgb_features = ["Points", "Was Home", "Opp rating", 'ScoreForLast5', 'ScoreAgainstLast5']

# career averages
xgb_features += ['MinAvgOverall', 'GlsAvgOverall',
       'AstAvgOverall', 'CrdYAvgOverall', 'CrdRAvgOverall', 'xGAvgOverall',
       'xAGAvgOverall', 'CSAvgOverall', 'Team ScoreAvgOverall',
       'Opp ScoreAvgOverall', 'FPLAvgOverall']

info = ["Name", "GW", "Squad", "Opponent", "Was Home", "Opp rating"]

In [31]:
model_general = xgb.XGBRegressor()
model_general.load_model("models/xgb/model_general.json")

In [32]:
model_gk = xgb.XGBRegressor()
model_gk.load_model("models/xgb/model_gk.json")

In [33]:
model_def = xgb.XGBRegressor()
model_def.load_model("models/xgb/model_def.json")

In [34]:
model_mid = xgb.XGBRegressor()
model_mid.load_model("models/xgb/model_mid.json")

In [35]:
model_fwd = xgb.XGBRegressor()
model_fwd.load_model("models/xgb/model_fwd.json")

# Generating XGBoost predictions

In [36]:
X_test_general = df_predictions_general[xgb_features]

X_test_gk = df_predictions_gk[xgb_features]
X_test_def = df_predictions_def[xgb_features]
X_test_mid = df_predictions_mid[xgb_features]
X_test_fwd = df_predictions_fwd[xgb_features]

In [37]:
predictions_general = model_general.predict(X_test_general)

predictions_gk = model_gk.predict(X_test_gk)
predictions_def = model_def.predict(X_test_def)
predictions_mid = model_mid.predict(X_test_mid)
predictions_fwd = model_fwd.predict(X_test_fwd)

In [38]:
df_predictions_general["pred"] = predictions_general

df_predictions_gk["pred"] = predictions_gk
df_predictions_def["pred"] = predictions_def
df_predictions_mid["pred"] = predictions_mid
df_predictions_fwd["pred"] = predictions_fwd

# Printing predictions

In [39]:
df_predictions_general[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,pred
8994,Diego-Costa,26.0,Wolves,Tottenham,1.0,1858.896484,11.478073
11627,Martinelli,25.0,Arsenal,Leicester City,0.0,1746.499023,10.93376
9494,Gabriel-Jesus,25.0,Arsenal,Leicester City,0.0,1746.499023,10.366996
7651,Aleksandar-Mitrovic,26.0,Fulham,Brentford,0.0,1779.055054,9.4202
11923,Mohamed-Salah,25.0,Liverpool,Crystal Palace,0.0,1742.434937,9.320785
11628,Martinelli,26.0,Arsenal,Bournemouth,1.0,1636.081665,9.320693
8992,Diego-Costa,25.0,Wolves,Fulham,0.0,1717.072754,9.309185
8993,Diego-Costa,25.0,Wolves,Liverpool,0.0,1963.46521,9.217393
9568,Granit-Xhaka,25.0,Arsenal,Everton,1.0,1667.46228,9.105757
11863,Miguel-Almiron,26.0,Newcastle Utd,Manchester City,0.0,2026.310791,9.04457


In [40]:
df_predictions_gk[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,pred
11581,Mark-Travers,25.0,Bournemouth,Manchester City,1.0,2026.310791,7.210227
8830,David-Raya,26.0,Brentford,Fulham,1.0,1717.072754,6.97817
11582,Mark-Travers,26.0,Bournemouth,Arsenal,0.0,1946.912109,6.682509
8784,Danny-Ward,25.0,Leicester City,Arsenal,1.0,1946.912109,5.671182
11316,Lukasz-Fabianski,25.0,West Ham,Nott'ham Forest,1.0,1655.643188,4.779517
10603,Jordan-Pickford,25.0,Everton,Aston Villa,1.0,1766.583008,4.769704
9789,Illan-Meslier,26.0,Leeds United,Chelsea,0.0,1870.791016,4.758778
7489,Aaron-Ramsdale,26.0,Arsenal,Bournemouth,1.0,1636.081665,4.754684
7488,Aaron-Ramsdale,25.0,Arsenal,Leicester City,0.0,1746.499023,4.626956
9715,Hugo-Lloris,25.0,Tottenham,Chelsea,1.0,1870.791016,4.336326


In [41]:
df_predictions_def[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,pred
12576,Reece-James,26.0,Chelsea,Leeds United,1.0,1708.115723,8.36762
13778,William-Saliba,26.0,Arsenal,Bournemouth,1.0,1636.081665,7.470566
8051,Ben-White,26.0,Arsenal,Bournemouth,1.0,1636.081665,6.832407
7784,Andrew-Robertson,25.0,Liverpool,Wolves,1.0,1695.581787,6.740488
10943,Kieran-Trippier,26.0,Newcastle Utd,Manchester City,0.0,2026.310791,6.629511
8019,Ben-Mee,26.0,Brentford,Fulham,1.0,1717.072754,5.924755
9700,Hugo-Bueno,25.0,Wolves,Fulham,0.0,1717.072754,5.91772
9480,Gabriel-Dos-Santos,26.0,Arsenal,Bournemouth,1.0,1636.081665,5.800217
12575,Reece-James,25.0,Chelsea,Tottenham,0.0,1858.896484,5.432866
10856,Kenny-Tete,25.0,Fulham,Wolves,1.0,1695.581787,5.321419


In [42]:
df_predictions_mid[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,pred
11628,Martinelli,26.0,Arsenal,Bournemouth,1.0,1636.081665,13.636171
11627,Martinelli,25.0,Arsenal,Leicester City,0.0,1746.499023,12.719133
11923,Mohamed-Salah,25.0,Liverpool,Crystal Palace,0.0,1742.434937,9.146258
11863,Miguel-Almiron,26.0,Newcastle Utd,Manchester City,0.0,2026.310791,8.607864
12782,Rodrigo,25.0,Leeds United,Southampton,1.0,1651.960449,8.541669
11611,Martin-Odegaard,25.0,Arsenal,Everton,1.0,1667.46228,8.381827
11922,Mohamed-Salah,25.0,Liverpool,Wolves,1.0,1695.581787,8.332723
8166,Brenden-Aaronson,25.0,Leeds United,Southampton,1.0,1651.960449,8.08309
10513,John-McGinn,25.0,Aston Villa,Everton,0.0,1667.46228,8.062737
11626,Martinelli,25.0,Arsenal,Everton,1.0,1667.46228,8.008951


In [43]:
df_predictions_fwd[info + ["pred"]].sort_values(by=["pred"], ascending = False).head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,pred
9494,Gabriel-Jesus,25.0,Arsenal,Leicester City,0.0,1746.499023,18.336842
9493,Gabriel-Jesus,25.0,Arsenal,Everton,1.0,1667.46228,11.701508
9495,Gabriel-Jesus,26.0,Arsenal,Bournemouth,1.0,1636.081665,11.658823
7651,Aleksandar-Mitrovic,26.0,Fulham,Brentford,0.0,1779.055054,10.750194
9156,Eddie-Nketiah,26.0,Arsenal,Bournemouth,1.0,1636.081665,9.265849
9155,Eddie-Nketiah,25.0,Arsenal,Leicester City,0.0,1746.499023,9.059929
9154,Eddie-Nketiah,25.0,Arsenal,Everton,1.0,1667.46228,8.986884
9831,Ivan-Toney,26.0,Brentford,Fulham,1.0,1717.072754,6.94805
9316,Erling-Haaland,26.0,Manchester City,Newcastle Utd,1.0,1855.404907,6.501473
9612,Harry-Kane,25.0,Tottenham,Chelsea,1.0,1870.791016,5.569001


# Assembly

In [44]:
df_predictions_positional_merged = pd.concat([df_predictions_gk, df_predictions_def, df_predictions_mid, df_predictions_fwd])

In [45]:
df_assembly = df_predictions_general

In [46]:
df_predictions_general.columns

Index(['Date', 'Day', 'Comp', 'Round', 'Venue', 'Squad', 'Opponent', 'Start',
       'Pos', 'Min', 'Gls', 'Ast', 'PK', 'PKatt', 'Sh', 'SoT', 'CrdY', 'CrdR',
       'Touches', 'Tkl', 'Int', 'Blocks', 'xG', 'npxG', 'xAG', 'SCA', 'GCA',
       'Cmp', 'Att', 'Cmp%', 'PrgP', 'Carries', 'PrgC', 'Att.1', 'Succ',
       'Match Report', 'Name', 'Season', 'WDL', 'GoalsTeams', 'Team Score',
       'Opp Score', 'CS', 'Was Home', 'GW', 'FPL', 'neutral_name',
       'Total Points', 'FPL position', 'Opp rating', 'Team rating',
       'ScoreForLast5', 'ScoreAgainstLast5', 'MinAvgOverall', 'GlsAvgOverall',
       'AstAvgOverall', 'CrdYAvgOverall', 'CrdRAvgOverall', 'xGAvgOverall',
       'xAGAvgOverall', 'CSAvgOverall', 'Team ScoreAvgOverall',
       'Opp ScoreAvgOverall', 'FPLAvgOverall', 'Finished', 'Squad H',
       'Squad A', 'Points', 'pred'],
      dtype='object')

In [47]:
for i, row in df_assembly.iterrows():
    name = row["Name"]
    gw = row["GW"]
    season = row["Season"]
    squadH = row["Squad H"]
    squadA = row["Squad A"]
    row_merged = df_predictions_positional_merged[(df_predictions_positional_merged["Name"] == name) & (df_predictions_positional_merged["GW"] == gw) & (df_predictions_positional_merged["Season"] == season) & (df_predictions_positional_merged["Squad H"] == squadH) & (df_predictions_positional_merged["Squad A"] == squadA)]
    
    if row_merged.shape[0] > 0:
        df_assembly.at[i, "general pred"] = row["pred"]
        df_assembly.at[i, "positional pred"] = row_merged["pred"].item()
        df_assembly.at[i, "assembly pred"] = round((row["pred"] + row_merged["pred"].item()) / 2, 2)
    
# dropping old pred column
df_assembly = df_assembly.drop(['pred'], axis=1)

In [48]:
df_assembly_sorted = df_assembly[info + ["assembly pred", "general pred", "positional pred"]].sort_values(by=["assembly pred"], ascending = False)
df_assembly_sorted.head(20)

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,assembly pred,general pred,positional pred
9494,Gabriel-Jesus,25.0,Arsenal,Leicester City,0.0,1746.499023,14.35,10.366996,18.336842
11627,Martinelli,25.0,Arsenal,Leicester City,0.0,1746.499023,11.83,10.93376,12.719133
11628,Martinelli,26.0,Arsenal,Bournemouth,1.0,1636.081665,11.48,9.320693,13.636171
7651,Aleksandar-Mitrovic,26.0,Fulham,Brentford,0.0,1779.055054,10.09,9.4202,10.750194
9493,Gabriel-Jesus,25.0,Arsenal,Everton,1.0,1667.46228,9.39,7.086811,11.701508
9495,Gabriel-Jesus,26.0,Arsenal,Bournemouth,1.0,1636.081665,9.3,6.942421,11.658823
11923,Mohamed-Salah,25.0,Liverpool,Crystal Palace,0.0,1742.434937,9.23,9.320785,9.146258
11863,Miguel-Almiron,26.0,Newcastle Utd,Manchester City,0.0,2026.310791,8.83,9.04457,8.607864
11626,Martinelli,25.0,Arsenal,Everton,1.0,1667.46228,8.28,8.549852,8.008951
11611,Martin-Odegaard,25.0,Arsenal,Everton,1.0,1667.46228,7.65,6.920246,8.381827


In [49]:
df_assembly_sorted[df_assembly_sorted["Name"] == "Martinelli"]

Unnamed: 0,Name,GW,Squad,Opponent,Was Home,Opp rating,assembly pred,general pred,positional pred
11627,Martinelli,25.0,Arsenal,Leicester City,0.0,1746.499023,11.83,10.93376,12.719133
11628,Martinelli,26.0,Arsenal,Bournemouth,1.0,1636.081665,11.48,9.320693,13.636171
11626,Martinelli,25.0,Arsenal,Everton,1.0,1667.46228,8.28,8.549852,8.008951


# Assembly sumed up for double gameweeks

In [50]:
df_assembly_sumed = df_assembly.groupby(['GW', 'Name']).sum().reset_index()

In [51]:
df_assembly_sumed = df_assembly_sumed.sort_values(by=["assembly pred"], ascending = False)[["Name", "GW", "assembly pred", "general pred", "positional pred"]]

In [52]:
df_assembly_sumed

Unnamed: 0,Name,GW,assembly pred,general pred,positional pred
106,Gabriel-Jesus,25.0,23.74,17.453806,30.038349
226,Martinelli,25.0,20.11,19.483612,20.728085
240,Mohamed-Salah,25.0,15.43,13.391315,17.478981
225,Martin-Odegaard,25.0,14.88,14.451110,15.301036
79,Diego-Costa,25.0,12.36,18.526578,6.197187
...,...,...,...,...,...
22,Asmir-Begovic,25.0,-0.14,-0.556467,0.274901
753,Thilo-Kehrer,26.0,-0.16,-0.194338,-0.127178
94,Emiliano-Marcondes,25.0,-0.17,-0.195719,-0.142661
390,Ben-Pearson,26.0,-0.17,-0.811666,0.476730


In [53]:
df_assembly_sumed[df_assembly_sumed["Name"] == "Martinelli"][["Name", "GW", "assembly pred", "general pred", "positional pred"]]

Unnamed: 0,Name,GW,assembly pred,general pred,positional pred
226,Martinelli,25.0,20.11,19.483612,20.728085
633,Martinelli,26.0,11.48,9.320693,13.636171


# Assembly pivot

In [54]:
df_pivot = df_assembly_sumed.pivot(index='Name', columns='GW', values='assembly pred').reset_index()
df_pivot = df_pivot.rename(columns=lambda x: "GW" + str(x) if x != "Name" else x)
df_pivot = df_pivot.rename(columns=lambda x: x[:-2] if x.startswith('GW') else x)
df_pivot = df_pivot.fillna(0)
df_pivot = df_pivot.rename_axis(None, axis=1)
df_pivot['Sum'] = df_pivot.iloc[:, 1:].sum(axis=1)

In [55]:
df_pivot

Unnamed: 0,Name,GW25,GW26,Sum
0,Aaron-Cresswell,2.40,0.93,3.33
1,Aaron-Hickey,0.00,3.81,3.81
2,Aaron-Ramsdale,7.77,4.46,12.23
3,Aaron-Wan-Bissaka,0.00,0.98,0.98
4,Abdoulaye-Doucoure,2.28,2.16,4.44
...,...,...,...,...
429,Willy-Boly,1.41,1.26,2.67
430,Wout-Faes,1.76,1.40,3.16
431,Yoane-Wissa,0.00,1.31,1.31
432,Youri-Tielemans,2.97,4.17,7.14
