# Imports

In [1]:
import os
os.chdir("D:\PulpitE\FPL_ML")

In [2]:
import pandas as pd

import torch
import torch.nn as nn

import torchvision
import torchvision.transforms

from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

import matplotlib.pyplot as plt

import numpy as np
import random

# Constants and seeds

In [3]:
# FIXTURES_IN_SAMPLE
FIS = 8

# FIXTURES_FOR_PLAYER - number of gameweeks required
FFP = 15

NEXT_GAMEWEEK = 19

In [4]:
def set_all_seeds(seed):
    os.environ["PL_GLOBAL_SEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

set_all_seeds(42)

# Data

In [5]:
df = pd.read_csv('data/final_dataset.csv')

In [6]:
# remembering original df
df_original = df

In [1]:
df.iloc[1]

NameError: name 'df' is not defined

In [8]:
info = ["Name", "GW"]
other = ["LSTM"]
features = ["Min", "Gls", "Sh", "SoT", "xG", "npxG", "xAG", "CS", "Was Home"]
to_predict = ["FPL"]

In [9]:
df = df[info + features + to_predict]
df[features + to_predict] = df[features + to_predict].astype("float32")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [10]:
df.shape

(19, 12)

In [11]:
# removing NaN rows and players that appear less than 15 times
df = df.dropna()
grouped = df.groupby('Name')

# taking onyl players with at least FFP matches in logs
df = grouped.filter(lambda x: len(x) >= FFP)

In [12]:
df.shape

(19, 12)

In [13]:
df.head(20)

Unnamed: 0,Name,GW,Min,Gls,Sh,SoT,xG,npxG,xAG,CS,Was Home,FPL
0,Mohamed-Salah,1,90.0,1.0,2.0,1.0,0.4,0.4,0.1,0.0,0.0,7.0
1,Mohamed-Salah,2,90.0,0.0,3.0,1.0,0.3,0.3,0.8,0.0,1.0,2.0
2,Mohamed-Salah,3,90.0,1.0,3.0,1.0,0.3,0.3,0.3,0.0,0.0,7.0
3,Mohamed-Salah,4,90.0,0.0,4.0,1.0,1.2,1.2,0.1,1.0,1.0,3.0
4,Mohamed-Salah,5,90.0,0.0,2.0,0.0,0.2,0.2,0.4,0.0,1.0,8.0
5,Mohamed-Salah,6,90.0,0.0,3.0,1.0,0.2,0.2,0.2,1.0,0.0,3.0
6,Mohamed-Salah,9,90.0,0.0,5.0,2.0,0.6,0.6,0.1,0.0,1.0,5.0
7,Mohamed-Salah,10,68.0,0.0,1.0,0.0,0.1,0.1,0.0,0.0,0.0,2.0
8,Mohamed-Salah,11,89.0,1.0,3.0,1.0,0.9,0.9,0.0,1.0,1.0,8.0
9,Mohamed-Salah,12,90.0,0.0,6.0,2.0,0.4,0.4,0.0,1.0,1.0,3.0


In [14]:
last_names = df["Name"].unique()

# DF to DataLoader

In [None]:
# gk, def, mid, fwd


In [15]:
def init_index_table(df):
    names = df["Name"].unique()
    result_dict = {}
    
    current_index = 0
    player_index = 0
    for name in names:
        sample_index = 0
        samples_for_names = df[df["Name"] == name].shape[0] - FIS
        for i in range(samples_for_names):
            result_dict[current_index] = [player_index, sample_index]
            current_index += 1
            sample_index += 1
        player_index += 1
    
    return result_dict

In [16]:
index_table = init_index_table(df)

In [17]:
from torchvision.transforms import ToTensor

class PandasDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.names = dataframe["Name"].unique()
        self.transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

    def __len__(self):
        length = 0
        for name in self.names:
            length += (self.dataframe[self.dataframe["Name"] == name].shape[0] - FIS)
        return length

    def __getitem__(self, idx):
        # print(idx)
        player_id, sample_id = index_table[idx]
        name = self.names[player_id]
        # print(name)
        features = self.dataframe[self.dataframe["Name"] == name]
        # print(self.dataframe[self.dataframe["Name"] == name])
        # print(features[sample_id: (sample_id + 4)])
        features = features[sample_id: (sample_id + FIS - 1)].drop(columns=info).values[:, :-1]
        # print(features)
        # print(features)
        # print(type(features))
        # print(self.dataframe[self.dataframe["Name"] == name].values[:, -1])
        # print(type(self.dataframe[self.dataframe["Name"] == name].values[:, -1]))
        # target = self.dataframe[self.dataframe["Name"] == name].values[:, -1]
        # target = target.astype(np.float32)
        # target = torch.from_numpy(target)
        target = np.float32(self.dataframe[self.dataframe["Name"] == name].values[(sample_id + FIS), -1])
        # print(name, idx, target, sample_id)
        # print(type(target))
        return features, target

In [18]:
dataset = PandasDataset(df)
dataset.__getitem__(3307)

KeyError: 3307

In [None]:
dataset.__len__()

In [None]:
batch_size = 16

In [None]:
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

In [None]:
# next(iter(dataloader))[1]

# LSTM Model

In [None]:
class LSTMNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(LSTMNetwork, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.transpose(x, 0, 1)
        # x = x.view(-1, batch_size)
        # x = torch.transpose(x, 0, 1)
        # print("FOR", x.shape)
        # print(x.shape)
        # print(x.shape)
        out, _ = self.lstm(x)
        # print(out.shape)
        out = self.fc(out[-1])
        return out

In [None]:
len(dataloader)

In [None]:
# torch.transpose(next(iter(dataloader))[0], 0, 1).shape

In [None]:
net = LSTMNetwork(1, 20, 2, 1, dropout=0.4)

# Training loop

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.005)

In [None]:
%%time
num_epochs = 30
loss_history = []
for epoch in range(num_epochs):
    running_loss = 0
    for i, (inputs, targets) in enumerate(dataloader):
        inputs = torch.flatten(inputs, 1, 2)
        inputs = inputs.unsqueeze(2)
        # print("INPUTS", inputs.shape)
        # print("TARGETS", targets.shape)
        # print(targets)
        optimizer.zero_grad()
        outputs = net(inputs).flatten()
        # print("OUTPUTS", outputs.shape)
        loss = criterion(outputs, targets)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    
    loss_history.append(running_loss / len(dataloader))
    # if epoch % 5 == 0:
    print(f'Epoch {epoch} Loss: {running_loss / len(dataloader)}')

In [None]:
plt.plot(loss_history)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# Predictions

In [None]:
index = 0
predictions = []
for inputs, target in dataloader:
    inputs = torch.flatten(inputs, 1, 2)
    inputs = inputs.unsqueeze(2)
    # print("INPUTS", inputs)
    # print(inputs)
    outputs = net(inputs)
    # print("OUTPUTS", outputs)
    for output in outputs:
        player_id, sample_id = index_table[index]
        name = last_names[player_id]
        points = round(output.item(), 2)
        gw = sample_id + FIS + 1
        predictions.append([name, gw, points])
        # df.loc[(df["Name"] == name) & (df["GW"] == gw), "LSTM"] = points
        index += 1

In [None]:
# appending results to original df
for name, gw, points in predictions:
    df_original.loc[(df_original["Name"] == name) & (df["GW"] == gw), "LSTM"] = points

In [None]:
df_original

In [None]:
df_original.to_csv("data/fpl_fbref_elo_lstm.csv")

In [None]:
# results sorted
pd.DataFrame(predictions, columns=["Name", "GW", "Points"]).sort_values(by=["Points"], ascending = False).head(20)