# Imports

In [1]:
import os
os.chdir("D:\PulpitE\FPL_ML")

In [2]:
import pandas as pd

import torch
import torch.nn as nn

import torchvision
import torchvision.transforms

from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

import matplotlib.pyplot as plt

import numpy as np
import random

# Reading data

In [3]:
df = pd.read_csv('data/fpl_fbref_elo_players.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [5]:
df.columns

Index(['Date', 'Day', 'Comp', 'Round', 'Venue', 'Squad', 'Opponent', 'Start',
       'Pos', 'Min',
       ...
       'GW_y', 'xP', 'expected_assists', 'expected_goal_involvements',
       'expected_goals', 'expected_goals_conceded', 'starts', 'neutral_name_y',
       'Opp rating', 'Team rating'],
      dtype='object', length=104)

# New features

In [6]:
# average goals for and against team in last 5 matches
grouped = df.groupby('Name')

In [7]:
df['ScoreForLast5'] = grouped['Team Score'].rolling(5, min_periods=1).mean().reset_index(level=0, drop=True)

In [8]:
df['ScoreAgainstLast5'] = grouped['Opp Score'].rolling(5, min_periods=1).mean().reset_index(level=0, drop=True)

In [9]:
df['ScoreForLast5'] = df['ScoreForLast5'].shift(1)
df['ScoreAgainstLast5'] = df['ScoreAgainstLast5'].shift(1)

In [10]:
df["Min_points"] = (df["Min"] > 0) + (df["Min"] > 60)

In [11]:
%%time
# calculates the average of a feature for all fixtures that took place before that game
overall_average_columns = ["Min", "Gls", "Ast", "CrdY", "CrdR", "xG", "xAG", "CS", "Team Score", "Opp Score"]

for i, row in df.iterrows():
    name = row["Name"]
    date = row["Date"]
    for col in overall_average_columns:
        df.loc[i, col + "AvgOverall"] = df[(df["Date"] < date) & (df["Name"] == name)][col].mean()

CPU times: total: 13min 53s
Wall time: 14min 21s


In [12]:
df[df["Name"] == "Mohamed-Salah"][["ScoreForLast5", "ScoreAgainstLast5", "Team Score", "Opp Score", "GlsAvgOverall", "Gls"]]

Unnamed: 0,ScoreForLast5,ScoreAgainstLast5,Team Score,Opp Score,GlsAvgOverall,Gls
17289,1.80,3.200000,3,0,,1.0
17290,3.00,0.000000,2,0,1.000000,0.0
17291,2.50,0.000000,1,1,0.500000,1.0
17292,2.00,0.333333,3,0,0.666667,1.0
17293,2.25,0.250000,3,0,0.750000,1.0
...,...,...,...,...,...,...
17357,3.40,1.800000,1,0,0.588235,1.0
17358,3.20,1.400000,1,0,0.594203,1.0
17359,2.20,1.200000,3,0,0.600000,0.0
17360,2.20,0.800000,1,1,0.591549,0.0


In [13]:
df.columns

Index(['Date', 'Day', 'Comp', 'Round', 'Venue', 'Squad', 'Opponent', 'Start',
       'Pos', 'Min',
       ...
       'MinAvgOverall', 'GlsAvgOverall', 'AstAvgOverall', 'CrdYAvgOverall',
       'CrdRAvgOverall', 'xGAvgOverall', 'xAGAvgOverall', 'CSAvgOverall',
       'Team ScoreAvgOverall', 'Opp ScoreAvgOverall'],
      dtype='object', length=117)

# Saving df to file

In [14]:
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [15]:
df.to_csv('data/final_dataset.csv')

In [16]:
df

Unnamed: 0,Date,Day,Comp,Round,Venue,Squad,Opponent,Start,Pos,Min,...,MinAvgOverall,GlsAvgOverall,AstAvgOverall,CrdYAvgOverall,CrdRAvgOverall,xGAvgOverall,xAGAvgOverall,CSAvgOverall,Team ScoreAvgOverall,Opp ScoreAvgOverall
0,2021-08-15,Sun,Premier League,Matchweek 1,Away,West Ham,Newcastle Utd,Y,LB,90.0,...,,,,,,,,,,
1,2021-08-23,Mon,Premier League,Matchweek 2,Home,West Ham,Leicester City,Y,LB,90.0,...,90.000000,1.000000,0.000000,0.000000,0.0,0.100000,0.100000,0.000000,4.000000,2.000000
2,2021-08-28,Sat,Premier League,Matchweek 3,Home,West Ham,Crystal Palace,Y,LB,90.0,...,90.000000,0.500000,0.000000,0.000000,0.0,0.100000,0.050000,0.000000,4.000000,1.500000
3,2021-09-11,Sat,Premier League,Matchweek 4,Away,West Ham,Southampton,Y,LB,90.0,...,90.000000,0.333333,0.000000,0.000000,0.0,0.066667,0.066667,0.000000,3.333333,1.666667
4,2021-09-19,Sun,Premier League,Matchweek 5,Home,West Ham,Manchester Utd,Y,LB,90.0,...,90.000000,0.250000,0.000000,0.000000,0.0,0.075000,0.075000,0.250000,2.500000,1.250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24678,2023-02-05,Sun,Premier League,Matchweek 22,Home,Tottenham,Manchester City,N,"CM,AM",7.0,...,58.360000,0.020000,0.040000,0.300000,0.0,0.016000,0.024000,0.280000,1.400000,1.320000
24679,2023-05-06,Sat,Premier League,Matchweek 35,Home,Tottenham,Crystal Palace,N,0,0.0,...,57.352941,0.019608,0.039216,0.294118,0.0,0.015686,0.023529,0.294118,1.392157,1.294118
24680,2023-05-13,Sat,Premier League,Matchweek 36,Away,Tottenham,Aston Villa,N,CM,30.0,...,56.250000,0.019231,0.038462,0.288462,0.0,0.015385,0.023077,0.307692,1.384615,1.269231
24681,2023-05-20,Sat,Premier League,Matchweek 37,Home,Tottenham,Brentford,Y,CM,90.0,...,55.754717,0.018868,0.037736,0.283019,0.0,0.015094,0.022642,0.301887,1.377358,1.283019
