# Imports

In [1]:
import os
os.chdir("D:\PulpitE\FPL_ML")

In [2]:
import pandas as pd

import torch
import torch.nn as nn

import torchvision
import torchvision.transforms

from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

import matplotlib.pyplot as plt

import numpy as np
import random

# Reading data

In [3]:
df = pd.read_csv('data/fpl_fbref_elo_players.csv')

In [4]:
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [5]:
df.columns

Index(['Date', 'Day', 'Comp', 'Round', 'Venue', 'Squad', 'Opponent', 'Start',
       'Pos', 'Min', 'Gls', 'Ast', 'PK', 'PKatt', 'Sh', 'SoT', 'CrdY', 'CrdR',
       'Touches', 'Tkl', 'Int', 'Blocks', 'xG', 'npxG', 'xAG', 'SCA', 'GCA',
       'Cmp', 'Att', 'Cmp%', 'PrgP', 'Carries', 'PrgC', 'Att.1', 'Succ',
       'Match Report', 'Name', 'Season', 'WDL', 'GoalsTeams', 'Team Score',
       'Opp Score', 'CS', 'Was Home', 'GW', 'FPL', 'neutral_name',
       'Total Points', 'FPL position', 'Opp rating', 'Team rating'],
      dtype='object')

# New features

In [6]:
# average goals for and against team in last 5 matches
grouped = df.groupby('Name')

In [7]:
df['ScoreForLast5'] = grouped['Team Score'].rolling(5, min_periods=1).mean().reset_index(level=0, drop=True)

In [8]:
df['ScoreAgainstLast5'] = grouped['Opp Score'].rolling(5, min_periods=1).mean().reset_index(level=0, drop=True)

In [9]:
df['ScoreForLast5'] = df['ScoreForLast5'].shift(1)
df['ScoreAgainstLast5'] = df['ScoreAgainstLast5'].shift(1)

In [10]:
%%time
# calculates the average of a feature for all fixtures that took place before that game
overall_average_columns = ["Min", "Gls", "Ast", "CrdY", "CrdR", "xG", "xAG", "CS", "Team Score", "Opp Score", "FPL"]

for i, row in df.iterrows():
    name = row["Name"]
    date = row["Date"]
    for col in overall_average_columns:
        df.loc[i, col + "AvgOverall"] = df[(df["Date"] < date) & (df["Name"] == name)][col].mean()

CPU times: total: 2min 14s
Wall time: 2min 18s


In [11]:
df[df["Name"] == "Mohamed-Salah"][["ScoreForLast5", "ScoreAgainstLast5", "Team Score", "Opp Score", "GlsAvgOverall", "Gls"]]

Unnamed: 0,ScoreForLast5,ScoreAgainstLast5,Team Score,Opp Score,GlsAvgOverall,Gls
5896,2.4,1.0,2,2,,1.0
5897,2.0,2.0,1,1,1.0,0.0
5898,1.5,1.5,1,2,0.5,1.0
5899,1.333333,1.666667,9,0,0.666667,0.0
5900,3.25,1.25,2,1,0.5,0.0
5901,3.0,1.2,0,0,0.4,0.0
5902,2.6,0.8,3,3,0.333333,0.0
5903,3.0,1.2,2,3,0.285714,0.0
5904,3.2,1.4,1,0,0.25,1.0
5905,1.6,1.4,1,0,0.333333,0.0


In [12]:
df.columns

Index(['Date', 'Day', 'Comp', 'Round', 'Venue', 'Squad', 'Opponent', 'Start',
       'Pos', 'Min', 'Gls', 'Ast', 'PK', 'PKatt', 'Sh', 'SoT', 'CrdY', 'CrdR',
       'Touches', 'Tkl', 'Int', 'Blocks', 'xG', 'npxG', 'xAG', 'SCA', 'GCA',
       'Cmp', 'Att', 'Cmp%', 'PrgP', 'Carries', 'PrgC', 'Att.1', 'Succ',
       'Match Report', 'Name', 'Season', 'WDL', 'GoalsTeams', 'Team Score',
       'Opp Score', 'CS', 'Was Home', 'GW', 'FPL', 'neutral_name',
       'Total Points', 'FPL position', 'Opp rating', 'Team rating',
       'ScoreForLast5', 'ScoreAgainstLast5', 'MinAvgOverall', 'GlsAvgOverall',
       'AstAvgOverall', 'CrdYAvgOverall', 'CrdRAvgOverall', 'xGAvgOverall',
       'xAGAvgOverall', 'CSAvgOverall', 'Team ScoreAvgOverall',
       'Opp ScoreAvgOverall', 'FPLAvgOverall'],
      dtype='object')

# Saving df to file

In [13]:
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [14]:
df.to_csv('data/final_dataset.csv')

In [15]:
df

Unnamed: 0,Date,Day,Comp,Round,Venue,Squad,Opponent,Start,Pos,Min,...,GlsAvgOverall,AstAvgOverall,CrdYAvgOverall,CrdRAvgOverall,xGAvgOverall,xAGAvgOverall,CSAvgOverall,Team ScoreAvgOverall,Opp ScoreAvgOverall,FPLAvgOverall
0,2022-08-06,Sat,Premier League,Matchweek 1,Home,Leeds United,Wolves,Y,RW,83.0,...,,,,,,,,,,
1,2022-08-13,Sat,Premier League,Matchweek 2,Away,Leeds United,Southampton,Y,"RW,AM",90.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,2.000000,1.000000,2.000000
2,2022-08-21,Sun,Premier League,Matchweek 3,Home,Leeds United,Chelsea,Y,AM,82.0,...,0.000000,0.000000,0.500000,0.0,0.050000,0.050000,0.000000,2.000000,1.500000,2.000000
3,2022-08-27,Sat,Premier League,Matchweek 4,Away,Leeds United,Brighton,Y,"AM,RW",81.0,...,0.333333,0.000000,0.333333,0.0,0.366667,0.066667,0.333333,2.333333,1.000000,4.000000
4,2022-08-30,Tue,Premier League,Matchweek 5,Home,Leeds United,Everton,Y,"AM,RW",75.0,...,0.250000,0.000000,0.500000,0.0,0.275000,0.050000,0.250000,1.750000,1.000000,3.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7452,2022-12-26,Mon,Premier League,Matchweek 17,Home,Arsenal,West Ham,Y*,RM,90.0,...,0.461538,0.153846,0.153846,0.0,0.323077,0.200000,0.461538,2.307692,0.846154,5.230769
7453,2022-12-31,Sat,Premier League,Matchweek 18,Away,Arsenal,Brighton,Y*,RM,86.0,...,0.428571,0.285714,0.142857,0.0,0.328571,0.264286,0.428571,2.357143,0.857143,5.428571
7454,2023-01-03,Tue,Premier League,Matchweek 19,Home,Arsenal,Newcastle Utd,Y*,RM,90.0,...,0.466667,0.333333,0.133333,0.0,0.313333,0.300000,0.400000,2.466667,0.933333,5.733333
7455,2023-01-15,Sun,Premier League,Matchweek 20,Away,Arsenal,Tottenham,Y*,RM,89.0,...,0.437500,0.312500,0.187500,0.0,0.306250,0.300000,0.437500,2.312500,0.875000,5.562500
