In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

mpl.rcParams['axes.grid'] ="True"
mpl.rcParams['axes.spines.top'] = "True"
mpl.rcParams['axes.spines.right'] = "True"
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20

df_epl = pd.read_csv("../data/EPL_1819_clean.csv")

home_teams = df_epl["HomeTeam"].unique()
away_teams = df_epl["AwayTeam"].unique()

all_teams = list(set(np.append(home_teams, away_teams)))

print(all_teams)

['Brighton', 'Crystal Palace', 'Huddersfield', 'Tottenham', 'Everton', 'Newcastle', 'Arsenal', 'Man United', 'Cardiff', 'Southampton', 'Watford', 'Burnley', 'Wolves', 'Leicester', 'Man City', 'West Ham', 'Chelsea', 'Bournemouth', 'Liverpool', 'Fulham']


In [2]:
team = all_teams[0]
df_team = df_epl[(df_epl["HomeTeam"]==team) | (df_epl["AwayTeam"]==team)]
df_team

Unnamed: 0.1,Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HS,AS,HST,AST,HC,AC
5,5,Watford,Brighton,2,0,H,19,6,5,0,8,2
16,16,Brighton,Man United,3,2,H,6,9,3,3,3,5
23,23,Liverpool,Brighton,1,0,H,22,6,8,2,8,5
30,30,Brighton,Fulham,2,2,D,15,10,5,5,7,1
49,49,Southampton,Brighton,2,2,D,14,12,5,4,1,4
50,50,Brighton,Tottenham,1,2,A,8,16,4,7,5,7
64,64,Man City,Brighton,2,0,H,28,4,8,1,10,3
70,70,Brighton,West Ham,1,0,H,9,17,4,4,2,9
85,85,Newcastle,Brighton,0,1,A,27,8,6,2,10,2
90,90,Brighton,Wolves,1,0,H,2,25,1,7,1,10


In [9]:
team_shots = df_team.apply(lambda x: x["HS"] if x["HomeTeam"] == team else x["AS"], axis=1).sum()
team_shots_ot = df_team.apply( lambda x: x["HST"] if x["HomeTeam"] == team else x["AST"], axis=1).sum()
team_shots_vs = df_team.apply( lambda x: x["AS"] if x["HomeTeam"] == team else x["HS"], axis=1).sum()
team_shots_ot_vs = df_team.apply( lambda x: x["AST"] if x["HomeTeam"] == team else x["HST"], axis=1).sum()

# team_shots
# team_shots_ot
# team_shots_vs
team_shots_ot_vs

175

In [16]:
def calc_feats(df, team):
    team_stats = dict()
    df_team = df_epl[(df_epl["HomeTeam"]==team) | (df_epl["AwayTeam"]==team)]
    team_shots = df_team.apply(lambda x: x["HS"] if x["HomeTeam"] == team else x["AS"], axis=1).sum()
    team_shots_ot = df_team.apply( lambda x: x["HST"] if x["HomeTeam"] == team else x["AST"], axis=1).sum()
    team_shots_vs = df_team.apply( lambda x: x["AS"] if x["HomeTeam"] == team else x["HS"], axis=1).sum()
    team_shots_ot_vs = df_team.apply( lambda x: x["AST"] if x["HomeTeam"] == team else x["HST"], axis=1).sum()
    team_points = df_team.apply(lambda x: 3 if ((x["HomeTeam"] == team and x["FTR"] == "H") or (x["AwayTeam"] == team and x["FTR"] == "A")) else (1 if x["FTR"] == "D" else 0), axis=1).sum()

    team_stats["team"] = team
    team_stats["shots"] = team_shots
    team_stats["shots_ot"] = team_shots_ot
    team_stats["shots_vs"] = team_shots_vs
    team_stats["shots_ot_vs"] = team_shots_ot_vs
    team_stats["points"] = team_points
    return team_stats

In [17]:
calc_feats(df_epl, "Liverpool")

{'team': 'Liverpool',
 'shots': 574,
 'shots_ot': 226,
 'shots_vs': 307,
 'shots_ot_vs': 97,
 'points': 97}

In [18]:
all_teams_stats = []

for team in all_teams:
    team_stats = calc_feats(df_epl, team)
    all_teams_stats.append(team_stats)

df_all_stats = pd.DataFrame(all_teams_stats)
df_all_stats

Unnamed: 0,team,shots,shots_ot,shots_vs,shots_ot_vs,points
0,Brighton,365,108,582,175,36
1,Crystal Palace,491,146,525,164,49
2,Huddersfield,400,118,520,191,16
3,Tottenham,536,189,463,159,71
4,Everton,495,169,402,137,54
5,Newcastle,444,142,489,148,45
6,Arsenal,466,170,494,183,70
7,Man United,526,225,493,173,66
8,Cardiff,417,126,571,212,34
9,Southampton,480,162,523,179,39


In [19]:
df_all_stats.to_csv("../data/EPL_1819_stats.csv")