# Imports

In [1]:
import os
os.chdir("D:\PulpitE\FPL_ML")

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from vaastav.understat import *
from vaastav.getters import *
from config import *

# Getting data

In [3]:
fixtures = get_fixtures_data()

In [4]:
df_fixtures = pd.DataFrame(fixtures).dropna()

In [5]:
def team_id_to_name(current_teams, team_id):
    return current_teams[team_id - 1]

In [6]:
df_fixtures["Squad H"] = df_fixtures["team_h"].apply(lambda x: team_id_to_name(current_teams, x))
df_fixtures["Squad A"] = df_fixtures["team_a"].apply(lambda x: team_id_to_name(current_teams, x))
df_fixtures["date"] = df_fixtures["kickoff_time"].str[:10]

In [7]:
epl_data = get_epl_data()
epl_data = epl_data[0]

In [8]:
# epl_data

In [9]:
def team_understat_to_fpl(team_name):
    if team_name == "Wolverhampton Wanderers":
        return "Wolves"
    if team_name == "Newcastle United":
        return "Newcastle Utd"
    if team_name == "Manchester United":
        return "Manchester Utd"
    if team_name == "Nottingham Forest":
        return "Nott'ham Forest"
    return team_name

# Basic DF

In [10]:
df = pd.DataFrame(columns=["Team", "xG", "xGA", "Date", "GW"])

for k in epl_data.keys():
    title = team_understat_to_fpl(epl_data[k]['title'])
    i = 1
    for h in epl_data[k]['history']:
        xG = h["xG"]
        xGA = h["xGA"]
        date = h["date"][:10]
        gw = i
        
        new_row = {
                    "Team": title,
                    "xG": xG,
                    "xGA": xGA,
                    "Date": date,
                    "GW": gw
                  }
        i += 1
        df = df.append(new_row, ignore_index=True)

In [11]:
def assign_opponent(x):
    r1 = df_fixtures[ (df_fixtures["date"] == x["Date"]) & (df_fixtures["Squad H"] == x["Team"]) ]
    r2 = df_fixtures[ (df_fixtures["date"] == x["Date"]) & (df_fixtures["Squad A"] == x["Team"]) ]
    if r1.size > 0:
        return r1["Squad A"].item()
    if r2.size > 0:
        return r2["Squad H"].item()
    return None

In [12]:
for i, row in df.iterrows():
    df.loc[i, "Opponent"] = assign_opponent(row)

In [13]:
df

Unnamed: 0,Team,xG,xGA,Date,GW,Opponent
0,Aston Villa,1.486,4.32208,2023-08-12,1,Newcastle Utd
1,Aston Villa,3.24336,0.721465,2023-08-20,2,Everton
2,Aston Villa,2.83691,0.630605,2023-08-27,3,Burnley
3,Everton,2.59001,1.58144,2023-08-12,1,Fulham
4,Everton,0.721465,3.24336,2023-08-20,2,Aston Villa
5,Everton,1.89792,1.40228,2023-08-26,3,Wolves
6,Bournemouth,1.51025,1.4834,2023-08-12,1,West Ham
7,Bournemouth,1.3867,3.25448,2023-08-19,2,Liverpool
8,Bournemouth,0.483582,2.26115,2023-08-26,3,Tottenham
9,Crystal Palace,2.27616,0.422704,2023-08-12,1,Sheffield United


# Pivot xG

In [14]:
pivot_xG = pd.pivot_table(df, values='xG', index=['Team'],
                       columns=['GW'], aggfunc=np.sum)
pivot_xG['Summary'] = pivot_xG.sum(axis=1)
pivot_xG.columns = ['GW' + str(col).split(".")[0] for col in pivot_xG.columns[:-1]] + ['Summary']
pivot_xG = pivot_xG.sort_values(by=["Summary"], ascending = False)
pivot_xG.head(20)

Unnamed: 0_level_0,GW1,GW2,GW3,Summary
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brighton,4.36748,3.15562,1.3743,8.8974
Aston Villa,1.486,3.24336,2.83691,7.56627
Manchester City,2.40074,1.22528,3.84804,7.47406
Manchester Utd,1.97087,2.44604,3.02803,7.44494
Chelsea,1.93258,1.60916,3.41419,6.95593
Brentford,1.90756,3.46888,0.948011,6.324451
Arsenal,0.84262,2.20593,3.23088,6.27943
Tottenham,1.45316,2.53904,2.26115,6.25335
Newcastle Utd,4.32208,0.257154,1.62037,6.199604
West Ham,1.4834,1.71234,2.9527,6.14844


# Pivot xGA

In [15]:
pivot_xGA = pd.pivot_table(df, values='xGA', index=['Team'],
                       columns=['GW'], aggfunc=np.sum)
pivot_xGA['Summary'] = pivot_xGA.sum(axis=1)
pivot_xGA.columns = ['GW' + str(col).split(".")[0] for col in pivot_xGA.columns[:-1]] + ['Summary']
pivot_xGA = pivot_xGA.sort_values(by=["Summary"], ascending = False)
pivot_xGA.head(20)

Unnamed: 0_level_0,GW1,GW2,GW3,Summary
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fulham,2.59001,3.46888,3.23088,9.28977
Luton,4.36748,3.41419,,7.78167
Brighton,1.88594,2.68821,2.9527,7.52685
Sheffield United,2.27616,1.37181,3.84804,7.49601
Wolves,1.97087,3.15562,1.89792,7.02441
Bournemouth,1.4834,3.25448,2.26115,6.99903
Everton,1.58144,3.24336,1.40228,6.22708
Aston Villa,4.32208,0.721465,0.630605,5.67415
Burnley,2.40074,2.83691,,5.23765
Manchester Utd,1.72228,2.53904,0.960354,5.221674


# Summary

In [16]:
summary_df = df.groupby('Team', as_index=False)['xG', 'xGA'].sum()
# Add a summary row for each unique 'Team'
# summary_df.append({'Team': 'Summary', 'xG': summary_df['xG'].sum(), 'xGA': summary_df['xGA'].sum()}, ignore_index=True)

  summary_df = df.groupby('Team', as_index=False)['xG', 'xGA'].sum()


In [17]:
summary_df["Opponent"] = "Summary"

In [18]:
summary_df

Unnamed: 0,Team,xG,xGA,Opponent
0,Arsenal,6.27943,2.885282,Summary
1,Aston Villa,7.56627,5.67415,Summary
2,Bournemouth,3.380532,6.99903,Summary
3,Brentford,6.324451,2.913669,Summary
4,Brighton,8.8974,7.52685,Summary
5,Burnley,0.941637,5.23765,Summary
6,Chelsea,6.95593,3.478021,Summary
7,Crystal Palace,4.40593,3.576645,Summary
8,Everton,5.209395,6.22708,Summary
9,Fulham,2.831156,9.28977,Summary


# Summary per 90

In [19]:
summary_df_per_90 = df.groupby('Team', as_index=False)['xG', 'xGA'].mean()
summary_df_per_90["Opponent"] = "Summary per 90"

  summary_df_per_90 = df.groupby('Team', as_index=False)['xG', 'xGA'].mean()


In [20]:
summary_df_per_90

Unnamed: 0,Team,xG,xGA,Opponent
0,Arsenal,2.093143,0.961761,Summary per 90
1,Aston Villa,2.52209,1.891383,Summary per 90
2,Bournemouth,1.126844,2.33301,Summary per 90
3,Brentford,2.10815,0.971223,Summary per 90
4,Brighton,2.9658,2.50895,Summary per 90
5,Burnley,0.470818,2.618825,Summary per 90
6,Chelsea,2.318643,1.15934,Summary per 90
7,Crystal Palace,1.468643,1.192215,Summary per 90
8,Everton,1.736465,2.075693,Summary per 90
9,Fulham,0.943719,3.09659,Summary per 90


# Weighting

In [21]:
# assigning weighted
for i, row in df[~df["Date"].isna()].iterrows():
    # print(df[(df["Team"] == row["Opponent"]) & (df["Opponent"] == "Summary per 90")])
    df.loc[i, "Weighted_xG"] = row["xG"] / summary_df_per_90[(summary_df_per_90["Team"] == row["Opponent"]) & (summary_df_per_90["Opponent"] == "Summary per 90")]["xGA"].item()
    df.loc[i, "Weighted_xGA"] = row["xGA"] / summary_df_per_90[(summary_df_per_90["Team"] == row["Opponent"]) & (summary_df_per_90["Opponent"] == "Summary per 90")]["xG"].item()

In [22]:
df.head(50)

Unnamed: 0,Team,xG,xGA,Date,GW,Opponent,Weighted_xG,Weighted_xGA
0,Aston Villa,1.486,4.32208,2023-08-12,1,Newcastle Utd,1.249649,2.091463
1,Aston Villa,3.24336,0.721465,2023-08-20,2,Everton,1.562543,0.415479
2,Aston Villa,2.83691,0.630605,2023-08-27,3,Burnley,1.083276,1.33938
3,Everton,2.59001,1.58144,2023-08-12,1,Fulham,0.836407,1.675754
4,Everton,0.721465,3.24336,2023-08-20,2,Aston Villa,0.381448,1.285981
5,Everton,1.89792,1.40228,2023-08-26,3,Wolves,0.810568,0.723724
6,Bournemouth,1.51025,1.4834,2023-08-12,1,West Ham,1.008243,0.723793
7,Bournemouth,1.3867,3.25448,2023-08-19,2,Liverpool,0.842185,1.804522
8,Bournemouth,0.483582,2.26115,2023-08-26,3,Tottenham,0.299916,1.084771
9,Crystal Palace,2.27616,0.422704,2023-08-12,1,Sheffield United,0.910949,0.73049


In [23]:
# Summary weighted

In [24]:
df = df.append(summary_df)
df = df.append(summary_df_per_90)
df = df.reset_index()

In [25]:
summary_weighted_df = df.groupby('Team', as_index=False)['Weighted_xG', "Weighted_xGA"].mean()

  summary_weighted_df = df.groupby('Team', as_index=False)['Weighted_xG', "Weighted_xGA"].mean()


In [28]:
summary_weighted_df.sort_values(by=["Weighted_xGA"], ascending=False)

Unnamed: 0,Team,Weighted_xG,Weighted_xGA
9,Fulham,0.685746,1.560186
4,Brighton,1.129232,1.477338
11,Luton,0.576707,1.472555
16,Sheffield United,0.815733,1.447358
1,Aston Villa,1.298489,1.282107
8,Everton,0.676141,1.228486
2,Bournemouth,0.716781,1.204362
5,Burnley,0.517382,1.044227
13,Manchester Utd,1.471062,0.993473
19,Wolves,0.912173,0.98372


In [27]:
# summary_weighted_df["Weighted_xG"].sum() / 20