# This is based on Markov model (Memoryless)
Memoryless models do not take into account the actions before and after the current action

# Import Libraries

In [1]:
import pandas as pd

# Get Data

In [6]:
cols_for_xt = ['minute', 'second', 'teamId', 'teamName', 'h_a',
               'x', 'y', 'expandedMinute', 'period', 'type', 'outcomeType', 'score',
               'playerId', 'playerName', 'endX', 'endY', "isShot", "isOwnGoal"]

In [7]:
xt_df = pd.read_csv("../data/Premier League/2021-2022/event_data.csv", low_memory=False)

In [44]:
xt_grid_df = pd.read_csv("../data/xT_Grid.csv", header=None)
xt_grid_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.000828,0.002543,0.003652,0.007795,0.013633,0.020894,0.025403,0.023597,0.029002,0.054983,0.065006,0.087954,0.100036,0.092737,0.086888,0.280268
1,0.000841,0.003283,0.004089,0.008307,0.013324,0.018302,0.022191,0.017617,0.02407,0.038428,0.054961,0.07359,0.082512,0.065993,0.071766,0.055461
2,0.000703,0.003383,0.003456,0.008025,0.013326,0.01825,0.020604,0.018121,0.02299,0.037736,0.052428,0.075394,0.063609,0.032408,0.052167,0.048856
3,0.001702,0.005673,0.006295,0.012162,0.015491,0.022664,0.027817,0.027618,0.028053,0.044331,0.054082,0.06416,0.054753,0.028039,0.044991,0.06275
4,0.003844,0.006209,0.007815,0.010247,0.015109,0.022571,0.020372,0.017351,0.018179,0.030381,0.044285,0.04524,0.026979,0.053293,0.069674,0.077692
5,0.018321,0.01101,0.017235,0.007616,0.014741,0.019497,0.011526,0.028151,0.033493,0.013868,0.035316,0.036558,0.034913,0.078975,0.180704,0.33694
6,0.017724,0.00921,0.014099,0.007174,0.014697,0.021808,0.010956,0.028359,0.028664,0.012611,0.035017,0.036049,0.027309,0.069071,0.141684,0.319632
7,0.003501,0.004641,0.006628,0.00851,0.014032,0.019352,0.019798,0.016509,0.02046,0.02702,0.047117,0.040916,0.026694,0.03677,0.078882,0.058132
8,0.001488,0.004702,0.005325,0.010703,0.018179,0.021458,0.025836,0.023383,0.029237,0.04657,0.051638,0.066484,0.045567,0.017753,0.042561,0.067027
9,0.000615,0.002658,0.002978,0.006945,0.012873,0.016493,0.01651,0.016487,0.019279,0.03361,0.044688,0.06798,0.056511,0.034807,0.057829,0.04797


# Data Cleaning

## Keep only required columns
This is for memory management

In [5]:
xt_df = xt_df[cols_for_xt]

# Compute xT for all players

## Keep all successful move and shoot actions

In [8]:
xt_df[xt_df["isShot"]]["outcomeType"].unique()

array(['Successful'], dtype=object)

In [23]:
xt_df.loc[(xt_df["isShot"] & (xt_df["isOwnGoal"] != True))]["type"]

22          SavedShot
197       MissedShots
206        ShotOnPost
283       MissedShots
376              Goal
             ...     
336533      SavedShot
336541      SavedShot
336656      SavedShot
336796    MissedShots
336835      SavedShot
Name: type, Length: 5560, dtype: object

In [25]:
xt_df.loc[(xt_df["type"].isin(["Pass", "TakeOn"])) | (xt_df["isShot"] & (xt_df["isOwnGoal"] != True))]["type"]

2         Pass
3         Pass
4         Pass
5         Pass
6         Pass
          ... 
336968    Pass
336971    Pass
336972    Pass
336973    Pass
336974    Pass
Name: type, Length: 225638, dtype: object

In [32]:
xt_action_df = xt_df.loc[(xt_df["type"].isin(["Pass", "TakeOn"]))
                         & (xt_df["outcomeType"] == "Successful")
                         & (xt_df["x"].notnull())
                         & (xt_df["y"].notnull())
                         & (xt_df["endX"].notnull())
                         & (xt_df["endY"].notnull())].copy(deep=True)

In [33]:
xt_action_df[xt_action_df["endX"].isnull()]["type"]

Series([], Name: type, dtype: object)

## Compute the bins

In [45]:
xt_grid_df.shape

(12, 16)

In [46]:
xt_rows = xt_grid_df.shape[0]
xt_cols = xt_grid_df.shape[1]

In [47]:
xt_action_df["x1_bin"] = pd.cut(xt_action_df["x"], bins=xt_cols, labels=False)
xt_action_df["y1_bin"] = pd.cut(xt_action_df["y"], bins=xt_rows, labels=False)
xt_action_df["x2_bin"] = pd.cut(xt_action_df["endX"], bins=xt_cols, labels=False)
xt_action_df["y2_bin"] = pd.cut(xt_action_df["endY"], bins=xt_rows, labels=False)

# Calculate final xT for each player

In [48]:
xt_action_df["start_zone_xT"] = xt_action_df[["x1_bin", "y1_bin"]].apply(lambda x: xt_grid_df.loc[x[1], x[0]], axis=1)
xt_action_df["end_zone_xT"] = xt_action_df[["x2_bin", "y2_bin"]].apply(lambda x: xt_grid_df.loc[x[1], x[0]], axis=1)

In [49]:
# Check the subtraction
xt_action_df["end_zone_xT"].sub(xt_action_df["start_zone_xT"])

2        -0.016625
5         0.001032
6        -0.004150
7        -0.011240
8         0.024128
            ...   
336965    0.031858
336971   -0.022477
336972    0.046172
336973   -0.008829
336974   -0.018784
Length: 166975, dtype: float64

In [51]:
# Compute xT
xt_action_df["xT"] = xt_action_df["end_zone_xT"].sub(xt_action_df["start_zone_xT"])

In [52]:
# Consolidate for every player individually
xt_action_df.groupby(["playerId"]).agg({"playerName": "first",
                                        "teamName": "first",
                                        "xT": "sum"})

Unnamed: 0_level_0,playerName,teamName,xT
playerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4511.0,James Milner,Liverpool,0.045062
5583.0,Cristiano Ronaldo,Man Utd,-0.014231
5625.0,Aaron Lennon,Burnley,-0.194955
8166.0,Ashley Young,Aston Villa,0.739688
8247.0,Mark Noble,West Ham,0.314128
...,...,...,...
425603.0,Jeremy Sarmiento,Brighton,0.037457
425711.0,Lewis Dobbin,Everton,-0.032463
429531.0,Kasey McAteer,Leicester,-0.004138
429723.0,Jonathan Rowe,Norwich,-0.034420


In [55]:
# Consolidate for every player individually
all_players_xT = xt_action_df.groupby(["playerId"]).agg({"playerName": "first",
                                                         "teamId": "first",
                                                         "teamName": "first",
                                                         "xT": "sum"})

In [56]:
all_players_xT.loc[all_players_xT["xT"].nlargest(10).index]

Unnamed: 0_level_0,playerName,teamId,teamName,xT
playerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
122117.0,Aymeric Laporte,167,Man City,10.725714
313171.0,Rúben Dias,167,Man City,9.529742
332325.0,Declan Rice,29,West Ham,9.008009
28550.0,Thiago Silva,15,Chelsea,8.948986
95408.0,Virgil van Dijk,26,Liverpool,8.357196
236519.0,Romain Saïss,161,Wolves,7.97587
104010.0,Antonio Rüdiger,15,Chelsea,7.013768
374631.0,Marc Guéhi,162,Crystal Palace,6.981843
117973.0,Eric Dier,30,Tottenham,6.54566
109922.0,Adam Webster,211,Brighton,6.494636


In [58]:
all_players_xT[["teamId", "teamName"]].drop_duplicates()

Unnamed: 0_level_0,teamId,teamName
playerId,Unnamed: 1_level_1,Unnamed: 2_level_1
4511.0,26,Liverpool
5583.0,32,Man Utd
5625.0,184,Burnley
8166.0,24,Aston Villa
8247.0,29,West Ham
11530.0,27,Watford
13796.0,18,Southampton
16161.0,161,Wolves
19119.0,167,Man City
19545.0,14,Leicester


In [63]:
t_id = 15
t_df = all_players_xT[all_players_xT["teamId"] == t_id]
t_df.loc[t_df["xT"].nlargest(10).index]

Unnamed: 0_level_0,playerName,teamId,teamName,xT
playerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
28550.0,Thiago Silva,15,Chelsea,8.948986
104010.0,Antonio Rüdiger,15,Chelsea,7.013768
353292.0,Trevoh Chalobah,15,Chelsea,4.868295
130331.0,Andreas Christensen,15,Chelsea,4.594404
106968.0,Jorginho,15,Chelsea,3.738027
93894.0,Mateo Kovacic,15,Chelsea,2.341583
114075.0,N'Golo Kanté,15,Chelsea,1.832776
255777.0,Ruben Loftus-Cheek,15,Chelsea,1.284263
322747.0,Malang Sarr,15,Chelsea,0.472535
78498.0,Romelu Lukaku,15,Chelsea,0.42909


In [62]:
t_df

Unnamed: 0_level_0,playerName,teamId,teamName,xT
playerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4511.0,James Milner,26,Liverpool,0.045062
68659.0,Jordan Henderson,26,Liverpool,2.609723
74341.0,Joel Matip,26,Liverpool,6.320693
74939.0,Thiago,26,Liverpool,3.275098
84146.0,Alex Oxlade-Chamberlain,26,Liverpool,0.584781
95408.0,Virgil van Dijk,26,Liverpool,8.357196
96182.0,Roberto Firmino,26,Liverpool,-0.003258
108226.0,Mohamed Salah,26,Liverpool,-3.644913
109915.0,Sadio Mané,26,Liverpool,0.038089
114147.0,Alisson,26,Liverpool,1.155795
