In [192]:
from socceraction.data.wyscout import PublicWyscoutLoader
from socceraction.spadl.wyscout import convert_to_actions
from socceraction.spadl.config import actiontypes, bodyparts
import socceraction.vaep.labels as lab
import socceraction.vaep.features as fs
import socceraction.spadl as spadl
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta
from xgboost import XGBClassifier
import socceraction.vaep.formula as vaepformula

In [193]:
api = PublicWyscoutLoader(root="data/wyscout")

In [194]:
# Competitions Input
competitions_df = api.competitions()
# competitions_df

In [195]:
# Games Input (Competition_id, Season_id)
games_wc_2018_df = api.games(28, 10078)
# games_wc_2018_df

In [196]:
# Teams Input (game_id)
teams_wc_2018_df = api.teams(2057954)
teams_wc_2018_df.head()

Unnamed: 0,team_id,team_name_short,team_name
0,14358,Russia,Russia
1,16521,Saudi Arabia,Saudi Arabia


In [197]:
# Events Input (game_id)
event_wc_2018_df = api.events(2057954)
event_wc_2018_df['minutes_played'] = event_wc_2018_df['milliseconds'] / 60000
# event_wc_2018_df

In [198]:
# Convert Wyscout to SPADL Format Input (Events Dataframe, home_team_id) --> Rusia
spadl_events_wc_2018_df = convert_to_actions(event_wc_2018_df, 14358)
spadl_events_wc_2018_df = spadl.add_names(spadl_events_wc_2018_df)
spadl_events_wc_2018_df

Unnamed: 0,game_id,period_id,time_seconds,team_id,player_id,start_x,start_y,end_x,end_y,original_event_id,bodypart_id,type_id,result_id,action_id,type_name,result_name,bodypart_name
0,2057954,1,1.656214,16521,122671,52.50,34.00,68.25,36.04,258612104,0,0,1,0,pass,success,foot
1,2057954,1,4.487814,16521,139393,68.25,36.04,26.25,12.92,258612106,0,0,1,1,pass,success,foot
2,2057954,1,6.524990,16521,122847,26.25,12.92,38.85,11.56,,0,21,1,2,dribble,success,foot
3,2057954,1,8.562167,16521,122847,38.85,11.56,30.45,10.20,258612110,0,0,1,3,pass,success,foot
4,2057954,1,10.991292,16521,122832,30.45,10.20,8.40,7.48,258612113,0,0,1,4,pass,success,foot
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1215,2057954,2,2868.496551,14358,103682,76.65,19.04,81.90,20.40,258613771,0,0,1,1215,pass,success,foot
1216,2057954,2,2870.980334,16521,122577,81.90,20.40,80.85,25.16,258614076,0,9,0,1216,tackle,fail,foot
1217,2057954,2,2872.843120,16521,122577,80.85,25.16,80.85,25.16,258614077,0,8,1,1217,foul,success,foot
1218,2057954,2,2939.438099,14358,257800,85.05,22.44,105.00,30.60,258613773,0,13,1,1218,shot_freekick,success,foot


In [199]:
# 1. convert actions to game states
home_team_id = 14358
# home_team_id = 16521
gamestates = fs.gamestates(spadl_events_wc_2018_df, 3)
gamestates = fs.play_left_to_right(gamestates, home_team_id)

In [200]:
# 2. compute features
xfns = [
    fs.actiontype, 
    fs.result, 
    fs.bodypart, 
    fs.time, 
    fs.startlocation, 
    fs.endlocation,
    fs.startpolar,
    fs.endpolar,
    fs.movement,
    fs.team,
    fs.time_delta,
    fs.space_delta,
    fs.goalscore
]
X = pd.concat([fn(gamestates) for fn in xfns], axis=1)
X

Unnamed: 0,type_id_a0,type_id_a1,type_id_a2,result_id_a0,result_id_a1,result_id_a2,bodypart_id_a0,bodypart_id_a1,bodypart_id_a2,period_id_a0,...,time_delta_2,dx_a01,dy_a01,mov_a01,dx_a02,dy_a02,mov_a02,goalscore_team,goalscore_opponent,goalscore_diff
0,0,0,0,1,1,1,0,0,0,1,...,0.000000,-15.75,-2.04,15.881565,-15.75,-2.04,15.881565,0,0,0
1,0,0,0,1,1,1,0,0,0,1,...,2.831600,0.00,0.00,0.000000,0.00,0.00,0.000000,0,0,0
2,21,0,0,1,1,1,0,0,0,1,...,4.868776,0.00,0.00,0.000000,-42.00,-23.12,47.943033,0,0,0
3,0,21,0,1,1,1,0,0,0,1,...,4.074353,0.00,0.00,0.000000,12.60,-1.36,12.673184,0,0,0
4,0,0,21,1,1,1,0,0,0,1,...,4.466302,0.00,0.00,0.000000,-8.40,-1.36,8.509383,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1215,0,0,21,1,1,1,0,0,0,2,...,3.948167,0.00,0.00,0.000000,-4.20,5.44,6.872671,4,0,4
1216,9,0,0,0,1,1,0,0,0,2,...,5.015466,0.00,0.00,0.000000,5.25,1.36,5.423292,0,4,-4
1217,8,9,0,1,0,1,0,0,0,2,...,4.346569,0.00,0.00,0.000000,-1.05,4.76,4.874433,0,4,-4
1218,13,8,9,1,1,0,0,0,0,2,...,68.457765,-4.20,2.72,5.003839,-4.20,2.72,5.003839,4,0,4


In [201]:
# 3. compute labels
yfns = [lab.scores, lab.concedes]
Y = pd.concat([fn(spadl_events_wc_2018_df) for fn in yfns], axis=1)
Y

Unnamed: 0,scores,concedes
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
1215,True,False
1216,False,True
1217,False,True
1218,True,False


In [202]:
# 4. load or train models
Y_hat = pd.DataFrame()
models = {}
for col in list(Y.columns):
    model = XGBClassifier(n_estimators=50, max_depth=3, n_jobs=-3, verbosity=1, enable_categorical=True)
    model.fit(X, Y[col])
    models[col] = model

# 5. predict scoring and conceding probabilities for each game state
for col in list(Y.columns):
    Y_hat[col] = [p[1] for p in models[col].predict_proba(X)]
Y_hat

Unnamed: 0,scores,concedes
0,0.000274,0.000342
1,0.002797,0.000227
2,0.001822,0.001707
3,0.002615,0.000331
4,0.002520,0.000275
...,...,...
1215,0.944298,0.003094
1216,0.006105,0.928944
1217,0.006721,0.967945
1218,0.969627,0.004774


In [203]:
# 6. compute VAEP value
values = vaepformula.value(spadl_events_wc_2018_df, Y_hat["scores"], Y_hat["concedes"])
values

Unnamed: 0,offensive_value,defensive_value,vaep_value
0,0.000000,-0.000000,0.000000
1,0.002523,0.000115,0.002638
2,-0.000975,-0.001480,-0.002455
3,0.000793,0.001376,0.002169
4,-0.000095,0.000056,-0.000039
...,...,...,...
1215,-0.003624,-0.001333,-0.004957
1216,0.003011,0.015354,0.018365
1217,0.000615,-0.039001,-0.038385
1218,0.969627,-0.004774,0.964854
