# Features
This notebook generates features and labels (goal/no goal) for all shots and stores them in a HDF file. This is a good practice to save computational time if you want to experiment with multiple pipelines.

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import itertools

In [2]:
%load_ext autoreload
%autoreload 2
from soccer_xg.api import DataApi
import soccer_xg.xg as xg
import soccer_xg.features as fs

## Config

In [3]:
# dataset
dir_data = "../data"
provider = 'wyscout_opensource'
leagues = ['ENG', 'ESP', 'ITA', 'GER', 'FRA']
seasons = ['1718']

# features
store_features = f'../data/{provider}/features.h5'

By default, all features defined in `soccer_xg.features.all_features` are computed. It is also possible to compute a subset of these features or add additional feature generators. Each feature generator is a function that expects either a DataFrame object containing actions (i.e., individual actions) or a list of DataFrame objects containing consecutive actions (i.e., game states), and returns the corresponding feature for the individual action or game state. Features that contain information about the shot's outcome are automatically removed.

In [4]:
feature_generators = fs.all_features

## Compute features and labels

In [5]:
for (l,s) in itertools.product(leagues, seasons):
    print(l, s)
    api = DataApi([f"{dir_data}/{provider}/spadl-{provider}-{l}-{s}.h5"])
    xg.get_features(api, xfns=feature_generators).to_hdf(store_features, key=f'{l}/{s}/features', format='table')  
    xg.get_labels(api).to_hdf(store_features, key=f'{l}/{s}/labels', format='table')  

ENG 1718


Generating features: 100%|██████████| 380/380 [01:40<00:00,  3.78it/s]
Generating labels: 100%|██████████| 380/380 [00:08<00:00, 43.51it/s]


ESP 1718


Generating features: 100%|██████████| 380/380 [01:40<00:00,  3.79it/s]
Generating labels: 100%|██████████| 380/380 [00:08<00:00, 43.70it/s]


ITA 1718


Generating features: 100%|██████████| 380/380 [01:40<00:00,  3.77it/s]
Generating labels: 100%|██████████| 380/380 [00:08<00:00, 43.62it/s]


GER 1718


Generating features: 100%|██████████| 306/306 [01:21<00:00,  3.77it/s]
Generating labels: 100%|██████████| 306/306 [00:06<00:00, 43.93it/s]


FRA 1718


Generating features: 100%|██████████| 380/380 [01:40<00:00,  3.78it/s]
Generating labels: 100%|██████████| 380/380 [00:08<00:00, 43.76it/s]


## Load features

In [6]:
features = []
labels = []
for (l,s) in itertools.product(leagues, seasons):
    features.append(pd.read_hdf(store_features, key=f'{l}/{s}/features'))
    labels.append(pd.read_hdf(store_features, key=f'{l}/{s}/labels'))
features = pd.concat(features)
labels = pd.concat(labels)

display(features.head())
display(labels.to_frame().head())

Unnamed: 0_level_0,Unnamed: 1_level_0,type_id_a0,type_id_a1,type_id_a2,bodypart_id_a0,bodypart_id_a1,bodypart_id_a2,result_id_a1,result_id_a2,start_x_a0,start_y_a0,start_x_a1,start_y_a1,start_x_a2,start_y_a2,end_x_a1,end_y_a1,end_x_a2,end_y_a2,dx_a1,dy_a1,movement_a1,dx_a2,dy_a2,movement_a2,dx_a01,dy_a01,mov_a01,dx_a02,dy_a02,mov_a02,start_dist_to_goal_a0,start_angle_to_goal_a0,start_dist_to_goal_a1,start_angle_to_goal_a1,start_dist_to_goal_a2,start_angle_to_goal_a2,end_dist_to_goal_a1,end_angle_to_goal_a1,end_dist_to_goal_a2,end_angle_to_goal_a2,team_1,team_2,time_delta_1,time_delta_2,speedx_a01,speedy_a01,speed_a01,speedx_a02,speedy_a02,speed_a02,shot_angle_a0,shot_angle_a1,shot_angle_a2,caley_zone_a0,caley_zone_a1,caley_zone_a2,angle_zone_a0,angle_zone_a1,angle_zone_a2
game_id,action_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1
2500098,17,shot,dribble,cross,foot,foot,foot,success,success,99.75,26.52,91.35,29.92,97.65,6.12,99.75,26.52,91.35,29.92,8.4,-3.4,9.062009,-6.3,23.8,24.619708,0.0,0.0,0.0,-8.4,3.4,9.062009,9.138539,0.958815,14.246715,0.290448,28.832567,1.313031,9.138539,0.958815,14.246715,0.290448,True,True,3.433228,6.866456,0.0,0.0,0.0,1.223339,0.495161,1.31975,0.499778,0.48378,0.0655,2,3,8,9,12,18
2500098,40,shot,corner_crossed,pass,foot,foot,foot,success,fail,91.35,35.36,105.0,0.0,96.6,23.8,91.35,35.36,0.0,53.72,-13.65,35.36,37.903194,-96.6,29.92,101.127476,0.0,0.0,0.0,-91.35,18.36,93.176779,13.717584,0.099306,34.0,1.570796,13.213629,0.881872,13.717584,0.099306,106.835754,0.185647,True,True,2.102531,21.927228,0.0,0.0,0.0,4.166053,0.837315,4.249364,0.517985,0.0,0.363334,3,8,4,12,21,12
2500098,77,shot,clearance,cross,foot,foot,foot,fail,fail,75.6,29.92,94.5,27.2,98.7,65.96,75.6,29.92,94.5,27.2,-18.9,2.72,19.094722,-4.2,-38.76,38.98689,0.0,0.0,0.0,18.9,-2.72,19.094722,29.681752,0.137895,12.509596,0.5747,32.575015,1.37617,29.681752,0.137895,12.509596,0.5747,False,True,2.629861,3.250682,0.0,0.0,0.0,5.814165,0.836747,5.874066,0.242481,0.491555,0.043863,6,3,0,18,12,18
2500098,140,shot,cross,dribble,foot,foot,foot,success,success,92.4,43.52,98.7,51.68,91.35,54.4,92.4,43.52,98.7,51.68,-6.3,-8.16,10.309006,7.35,-2.72,7.837149,0.0,0.0,0.0,6.3,8.16,10.309006,15.792099,0.647047,18.768921,1.228489,24.545519,0.981099,15.792099,0.647047,18.768921,1.228489,True,True,1.052499,5.000627,0.0,0.0,0.0,1.259842,1.631795,2.061543,0.371538,0.13486,0.167545,4,5,0,12,15,18
2500098,145,shot,pass,pass,foot,foot,foot,success,success,99.75,37.4,96.6,38.76,93.45,45.56,99.75,37.4,96.6,38.76,3.15,-1.36,3.431049,3.15,-6.8,7.494164,0.0,0.0,0.0,-3.15,1.36,3.431049,6.254798,0.5747,9.654926,0.515549,16.341239,0.785831,6.254798,0.5747,9.654926,0.515549,True,True,1.677755,2.659997,0.0,0.0,0.0,1.184212,0.511279,1.28987,0.978291,0.654611,0.320841,1,3,4,6,9,15


Unnamed: 0_level_0,Unnamed: 1_level_0,goal
game_id,action_id,Unnamed: 2_level_1
2500098,17,False
2500098,40,False
2500098,77,False
2500098,140,False
2500098,145,False
