In [1]:
import os
import warnings
import tqdm
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [2]:
%load_ext autoreload
%autoreload 2
import socceraction.spadl as spadl
import socceraction.vaep.features as fs
import socceraction.vaep.labels as lab

import edit.create as cre

## Select data

In [3]:
# Configure file and folder names
datafolder = "/home/r-maejima/work/data-fifa"
spadl_h5 = os.path.join(datafolder, "spadl-statsbomb.h5")
features_h5 = os.path.join(datafolder, "features.h5")
labels_h5 = os.path.join(datafolder, "labels.h5")

In [4]:
games = pd.read_hdf(spadl_h5, "games")
print("nb of games:", len(games))

nb of games: 1


## Compute features

In [5]:
xfns = [
    fs.actiontype,
    fs.actiontype_onehot,
    fs.bodypart,
    fs.bodypart_onehot,
    fs.result,
    fs.result_onehot,
    fs.goalscore,
    fs.startlocation,
    fs.endlocation,
    fs.movement,
    fs.space_delta,
    fs.startpolar,
    fs.endpolar,
    fs.team,
    fs.time,
    fs.time_delta,
    # fs.playerlocations
]

with pd.HDFStore(spadl_h5) as spadlstore, pd.HDFStore(features_h5) as featurestore:
    for game in tqdm.tqdm(list(games.itertuples()), desc=f"Generating and storing features in {features_h5}"):
        actions = spadlstore[f"actions/game_{game.game_id}"]

        action_locations = spadlstore[f"action_locations/game_{game.game_id}"]
        action_locations = (
            action_locations
            .sort_values(["game_id", "period_id", "action_id", "team_id", "player_id"])
            .reset_index(drop=True)
        )

        gamestates = fs.gamestates(spadl.add_names(actions), 3)
        gamestates = fs.play_left_to_right(gamestates, game.home_team_id)

        # gamestates_loc = fs.gamestates_loc(action_locations, 20, 3)
        # gamestates_loc = fs.play_left_to_right_loc(gamestates_loc, game.home_team_id)
        
        X = pd.concat([fn(gamestates) for fn in xfns], axis=1)
        
        player_location_cols = [col for col in X.columns if col.startswith("location_x_p") or col.startswith("location_y_p")]
        # X[player_location_cols] = X[player_location_cols].fillna(0)
        
        featurestore.put(f"game_{game.game_id}", X, format='table')

Generating and storing features in /home/r-maejima/work/data-fifa/features.h5: 100%|██████████| 1/1 [00:00<00:00,  1.90it/s]


In [6]:
action_locations

Unnamed: 0,game_id,period_id,team_id,player_id,location_x,location_y,action_id
0,3753991,1,24,3471.0,39.327439,35.976570,0
1,3753991,1,24,3473.0,55.253723,15.058954,0
2,3753991,1,24,3493.0,48.677869,35.913658,0
3,3753991,1,24,3532.0,52.214297,28.705582,0
4,3753991,1,24,3535.0,59.567393,39.809733,0
...,...,...,...,...,...,...,...
43035,3753991,2,36,3598.0,,,2151
43036,3753991,2,36,3708.0,,,2151
43037,3753991,2,36,5596.0,,,2151
43038,3753991,2,36,6821.0,,,2151


## Compute labels

In [6]:
yfns = [lab.scores, lab.concedes, lab.recoveries, lab.losts, lab.attacked, lab.goal_from_shot, lab.attacks]

with pd.HDFStore(spadl_h5) as spadlstore, pd.HDFStore(labels_h5) as labelstore:
    for game in tqdm.tqdm(list(games.itertuples()), desc=f"Computing and storing labels in {labels_h5}"):
        actions = spadlstore[f"actions/game_{game.game_id}"]
        Y = pd.concat([fn(spadl.add_names(actions)) for fn in yfns], axis=1)
        labelstore.put(f"game_{game.game_id}", Y, format='table')

Computing and storing labels in /home/r-maejima/work/data-fifa/labels.h5: 100%|██████████| 1/1 [00:00<00:00,  2.53it/s]
