In [1]:
%load_ext autoreload
%autoreload 2
import os
import warnings
import tqdm
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import socceraction.atomic.vaep.features as fs
import socceraction.atomic.vaep.labels as lab

In [2]:
## Configure file and folder names
datafolder = "../data-fifa"
spadl_h5 = os.path.join(datafolder, "atomic-spadl-statsbomb.h5")
features_h5 = os.path.join(datafolder, "atomic-features.h5")
labels_h5 = os.path.join(datafolder, "atomic-labels.h5")
predictions_h5 = os.path.join(datafolder, "atomic-predictions.h5")

In [3]:
games = pd.read_hdf(spadl_h5, "games")
print("nb of games:", len(games))

actiontypes = pd.read_hdf(spadl_h5, "atomic_actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")

nb of games: 64


In [4]:
xfns = [
    fs.actiontype,
    fs.actiontype_onehot,
    fs.bodypart,
    fs.bodypart_onehot,
    fs.goalscore,
    fs.location,
    fs.polar,
    fs.direction,
    fs.team,
    fs.time,
    fs.time_delta
]

with pd.HDFStore(spadl_h5) as spadlstore, pd.HDFStore(features_h5) as featurestore:
    for game in tqdm.tqdm(list(games.itertuples()),desc=f"Generating and storing features in {features_h5}"):
        actions = spadlstore[f"atomic_actions/game_{game.game_id}"]
        actions = (
            actions.merge(actiontypes, how="left")
            .merge(bodyparts, how="left")
            .reset_index(drop=True)
        )
        gamestates = fs.gamestates(actions,2)
        gamestates = fs.play_left_to_right(gamestates, game.home_team_id)

        X = pd.concat([fn(gamestates) for fn in xfns],axis=1)
        featurestore[f"game_{game.game_id}"] = X

Generating and storing features in ../data-fifa/atomic-features.h5: 100%|██████████| 64/64 [00:12<00:00,  5.22it/s]


In [5]:
yfns = [lab.scores, lab.concedes, lab.goal_from_shot]

with pd.HDFStore(spadl_h5) as spadlstore, pd.HDFStore(labels_h5) as labelstore:
    for game in tqdm.tqdm(list(games.itertuples()), desc=f"Computing and storing labels in {labels_h5}"):
        actions = spadlstore[f"atomic_actions/game_{game.game_id}"]
        actions = (
            actions.merge(actiontypes, how="left")
            .merge(bodyparts, how="left")
            .reset_index(drop=True)
        )
        Y = pd.concat([fn(actions) for fn in yfns], axis=1)
        labelstore[f"game_{game.game_id}"] = Y

Computing and storing labels in ../data-fifa/atomic-labels.h5: 100%|██████████| 64/64 [00:10<00:00,  6.36it/s]
