In [1]:
%load_ext autoreload
%autoreload 2
import os; import sys; sys.path.insert(0,'../')
import pandas as pd
import tqdm
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import socceraction.atomic.features as fs
import socceraction.atomic.labels as lab

In [6]:
## Configure file and folder names
datafolder = "../data/private"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")
features_h5 = os.path.join(datafolder,"atomic-features.h5")
labels_h5 = os.path.join(datafolder,"atomic-labels.h5")
predictions_h5 = os.path.join(datafolder,"atomic-predictions.h5")

In [7]:
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "Premier League"]
print("nb of games:", len(games))

actiontypes = pd.read_hdf(spadl_h5, "atomic_actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")

nb of games: 760


In [8]:
xfns = [fs.actiontype,
       fs.actiontype_onehot,
       fs.bodypart,
       fs.bodypart_onehot,
       fs.goalscore,
       fs.location,
       fs.polar,
       fs.direction,
       fs.team,
       fs.time,
       fs.time_delta
      ]

for game in tqdm.tqdm(list(games.itertuples()),desc=f"Generating and storing features in {features_h5}"):
    actions = pd.read_hdf(spadl_h5,f"atomic_actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes,how="left")
        .merge(bodyparts,how="left")
        .reset_index(drop=True)
    )
    gamestates = fs.gamestates(actions,3)
    gamestates = fs.play_left_to_right(gamestates,game.home_team_id)
    
    X = pd.concat([fn(gamestates) for fn in xfns],axis=1)
    X.to_hdf(features_h5,f"game_{game.game_id}")

Generating and storing features in ../data/private/atomic-features.h5: 100%|██████████| 760/760 [04:04<00:00,  3.11it/s]


In [9]:
yfns = [lab.scores,lab.concedes,lab.goal_from_shot]

for game in tqdm.tqdm(list(games.itertuples()),desc=f"Computing and storing labels in {labels_h5}"):
    actions = pd.read_hdf(spadl_h5,f"atomic_actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes,how="left")
        .merge(bodyparts,how="left")
        .reset_index(drop=True)
    )
    Y = pd.concat([fn(actions) for fn in yfns],axis=1)
    Y.to_hdf(labels_h5,f"game_{game.game_id}")

Computing and storing labels in ../data/private/atomic-labels.h5: 100%|██████████| 760/760 [02:30<00:00,  5.05it/s]
