In [16]:
%load_ext autoreload
%autoreload 2

import os
from time import time
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from causallearn.search.ConstraintBased.PC import pc
from causallearn.utils.GraphUtils import GraphUtils
from causallearn.utils.cit import CIT

import vis
import softclustering as sc
import socceraction.spadl as spadl

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
datafolder = "data"
fifa2018h5 = os.path.join(datafolder, "spadl-fifa2018.h5")
games = pd.read_hdf(fifa2018h5, key="games")
with pd.HDFStore(fifa2018h5) as store:
    actions = []  #list of DataFrames
    for game in tqdm(games.itertuples()):
        game_action = store[f"actions/game_{game.game_id}"]
        game_action = spadl.play_left_to_right(game_action, game.home_team_id)
        game_action["is_home"] = game_action["team_id"] == game.home_team_id
        actions.append(game_action)
    actions = pd.concat(actions)
    actions.drop("original_event_id", axis=1, inplace=True)
    actions = pd.merge(actions, spadl.config.actiontypes_df(), how="left")
    actions = sc.consolidate(actions)
    actions = sc.add_noise(actions)
    actions = sc.remove_outliers(actions, True)
    actions["angle"] = np.arctan2(actions.end_y - actions.start_y, actions.end_x - actions.start_x)
    actions["cos_angle"] = np.cos(actions["angle"])
    actions["sin_angle"] = np.sin(actions["angle"])


0it [00:00, ?it/s][A
21it [00:00, 202.16it/s][A
64it [00:00, 209.98it/s][A


Remove 536 out of 128484 datapoints.


In [20]:
no_dir_action_types = set(actions[actions["angle"] == 0.0]["type_name"])
action_types = set(actions["type_name"]) - no_dir_action_types - {"shot_penalty"}

for action in ["pass"]:
    print(f"Processing {action}...\n")
    mask = (actions["type_name"]==action) & (actions["team_id"]==771)
    columns = ["start_x", "start_y","cos_angle", "sin_angle"]
    #columns = ["start_x", "start_y","angle"]
    data = actions[mask][columns]
    cg = pc(data.to_numpy(),
                alpha=0.05,
                indep_test="fastkci",
                kernelX='Gaussian',
                kernelY='Gaussian',
                est_width='empirical',
            )
    pyd = GraphUtils.to_pydot(cg.G, labels=columns)
    pyd.write_pdf(f'plots/DAG_aatest_{action}.pdf')

Processing pass...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
Depth=0, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 928.56it/s][A
Depth=0, working on node 0:  50%|█████     | 2/4 [00:03<00:03,  1.63s/it] [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:03<00:03,  1.63s/it][A
Depth=0, working on node 1:  75%|███████▌  | 3/4 [00:04<00:01,  1.29s/it][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:04<00:01,  1.29s/it][A
Depth=0, working on node 2: 100%|██████████| 4/4 [00:04<00:00,  1.05it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:04<00:00,  1.05it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:04<00:00,  1.05it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1510.92it/s][A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:00<00:00, 1469.62it/s][A
Depth=1, working on node 1:  75%|███████▌  | 3/4 [00:01<00:00,  

In [21]:
no_dir_action_types = set(actions[actions["angle"] == 0.0]["type_name"])
action_types = set(actions["type_name"]) - no_dir_action_types - {"shot_penalty"}

for action in action_types:
    print(f"Processing {action}...\n")
    mask = (actions["type_name"]==action) & (actions["team_id"]==771)
    columns = ["start_x", "start_y","cos_angle", "sin_angle"]
    #columns = ["start_x", "start_y","angle"]
    data = actions[mask][columns]
    cg = pc(data.to_numpy(),
                alpha=0.05,
                indep_test="fastkci",
                kernelX='Gaussian',
                kernelY='Gaussian',
                est_width='empirical',
            )
    pyd = GraphUtils.to_pydot(cg.G, labels=columns)
    pyd.write_pdf(f'plots/DAG2_{action}.pdf')

Processing corner...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, axis=0)
  data = stats.zscore(data, ddof=1, a

Processing pass...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
Depth=0, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1003.18it/s][A
Depth=0, working on node 0:  50%|█████     | 2/4 [00:01<00:01,  1.12it/s]  [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:01<00:01,  1.12it/s][A
Depth=0, working on node 1:  75%|███████▌  | 3/4 [00:03<00:01,  1.11s/it][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:03<00:01,  1.11s/it][A
Depth=0, working on node 2: 100%|██████████| 4/4 [00:03<00:00,  1.18it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:03<00:00,  1.18it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:03<00:00,  1.18it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1248.30it/s][A
Depth=1, working on node 0:  50%|█████     | 2/4 [00:03<00:03,  1.81s/it]  [A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:03<00:03,

Processing freekick...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
  data = stats.zscore(data, ddof=1, axis=0)

Depth=0, working on node 0:  50%|█████     | 2/4 [00:00<00:00,  6.79it/s]  [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:00<00:00,  6.79it/s][A
Depth=0, working on node 1:  75%|███████▌  | 3/4 [00:00<00:00,  6.55it/s][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00,  6.55it/s][A
Depth=0, working on node 2: 100%|██████████| 4/4 [00:00<00:00,  7.52it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  7.52it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  7.52it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1475.31it/s][A
Depth=1, working on node 0:  50%|█████     | 2/4 [00:00<00:00, 16.00it/s]  [A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:00<00:00, 16.00it/s][A
Depth=1, working on

Processing goalkick...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
Depth=0, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1228.20it/s][A
Depth=0, working on node 0:  50%|█████     | 2/4 [00:00<00:00,  9.46it/s]  [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:00<00:00,  9.46it/s][A
Depth=0, working on node 1:  75%|███████▌  | 3/4 [00:00<00:00,  8.87it/s][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00,  8.87it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  8.87it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  8.87it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1148.18it/s][A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:00<00:00, 1301.97it/s][A
Depth=1, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00, 1343.32it/s][A
Depth=1, working on node 3: 100%|██████████| 4/4 [00:00<00:0

Processing cross...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
Depth=0, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1297.34it/s][A
Depth=0, working on node 0:  50%|█████     | 2/4 [00:00<00:00,  9.95it/s]  [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:00<00:00,  9.95it/s][A
Depth=0, working on node 1:  75%|███████▌  | 3/4 [00:00<00:00,  9.11it/s][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00,  9.11it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  9.11it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  9.11it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1142.86it/s][A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:00<00:00, 22.52it/s]  [A
Depth=1, working on node 1:  75%|███████▌  | 3/4 [00:00<00:00, 10.89it/s][A
Depth=1, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00,

Processing keeper_action...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
Depth=0, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1276.03it/s][A
Depth=0, working on node 0:  50%|█████     | 2/4 [00:00<00:00, 12.27it/s]  [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:00<00:00, 12.27it/s][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00, 12.27it/s][A
Depth=0, working on node 2: 100%|██████████| 4/4 [00:00<00:00, 13.03it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00, 13.03it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00, 13.03it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1385.17it/s][A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:00<00:00, 1436.90it/s][A
Depth=1, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00, 1512.19it/s][A
Depth=1, working on node 3: 100%|██████████| 4/4 [00:00<00:0

Processing throw_in...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
  data = stats.zscore(data, ddof=1, axis=0)

Depth=0, working on node 0:  50%|█████     | 2/4 [00:00<00:00,  8.19it/s]  [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:00<00:00,  8.19it/s][A
Depth=0, working on node 1:  75%|███████▌  | 3/4 [00:00<00:00,  7.81it/s][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00,  7.81it/s][A
Depth=0, working on node 2: 100%|██████████| 4/4 [00:00<00:00,  8.32it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  8.32it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  8.32it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1054.91it/s][A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:00<00:00, 1208.56it/s][A
Depth=1, working on node 1:  75%|███████▌  | 3/4 [00:00<00:00, 22.48it/s]  [A
Depth=1, working 

Processing shot...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
Depth=0, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1088.58it/s][A
Depth=0, working on node 0:  50%|█████     | 2/4 [00:00<00:00,  7.93it/s]  [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:00<00:00,  7.93it/s][A
Depth=0, working on node 1:  75%|███████▌  | 3/4 [00:00<00:00,  6.94it/s][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00,  6.94it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  6.94it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:00<00:00,  6.94it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1021.51it/s][A
Depth=1, working on node 0:  50%|█████     | 2/4 [00:00<00:00, 17.68it/s]  [A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:00<00:00, 17.68it/s][A
Depth=1, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00,

Processing dribble...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
Depth=0, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1153.55it/s][A
Depth=0, working on node 0:  50%|█████     | 2/4 [00:01<00:01,  1.10it/s]  [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:01<00:01,  1.10it/s][A
Depth=0, working on node 1:  75%|███████▌  | 3/4 [00:03<00:01,  1.12s/it][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:03<00:01,  1.12s/it][A
Depth=0, working on node 2: 100%|██████████| 4/4 [00:03<00:00,  1.18it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:03<00:00,  1.18it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:03<00:00,  1.18it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1236.16it/s][A
Depth=1, working on node 0:  50%|█████     | 2/4 [00:08<00:08,  4.17s/it]  [A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:08<00:08,

Processing clearance...




  0%|          | 0/4 [00:00<?, ?it/s][A
  0%|          | 0/4 [00:00<?, ?it/s][A
Depth=0, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 938.11it/s][A
Depth=0, working on node 0:  50%|█████     | 2/4 [00:00<00:00,  2.01it/s] [A
Depth=0, working on node 1:  50%|█████     | 2/4 [00:00<00:00,  2.01it/s][A
Depth=0, working on node 1:  75%|███████▌  | 3/4 [00:01<00:00,  2.59it/s][A
Depth=0, working on node 2:  75%|███████▌  | 3/4 [00:01<00:00,  2.59it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:01<00:00,  2.59it/s][A
Depth=0, working on node 3: 100%|██████████| 4/4 [00:01<00:00,  2.59it/s][A
Depth=0, working on node 3:   0%|          | 0/4 [00:00<?, ?it/s]        [A
Depth=1, working on node 0:  25%|██▌       | 1/4 [00:00<00:00, 1093.98it/s][A
Depth=1, working on node 1:  50%|█████     | 2/4 [00:00<00:00, 1325.42it/s][A
Depth=1, working on node 1:  75%|███████▌  | 3/4 [00:00<00:00,  6.82it/s]  [A
Depth=1, working on node 2:  75%|███████▌  | 3/4 [00:00<00:00,