In [2]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd

In [3]:
# Projektwurzel: ein Ordner über "notebooks"
project_root = Path("..").resolve()
project_root

PosixPath('/Users/kat/Documents/Stackfuel/Biomech_Project_2025')

In [4]:
# Ordner, in dem "data/matches/<match_id>/*" liegt
sc_data_root = project_root / "data" / "raw" / "opendata-master" / "data"

print(sc_data_root)

/Users/kat/Documents/Stackfuel/Biomech_Project_2025/data/raw/opendata-master/data


In [5]:
# Pfad, in dem "PhasesOfPlayAggregator.py" liegt
pop_agg_path = (
    project_root
    / "data"
    / "raw"
    / "opendata-master"
    / "resources"
    / "Tutorials"
    / "[Advanced] Aggregating Phases of Play"
)

sys.path.append(str(pop_agg_path))

from PhasesOfPlayAggregator import PhasesOfPlayAggregator

print(pop_agg_path)
print("Import OK ")


/Users/kat/Documents/Stackfuel/Biomech_Project_2025/data/raw/opendata-master/resources/Tutorials/[Advanced] Aggregating Phases of Play
Import OK 


In [6]:
def aggregate_one_match(match_id: int, sc_data_root: Path) -> pd.DataFrame:
    """
    Lädt phases_of_play für ein Match und erzeugt ein Team-Match-DataFrame
    mit allen In- und Out-of-possession-Aggregaten.

    Output: eine Zeile pro (match_id, team_id, team_name)
    """

    # 1) CSV-Pfad bauen
    phases_path = sc_data_root / "matches" / str(match_id) / f"{match_id}_phases_of_play.csv"
    print(f"Lade Match {match_id} von {phases_path}")

    phases_df = pd.read_csv(phases_path)

    # 2) Aggregator instanziieren
    agg = PhasesOfPlayAggregator(phases_of_play_df=phases_df)

    # 3) Aggregates für In-possession holen
    in_pos = agg.get_in_possession_aggregates()
    in_pos = in_pos.rename(
        columns={
            "team_in_possession_id": "team_id",
            "team_in_possession_shortname": "team_name",
        }
    )

    # 4) Aggregates für Out-of-possession holen
    out_pos = agg.get_out_of_possession_aggregates()
    out_pos = out_pos.rename(
        columns={
            "team_out_of_possession_id": "team_id",
            "team_out_of_possession_shortname": "team_name",
        }
    )

    # 5) Zusammenführen -> eine Zeile pro Team im Match
    merged = in_pos.merge(
        out_pos,
        on=["match_id", "team_id", "team_name"],
        suffixes=("_ip", "_oop"),
    )

    return merged


In [8]:
match_ids = [
    1886347,
    1899585,
    1925299,
    1953632,
    1996435,
    2006229,
    2011166,
    2013725,
    2015213,
    2017461,
]

In [9]:
all_games_list = []

for mid in match_ids:
    try:
        df_match = aggregate_one_match(mid, sc_data_root)
        all_games_list.append(df_match)
    except FileNotFoundError:
        print(f"Datei für Match {mid} nicht gefunden – wird übersprungen.")

# Alles zu einem großen DataFrame zusammenfügen
all_games_df = pd.concat(all_games_list, ignore_index=True)

all_games_df.head()


Lade Match 1886347 von /Users/kat/Documents/Stackfuel/Biomech_Project_2025/data/raw/opendata-master/data/matches/1886347/1886347_phases_of_play.csv
Lade Match 1899585 von /Users/kat/Documents/Stackfuel/Biomech_Project_2025/data/raw/opendata-master/data/matches/1899585/1899585_phases_of_play.csv
Lade Match 1925299 von /Users/kat/Documents/Stackfuel/Biomech_Project_2025/data/raw/opendata-master/data/matches/1925299/1925299_phases_of_play.csv
Lade Match 1953632 von /Users/kat/Documents/Stackfuel/Biomech_Project_2025/data/raw/opendata-master/data/matches/1953632/1953632_phases_of_play.csv
Lade Match 1996435 von /Users/kat/Documents/Stackfuel/Biomech_Project_2025/data/raw/opendata-master/data/matches/1996435/1996435_phases_of_play.csv
Lade Match 2006229 von /Users/kat/Documents/Stackfuel/Biomech_Project_2025/data/raw/opendata-master/data/matches/2006229/2006229_phases_of_play.csv
Lade Match 2011166 von /Users/kat/Documents/Stackfuel/Biomech_Project_2025/data/raw/opendata-master/data/matches

Unnamed: 0,match_id,team_id,team_name,count_build_up,total_time_build_up,count_player_possessions_build_up,count_possession_lost_in_phase_build_up,count_possession_lead_to_shot_build_up,count_possession_lead_to_goal_build_up,avg_start_width_build_up,...,count_into_transition_from_defending_set_play,count_into_quick_break_from_defending_set_play,count_into_direct_from_defending_set_play,count_into_chaotic_from_defending_set_play,count_into_low_block_from_defending_set_play,count_into_medium_block_from_defending_set_play,count_into_high_block_from_defending_set_play,count_into_defending_quick_break_from_defending_set_play,count_into_defending_transition_from_defending_set_play,count_into_defending_direct_from_defending_set_play
0,1886347,1805,Newcastle,39.0,353.7,129.0,4.0,3.0,0.0,41.585556,...,,,,2.0,2.0,3.0,,,,
1,1886347,4177,Auckland FC,24.0,219.0,72.0,3.0,3.0,0.0,45.182083,...,,,,1.0,1.0,2.0,,,,
2,1899585,867,Wellington P FC,33.0,280.5,102.0,6.0,1.0,0.0,45.071613,...,,,,1.0,2.0,1.0,,,,
3,1899585,4177,Auckland FC,16.0,149.1,45.0,2.0,0.0,0.0,45.026923,...,1.0,,,1.0,,2.0,,,,
4,1925299,871,Perth Glory,40.0,324.1,88.0,3.0,1.0,1.0,38.425,...,1.0,,,1.0,,,,,,


In [10]:
print(all_games_df.shape)
all_games_df[["match_id", "team_name"]].drop_duplicates()

(20, 371)


Unnamed: 0,match_id,team_name
0,1886347,Newcastle
1,1886347,Auckland FC
2,1899585,Wellington P FC
3,1899585,Auckland FC
4,1925299,Perth Glory
5,1925299,Brisbane FC
6,1953632,CC Mariners
7,1953632,Melbourne City
8,1996435,Adelaide United
9,1996435,Sydney FC


In [11]:
print(all_games_df.shape)


(20, 371)


In [13]:
team_phase_summary = (
    all_games_df
    .groupby("team_name")
    .agg({
        "total_time_build_up": "mean",
        "total_time_create": "mean",
        "total_time_finish": "mean",
        "total_time_low_block": "mean",
        "total_time_medium_block": "mean",
        "total_time_high_block": "mean"
    })
)

team_phase_summary

Unnamed: 0_level_0,total_time_build_up,total_time_create,total_time_finish,total_time_low_block,total_time_medium_block,total_time_high_block
team_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide United,345.1,556.9,289.5,533.1,630.1,347.1
Auckland FC,192.55,552.175,341.7,258.125,559.475,292.525
Brisbane FC,270.9,832.2,720.5,253.4,482.0,324.1
CC Mariners,331.1,441.8,98.8,334.2,1314.2,240.4
Macarthur FC,315.3,552.7,153.7,447.0,830.5,301.5
Melbourne City,270.95,1072.35,390.6,126.25,497.25,323.2
Melbourne V FC,157.3,578.25,450.65,160.7,347.45,147.9
Newcastle,353.7,646.9,150.1,478.4,685.7,219.0
Perth Glory,324.1,482.0,253.4,720.5,832.2,270.9
Sydney FC,269.05,607.4,425.05,220.45,577.6,364.55
