In [1]:
# Deactivate distracting warnings
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import os
import shutil
import json
import pandas as pd

from tqdm.notebook import tqdm

from datatools.preprocess import (
    load_event_data, 
    load_position_data, 
    load_team_sheets,
    extract_match_id, 
    extract_event_pos,
    convert_locations,
    add_position_info,
    calc_player_velocities,
    extract_related_positions
)
from datatools.visualization import plot_event_count, plot_kde, plot_goal_positions

# Define the path to the dataset
path = os.path.join(os.path.dirname(os.getcwd()), "data", "DFL")

In [2]:
# 파일을 각 match_id 폴더로 이동시키는 코드입니다.
for filename in os.listdir(path):
    if filename.endswith(".xml"): 
        name_part=filename.rsplit(".xml",1)[0]
        folder_name = name_part[-14:]  
        folder_path = os.path.join(path, folder_name)

        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        old_path = os.path.join(path, filename)
        new_path = os.path.join(folder_path, filename)
        shutil.move(old_path, new_path)

In [3]:
match_ids = [extract_match_id(filename) for filename in os.listdir(path) if filename.startswith("DFL")]
print(len(match_ids))
match_ids

13


['DFL-MAT-J03YKM',
 'DFL-MAT-J03WOH',
 'DFL-MAT-J03YKB',
 'DFL-MAT-J03YLO',
 'DFL-MAT-J03WOY',
 'DFL-MAT-J03YHA',
 'DFL-MAT-J03WN1',
 'DFL-MAT-J03YIY',
 'DFL-MAT-J03WMX',
 'DFL-MAT-J03WQQ',
 'DFL-MAT-J03YKY',
 'DFL-MAT-J03WPY',
 'DFL-MAT-J03WR9']

### Schema

1. eID: 이벤트 타입
2. gameclock: seconds(전후반 모두 0s부터 시작)
3. tID: team ID
4. pID: player ID
5. timestampe, minute, second
6. qualifier: 다양한 정보(결과...)

In [5]:
for match_id in tqdm(sorted(match_ids)):
    match_path = os.path.join(path, match_id)

    # xy_objects
    # 1, X, Y:  x- and y-coordinates (XY/ in m),
    # 2. D: distance covered since the preceding frame (D in cm),
    # 3. S: speed (S in km/h)
    # 4. A: acceleration (A in m/s²)
    # 5. M: minute of play (M)
    events = load_event_data(match_path)
    events["game_id"] = match_id # SPADL game_id
    
    
    positions, team_sheets = load_position_data(match_path)
    positions = convert_locations(positions)
    positions = calc_player_velocities(positions, team_sheets)
    events = extract_event_pos(events, positions, team_sheets)
    
    # Rename columns(format: SPADL)
    team_sheets = team_sheets.rename(columns={"tID": "team_id",
                                              "pID": "player_id"
    })
                                              
    events = events.rename(columns={"tID": "team_id", 
                                    "pID": "player_id", 
                                    "eID": "type_name",
                                    "gameclock": "time_seconds"
    })

    events= add_position_info(events, team_sheets)
    events = extract_related_positions(events, positions, team_sheets)
    
    events.to_csv(os.path.join(match_path, "events.csv"), index=False)
    positions.to_csv(os.path.join(match_path, "positions.csv"), index=False)
    team_sheets.to_csv(os.path.join(match_path, "teams.csv"), index=False)

  0%|          | 0/13 [00:00<?, ?it/s]

In [2]:
events= pd.read_csv(os.path.join("/root/exPress/PlayerImputer/data/DFL/DFL-MAT-J03WQQ", 'events.csv'))
positions= pd.read_csv(os.path.join("/root/exPress/PlayerImputer/data/DFL/DFL-MAT-J03WQQ", 'positions.csv'))
team_sheets = pd.read_csv(os.path.join("/root/exPress/PlayerImputer/data/DFL/DFL-MAT-J03WQQ", 'teams.csv'))
events

Unnamed: 0,type_name,time_seconds,team_id,player_id,outcome,timestamp,minute,second,qualifier,period_id,team,game_id,start_x,start_y,position,related_x,related_y,related_id
0,KickOff_Play_Pass,0.000,DFL-CLU-00000H,DFL-OBJ-002GNL,,2022-11-05 13:01:27.810000+01:00,0.0,0.0,"{'TeamLeft': 'DFL-CLU-00000H', 'GameSection': ...",1,Away,DFL-MAT-J03WQQ,52.910000,33.800000,IVZ,31.100000,28.480000,DFL-OBJ-002GN4
1,Play_Pass,2.097,DFL-CLU-00000H,DFL-OBJ-002GN4,,2022-11-05 13:01:29.907000+01:00,0.0,2.0,"{'Evaluation': 'unsuccessful', 'Distance': 'lo...",1,Away,DFL-MAT-J03WQQ,32.450088,30.279993,IVR,74.530034,49.000068,DFL-OBJ-0000NZ
2,TacklingGame,4.289,DFL-CLU-00000H,DFL-OBJ-0027QN,1.0,2022-11-05 13:01:32.099000+01:00,0.0,4.0,"{'WinnerTeam': 'DFL-CLU-00000P', 'LoserRole': ...",1,Away,DFL-MAT-J03WQQ,74.360040,57.580050,STR,76.029986,57.010068,DFL-OBJ-0000NZ
3,OtherBallAction,5.084,DFL-CLU-00000P,DFL-OBJ-0000NZ,,2022-11-05 13:01:32.894000+01:00,0.0,5.0,"{'Player': 'DFL-OBJ-0000NZ', 'Team': 'DFL-CLU-...",1,Home,DFL-MAT-J03WQQ,75.730002,59.640014,IVR,,,
4,Play_Pass,7.553,DFL-CLU-00000H,DFL-OBJ-0002EL,,2022-11-05 13:01:35.363000+01:00,0.0,7.0,"{'Evaluation': 'unsuccessful', 'Distance': 'me...",1,Away,DFL-MAT-J03WQQ,64.739978,51.950025,DLM,81.330014,37.270003,DFL-OBJ-002GMO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1674,OtherBallAction,2914.218,DFL-CLU-00000P,DFL-OBJ-J01L1F,,2022-11-05 14:52:26.028000+01:00,48.0,34.0,"{'Player': 'DFL-OBJ-J01L1F', 'Team': 'DFL-CLU-...",2,Home,DFL-MAT-J03WQQ,81.910108,15.449978,STL,,,
1675,Delete,2914.900,,,,2022-11-05 14:52:26.710000+01:00,48.0,34.0,{},2,Home,DFL-MAT-J03WQQ,,,,,,
1676,Delete,2914.900,,,,2022-11-05 14:52:26.710000+01:00,48.0,34.0,{},2,Away,DFL-MAT-J03WQQ,,,,,,
1677,FinalWhistle,2920.190,,,,2022-11-05 14:52:32+01:00,48.0,40.0,"{'BreakingOff': 'false', 'GameSection': 'secon...",2,Home,DFL-MAT-J03WQQ,,,,,,


### 시각화

In [None]:
highlight = events[events["eID"] == "Play_Pass"].iloc[76]

before_maigin = 3
after_margin = 0.1
period_id, start_time, end_time = highlight.period_id, highlight.gameclock-before_maigin, highlight.gameclock+after_margin
trace = positions[(positions["period_id"] == period_id) & (positions["time"] >= start_time) & (positions["time"] <= end_time)]
trace.iloc[:, 30:50]

In [None]:
from express.trace_snapshot import TraceSnapshot

snapshot = TraceSnapshot(trace, play_left_to_right=False)
snapshot.plot()