In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor

# 1) 하이퍼파라미터 및 데이터 차원 정의
B = 500    # 배치 크기(샘플 수)
W = 10     # 윈도우 길이(프레임 수)
N = 22     # 에이전트(선수) 수
F = 4      # 입력 피처 수(x,y,vx,vy 등)
D = 2      # 출력 차원(x,y)

# 2) 더미 데이터 생성
#    실제론 train_dataset에서 가져온 X_train = np.array([...]) 형태를 쓰시면 됩니다.
X = np.random.rand(B, W, N, F).astype(np.float32)      # (B, W, N, F)
y_full = np.random.rand(B, W, N, D).astype(np.float32)  # (B, W, N, D)

# 3) 예측할 시점(여기선 중간 프레임)만 뽑아서 (B, N, D) 로 만듦
y = y_full[:, W // 2, :, :]    # (B, N, D)

# 4) X, y를 2D로 “펼치기”
X_flat = X.reshape(B, W * N * F)  # (B, W*N*F)
y_flat = y.reshape(B, N * D)      # (B, N*D)

# 5) 학습/테스트 분리
X_train, X_test, y_train, y_test = train_test_split(
    X_flat, y_flat, test_size=0.2, random_state=42
)

# 6) XGBoost + GPU 설정
xgb_params = {
    "tree_method":   "gpu_hist",
    "predictor":     "gpu_predictor",
    "gpu_id":        0,
    "objective":     "reg:squarederror",
    "eval_metric":   "rmse",
    # …그 외 파라미터…
}

base_reg = XGBRegressor(**xgb_params)

# 7) MultiOutputRegressor 래퍼
model = MultiOutputRegressor(base_reg)

# 8) (선택) 개별 eval_set + early stopping
#    내부 estimator들을 직접 초기화하고, 각 타깃별로 fit 해 줌
model.estimators_ = [
    XGBRegressor(**xgb_params) for _ in range(y_train.shape[1])
]
for i, est in enumerate(model.estimators_):
    est.fit(
        X_train, y_train[:, i],
        eval_set=[(X_train, y_train[:, i]), (X_test, y_test[:, i])],
        early_stopping_rounds=10,
        verbose=False
    )

# 9) 예측 & 원래 형태로 복원
y_pred_flat = model.predict(X_test)        # (B_test, N*D)
y_pred = y_pred_flat.reshape(-1, N, D)     # (B_test, N, D)

print("X_test:", X_test.shape)
print("y_pred:", y_pred.shape)  # -> (100, 22, 2) 처럼 나옵니다.




X_test: (100, 880)
y_pred: (100, 22, 2)


In [3]:
import numpy as np
from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split

# ——————————————————————————————————
# 1) 더미 데이터 준비
# B: 배치 크기, W: 윈도우 길이, N: 에이전트 수, F: 입력 피처 수, D: 예측 차원
B, W, N, F, D = 500, 10, 22, 4, 2  
X = np.random.rand(B, W, N, F).astype(np.float32)     # (B, W, N, F)
y_full = np.random.rand(B, W, N, D).astype(np.float32) # (B, W, N, D)

# 중간 시점 프레임만 골라 (B, N, D) 로 만들고,
y = y_full[:, W//2, :, :]    # (B, N, D)

# 2) 2D로 “펼치기”  →  X: (B, W*N*F), y: (B, N*D)
X_flat = X.reshape(B, W * N * F)
y_flat = y.reshape(B, N * D)

# 3) 학습/검증 분리
X_train, X_test, y_train, y_test = train_test_split(
    X_flat, y_flat, test_size=0.2, random_state=42
)

# 4) XGBoost GPU 파라미터
xgb_params = {
    "tree_method": "gpu_hist",
    "predictor":   "gpu_predictor",
    "gpu_id":      0,
    "objective":   "reg:squarederror",
    "eval_metric": "rmse",
    # …그 외 파라미터…
}

# 5) MultiOutputRegressor 래퍼로 한 번에 학습
model = MultiOutputRegressor(XGBRegressor(**xgb_params))
model.fit(X_train, y_train)   # 내부적으로 N*D개의 XGBRegressor가 한 번에 학습됩니다.

# 6) 예측 & 원래 모양으로 복원
y_pred_flat = model.predict(X_test)        # (B_test, N*D)
y_pred = y_pred_flat.reshape(-1, N, D)     # (B_test, N, D)

print("X_test:", X_test.shape)  # -> (100, W*N*F)
print("y_pred:", y_pred.shape)  # -> (100, N, D)


X_test: (100, 880)
y_pred: (100, 22, 2)


In [5]:
y_pred_flat.shape

(100, 44)

In [1]:
# Deactivate distracting warnings
import warnings
warnings.filterwarnings("ignore")
import sys
sys.path.append('..')
from datatools.preprocess import display_data_summary, load_event_data, load_position_data, load_data, extract_match_id, load_team_sheets
from express import config

import os
import pandas as pd
import numpy as np
import torch


In [2]:
path = os.path.join(os.path.dirname(os.getcwd()), 'data/DFL')
print(path)
match_ids = [extract_match_id(filename) for filename in os.listdir(path) if filename.startswith('DFL')]
match_ids

/home/exPress/PlayerImputer/data/DFL


['DFL-MAT-J03WMX',
 'DFL-MAT-J03WR9',
 'DFL-MAT-J03WQQ',
 'DFL-MAT-J03YLO',
 'DFL-MAT-J03YKB',
 'DFL-MAT-J03WOY',
 'DFL-MAT-J03YIY',
 'DFL-MAT-J03YKM',
 'DFL-MAT-J03WOH',
 'DFL-MAT-J03YKY',
 'DFL-MAT-J03WN1',
 'DFL-MAT-J03YHA',
 'DFL-MAT-J03WPY']

In [147]:
events = pd.read_csv(os.path.join(path, match_ids[0], "events.csv"),index_col=0)
events = events.reset_index()
position = pd.read_csv(os.path.join(path, match_ids[0], "positions.csv"),index_col=0)
position = position.reset_index()
teams=pd.read_csv(os.path.join(path, match_ids[0], "teams.csv"),index_col=0)




In [4]:
position

Unnamed: 0,H00_x,H00_y,H01_x,H01_y,H02_x,H02_y,H03_x,H03_y,H04_x,H04_y,...,A17_speed,A18_vx,A18_vy,A18_speed,A19_vx,A19_vy,A19_speed,B00_vx,B00_vy,B00_speed
0,59.40,39.12,,,65.58,20.29,50.38,26.91,99.87,34.25,...,,,,,,,,,,
1,59.32,39.10,,,65.56,20.25,50.22,27.01,99.84,34.26,...,,,,,,,,,,
2,59.22,39.08,,,65.55,20.22,50.06,27.12,99.80,34.26,...,,,,,,,,,,
3,59.12,39.05,,,65.53,20.18,49.89,27.22,99.76,34.26,...,,,,,,,,,,
4,59.00,39.02,,,65.52,20.13,49.72,27.34,99.72,34.27,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145962,,,,,58.58,46.63,,,11.51,35.16,...,,-1.36,0.18,1.37,0.0,0.0,0.0,0.0,0.0,0.0
145963,,,,,58.62,46.60,,,11.56,35.15,...,,-1.32,0.14,1.33,0.0,0.0,0.0,0.0,0.0,0.0
145964,,,,,58.64,46.58,,,11.61,35.14,...,,-1.30,0.13,1.31,0.0,0.0,0.0,0.0,0.0,0.0
145965,,,,,58.67,46.56,,,11.65,35.14,...,,-1.29,0.13,1.29,0.0,0.0,0.0,0.0,0.0,0.0


1. prevAgentTime   
현재 시점 t에서, 해당 선수가 가장 최근에 공을 소유했던 시점   
t'는 M𝑛_𝑡' = 1 (즉, 공을 소유하고 있던 순간)이고 t' ≤ t인 가장 가까운 시간.   
만약 현재 선수가 공을 소유하고 있으면 (t' = t), 시간 차이는 0.   
prevAgentTime=t-t'

In [24]:
pd.set_option("display.max_rows",50) 

In [8]:
from express.config import on_ball_actions
time_diffs_dict = {}
agent_ids=events["player_id"].unique()
added_ids=set(teams['player_id'].unique())-set(events['player_id'].unique()) #events에 기록이 없는 team_sheets의 선수들
last_seen = {}
last_seen_xy = {}
next_seen = {}
next_seen_xy = {}

time_diffs_dict = {}  
last_seen = {}  


for idx, row in events.iterrows():
    current_pID = row["player_id"]
    current_time = row["time_seconds"]
    type_name = row["type_name"]

    # 현재 선수의 마지막 관측 시간과의 차이 계산
    time_diffs = {
        pid: 0 if pid == current_pID else (current_time - last_seen[pid] if pid in last_seen else None) 
        for pid in agent_ids
    }

    # 현재 선수가 공을 소유한 이벤트라면 마지막 관측 시간 업데이트
    if pd.notna(current_pID) and type_name in on_ball_actions:
        last_seen[current_pID] = current_time

    time_diffs_dict[idx] = time_diffs  # 결과 저장

# 데이터프레임 변환 및 NaN → 0으로 채우기
df = pd.DataFrame.from_dict(time_diffs_dict, orient="index").fillna(0)
df=df.reindex(columns=agent_ids.tolist()+list(added_ids), fill_value=0).astype("float")
df

Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-002GCR,DFL-OBJ-J01N65,DFL-OBJ-0000M0,DFL-OBJ-002GIC,DFL-OBJ-J01LJ2,DFL-OBJ-002G0R,DFL-OBJ-002GLJ,DFL-OBJ-0000LT,DFL-OBJ-J00USE,DFL-OBJ-J0117H
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1836,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1837,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1838,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
df[['DFL-OBJ-J01BGM']].iloc[1570:1600,:]

Unnamed: 0,DFL-OBJ-J01BGM
1570,0.0
1571,0.0
1572,0.0
1573,0.0
1574,0.0
1575,0.0
1576,0.0
1577,0.0
1578,0.0
1579,0.0


In [117]:
# 슬라이딩 윈도우 크기 설정 (양쪽 2개씩 포함)
window_size = 5
half_window = window_size // 2

# 데이터프레임을 NumPy 배열로 변환
numpy_data = result_df.to_numpy()
num_events, num_agents = numpy_data.shape

# Agent ID 리스트
agent_ids = list(result_df.columns)

# 최종 저장할 리스트 (N개의 Agent마다 따로 저장)
final_tensors = []

# 각 Agent(pID)별로 슬라이딩 윈도우 적용
for agent_id in agent_ids:
    tensor_list = []  # 해당 Agent의 모든 윈도우 저장

    # agent_id를 숫자로 변환 (Pandas의 컬럼 인덱스 가져오기)
    agent_idx = result_df.columns.get_loc(agent_id)

    for i in range(num_events):
        start_idx = max(0, i - half_window)  # 범위를 벗어나지 않도록 제한
        end_idx = min(num_events, i + half_window + 1)  # 범위를 벗어나지 않도록 제한

        # 해당 범위의 데이터를 슬라이싱 (Agent별로 개별 수집, iloc 사용)
        window_tensor = numpy_data[start_idx:end_idx, agent_idx].reshape(-1, 1)  # (L x I) 형태

        # 윈도우 크기가 부족할 경우 zero-padding 적용
        if window_tensor.shape[0] < window_size:
            padding = np.full((window_size - window_tensor.shape[0], 1), np.nan, dtype=np.float32)
            if start_idx == 0:
                window_tensor = np.vstack((padding, window_tensor))  # 앞쪽 패딩
            else:
                window_tensor = np.vstack((window_tensor, padding))  # 뒤쪽 패딩

        tensor_list.append(window_tensor)

    # 리스트를 NumPy 배열로 변환 (B x L x I)
    agent_tensor = np.array(tensor_list, dtype=np.float32)

    # 해당 Agent의 데이터를 최종 리스트에 추가
    final_tensors.append(agent_tensor)

# 최종 NumPy 배열 변환 (N x B x L x I 형태)
final_numpy_tensor = np.array(final_tensors, dtype=np.float32)

# NumPy 배열 형태 출력
final_numpy_tensor.shape


(32, 1840, 5, 1)

2. prevAgentX, prevAgentY

In [108]:
events

Unnamed: 0,type_name,time_seconds,team_id,player_id,outcome,timestamp,minute,second,qualifier,period_id,team,game_id,start_x,start_y,pID
0,KickOff_Play_Pass,0.000,DFL-CLU-00000G,DFL-OBJ-0027G6,,2023-05-27 15:30:12.230000+02:00,0.0,0.0,"{'TeamLeft': 'DFL-CLU-00000G', 'TeamRight': 'D...",1,Away,DFL-MAT-J03WMX,53.28000,33.46000,A07
1,Play_Pass,2.829,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:15.059000+02:00,0.0,2.0,"{'SemiField': 'false', 'Player': 'DFL-OBJ-0027...",1,Away,DFL-MAT-J03WMX,38.57175,33.30550,A12
2,ThrowIn_Play_Pass,21.551,DFL-CLU-000008,DFL-OBJ-0002BO,,2023-05-27 15:30:33.781000+02:00,0.0,21.0,"{'Team': 'DFL-CLU-000008', 'Side': 'right', 'D...",1,Home,DFL-MAT-J03WMX,68.63575,63.81800,H05
3,TacklingGame,22.268,DFL-CLU-000008,DFL-OBJ-J01BGM,1.0,2023-05-27 15:30:34.498000+02:00,0.0,22.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",1,Home,DFL-MAT-J03WMX,53.22700,59.25000,H14
4,OtherBallAction,22.733,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:34.963000+02:00,0.0,22.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",1,Away,DFL-MAT-J03WMX,48.33075,53.38125,A12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,OtherBallAction,2992.377,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 17:25:35.767000+02:00,49.0,52.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",2,Away,DFL-MAT-J03WMX,53.32425,68.39400,A12
1836,TacklingGame,2992.753,DFL-CLU-000008,DFL-OBJ-002GBW,1.0,2023-05-27 17:25:36.143000+02:00,49.0,52.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",2,Home,DFL-MAT-J03WMX,53.78825,62.79750,H10
1837,GoalKick_Play_Pass,3001.975,DFL-CLU-000008,DFL-OBJ-0002HE,,2023-05-27 17:25:45.365000+02:00,50.0,1.0,"{'Team': 'DFL-CLU-000008', 'DecisionTimestamp'...",2,Home,DFL-MAT-J03WMX,4.40250,38.98125,H04
1838,FinalWhistle,3004.030,,,,2023-05-27 17:25:47.420000+02:00,50.0,4.0,"{'GameSection': 'secondHalf', 'FinalResult': '...",2,Home,DFL-MAT-J03WMX,,,


In [None]:
from express.config import on_ball_actions
import pandas as pd
import numpy as np

last_seen_xy = {}
first_seen_xy = {}  # 처음 관측된 좌표 저장
first_seen_idx = {}  # 처음 관측된 인덱스 저장

x_positions_dict = {}
y_positions_dict = {}
added_ids=set(team_sheets['player_id'].unique())-set(events['player_id'].unique()) #events에 기록이 없는 team_sheets의 선수들


first_event_x = None
first_event_y = None

for idx, row in events.iterrows():
    current_pID = row["player_id"]
    current_time = row["time_seconds"]
    event_type = row["type_name"] 

    event_x = row["start_x"]
    event_y = row["start_y"]

    if first_event_x is None or first_event_y is None:
        first_event_x, first_event_y = event_x, event_y

    # 처음 관측되었을 경우, first_seen_xy와 first_seen_idx에 저장
    if current_pID not in first_seen_xy:
        first_seen_xy[current_pID] = (event_x, event_y)
        first_seen_idx[current_pID] = idx  #

        # 해당 선수가 처음 등장한 시점 이전까지의 모든 좌표를 업데이트
        for prev_idx in range(idx):
            x_positions_dict[prev_idx] = x_positions_dict.get(prev_idx, {})
            y_positions_dict[prev_idx] = y_positions_dict.get(prev_idx, {})
            
            x_positions_dict[prev_idx][current_pID] = event_x
            y_positions_dict[prev_idx][current_pID] = event_y

    # on_ball_actions에 해당하는 이벤트만 업데이트
    if event_type in on_ball_actions:
        last_seen_xy[current_pID] = (event_x, event_y)

    # 선수별 x, y 좌표 저장 (관측되지 않은 경우, 처음 관측된 값 or 첫 이벤트 값 사용)
    x_positions_dict[idx] = {
        pid: last_seen_xy.get(pid, first_seen_xy.get(pid, (first_event_x, first_event_y)))[0] for pid in agent_ids
    }
    y_positions_dict[idx] = {
        pid: last_seen_xy.get(pid, first_seen_xy.get(pid, (first_event_x, first_event_y)))[1] for pid in agent_ids
    }

# 
x_positions_df = pd.DataFrame.from_dict(x_positions_dict, orient="index").astype("float").fillna(pd.NA)
y_positions_df = pd.DataFrame.from_dict(y_positions_dict, orient="index").astype("float").fillna(pd.NA)

x_positions_df


Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-0027V2,DFL-OBJ-J01DVC,DFL-OBJ-J015S4,DFL-OBJ-002G89,DFL-OBJ-000191,DFL-OBJ-0000RP,DFL-OBJ-0026ZI,DFL-OBJ-002GBW,DFL-OBJ-002GCR,DFL-OBJ-J01N65
0,53.2800,38.57175,68.63575,53.2270,57.7800,52.454,48.9575,59.5005,39.19500,34.155,...,85.8355,63.9440,66.23625,99.4725,46.695,41.23875,52.5300,75.7800,46.87500,16.562
1,53.2800,38.57175,68.63575,53.2270,57.7800,52.454,48.9575,59.5005,39.19500,34.155,...,85.8355,63.9440,66.23625,99.4725,46.695,41.23875,52.5300,75.7800,46.87500,16.562
2,53.2800,38.57175,68.63575,53.2270,57.7800,52.454,48.9575,59.5005,39.19500,34.155,...,85.8355,63.9440,66.23625,99.4725,46.695,41.23875,52.5300,75.7800,46.87500,16.562
3,53.2800,38.57175,68.63575,53.2270,57.7800,52.454,48.9575,59.5005,39.19500,34.155,...,85.8355,63.9440,66.23625,99.4725,46.695,41.23875,52.5300,75.7800,46.87500,16.562
4,53.2800,48.33075,68.63575,53.2270,57.7800,52.454,48.9575,59.5005,39.19500,34.155,...,85.8355,63.9440,66.23625,99.4725,46.695,41.23875,52.5300,75.7800,46.87500,16.562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,56.0895,53.32425,45.15550,94.9625,43.1875,81.493,29.5105,17.2955,31.57875,88.520,...,63.8300,17.7825,81.43000,34.3980,62.322,14.10850,20.6305,72.2215,28.26225,39.166
1836,56.0895,53.32425,45.15550,94.9625,43.1875,81.493,29.5105,17.2955,31.57875,88.520,...,63.8300,17.7825,81.43000,34.3980,62.322,14.10850,20.6305,72.2215,28.26225,39.166
1837,56.0895,53.32425,45.15550,94.9625,43.1875,81.493,29.5105,17.2955,31.57875,88.520,...,63.8300,17.7825,81.43000,34.3980,62.322,14.10850,20.6305,72.2215,28.26225,39.166
1838,56.0895,53.32425,45.15550,94.9625,43.1875,81.493,29.5105,17.2955,31.57875,88.520,...,63.8300,17.7825,81.43000,34.3980,62.322,14.10850,20.6305,72.2215,28.26225,39.166


In [82]:
x_positions_df.iloc[1820:,1:2]

Unnamed: 0,DFL-OBJ-0027KL
1820,71.112
1821,71.112
1822,71.112
1823,71.112
1824,71.112
1825,71.112
1826,71.112
1827,71.112
1828,71.112
1829,71.112


In [107]:
x_positions_df

Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-0027V2,DFL-OBJ-J01DVC,DFL-OBJ-J015S4,DFL-OBJ-002G89,DFL-OBJ-000191,DFL-OBJ-0000RP,DFL-OBJ-0026ZI,DFL-OBJ-002GBW,DFL-OBJ-002GCR,DFL-OBJ-J01N65
0,53.28,,,,,,,,,,...,,,,,,,,,,
1,53.28,38.58,,,,,,,,,...,,,,,,,,,,
2,53.28,38.58,68.62,,,,,,,,...,,,,,,,,,,
3,53.28,38.58,68.62,53.23,,,,,,,...,,,,,,,,,,
4,53.28,48.36,68.62,53.23,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1833,53.99,71.13,71.95,43.62,46.65,42.57,29.47,18.22,31.57,34.76,...,,17.79,,34.4,,14.11,20.62,69.09,28.26,39.19
1834,53.99,71.13,71.95,43.62,46.65,42.57,29.47,18.22,31.57,34.76,...,,17.79,,34.4,,14.11,20.62,69.09,28.26,39.19
1835,53.99,53.32,71.95,43.62,46.65,42.57,29.47,18.22,31.57,34.76,...,,17.79,,34.4,,14.11,20.62,69.09,28.26,39.19
1836,53.99,53.32,71.95,43.62,46.65,42.57,29.47,18.22,31.57,34.76,...,,17.79,,34.4,,14.11,20.62,53.79,28.26,39.19


In [99]:
prev_x_positions_df

Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-0027V2,DFL-OBJ-J01DVC,DFL-OBJ-J015S4,DFL-OBJ-002G89,DFL-OBJ-000191,DFL-OBJ-0000RP,DFL-OBJ-0026ZI,DFL-OBJ-002GBW,DFL-OBJ-002GCR,DFL-OBJ-J01N65
0,56.0895,53.32425,,,,,29.5105,18.205,31.57875,88.52,...,63.83,17.7825,81.43,34.398,,14.1085,20.6305,53.78825,28.26225,39.166
1,56.0895,53.32425,,,,,29.5105,18.205,31.57875,88.52,...,63.83,17.7825,81.43,34.398,,14.1085,20.6305,53.78825,28.26225,39.166
2,56.0895,53.32425,,,,,29.5105,18.205,31.57875,88.52,...,63.83,17.7825,81.43,34.398,,14.1085,20.6305,53.78825,28.26225,39.166
3,56.0895,53.32425,,,,,29.5105,18.205,31.57875,88.52,...,63.83,17.7825,81.43,34.398,,14.1085,20.6305,53.78825,28.26225,39.166
4,56.0895,53.32425,,,,,29.5105,18.205,31.57875,88.52,...,63.83,17.7825,81.43,34.398,,14.1085,20.6305,53.78825,28.26225,39.166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,56.0895,53.32425,,,,,29.5105,18.205,31.57875,88.52,...,63.83,17.7825,81.43,34.398,,14.1085,20.6305,53.78825,28.26225,39.166
1836,56.0895,53.32425,,,,,29.5105,18.205,31.57875,88.52,...,63.83,17.7825,81.43,34.398,,14.1085,20.6305,53.78825,28.26225,39.166
1837,56.0895,53.32425,,,,,29.5105,18.205,31.57875,88.52,...,63.83,17.7825,81.43,34.398,,14.1085,20.6305,53.78825,28.26225,39.166
1838,56.0895,53.32425,,,,,29.5105,18.205,31.57875,88.52,...,63.83,17.7825,81.43,34.398,,14.1085,20.6305,53.78825,28.26225,39.166


In [144]:
import numpy as np
import pandas as pd

class SlidingWindowProcessor:
    def __init__(self, window_size: int = 5):
        """
        슬라이딩 윈도우를 적용하는 클래스.

        Parameters:
        - window_size (int): 슬라이딩 윈도우 크기 (기본값: 5)
        """
        self.window_size = window_size
        self.half_window = window_size // 2

    def apply_sliding_window(self, df: pd.DataFrame):
        """
        데이터프레임에 슬라이딩 윈도우를 적용하여 NumPy 배열로 변환.

        Parameters:
        - df (pd.DataFrame): 입력 데이터프레임

        Returns:
        - final_numpy_tensor (np.ndarray): (N x B x L x I) 형태의 NumPy 배열
        """
        numpy_data = df.to_numpy()
        num_events, num_features = numpy_data.shape
        feature_ids = list(df.columns)
        final_tensors = []

        for feature_id in feature_ids:
            tensor_list = []
            feature_idx = df.columns.get_loc(feature_id)

            for i in range(num_events):
                start_idx = max(0, i - self.half_window)
                end_idx = min(num_events, i + self.half_window + 1)

                window_tensor = numpy_data[start_idx:end_idx, feature_idx].reshape(-1, 1)

                if window_tensor.shape[0] < self.window_size:
                    padding = np.full((self.window_size - window_tensor.shape[0], 1), np.nan, dtype=np.float32)
                    window_tensor = np.vstack((padding, window_tensor)) if start_idx == 0 else np.vstack((window_tensor, padding))

                tensor_list.append(window_tensor)

            feature_tensor = np.array(tensor_list, dtype=np.float32)
            final_tensors.append(feature_tensor)

        return np.array(final_tensors, dtype=np.float32)

    def process_multiple_features(self, feature_dfs: dict):
        """
        여러 개의 feature 데이터프레임을 슬라이딩 윈도우로 변환.

        Parameters:
        - feature_dfs (dict): { "feature_name": pd.DataFrame } 형태의 딕셔너리

        Returns:
        - transformed_features (dict): { "feature_name": np.ndarray } 형태의 딕셔너리
        """
        transformed_features = {}
        for feature_name, df in feature_dfs.items():
            transformed_features[feature_name] = self.apply_sliding_window(df)
        return transformed_features


4 nextAgentTime


In [58]:
from express.express.config import on_ball_actions
import pandas as pd

# ✅ 선수별 데이터를 저장할 딕셔너리 초기화 (0으로 초기화)
last_seen_time = {}  
last_seen_idx = {}  
next_time_diff_dict = {idx: {pid: 0 for pid in events["player_id"].dropna().unique()} for idx in events.index}

# ✅ 이벤트에 등장한 선수 리스트
agent_ids = events["player_id"].dropna().unique()

# ✅ 정방향 순회하여 데이터 업데이트
for idx, row in events.iterrows():  
    current_pID = row["player_id"]
    current_time = row["time_seconds"]  
    event_type = row["type_name"] 

    # ✅ 공을 잡은 경우, last_seen_idx부터 현재까지 값을 업데이트
    if event_type in on_ball_actions:
        if current_pID in last_seen_idx:
            prev_idx = last_seen_idx[current_pID]
            for fill_idx in range(prev_idx, idx+1):  # 현재 인덱스까지 채움
                next_time_diff_dict[fill_idx][current_pID] = (
                    current_time - events.loc[fill_idx, "time_seconds"]
                )
        else:  # 처음 등장한 경우
            for fill_idx in range(idx+1):
                next_time_diff_dict[fill_idx][current_pID] = (
                    current_time - events.loc[fill_idx, "time_seconds"]
                )
        
        last_seen_time[current_pID] = current_time
        last_seen_idx[current_pID] = idx  # 현재 인덱스 기록

for pid in agent_ids:
    if pid in last_seen_idx:
        last_idx = last_seen_idx[pid]
        last_value = next_time_diff_dict[last_idx-1][pid]  # 🔥 마지막 값 가져오기

        for idx in range(last_idx, events.index[-1] + 1):
            next_time_diff_dict[idx][pid] = last_value  # ✅ 마지막으로 기록된 값을 유지


# ✅ 결과 DataFrame 변환
result_df = pd.DataFrame.from_dict(next_time_diff_dict, orient="index").astype("float")
result_df


Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-0027V2,DFL-OBJ-J01DVC,DFL-OBJ-J015S4,DFL-OBJ-002G89,DFL-OBJ-000191,DFL-OBJ-0000RP,DFL-OBJ-0026ZI,DFL-OBJ-002GBW,DFL-OBJ-002GCR,DFL-OBJ-J01N65
0,342.911,2.829,21.551,99.010,24.660,102.336,26.370,28.806,32.060,38.740,...,1018.111,1046.304,1069.285,1180.815,1579.762,1591.505,2243.709,2276.428,2360.090,2365.868
1,340.082,19.904,18.722,96.181,21.831,99.507,23.541,25.977,29.231,35.911,...,1015.282,1043.475,1066.456,1177.986,1576.933,1588.676,2240.880,2273.599,2357.261,2363.039
2,321.360,1.182,240.840,77.459,3.109,80.785,4.819,7.255,10.509,17.189,...,996.560,1024.753,1047.734,1159.264,1558.211,1569.954,2222.158,2254.877,2338.539,2344.317
3,320.643,0.465,240.123,76.742,2.392,80.068,4.102,6.538,9.792,16.472,...,995.843,1024.036,1047.017,1158.547,1557.494,1569.237,2221.441,2254.160,2337.822,2343.600
4,320.178,11.929,239.658,76.277,1.927,79.603,3.637,6.073,9.327,16.007,...,995.378,1023.571,1046.552,1158.082,1557.029,1568.772,2220.976,2253.695,2337.357,2343.135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,1.476,3.044,2.669,80.720,0.360,3.065,4.174,0.916,18.263,0.625,...,2.474,0.120,0.664,6.365,1.130,1.208,13.173,1.581,1.347,1.149
1836,1.476,3.044,2.669,80.720,0.360,3.065,4.174,0.916,18.263,0.625,...,2.474,0.120,0.664,6.365,1.130,1.208,13.173,1.581,1.347,1.149
1837,1.476,3.044,2.669,80.720,0.360,3.065,4.174,0.916,18.263,0.625,...,2.474,0.120,0.664,6.365,1.130,1.208,13.173,1.581,1.347,1.149
1838,1.476,3.044,2.669,80.720,0.360,3.065,4.174,0.916,18.263,0.625,...,2.474,0.120,0.664,6.365,1.130,1.208,13.173,1.581,1.347,1.149


In [83]:
events.iloc[1820:,[3,12]]

Unnamed: 0,player_id,start_x
1820,DFL-OBJ-0027G0,20.17525
1821,DFL-OBJ-0027G0,21.8815
1822,DFL-OBJ-0000RP,16.2925
1823,DFL-OBJ-0002AU,17.2955
1824,DFL-OBJ-0002AU,17.195
1825,DFL-OBJ-002GKO,14.77
1826,DFL-OBJ-002G4A,13.0575
1827,DFL-OBJ-0002AU,18.205
1828,DFL-OBJ-J01DVC,17.7825
1829,DFL-OBJ-002GKO,16.4825


5,6.nextAgentXY

In [61]:
import sys

from express.config import on_ball_actions
import pandas as pd
import numpy as np

last_seen_xy = {}
next_seen_xy = {}
last_seen = {}
next_seen = {}
last_seen_idx = {}  # 마지막으로 공을 잡았을 때의 인덱스
agent_ids = events["player_id"].dropna().unique()
added_ids=set(team_sheets['player_id'].unique())-set(events['player_id'].unique()) #events에 기록이 없는 team_sheets의 선수들
print(added_ids)
# ✅ 결과 저장용 딕셔너리 (초기값 0)
next_x_positions_dict = {idx: {pid: 0 for pid in agent_ids} for idx in events.index}
next_y_positions_dict = {idx: {pid: 0 for pid in agent_ids} for idx in events.index}


for idx, row in events.iterrows():
    current_pID = row["player_id"]
    event_type = row["type_name"]
    event_x = row["start_x"]
    event_y = row["start_y"]

    # 만약 on ball action이면 last_seen_xy 업데이트
    if event_type in on_ball_actions:
        if current_pID in last_seen_idx:
            prev_idx = last_seen_idx[current_pID]
            for fill_idx in range(prev_idx, idx):  # 현재 인덱스 이전까지 채움
                next_x_positions_dict[fill_idx][current_pID] = event_x
                next_y_positions_dict[fill_idx][current_pID] = event_y
        else:#current_pid가 처음 등장한 경우
            for fill_idx in range(idx):
                next_x_positions_dict[fill_idx][current_pID] = event_x
                next_y_positions_dict[fill_idx][current_pID] = event_y

        last_seen_xy[current_pID] = (event_x, event_y)
        last_seen_idx[current_pID] = idx  # 현재 인덱스 기록
for pid in agent_ids:
    if pid in last_seen_idx:
        last_x, last_y = last_seen_xy[pid]
        last_idx = last_seen_idx[pid]

        # 🔥 마지막으로 공을 잡았던 이후의 모든 이벤트 채우기
        for idx in range(last_idx, events.index[-1] + 1):
            next_x_positions_dict[idx][pid] = last_x
            next_y_positions_dict[idx][pid] = last_y

# ✅ 결과 데이터프레임 변환 및 정렬
next_x_positions_df = pd.DataFrame.from_dict(next_x_positions_dict, orient="index").astype("float").fillna(0).sort_index()
next_x_positions_df = next_x_positions_df.reindex(columns=agent_ids.tolist()+list(added_ids), fill_value=0).astype("float")

next_y_positions_df = pd.DataFrame.from_dict(next_y_positions_dict, orient="index").astype("float").fillna(0).sort_index()

# ✅ 최종 결과 확인
next_x_positions_df

#


{'DFL-OBJ-002GIC', 'DFL-OBJ-0000M0', 'DFL-OBJ-002GLJ', 'DFL-OBJ-J00USE', 'DFL-OBJ-002G0R', 'DFL-OBJ-J0117H', 'DFL-OBJ-J01LJ2', 'DFL-OBJ-0000LT'}


Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-002GCR,DFL-OBJ-J01N65,DFL-OBJ-002GIC,DFL-OBJ-0000M0,DFL-OBJ-002GLJ,DFL-OBJ-J00USE,DFL-OBJ-002G0R,DFL-OBJ-J0117H,DFL-OBJ-J01LJ2,DFL-OBJ-0000LT
0,56.8975,38.57175,68.63575,7.3025,57.7800,16.342,48.9575,59.5005,39.19500,34.155,...,45.50750,16.562,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,56.8975,48.33075,68.63575,7.3025,57.7800,16.342,48.9575,59.5005,39.19500,34.155,...,45.50750,16.562,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,56.8975,48.33075,66.11250,7.3025,57.7800,16.342,48.9575,59.5005,39.19500,34.155,...,45.50750,16.562,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,56.8975,48.33075,66.11250,7.3025,57.7800,16.342,48.9575,59.5005,39.19500,34.155,...,45.50750,16.562,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,56.8975,31.51250,66.11250,7.3025,57.7800,16.342,48.9575,59.5005,39.19500,34.155,...,45.50750,16.562,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,56.0895,53.32425,45.15550,94.9625,43.1875,81.493,29.5105,17.2955,31.57875,88.520,...,28.26225,39.166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1836,56.0895,53.32425,45.15550,94.9625,43.1875,81.493,29.5105,17.2955,31.57875,88.520,...,28.26225,39.166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1837,56.0895,53.32425,45.15550,94.9625,43.1875,81.493,29.5105,17.2955,31.57875,88.520,...,28.26225,39.166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1838,56.0895,53.32425,45.15550,94.9625,43.1875,81.493,29.5105,17.2955,31.57875,88.520,...,28.26225,39.166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [91]:
events[events["player_id"] == "DFL-OBJ-0027KL"][["start_x"]].tail(10)

Unnamed: 0,start_x
1708,87.439
1716,77.86475
1718,68.219
1722,67.005
1747,44.526
1762,77.459
1765,80.1155
1788,82.44025
1801,71.112
1835,53.32425


In [39]:
events[["player_id","time_seconds"]].iloc[1830:,:]


Unnamed: 0,player_id,time_seconds
1830,DFL-OBJ-0027G0,2974.233
1831,DFL-OBJ-0000RP,2975.441
1832,DFL-OBJ-0026ZI,2988.614
1833,DFL-OBJ-00012X,2989.29
1834,DFL-OBJ-00012X,2989.333
1835,DFL-OBJ-0027KL,2992.377
1836,DFL-OBJ-002GBW,2992.753
1837,DFL-OBJ-0002HE,3001.975
1838,,3004.03
1839,,3004.03


7,8. avAgentXY

In [201]:
events

Unnamed: 0,type_name,time_seconds,team_id,player_id,outcome,timestamp,minute,second,qualifier,period_id,team,game_id,start_x,start_y,pID
0,KickOff_Play_Pass,0.000,DFL-CLU-00000G,DFL-OBJ-0027G6,,2023-05-27 15:30:12.230000+02:00,0.0,0.0,"{'TeamLeft': 'DFL-CLU-00000G', 'TeamRight': 'D...",1,Away,DFL-MAT-J03WMX,53.28000,33.46000,A07
1,Play_Pass,2.829,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:15.059000+02:00,0.0,2.0,"{'SemiField': 'false', 'Player': 'DFL-OBJ-0027...",1,Away,DFL-MAT-J03WMX,38.57175,33.30550,A12
2,ThrowIn_Play_Pass,21.551,DFL-CLU-000008,DFL-OBJ-0002BO,,2023-05-27 15:30:33.781000+02:00,0.0,21.0,"{'Team': 'DFL-CLU-000008', 'Side': 'right', 'D...",1,Home,DFL-MAT-J03WMX,68.63575,63.81800,H05
3,TacklingGame,22.268,DFL-CLU-000008,DFL-OBJ-J01BGM,1.0,2023-05-27 15:30:34.498000+02:00,0.0,22.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",1,Home,DFL-MAT-J03WMX,53.22700,59.25000,H14
4,OtherBallAction,22.733,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:34.963000+02:00,0.0,22.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",1,Away,DFL-MAT-J03WMX,48.33075,53.38125,A12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,OtherBallAction,2992.377,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 17:25:35.767000+02:00,49.0,52.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",2,Away,DFL-MAT-J03WMX,53.32425,68.39400,A12
1836,TacklingGame,2992.753,DFL-CLU-000008,DFL-OBJ-002GBW,1.0,2023-05-27 17:25:36.143000+02:00,49.0,52.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",2,Home,DFL-MAT-J03WMX,53.78825,62.79750,H10
1837,GoalKick_Play_Pass,3001.975,DFL-CLU-000008,DFL-OBJ-0002HE,,2023-05-27 17:25:45.365000+02:00,50.0,1.0,"{'Team': 'DFL-CLU-000008', 'DecisionTimestamp'...",2,Home,DFL-MAT-J03WMX,4.40250,38.98125,H04
1838,FinalWhistle,3004.030,,,,2023-05-27 17:25:47.420000+02:00,50.0,4.0,"{'GameSection': 'secondHalf', 'FinalResult': '...",2,Home,DFL-MAT-J03WMX,,,


In [63]:
from express.config import on_ball_actions
import pandas as pd
import numpy as np

# 고유한 선수 목록
agent_ids = events["player_id"].dropna().unique()
added_ids=set(team_sheets['player_id'].unique())-set(events['player_id'].unique()) #events에 기록이 없는 team_sheets의 선수들
# 선수별 위치 합산 및 이벤트 개수 저장
coor_sum_dict = {pID: {"sum_x": 0, "sum_y": 0} for pID in agent_ids}
count_dict = {pID: 0 for pID in agent_ids}

# on-ball 이벤트만 필터링
on_ball_events_df = events[events["type_name"].isin(on_ball_actions)].copy()

for idx, row in on_ball_events_df.iterrows():
    pID = row["player_id"]
    if pd.isna(pID):
        continue  # 선수 ID가 없는 경우 스킵

    # ✅ 이벤트 위치를 바로 사용 (positions 탐색 X)
    event_x = row["start_x"]
    event_y = row["start_y"]

    # 🔹 X, Y 좌표 합산 (NaN 값 체크)
    if not np.isnan(event_x) and not np.isnan(event_y):
        coor_sum_dict[pID]["sum_x"] += event_x
        coor_sum_dict[pID]["sum_y"] += event_y
        count_dict[pID] += 1  # 이벤트 개수 증가

# 🔹 선수별 평균 좌표 계산
avg_positions_dict = {
    pID: {
        "avgAgentX": coor_sum_dict[pID]["sum_x"] / count_dict[pID] if count_dict[pID] > 0 else np.nan,
        "avgAgentY": coor_sum_dict[pID]["sum_y"] / count_dict[pID] if count_dict[pID] > 0 else np.nan
    }
    for pID in agent_ids
}

# 🔹 데이터프레임 변환 및 모든 이벤트에 동일한 평균 좌표 적용
avg_positions_df = pd.DataFrame.from_dict(avg_positions_dict, orient="index")
added_data = {pid: [0, 0] for pid in added_ids}
added_df = pd.DataFrame.from_dict(added_data, orient="index", columns=["avgAgentX", "avgAgentY"])
avg_positions_df = pd.concat([avg_positions_df, added_df]).astype(float)
# avgAgentX_df = pd.DataFrame([avg_positions_df["avgAgentX"]] * len(events), index=events.index)
# avgAgentY_df = pd.DataFrame([avg_positions_df["avgAgentY"]] * len(events), index=events.index)
avg_positions_df
# # ✅ 결과 확인
# print("✅ avgAgentX_df 생성 완료")
# print(avgAgentX_df.head())
# print("✅ avgAgentY_df 생성 완료")
# print(avgAgentY_df.head())


Unnamed: 0,avgAgentX,avgAgentY
DFL-OBJ-0027G6,47.640669,38.825669
DFL-OBJ-0027KL,45.258135,28.155838
DFL-OBJ-0002BO,52.915623,38.655291
DFL-OBJ-J01BGM,48.35261,30.430184
DFL-OBJ-J01B8N,53.63855,34.215613
DFL-OBJ-0027AX,53.01814,24.246573
DFL-OBJ-0002F5,49.508415,32.695573
DFL-OBJ-0002AU,49.176978,26.20337
DFL-OBJ-0027G0,52.04239,29.783103
DFL-OBJ-J017RE,50.94477,35.433155


In [62]:
added_data = {pid: [0, 0] for pid in added_ids}
added_df = pd.DataFrame.from_dict(added_data, orient="index", columns=["avgAgentX", "avgAgentY"])
added_df



Unnamed: 0,avgAgentX,avgAgentY
DFL-OBJ-002GIC,0,0
DFL-OBJ-0000M0,0,0
DFL-OBJ-002GLJ,0,0
DFL-OBJ-J00USE,0,0
DFL-OBJ-002G0R,0,0
DFL-OBJ-J0117H,0,0
DFL-OBJ-J01LJ2,0,0
DFL-OBJ-0000LT,0,0


In [207]:
avgAgentX_df

Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-0027V2,DFL-OBJ-J01DVC,DFL-OBJ-J015S4,DFL-OBJ-002G89,DFL-OBJ-000191,DFL-OBJ-0000RP,DFL-OBJ-0026ZI,DFL-OBJ-002GBW,DFL-OBJ-002GCR,DFL-OBJ-J01N65
0,47.640669,45.258135,52.915623,48.35261,53.63855,53.01814,49.508415,49.176978,52.04239,50.94477,...,61.336803,65.347273,64.9844,67.6905,59.101607,38.658955,55.027661,85.752625,34.418812,36.052625
1,47.640669,45.258135,52.915623,48.35261,53.63855,53.01814,49.508415,49.176978,52.04239,50.94477,...,61.336803,65.347273,64.9844,67.6905,59.101607,38.658955,55.027661,85.752625,34.418812,36.052625
2,47.640669,45.258135,52.915623,48.35261,53.63855,53.01814,49.508415,49.176978,52.04239,50.94477,...,61.336803,65.347273,64.9844,67.6905,59.101607,38.658955,55.027661,85.752625,34.418812,36.052625
3,47.640669,45.258135,52.915623,48.35261,53.63855,53.01814,49.508415,49.176978,52.04239,50.94477,...,61.336803,65.347273,64.9844,67.6905,59.101607,38.658955,55.027661,85.752625,34.418812,36.052625
4,47.640669,45.258135,52.915623,48.35261,53.63855,53.01814,49.508415,49.176978,52.04239,50.94477,...,61.336803,65.347273,64.9844,67.6905,59.101607,38.658955,55.027661,85.752625,34.418812,36.052625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,47.640669,45.258135,52.915623,48.35261,53.63855,53.01814,49.508415,49.176978,52.04239,50.94477,...,61.336803,65.347273,64.9844,67.6905,59.101607,38.658955,55.027661,85.752625,34.418812,36.052625
1836,47.640669,45.258135,52.915623,48.35261,53.63855,53.01814,49.508415,49.176978,52.04239,50.94477,...,61.336803,65.347273,64.9844,67.6905,59.101607,38.658955,55.027661,85.752625,34.418812,36.052625
1837,47.640669,45.258135,52.915623,48.35261,53.63855,53.01814,49.508415,49.176978,52.04239,50.94477,...,61.336803,65.347273,64.9844,67.6905,59.101607,38.658955,55.027661,85.752625,34.418812,36.052625
1838,47.640669,45.258135,52.915623,48.35261,53.63855,53.01814,49.508415,49.176978,52.04239,50.94477,...,61.336803,65.347273,64.9844,67.6905,59.101607,38.658955,55.027661,85.752625,34.418812,36.052625


9. agentRole

In [32]:
team_sheets

Unnamed: 0_level_0,position,team,jID,player_id,team_id,xID
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
F. Kainz,ZO,Home,11,DFL-OBJ-0027AX,DFL-CLU-000008,0
Timo Horn,,Home,1,DFL-OBJ-0000M0,DFL-CLU-000008,1
Jonas Hector,LV,Home,14,DFL-OBJ-00012X,DFL-CLU-000008,2
D. Selke,STZ,Home,27,DFL-OBJ-000270,DFL-CLU-000008,3
M. Schwäbe,TW,Home,20,DFL-OBJ-0002HE,DFL-CLU-000008,4
B. Schmitz,RV,Home,2,DFL-OBJ-0002BO,DFL-CLU-000008,5
K. Schindler,RV,Home,17,DFL-OBJ-0027V2,DFL-CLU-000008,6
Timo Hübers,IVR,Home,4,DFL-OBJ-0027B6,DFL-CLU-000008,7
Kristian Pedersen,LV,Home,3,DFL-OBJ-0026ZI,DFL-CLU-000008,8
L. Maina,OLM,Home,37,DFL-OBJ-0028BD,DFL-CLU-000008,9


In [31]:
import pandas as pd
for match_id in tqdm(match_ids):
    match_path = os.path.join(path, match_id)
    events = load_event_data(match_path)
    # 🔹 team_sheets에서 필요한 컬럼만 선택 (player_id, position)
    team_position_mapping = team_sheets[['player_id', 'position']]

    # 🔹 events에 team_sheets의 position을 병합 (player_id 기준)
    events = events.merge(team_position_mapping, on='player_id', how='left')

events_copy

Unnamed: 0,type_name,time_seconds,team_id,player_id,outcome,timestamp,minute,second,qualifier,period_id,team,game_id,start_x,start_y,position
0,KickOff_Play_Pass,0.000,DFL-CLU-00000G,DFL-OBJ-0027G6,,2023-05-27 15:30:12.230000+02:00,0.0,0.0,"{'TeamLeft': 'DFL-CLU-00000G', 'TeamRight': 'D...",1,Away,DFL-MAT-J03WMX,53.28000,33.46000,STZ
1,Play_Pass,2.829,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:15.059000+02:00,0.0,2.0,"{'SemiField': 'false', 'Player': 'DFL-OBJ-0027...",1,Away,DFL-MAT-J03WMX,38.57175,33.30550,IVR
2,ThrowIn_Play_Pass,21.551,DFL-CLU-000008,DFL-OBJ-0002BO,,2023-05-27 15:30:33.781000+02:00,0.0,21.0,"{'Team': 'DFL-CLU-000008', 'Side': 'right', 'D...",1,Home,DFL-MAT-J03WMX,68.63575,63.81800,RV
3,TacklingGame,22.268,DFL-CLU-000008,DFL-OBJ-J01BGM,1.0,2023-05-27 15:30:34.498000+02:00,0.0,22.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",1,Home,DFL-MAT-J03WMX,53.22700,59.25000,ORM
4,OtherBallAction,22.733,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:34.963000+02:00,0.0,22.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",1,Away,DFL-MAT-J03WMX,48.33075,53.38125,IVR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,OtherBallAction,2992.377,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 17:25:35.767000+02:00,49.0,52.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",2,Away,DFL-MAT-J03WMX,53.32425,68.39400,IVR
1836,TacklingGame,2992.753,DFL-CLU-000008,DFL-OBJ-002GBW,1.0,2023-05-27 17:25:36.143000+02:00,49.0,52.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",2,Home,DFL-MAT-J03WMX,53.78825,62.79750,STZ
1837,GoalKick_Play_Pass,3001.975,DFL-CLU-000008,DFL-OBJ-0002HE,,2023-05-27 17:25:45.365000+02:00,50.0,1.0,"{'Team': 'DFL-CLU-000008', 'DecisionTimestamp'...",2,Home,DFL-MAT-J03WMX,4.40250,38.98125,TW
1838,FinalWhistle,3004.030,,,,2023-05-27 17:25:47.420000+02:00,50.0,4.0,"{'GameSection': 'secondHalf', 'FinalResult': '...",2,Home,DFL-MAT-J03WMX,,,


In [5]:
import pandas as pd

# 🔹 team_sheets에서 필요한 컬럼만 선택 (player_id, position)
team_position_mapping = team_sheets[['player_id', 'position']]

# 🔹 events에 team_sheets의 position을 병합 (player_id 기준)
events_copy = events.merge(team_position_mapping, on='player_id', how='left')
events_copy


Unnamed: 0,type_name,time_seconds,team_id,player_id,outcome,timestamp,minute,second,qualifier,period_id,team,game_id,start_x,start_y,position
0,KickOff_Play_Pass,0.000,DFL-CLU-00000G,DFL-OBJ-0027G6,,2023-05-27 15:30:12.230000+02:00,0.0,0.0,"{'TeamLeft': 'DFL-CLU-00000G', 'TeamRight': 'D...",1,Away,DFL-MAT-J03WMX,53.28000,33.46000,STZ
1,Play_Pass,2.829,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:15.059000+02:00,0.0,2.0,"{'SemiField': 'false', 'Player': 'DFL-OBJ-0027...",1,Away,DFL-MAT-J03WMX,38.57175,33.30550,IVR
2,ThrowIn_Play_Pass,21.551,DFL-CLU-000008,DFL-OBJ-0002BO,,2023-05-27 15:30:33.781000+02:00,0.0,21.0,"{'Team': 'DFL-CLU-000008', 'Side': 'right', 'D...",1,Home,DFL-MAT-J03WMX,68.63575,63.81800,RV
3,TacklingGame,22.268,DFL-CLU-000008,DFL-OBJ-J01BGM,1.0,2023-05-27 15:30:34.498000+02:00,0.0,22.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",1,Home,DFL-MAT-J03WMX,53.22700,59.25000,ORM
4,OtherBallAction,22.733,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:34.963000+02:00,0.0,22.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",1,Away,DFL-MAT-J03WMX,48.33075,53.38125,IVR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,OtherBallAction,2992.377,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 17:25:35.767000+02:00,49.0,52.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",2,Away,DFL-MAT-J03WMX,53.32425,68.39400,IVR
1836,TacklingGame,2992.753,DFL-CLU-000008,DFL-OBJ-002GBW,1.0,2023-05-27 17:25:36.143000+02:00,49.0,52.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",2,Home,DFL-MAT-J03WMX,53.78825,62.79750,STZ
1837,GoalKick_Play_Pass,3001.975,DFL-CLU-000008,DFL-OBJ-0002HE,,2023-05-27 17:25:45.365000+02:00,50.0,1.0,"{'Team': 'DFL-CLU-000008', 'DecisionTimestamp'...",2,Home,DFL-MAT-J03WMX,4.40250,38.98125,TW
1838,FinalWhistle,3004.030,,,,2023-05-27 17:25:47.420000+02:00,50.0,4.0,"{'GameSection': 'secondHalf', 'FinalResult': '...",2,Home,DFL-MAT-J03WMX,,,


In [70]:
from express.config import PLAYER_ROLE_MAPPING, EVENT_LABEL_MAPPING
agent_ids=events["player_id"].dropna().unique()
added_ids=set(team_sheets['player_id'].unique())-set(events['player_id'].unique()) #events에 기록이 없는 team_sheets의 선수들

positions_mapping = (
    events.drop_duplicates(subset="player_id")
    .set_index("player_id")["position"]
    .map(PLAYER_ROLE_MAPPING)
    .to_dict()
)

events_positions_df = pd.DataFrame(
    [{pid: positions_mapping.get(pid, None) for pid in agent_ids}] * len(events),
    index=events.index
)
events_positions_df = events_positions_df.reindex(columns=agent_ids.tolist()+list(added_ids), fill_value=0)
events_positions_df.astype(int)


Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-002GCR,DFL-OBJ-J01N65,DFL-OBJ-002GIC,DFL-OBJ-0000M0,DFL-OBJ-002GLJ,DFL-OBJ-J00USE,DFL-OBJ-002G0R,DFL-OBJ-J0117H,DFL-OBJ-J01LJ2,DFL-OBJ-0000LT
0,3,6,10,2,9,4,12,2,10,7,...,4,13,0,0,0,0,0,0,0,0
1,3,6,10,2,9,4,12,2,10,7,...,4,13,0,0,0,0,0,0,0,0
2,3,6,10,2,9,4,12,2,10,7,...,4,13,0,0,0,0,0,0,0,0
3,3,6,10,2,9,4,12,2,10,7,...,4,13,0,0,0,0,0,0,0,0
4,3,6,10,2,9,4,12,2,10,7,...,4,13,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,3,6,10,2,9,4,12,2,10,7,...,4,13,0,0,0,0,0,0,0,0
1836,3,6,10,2,9,4,12,2,10,7,...,4,13,0,0,0,0,0,0,0,0
1837,3,6,10,2,9,4,12,2,10,7,...,4,13,0,0,0,0,0,0,0,0
1838,3,6,10,2,9,4,12,2,10,7,...,4,13,0,0,0,0,0,0,0,0


In [20]:
events_positions_df.columns

Index(['DFL-OBJ-0027G6', 'DFL-OBJ-0027KL', 'DFL-OBJ-0002BO', 'DFL-OBJ-J01BGM',
       'DFL-OBJ-J01B8N', 'DFL-OBJ-0027AX', 'DFL-OBJ-0002F5', 'DFL-OBJ-0002AU',
       'DFL-OBJ-0027G0', 'DFL-OBJ-J017RE', 'DFL-OBJ-0026PM', 'DFL-OBJ-0027B6',
       'DFL-OBJ-0002DR',              nan, 'DFL-OBJ-J01D1W', 'DFL-OBJ-002GKO',
       'DFL-OBJ-00012X', 'DFL-OBJ-002G4A', 'DFL-OBJ-J01APO', 'DFL-OBJ-0028BD',
       'DFL-OBJ-0002HE', 'DFL-OBJ-0000IA', 'DFL-OBJ-000270', 'DFL-OBJ-0027V2',
       'DFL-OBJ-J01DVC', 'DFL-OBJ-J015S4', 'DFL-OBJ-002G89', 'DFL-OBJ-000191',
       'DFL-OBJ-0000RP', 'DFL-OBJ-0026ZI', 'DFL-OBJ-002GBW', 'DFL-OBJ-002GCR',
       'DFL-OBJ-J01N65'],
      dtype='object')

In [12]:
events

Unnamed: 0,index,time_seconds,team_id,player_id,outcome,timestamp,minute,second,qualifier,period_id,team,game_id,start_x,start_y,position
0,0,0.000,DFL-CLU-00000G,DFL-OBJ-0027G6,,2023-05-27 15:30:12.230000+02:00,0.0,0.0,"{'TeamLeft': 'DFL-CLU-00000G', 'TeamRight': 'D...",1,Away,DFL-MAT-J03WMX,53.28000,33.46000,STZ
1,1,2.829,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:15.059000+02:00,0.0,2.0,"{'SemiField': 'false', 'Player': 'DFL-OBJ-0027...",1,Away,DFL-MAT-J03WMX,38.57175,33.30550,IVR
2,2,21.551,DFL-CLU-000008,DFL-OBJ-0002BO,,2023-05-27 15:30:33.781000+02:00,0.0,21.0,"{'Team': 'DFL-CLU-000008', 'Side': 'right', 'D...",1,Home,DFL-MAT-J03WMX,68.63575,63.81800,RV
3,3,22.268,DFL-CLU-000008,DFL-OBJ-J01BGM,1.0,2023-05-27 15:30:34.498000+02:00,0.0,22.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",1,Home,DFL-MAT-J03WMX,53.22700,59.25000,ORM
4,4,22.733,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:34.963000+02:00,0.0,22.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",1,Away,DFL-MAT-J03WMX,48.33075,53.38125,IVR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,1835,2992.377,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 17:25:35.767000+02:00,49.0,52.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",2,Away,DFL-MAT-J03WMX,53.32425,68.39400,IVR
1836,1836,2992.753,DFL-CLU-000008,DFL-OBJ-002GBW,1.0,2023-05-27 17:25:36.143000+02:00,49.0,52.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",2,Home,DFL-MAT-J03WMX,53.78825,62.79750,STZ
1837,1837,3001.975,DFL-CLU-000008,DFL-OBJ-0002HE,,2023-05-27 17:25:45.365000+02:00,50.0,1.0,"{'Team': 'DFL-CLU-000008', 'DecisionTimestamp'...",2,Home,DFL-MAT-J03WMX,4.40250,38.98125,TW
1838,1838,3004.030,,,,2023-05-27 17:25:47.420000+02:00,50.0,4.0,"{'GameSection': 'secondHalf', 'FinalResult': '...",2,Home,DFL-MAT-J03WMX,,,


10.agentSide 

In [26]:
match_ids = [extract_match_id(filename) for filename in os.listdir(path)]
match_ids[3]

'DFL-MAT-J03WN1'

In [72]:
import pandas as pd
import json

team_ids = events['team_id'].unique()  
home_team_id = team_ids[0]
away_team_id = team_ids[1]

home_players = events[events["team_id"] == home_team_id]["player_id"].dropna().unique()
away_players = events[events["team_id"] == away_team_id]["player_id"].dropna().unique()

agent_ids = events["player_id"].dropna().unique()

agent_side_dict = {}

for idx, row in events.iterrows():
    team_id = row["team_id"]  # 현재 이벤트 수행한 팀 (Home or Away)
    player_id = row["player_id"]  # 이벤트 수행한 선수
    type_id = row["type_name"]  # 이벤트 ID

    event_binary = {}  # 현재 이벤트에서 모든 선수의 binary 값 저장

    if type_id in on_ball_actions:
        if team_id == home_team_id:
            # 같은 팀 (Home) 선수들에게 1 할당, 상대 팀(Away) 선수들에게 2 할당
            for pid in home_players:
                event_binary[pid] = 1
            for pid in away_players:
                event_binary[pid] = 2
        else:
            # 같은 팀 (Away) 선수들에게 1 할당, 상대 팀(Home) 선수들에게 2 할당
            for pid in away_players:
                event_binary[pid] = 1
            for pid in home_players:
                event_binary[pid] = 2
    else:
        # on-ball 이벤트가 아닌 경우, 모든 선수에 대해 0 할당
        for pid in agent_ids:
            event_binary[pid] = 0

    agent_side_dict[idx] = event_binary  # 결과 저장

# 🏁 결과를 DataFrame으로 변환
agent_side_df = pd.DataFrame.from_dict(agent_side_dict, orient="index")
agent_side_df=agent_side_df.reindex(columns=agent_ids.tolist()+list(added_ids), fill_value=0).astype(int)
agent_side_df


Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-002GCR,DFL-OBJ-J01N65,DFL-OBJ-002GIC,DFL-OBJ-0000M0,DFL-OBJ-002GLJ,DFL-OBJ-J00USE,DFL-OBJ-002G0R,DFL-OBJ-J0117H,DFL-OBJ-J01LJ2,DFL-OBJ-0000LT
0,1,1,2,2,1,2,1,1,1,1,...,1,1,0,0,0,0,0,0,0,0
1,1,1,2,2,1,2,1,1,1,1,...,1,1,0,0,0,0,0,0,0,0
2,2,2,1,1,2,1,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,1,2,2,1,2,1,1,1,1,...,1,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,1,1,2,2,1,2,1,1,1,1,...,1,1,0,0,0,0,0,0,0,0
1836,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1837,2,2,1,1,2,1,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
1838,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


11. agentObserved

In [75]:
agent_ids = events["player_id"].dropna().unique()
added_ids=set(team_sheets['player_id'].unique())-set(events['player_id'].unique()) #events에 기록이 없는 team_sheets의 선수들
on_ball_dict={}
for idx, row in events.iterrows():
    event_on_ball = {}  # 한 이벤트에서 모든 pID의 position 저장
    for pid in agent_ids:
        if row['type_name'] in on_ball_actions and row['player_id']==pid:
            if_onball = 1
        else:
            if_onball = 2  # off-ball action이면 False 처리

        event_on_ball[pid] = if_onball  # 선수 ID별로 on-ball 여부 저장

    on_ball_dict[idx] = event_on_ball  # 이벤트 인덱스별 저장

# 데이터프레임 변환
on_ball_df = pd.DataFrame.from_dict(on_ball_dict, orient="index")
on_ball_df=on_ball_df.reindex(columns=agent_ids.tolist()+list(added_ids), fill_value=0).astype(int)
    
on_ball_df


Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-002GCR,DFL-OBJ-J01N65,DFL-OBJ-002GIC,DFL-OBJ-0000M0,DFL-OBJ-002GLJ,DFL-OBJ-J00USE,DFL-OBJ-002G0R,DFL-OBJ-J0117H,DFL-OBJ-J01LJ2,DFL-OBJ-0000LT
0,1,2,2,2,2,2,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
1,2,1,2,2,2,2,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
2,2,2,1,2,2,2,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
3,2,2,2,2,2,2,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
4,2,1,2,2,2,2,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,2,1,2,2,2,2,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
1836,2,2,2,2,2,2,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
1837,2,2,2,2,2,2,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0
1838,2,2,2,2,2,2,2,2,2,2,...,2,2,0,0,0,0,0,0,0,0


12. goalDiff/
직접 찾아서 만들어야 함
KickOff_Play_Pass 가 두개는 기본이니, 나머지는 득점                     


In [76]:
team_ids = events['team_id'].unique()
home_team_id = team_ids[0]
away_team_id = team_ids[1]
added_ids=set(team_sheets['player_id'].unique())-set(events['player_id'].unique()) #events에 기록이 없는 team_sheets의 선수들

home_players = [pid for pid in home_players]
away_players = [pid for pid in away_players]

goal_record = pd.DataFrame(index=events.index, columns=["home_score", "away_score"])
goal_record.fillna(0, inplace=True)  # 초기 값 0으로 설정

current_home_score = 0
current_away_score = 0

for idx, row in events.iterrows():
    if (row['type_name'] == "KickOff_Play_Pass") and (row['time_seconds'] != 0):
        if row['team_id'] == home_team_id:
            current_away_score += 1  # Home 팀이면 Away 점수 증가
        else:
            current_home_score += 1  # Away 팀이면 Home 점수 증가

    goal_record.loc[idx, "home_score"] = current_home_score
    goal_record.loc[idx, "away_score"] = current_away_score

goal_df_dict = {}

for idx, row in events.iterrows():
    goal_diff = {}  # 한 이벤트에서 모든 pID의 골 차이 저장

    # 현재까지의 골 차이 계산
    home_goal_diff = goal_record.loc[idx, "home_score"] - goal_record.loc[idx, "away_score"]
    away_goal_diff = goal_record.loc[idx, "away_score"] - goal_record.loc[idx, "home_score"]

    for pid in home_players:
        goal_diff[pid] = home_goal_diff
        
    for pid in away_players:
        goal_diff[pid] = away_goal_diff

    goal_df_dict[idx] = goal_diff  # 이벤트별 골 차이 저장

# 데이터프레임 변환
goal_diff_df = pd.DataFrame.from_dict(goal_df_dict, orient="index")
goal_diff_df= goal_diff_df.reindex(columns=agent_ids.tolist()+list(added_ids), fill_value=0).astype(int)
goal_diff_df
# 🛠️ 결측값 처리 (NaN → 0)
# goal_diff_df.fillna(0, inplace=True)
# goal_diff_df


Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-002GCR,DFL-OBJ-J01N65,DFL-OBJ-002GIC,DFL-OBJ-0000M0,DFL-OBJ-002GLJ,DFL-OBJ-J00USE,DFL-OBJ-002G0R,DFL-OBJ-J0117H,DFL-OBJ-J01LJ2,DFL-OBJ-0000LT
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,1,1,-1,-1,1,-1,1,1,1,1,...,1,1,0,0,0,0,0,0,0,0
1836,1,1,-1,-1,1,-1,1,1,1,1,...,1,1,0,0,0,0,0,0,0,0
1837,1,1,-1,-1,1,-1,1,1,1,1,...,1,1,0,0,0,0,0,0,0,0
1838,1,1,-1,-1,1,-1,1,1,1,1,...,1,1,0,0,0,0,0,0,0,0


13,14 eventXY

In [40]:
events

Unnamed: 0,type_name,time_seconds,team_id,player_id,outcome,timestamp,minute,second,qualifier,period_id,team,game_id,start_x,start_y,position
0,KickOff_Play_Pass,0.000,DFL-CLU-00000H,DFL-OBJ-002GNL,,2022-11-05 13:01:27.810000+01:00,0.0,0.0,"{'TeamLeft': 'DFL-CLU-00000H', 'GameSection': ...",1,Away,DFL-MAT-J03WQQ,52.91000,33.80000,IVZ
1,Play_Pass,2.097,DFL-CLU-00000H,DFL-OBJ-002GN4,,2022-11-05 13:01:29.907000+01:00,0.0,2.0,"{'Evaluation': 'unsuccessful', 'Distance': 'lo...",1,Away,DFL-MAT-J03WQQ,32.50525,30.27575,IVR
2,TacklingGame,4.289,DFL-CLU-00000H,DFL-OBJ-0027QN,1.0,2022-11-05 13:01:32.099000+01:00,0.0,4.0,"{'WinnerTeam': 'DFL-CLU-00000P', 'LoserRole': ...",1,Away,DFL-MAT-J03WQQ,74.38475,57.61150,STR
3,OtherBallAction,5.084,DFL-CLU-00000P,DFL-OBJ-0000NZ,,2022-11-05 13:01:32.894000+01:00,0.0,5.0,"{'Player': 'DFL-OBJ-0000NZ', 'Team': 'DFL-CLU-...",1,Home,DFL-MAT-J03WQQ,75.73100,59.64900,IVR
4,Play_Pass,7.553,DFL-CLU-00000H,DFL-OBJ-0002EL,,2022-11-05 13:01:35.363000+01:00,0.0,7.0,"{'Evaluation': 'unsuccessful', 'Distance': 'me...",1,Away,DFL-MAT-J03WQQ,64.72775,51.96750,DLM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1674,OtherBallAction,2914.218,DFL-CLU-00000P,DFL-OBJ-J01L1F,,2022-11-05 14:52:26.028000+01:00,48.0,34.0,"{'Player': 'DFL-OBJ-J01L1F', 'Team': 'DFL-CLU-...",2,Home,DFL-MAT-J03WQQ,81.97750,15.43650,STL
1675,Delete,2914.900,,,,2022-11-05 14:52:26.710000+01:00,48.0,34.0,{},2,Home,DFL-MAT-J03WQQ,,,
1676,Delete,2914.900,,,,2022-11-05 14:52:26.710000+01:00,48.0,34.0,{},2,Away,DFL-MAT-J03WQQ,,,
1677,FinalWhistle,2920.190,,,,2022-11-05 14:52:32+01:00,48.0,40.0,"{'BreakingOff': 'false', 'GameSection': 'secon...",2,Home,DFL-MAT-J03WQQ,,,


15. eventType

In [6]:
eID_df = events[["type_name"]].copy()
eID_df


Unnamed: 0,type_name
0,KickOff_Play_Pass
1,Play_Pass
2,ThrowIn_Play_Pass
3,TacklingGame
4,OtherBallAction
...,...
1835,OtherBallAction
1836,TacklingGame
1837,GoalKick_Play_Pass
1838,FinalWhistle


03.31 새로 추가된 features

16.prevAvgX

In [56]:
from imputer.config import on_ball_actions
import pandas as pd
import numpy as np

# 기본 준비
agent_ids = events["player_id"].dropna().unique()
added_ids = set(teams['player_id'].unique()) - set(agent_ids)
first_event_x, first_event_y = events.loc[0, ["start_x", "start_y"]]

# 누적 정보 저장용 dict
cumulative_x = {pid: 0.0 for pid in agent_ids}
cumulative_y = {pid: 0.0 for pid in agent_ids}
count_seen = {pid: 0 for pid in agent_ids}
first_seen_xy = {}

# 평균 좌표 저장 dict
mean_x_dict = {}
mean_y_dict = {}

for idx, row in events.iterrows():
    pID = row["player_id"]
    x, y = row["start_x"], row["start_y"]
    typ = row["type_name"]

    # 처음 관측된 선수라면 이전 인덱스들 채우기
    if pd.notna(pID) and pID not in first_seen_xy:
        first_seen_xy[pID] = (x, y)
        for prev_idx in range(idx):
            mean_x_dict.setdefault(prev_idx, {})[pID] = x
            mean_y_dict.setdefault(prev_idx, {})[pID] = y

    # 현재 시점의 평균 좌표 계산
    mean_x_row = {
        pid: (cumulative_x[pid] / count_seen[pid]) if count_seen[pid] > 0
             else first_seen_xy.get(pid, (first_event_x, first_event_y))[0]
        for pid in agent_ids
    }
    mean_y_row = {
        pid: (cumulative_y[pid] / count_seen[pid]) if count_seen[pid] > 0
             else first_seen_xy.get(pid, (first_event_x, first_event_y))[1]
        for pid in agent_ids
    }
    mean_x_dict[idx] = mean_x_row
    mean_y_dict[idx] = mean_y_row

    # on-ball이면 누적
    if pd.notna(pID) and typ in on_ball_actions:
        cumulative_x[pID] += x
        cumulative_y[pID] += y
        count_seen[pID] += 1

# DataFrame 변환 및 정렬
mean_x_df = pd.DataFrame.from_dict(mean_x_dict, orient="index")
mean_y_df = pd.DataFrame.from_dict(mean_y_dict, orient="index")

mean_x_df = mean_x_df.reindex(columns=list(agent_ids) + list(added_ids), fill_value=0).astype(float)
mean_y_df = mean_y_df.reindex(columns=list(agent_ids) + list(added_ids), fill_value=0).astype(float)

# 확인
mean_x_df


Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-002GCR,DFL-OBJ-J01N65,DFL-OBJ-002GIC,DFL-OBJ-J0117H,DFL-OBJ-0000M0,DFL-OBJ-J01LJ2,DFL-OBJ-002GLJ,DFL-OBJ-0000LT,DFL-OBJ-002G0R,DFL-OBJ-J00USE
0,53.280000,38.571750,68.635750,53.227000,57.78000,52.454000,48.957500,59.500500,39.19500,34.15500,...,46.875000,16.562000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,53.280000,38.571750,68.635750,53.227000,57.78000,52.454000,48.957500,59.500500,39.19500,34.15500,...,46.875000,16.562000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,53.280000,38.571750,68.635750,53.227000,57.78000,52.454000,48.957500,59.500500,39.19500,34.15500,...,46.875000,16.562000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,53.280000,38.571750,68.635750,53.227000,57.78000,52.454000,48.957500,59.500500,39.19500,34.15500,...,46.875000,16.562000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,53.280000,38.571750,68.635750,53.227000,57.78000,52.454000,48.957500,59.500500,39.19500,34.15500,...,46.875000,16.562000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,47.640669,45.168511,52.915623,48.373015,53.63855,53.102384,49.505122,49.276337,52.04239,50.94477,...,34.418812,36.052625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1836,47.640669,45.258135,52.915623,48.373015,53.63855,53.102384,49.505122,49.276337,52.04239,50.94477,...,34.418812,36.052625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1837,47.640669,45.258135,52.915623,48.373015,53.63855,53.102384,49.505122,49.276337,52.04239,50.94477,...,34.418812,36.052625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1838,47.640669,45.258135,52.915623,48.373015,53.63855,53.102384,49.505122,49.276337,52.04239,50.94477,...,34.418812,36.052625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


17.nextAvgXY

In [164]:
agent_ids = events["player_id"].dropna().unique()
added_ids = set(teams["player_id"].unique()) - set(agent_ids)
last_event_x, last_event_y = events.iloc[-1][["start_x", "start_y"]]

n = len(events)
cumulative_x = {pid: 0.0 for pid in agent_ids}
cumulative_y = {pid: 0.0 for pid in agent_ids}
count_seen = {pid: 0 for pid in agent_ids}
last_seen_xy = {}

mean_x_positions_dict = {}
mean_y_positions_dict = {}

for idx in reversed(range(n)):
    row = events.iloc[idx]
    current_pID = row["player_id"]
    event_x = row["start_x"]
    event_y = row["start_y"]
    event_type = row["type_name"]


    mean_x_row = {
        pid: (cumulative_x[pid] / count_seen[pid]) if count_seen[pid] > 0
             else last_seen_xy.get(pid, (last_event_x, last_event_y))[0]
        for pid in agent_ids
    }
    mean_y_row = {
        pid: (cumulative_y[pid] / count_seen[pid]) if count_seen[pid] > 0
             else last_seen_xy.get(pid, (last_event_x, last_event_y))[1]
        for pid in agent_ids
    }

    mean_x_positions_dict[idx] = mean_x_row
    mean_y_positions_dict[idx] = mean_y_row

    # 누적은 이벤트 이후부터 반영
    if pd.notna(current_pID) and event_type in on_ball_actions:
        cumulative_x[current_pID] += event_x
        cumulative_y[current_pID] += event_y
        count_seen[current_pID] += 1



# 결과 DataFrame 변환
mean_x_df = pd.DataFrame.from_dict(mean_x_positions_dict, orient="index").sort_index()
mean_x_df = mean_x_df.reindex(columns=agent_ids.tolist() + list(added_ids), fill_value=0).astype(float)

mean_y_df = pd.DataFrame.from_dict(mean_y_positions_dict, orient="index").sort_index()
mean_y_df = mean_y_df.reindex(columns=agent_ids.tolist() + list(added_ids), fill_value=0).astype(float)
mean_x_df = mean_x_df.ffill().bfill()
mean_y_df = mean_y_df.ffill().bfill()

18.possessRatio

In [168]:
from imputer.config import on_ball_actions
import pandas as pd

home_possession = 0.0
away_possession = 0.0
last_time = None
last_team = None

home_ratio_list = []
away_ratio_list = []

pending_start_time = None  # 🔥 델타 계산을 보류할 때 필요한 변수
valid_events = events[events["time_seconds"] >= 0].copy()
last_period=events.loc[0,'period_id']

player_team_map = teams.set_index("player_id")["team"].to_dict()
agent_ids = events["player_id"].dropna().unique()

added_ids = set(teams["player_id"].unique()) - set(agent_ids)

possession_by_player = {}
for i, (idx, row) in enumerate(valid_events.iterrows()):
    curr_time = row["time_seconds"]
    curr_team = row["team"]
    event_type = row["type_name"]
    cur_period= row["period_id"]

    if cur_period!=last_period:
        pending_start_time = None
        last_team = None
    last_period = cur_period  # 마지막 period 업데이트
    # 이전에 누가 점유하고 있었는지에 따라 possession time 누적
    if event_type in on_ball_actions:
        if last_team in ["Home", "Away"] and pending_start_time is not None:
            delta_time = curr_time - pending_start_time
            if last_team == "Home":
                home_possession += delta_time
            elif last_team == "Away":
                away_possession += delta_time

        last_team = curr_team
        pending_start_time = curr_time

    # 해당 시점까지 점유율 계산
    total_time = home_possession + away_possession
    if total_time > 0:
        home_ratio = home_possession / total_time
        away_ratio = away_possession / total_time
    else:
        home_ratio = 0.5  # 초기엔 50:50으로 가정
        away_ratio = 0.5

    home_ratio_list.append(home_ratio)
    away_ratio_list.append(away_ratio)

    # 마지막 점유 팀 업데이트
    if event_type in on_ball_actions:
        last_team = curr_team
        last_time = curr_time

    row_dict = {}
    for pid in agent_ids:
        team = player_team_map.get(pid, None)
        if team == "Home":
            row_dict[pid] = home_ratio
        elif team == "Away":
            row_dict[pid] = away_ratio
        else:
            row_dict[pid] = None
    possession_by_player[idx] = row_dict

possession_df = pd.DataFrame.from_dict(possession_by_player, orient="index")
possession_df

Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-0027V2,DFL-OBJ-J01DVC,DFL-OBJ-J015S4,DFL-OBJ-002G89,DFL-OBJ-000191,DFL-OBJ-0000RP,DFL-OBJ-0026ZI,DFL-OBJ-002GBW,DFL-OBJ-002GCR,DFL-OBJ-J01N65
0,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,...,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000
1,1.000000,1.000000,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,...,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000,1.000000
2,1.000000,1.000000,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,...,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000,1.000000
3,1.000000,1.000000,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,...,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000,1.000000
4,0.948005,0.948005,0.051995,0.051995,0.948005,0.051995,0.948005,0.948005,0.948005,0.948005,...,0.051995,0.051995,0.948005,0.051995,0.948005,0.948005,0.051995,0.051995,0.948005,0.948005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,0.565327,0.565327,0.434673,0.434673,0.565327,0.434673,0.565327,0.565327,0.565327,0.565327,...,0.434673,0.434673,0.565327,0.434673,0.565327,0.565327,0.434673,0.434673,0.565327,0.565327
1836,0.565327,0.565327,0.434673,0.434673,0.565327,0.434673,0.565327,0.565327,0.565327,0.565327,...,0.434673,0.434673,0.565327,0.434673,0.565327,0.565327,0.434673,0.434673,0.565327,0.565327
1837,0.566044,0.566044,0.433956,0.433956,0.566044,0.433956,0.566044,0.566044,0.566044,0.566044,...,0.433956,0.433956,0.566044,0.433956,0.566044,0.566044,0.433956,0.433956,0.566044,0.566044
1838,0.566044,0.566044,0.433956,0.433956,0.566044,0.433956,0.566044,0.566044,0.566044,0.566044,...,0.433956,0.433956,0.566044,0.433956,0.566044,0.566044,0.433956,0.433956,0.566044,0.566044


19.elapsedTime

In [170]:
agent_ids = events["player_id"].dropna().unique()
added_ids = set(teams['player_id'].unique()) - set(agent_ids)
all_ids = list(agent_ids) + list(added_ids)

# ⏱ 이벤트 간 시간 차이 계산
delta_times = events["time_seconds"].diff().fillna(0).values

# 📌 모든 player_id에 대해 복제해서 DataFrame 생성
elapsed_time_df = pd.DataFrame({pid: delta_times for pid in all_ids}, index=events.index).astype(float)
elapsed_time_df

Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-002GCR,DFL-OBJ-J01N65,DFL-OBJ-002GIC,DFL-OBJ-J0117H,DFL-OBJ-0000M0,DFL-OBJ-J01LJ2,DFL-OBJ-002GLJ,DFL-OBJ-0000LT,DFL-OBJ-002G0R,DFL-OBJ-J00USE
0,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1,2.829,2.829,2.829,2.829,2.829,2.829,2.829,2.829,2.829,2.829,...,2.829,2.829,2.829,2.829,2.829,2.829,2.829,2.829,2.829,2.829
2,18.722,18.722,18.722,18.722,18.722,18.722,18.722,18.722,18.722,18.722,...,18.722,18.722,18.722,18.722,18.722,18.722,18.722,18.722,18.722,18.722
3,0.717,0.717,0.717,0.717,0.717,0.717,0.717,0.717,0.717,0.717,...,0.717,0.717,0.717,0.717,0.717,0.717,0.717,0.717,0.717,0.717
4,0.465,0.465,0.465,0.465,0.465,0.465,0.465,0.465,0.465,0.465,...,0.465,0.465,0.465,0.465,0.465,0.465,0.465,0.465,0.465,0.465
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,3.044,3.044,3.044,3.044,3.044,3.044,3.044,3.044,3.044,3.044,...,3.044,3.044,3.044,3.044,3.044,3.044,3.044,3.044,3.044,3.044
1836,0.376,0.376,0.376,0.376,0.376,0.376,0.376,0.376,0.376,0.376,...,0.376,0.376,0.376,0.376,0.376,0.376,0.376,0.376,0.376,0.376
1837,9.222,9.222,9.222,9.222,9.222,9.222,9.222,9.222,9.222,9.222,...,9.222,9.222,9.222,9.222,9.222,9.222,9.222,9.222,9.222,9.222
1838,2.055,2.055,2.055,2.055,2.055,2.055,2.055,2.055,2.055,2.055,...,2.055,2.055,2.055,2.055,2.055,2.055,2.055,2.055,2.055,2.055


In [182]:
events

Unnamed: 0,type_name,time_seconds,team_id,player_id,outcome,timestamp,minute,second,qualifier,period_id,team,game_id,start_x,start_y,position
0,KickOff_Play_Pass,0.000,DFL-CLU-00000G,DFL-OBJ-0027G6,,2023-05-27 15:30:12.230000+02:00,0.0,0.0,"{'TeamLeft': 'DFL-CLU-00000G', 'TeamRight': 'D...",1,Away,DFL-MAT-J03WMX,53.28000,33.46000,STZ
1,Play_Pass,2.829,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:15.059000+02:00,0.0,2.0,"{'SemiField': 'false', 'Player': 'DFL-OBJ-0027...",1,Away,DFL-MAT-J03WMX,38.57175,33.30550,IVR
2,ThrowIn_Play_Pass,21.551,DFL-CLU-000008,DFL-OBJ-0002BO,,2023-05-27 15:30:33.781000+02:00,0.0,21.0,"{'Team': 'DFL-CLU-000008', 'Side': 'right', 'D...",1,Home,DFL-MAT-J03WMX,68.63575,63.81800,RV
3,TacklingGame,22.268,DFL-CLU-000008,DFL-OBJ-J01BGM,1.0,2023-05-27 15:30:34.498000+02:00,0.0,22.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",1,Home,DFL-MAT-J03WMX,53.22700,59.25000,ORM
4,OtherBallAction,22.733,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:34.963000+02:00,0.0,22.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",1,Away,DFL-MAT-J03WMX,48.33075,53.38125,IVR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,OtherBallAction,2992.377,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 17:25:35.767000+02:00,49.0,52.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",2,Away,DFL-MAT-J03WMX,53.32425,68.00000,IVR
1836,TacklingGame,2992.753,DFL-CLU-000008,DFL-OBJ-002GBW,1.0,2023-05-27 17:25:36.143000+02:00,49.0,52.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",2,Home,DFL-MAT-J03WMX,53.78825,62.79750,STZ
1837,GoalKick_Play_Pass,3001.975,DFL-CLU-000008,DFL-OBJ-0002HE,,2023-05-27 17:25:45.365000+02:00,50.0,1.0,"{'Team': 'DFL-CLU-000008', 'DecisionTimestamp'...",2,Home,DFL-MAT-J03WMX,4.40250,38.98125,TW
1838,FinalWhistle,3004.030,,,,2023-05-27 17:25:47.420000+02:00,50.0,4.0,"{'GameSection': 'secondHalf', 'FinalResult': '...",2,Home,DFL-MAT-J03WMX,,,


20.prevDeltaAngle

In [176]:
import numpy as np
import pandas as pd


# 전체 player ID 목록
agent_ids = events["player_id"].dropna().unique()
added_ids = set(teams["player_id"].unique()) - set(agent_ids)
all_ids = list(agent_ids) + list(added_ids)


# on-ball 이벤트만 추출해서 벡터 계산
onball_events = events[events["type_name"].isin(on_ball_actions)].copy()

onball_events["prev_x"], onball_events["prev_y"] = onball_events["start_x"].shift(1), onball_events["start_y"].shift(1)
onball_events["delta_x"] = onball_events["start_x"] - onball_events["prev_x"]
onball_events["delta_y"] = onball_events["start_y"] - onball_events["prev_y"]
onball_events["delta_dist"] = np.sqrt(onball_events["delta_x"]**2 + onball_events["delta_y"]**2)
onball_events["delta_angle_rad"] = np.arctan2(onball_events["delta_y"], onball_events["delta_x"])

# delta_angle_rad 확장 (NaN은 0.0으로 채움)
delta_angle_full = onball_events["delta_angle_rad"].reindex(events.index).fillna(0.0)

expanded_delta_angle = pd.DataFrame(
    np.repeat(delta_angle_full.values[:, np.newaxis], len(all_ids), axis=1),
    index=events.index,
    columns=all_ids
).astype(float)


Unnamed: 0,DFL-OBJ-0027G6,DFL-OBJ-0027KL,DFL-OBJ-0002BO,DFL-OBJ-J01BGM,DFL-OBJ-J01B8N,DFL-OBJ-0027AX,DFL-OBJ-0002F5,DFL-OBJ-0002AU,DFL-OBJ-0027G0,DFL-OBJ-J017RE,...,DFL-OBJ-002GCR,DFL-OBJ-J01N65,DFL-OBJ-002GIC,DFL-OBJ-J0117H,DFL-OBJ-0000M0,DFL-OBJ-J01LJ2,DFL-OBJ-002GLJ,DFL-OBJ-0000LT,DFL-OBJ-002G0R,DFL-OBJ-J00USE
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,...,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089,-3.131089
2,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802,...,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802,0.792802
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,...,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809,-2.666809
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284,...,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284,0.096284
1836,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1837,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,...,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213,-2.606213
1838,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


21.freeze_frame

In [None]:

def add_freeze_frames(events, teams, position):
    freeze_frame_list = []

    agent_ids = events["player_id"].dropna().unique()

    for idx, row in events.iterrows():
        current_pID = row["player_id"]
        current_team = row["team"]
        actor_x, actor_y = row["start_x"], row["start_y"]

        if pd.isna(actor_x) or pd.isna(actor_y):
            freeze_frame_list.append([])  # 빈 freeze frame
            continue

        index = int(row["time_seconds"] // 0.04)
        x_min, x_max = actor_x - 25, actor_x + 25
        y_min, y_max = actor_y - 25, actor_y + 25

        freeze_frame_event = []

        for pid in agent_ids:
            try:
                team_info = teams.loc[teams["player_id"] == pid, "team"].values[0]
                position_info = teams.loc[teams["player_id"] == pid, "position"].values[0]
                xid_raw = teams.loc[teams["player_id"] == pid, "xID"].values[0]
            except IndexError:
                continue

            xid = str(int(xid_raw)).zfill(2)
            prefix = "H" if team_info == "Home" else "A"
            key = f"{prefix}{xid}_"

            try:
                x = position.iloc[index][key + "x"]
                y = position.iloc[index][key + "y"]
            except (KeyError, IndexError):
                continue

            if not (x_min <= x <= x_max and y_min <= y <= y_max):
                continue

            freeze_frame = {
                "x": x,
                "y": y,
                "actor": pid == current_pID,
                "teammate": team_info == current_team,
                "keeper": position_info == "TW"
            }

            if pid == current_pID:
                freeze_frame["teammate"] = True  # actor는 항상 teammate
                freeze_frame["actor"] = True

            freeze_frame_event.append(freeze_frame)

        freeze_frame_list.append(freeze_frame_event)
    events = events.copy()
    events["freeze_frame"] = freeze_frame_list
    return events


In [180]:
events_with_ff = add_freeze_frames(events, teams, position)
events_with_ff

Unnamed: 0,type_name,time_seconds,team_id,player_id,outcome,timestamp,minute,second,qualifier,period_id,team,game_id,start_x,start_y,position,freeze_frame
0,KickOff_Play_Pass,0.000,DFL-CLU-00000G,DFL-OBJ-0027G6,,2023-05-27 15:30:12.230000+02:00,0.0,0.0,"{'TeamLeft': 'DFL-CLU-00000G', 'TeamRight': 'D...",1,Away,DFL-MAT-J03WMX,53.28000,33.46000,STZ,"[{'x': 53.28, 'y': 33.46, 'actor': True, 'team..."
1,Play_Pass,2.829,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:15.059000+02:00,0.0,2.0,"{'SemiField': 'false', 'Player': 'DFL-OBJ-0027...",1,Away,DFL-MAT-J03WMX,38.57175,33.30550,IVR,"[{'x': 38.55, 'y': 33.32, 'actor': True, 'team..."
2,ThrowIn_Play_Pass,21.551,DFL-CLU-000008,DFL-OBJ-0002BO,,2023-05-27 15:30:33.781000+02:00,0.0,21.0,"{'Team': 'DFL-CLU-000008', 'Side': 'right', 'D...",1,Home,DFL-MAT-J03WMX,68.63575,63.81800,RV,"[{'x': 66.69, 'y': 49.4, 'actor': False, 'team..."
3,TacklingGame,22.268,DFL-CLU-000008,DFL-OBJ-J01BGM,1.0,2023-05-27 15:30:34.498000+02:00,0.0,22.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",1,Home,DFL-MAT-J03WMX,53.22700,59.25000,ORM,"[{'x': 64.13, 'y': 48.97, 'actor': False, 'tea..."
4,OtherBallAction,22.733,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 15:30:34.963000+02:00,0.0,22.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",1,Away,DFL-MAT-J03WMX,48.33075,53.38125,IVR,"[{'x': 62.55, 'y': 48.57, 'actor': False, 'tea..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,OtherBallAction,2992.377,DFL-CLU-00000G,DFL-OBJ-0027KL,,2023-05-27 17:25:35.767000+02:00,49.0,52.0,"{'Player': 'DFL-OBJ-0027KL', 'Team': 'DFL-CLU-...",2,Away,DFL-MAT-J03WMX,53.32425,68.00000,IVR,"[{'x': 44.8, 'y': 44.86, 'actor': False, 'team..."
1836,TacklingGame,2992.753,DFL-CLU-000008,DFL-OBJ-002GBW,1.0,2023-05-27 17:25:36.143000+02:00,49.0,52.0,"{'WinnerTeam': 'DFL-CLU-00000G', 'Winner': 'DF...",2,Home,DFL-MAT-J03WMX,53.78825,62.79750,STZ,"[{'x': 43.53, 'y': 45.41, 'actor': False, 'tea..."
1837,GoalKick_Play_Pass,3001.975,DFL-CLU-000008,DFL-OBJ-0002HE,,2023-05-27 17:25:45.365000+02:00,50.0,1.0,"{'Team': 'DFL-CLU-000008', 'DecisionTimestamp'...",2,Home,DFL-MAT-J03WMX,4.40250,38.98125,TW,"[{'x': 18.78, 'y': 32.68, 'actor': False, 'tea..."
1838,FinalWhistle,3004.030,,,,2023-05-27 17:25:47.420000+02:00,50.0,4.0,"{'GameSection': 'secondHalf', 'FinalResult': '...",2,Home,DFL-MAT-J03WMX,,,,[]


In [181]:
events_with_ff.iloc[0,:]['freeze_frame']

[{'x': 53.28, 'y': 33.46, 'actor': True, 'teammate': True, 'keeper': False},
 {'x': 36.45, 'y': 35.04, 'actor': False, 'teammate': True, 'keeper': False},
 {'x': 69.76, 'y': 54.85, 'actor': False, 'teammate': False, 'keeper': False},
 {'x': 53.9, 'y': 47.05, 'actor': False, 'teammate': False, 'keeper': False},
 {'x': 53.02, 'y': 44.26, 'actor': False, 'teammate': True, 'keeper': False},
 {'x': 59.4, 'y': 39.12, 'actor': False, 'teammate': False, 'keeper': False},
 {'x': 43.41, 'y': 32.04, 'actor': False, 'teammate': True, 'keeper': False},
 {'x': 52.61, 'y': 12.13, 'actor': False, 'teammate': True, 'keeper': False},
 {'x': 39.7, 'y': 15.15, 'actor': False, 'teammate': True, 'keeper': False},
 {'x': 31.73, 'y': 43.07, 'actor': False, 'teammate': True, 'keeper': False},
 {'x': 72.66, 'y': 41.08, 'actor': False, 'teammate': False, 'keeper': False},
 {'x': 71.34, 'y': 29.51, 'actor': False, 'teammate': False, 'keeper': False},
 {'x': 63.87, 'y': 39.18, 'actor': False, 'teammate': False, 'k

In [120]:
freeze_frame=
[{'teammate': True,
  'actor': False,
  'keeper': False,
  'x': 70.68516235399647,
  'y': 38.06182105940856
  },
 {'teammate': True,
  'actor': False,
  'keeper': False,
  'x': 70.67938380761842,
  'y': 25.07232033860518
  },
 {'teammate': True,
  'actor': False,
  'keeper': False,
  'x': 67.74541132985456,
  'y': 57.39517948339672,
  'player_id': 3026},
 {'teammate': True,
  'actor': False,
  'keeper': False,
  'x': 64.26952015776081,
  'y': 10.721266141117013
},
 {'teammate': False,
  'actor': False,
  'keeper': False,
  'x': 27.79502957564756,
  'y': 19.76975508410478
}]

[{'teammate': True,
  'actor': False,
  'keeper': False,
  'x': 70.68516235399647,
  'y': 38.06182105940856},
 {'teammate': True,
  'actor': False,
  'keeper': False,
  'x': 70.67938380761842,
  'y': 25.07232033860518},
 {'teammate': True,
  'actor': False,
  'keeper': False,
  'x': 67.74541132985456,
  'y': 57.39517948339672,
  'player_id': 3026},
 {'teammate': True,
  'actor': False,
  'keeper': False,
  'x': 64.26952015776081,
  'y': 10.721266141117013},
 {'teammate': False,
  'actor': False,
  'keeper': False,
  'x': 27.79502957564756,
  'y': 19.76975508410478}]