## 1. Import

In [4]:
# file/path uilities
import os
import glob
from pathlib import Path

# for data manipulation/math
import pandas as pd
import numpy as np
import random

# encoding (type_name to number) / split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GroupKFold

# progress bar
from tqdm import tqdm

# deep learning framework
import torch
from torch import nn
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence # padding to handle variable length sequences
from torch.utils.data import Dataset, DataLoader

## 2. 하이퍼파라미터 세팅

In [5]:
#--- hyperparameter ---

SEED = 42

# cross-validation
N_SPLITS = 5 # number of folds
FOLD = 0 # which fold for validation

# sequence length
K = 50 # number of events to consider before the target event if smaller than K, pad with zeros
MIN_EVENTS = 2

# training parameters
EPOCHS = 100
BATCH_SIZE = 256
LR = 1e-3
WEIGHT_DECAY = 1e-5

# model parameters
HIDDEN_SIZE = 256 # LSTM hidden size
NUM_LAYERS = 2 # number of LSTM layers
DROPOUT = 0.4  # increased from 0.2 for regularization
NUM_HEADS = 4 # number of attention heads for multi-head attention

# augmentation parameters
NOISE_STD = 0.5  # std for coordinate noise augmentation (meters)

# data loader parameters
NUM_WORKERS = 0

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(SEED)

print("Using device:", DEVICE)





Using device: cuda


## 3. 데이터 로드 및 전처리

In [6]:
TRAIN_PATH = "../data/train.csv"

df = pd.read_csv(TRAIN_PATH)

# sort events inside each episode by time, then action_id
# action_id is used for duplicate time_seconds
df = df.sort_values(["game_episode", "time_seconds", "action_id"]).reset_index(drop=True)

# fill missing category text
df["type_name"] = df["type_name"].fillna("__NA_TYPE__")
df["result_name"] = df["result_name"].fillna("__NA_RES__")

# change category text to idx(number)
# mapping number is just name, no matter with performance
le_type = LabelEncoder()
le_res  = LabelEncoder()
# id: number idx, name: text category
df["type_id"] = le_type.fit_transform(df["type_name"]) + 1 # start from 1 to avoid 0 for padding
df["res_id"]  = le_res.fit_transform(df["result_name"]) + 1

# stadium (105 x 68), attacking direction: left -> right
STADIUM_X, STADIUM_Y = 105.0, 68.0
CENTER_Y = STADIUM_Y / 2.0  # 34.0
HALF_X   = STADIUM_X / 2.0  # 52.5

# goal position
GOAL_X, GOAL_Y = STADIUM_X, CENTER_Y  # (105.0, 34.0)

# goal segment (FIFA goal width 7.32m => +/-3.66m from center)
GOAL_POST_HALF = 3.66
GOAL_Y_L = CENTER_Y - GOAL_POST_HALF  # 30.34
GOAL_Y_R = CENTER_Y + GOAL_POST_HALF  # 37.66


# opponent penalty box (FIFA: 16.5m deep, 40.32m wide => +/-20.16m from center)
P_BOX_X_MIN = STADIUM_X - 16.5        # 88.5
P_BOX_Y_MIN = CENTER_Y - 20.16      # 13.84
P_BOX_Y_MAX = CENTER_Y + 20.16      # 54.16

episodes = [] # list of episodes' event sequences
targets  = [] # list of target (x,y) per episode
episode_keys = [] # ex) 12345_7 used for game_episode rematching, debug
episode_game_ids = [] # gmae_id list

# build sequences per game_episode
for key, g in tqdm(df.groupby("game_episode")): # key: {game_id}_{episode_id}, g: features in the episode + type_id, res_id
    g = g.reset_index(drop=True) # realign index [0 ... T-1]
    if len(g) < 2:
        continue

    # target data is the last Pass event's end point
    # if not pass event in the episode, skip, actually only one case of carry
    if g.iloc[-1]["type_name"] != "Pass":
        pass_idxs = g.index[g["type_name"] == "Pass"]
        if len(pass_idxs) == 0:
            continue
        g = g.loc[:pass_idxs[-1]].reset_index(drop=True)

        # after cutting, ensure enough length
        if len(g) < 2:
            continue

    # target is the last event's end point
    tx, ty = float(g.loc[len(g)-1, "end_x"]), float(g.loc[len(g)-1, "end_y"])
    if np.isnan(tx) or np.isnan(ty):
        continue

    # compute dt inside episode
    t = g["time_seconds"].astype("float32").values
    dt = np.zeros_like(t, dtype="float32")
    dt[1:] = t[1:] - t[:-1]
    dt[dt < 0] = 0.0  # time-reversal safe-guard

    # extract start/end positions
    sx = g["start_x"].astype("float32").values
    sy = g["start_y"].astype("float32").values
    ex = g["end_x"].astype("float32").values
    ey = g["end_y"].astype("float32").values

    # leak-safe masking for last event's end
    ex_mask = ex.copy() 
    ey_mask = ey.copy()
    ex_mask[-1] = 0.0 # leak-safe
    ey_mask[-1] = 0.0 # leak-safe

    # goal geometry features
    dxg = GOAL_X - sx  # delta x to goal line
    dy_goal = np.maximum(0.0, np.maximum(GOAL_Y_L - sy, sy - GOAL_Y_R)) # delta y to goal segment, if inside segment = 0, else minimum y distance
    dist_to_goal = np.sqrt(dxg**2 + dy_goal**2).astype("float32") # distance to goal segment

    # angles to goal posts
    alpha_L = np.arctan2(GOAL_Y_L - sy, dxg).astype("float32")
    alpha_R = np.arctan2(GOAL_Y_R - sy, dxg).astype("float32")

    # goal view features
    theta_view = np.abs(alpha_R - alpha_L).astype("float32")

    # half line
    in_own_half = (sx < HALF_X).astype("float32") # (1): own half, (0): opponent half

    # penalty box (only dist_p_box, removed in_p_box)
    dx_box = np.maximum(0.0, P_BOX_X_MIN - sx) # inside: 0, outside: delta x to penalty box
    dy_box = np.maximum(0.0, np.maximum(P_BOX_Y_MIN - sy, sy - P_BOX_Y_MAX)) # inside 0, outside: delta y to penalty box
    dist_p_box = np.sqrt(dx_box**2 + dy_box**2).astype("float32") # distance to penalty box

    # previous event features
    # prev_dx[t] = end_x[t-1] - start_x[t-1], prev_dy[t] = end_y[t-1] - start_y[t-1]
    # [when t = 0 (no prev case)] 0, prev_valid=0
    T = len(g)
    prev_dx = np.zeros(T, dtype="float32")
    prev_dy = np.zeros(T, dtype="float32")
    prev_valid = np.zeros(T, dtype="float32")

    if T > 1:
        # movement for prev event (t-1's end - start)
        dx_prev_raw = ex[:-1] - sx[:-1]  # shape (T-1,)
        dy_prev_raw = ey[:-1] - sy[:-1]

        # assign on t>=1
        prev_dx[1:] = dx_prev_raw
        prev_dy[1:] = dy_prev_raw
        prev_valid[1:] = 1.0

    # categorical idx per event
    type_id = g["type_id"].astype("int64").values
    res_id  = g["res_id"].astype("int64").values

    # continuous features per event (T, F_cont)
    # x,y -> start_x,start_y
    # end_x,end_y -> masked for last event
    # dt -> time gap
    # is_start,is_end -> flags
    # dist_to_goal, angle_to_goal -> geometry
    cont = np.stack(
        [
            sx,            # 1
            sy,            # 2
            ex_mask,       # 3
            ey_mask,       # 4
            dt,            # 5
            dist_to_goal,  # 6
            theta_view,    # 7
            in_own_half,   # 8
            dist_p_box,    # 9
            prev_dx,       # 10
            prev_dy,       # 11
            prev_valid     # 12
        ],
        axis=1
    ).astype("float32")  # (T, F_cont)

    episodes.append({
        "cont": cont,               # continuous features
        "type_id": type_id,         # categorical type idx
        "res_id": res_id            # categorical result idx
    })

    targets.append(np.array([tx, ty], dtype="float32"))  # target (x,y)
    episode_keys.append(key)  # for rematching
    episode_game_ids.append(key.split("_")[0])  # game_id only


print("num episodes:", len(episodes))
print("example cont shape:", episodes[0]["cont"].shape, "| example target:", targets[0])

# debug
sample_cont = episodes[0]["cont"]
print("\\nfeatures check:")
print(f"  cont_dim (should be 12): {sample_cont.shape[1]}")
print(f"  prev_valid[0] (should be 0): {sample_cont[0, 11]}")
if sample_cont.shape[0] > 1:
    print(f"  prev_valid[1] (should be 1): {sample_cont[1, 11]}")
    print(f"  prev_dx[1]: {sample_cont[1, 9]:.4f}, prev_dy[1]: {sample_cont[1, 10]:.4f}")

100%|██████████| 15435/15435 [00:09<00:00, 1656.29it/s]

num episodes: 15428
example cont shape: (49, 12) | example target: [97.13403 41.79307]
\nfeatures check:
  cont_dim (should be 12): 12
  prev_valid[0] (should be 0): 0.0
  prev_valid[1] (should be 1): 1.0
  prev_dx[1]: -21.0958, prev_dy[1]: 4.7893





## 4. Custom Dataset / DataLoader 정의 및 Validation 분할

In [7]:
class EpisodeDataset(Dataset):
    # store sequences and targets, give tuple(묶음) by idx

    def __init__(self, episodes, targets, keys, augment=False, noise_std=0.0):
        self.episodes = episodes # episodes: list of dict(cont, type_id, res_id)
        self.targets = targets # targets: list of (x,y)
        self.keys = keys  # ex) 12345_7 used for game_episode rematching, debug
        self.augment = augment  # whether to apply augmentation
        self.noise_std = noise_std  # noise std for coordinates

    # number of episodes
    def __len__(self):
        return len(self.episodes)

    # return one episode as tensor [cont, type_id, res_id, y, key]
    def __getitem__(self, idx):
        ep = self.episodes[idx]                             # episode dict, includes below
        cont = ep["cont"].copy()                            # copy to avoid modifying original
        
        # augmentation: add noise to coordinates (sx, sy, ex_mask, ey_mask)
        if self.augment and self.noise_std > 0:
            noise = np.random.randn(cont.shape[0], 4).astype("float32") * self.noise_std
            cont[:, 0:4] += noise  # sx, sy, ex_mask, ey_mask
            # clamp to valid stadium range
            cont[:, 0] = np.clip(cont[:, 0], 0, 105)  # sx
            cont[:, 1] = np.clip(cont[:, 1], 0, 68)   # sy
            cont[:, 2] = np.clip(cont[:, 2], 0, 105)  # ex_mask
            cont[:, 3] = np.clip(cont[:, 3], 0, 68)   # ey_mask
        
        cont = torch.from_numpy(cont)                       # features tensor
        type_id = torch.from_numpy(ep["type_id"])           # type_name tensor
        res_id  = torch.from_numpy(ep["res_id"])            # res_id tensor
        y = torch.from_numpy(self.targets[idx])             # target tensor
        key = self.keys[idx]                                # for debug
        return cont, type_id, res_id, y, key                # tuple: (cont, type_id, res_id, y, key)

# collate(합치다) variable-length samples into a padded batch
def collate_fn(batch):
    # unpack tuple list
    conts, type_ids, res_ids, ys, keys = zip(*batch)

    # lengths before padding
    lengths = torch.tensor([c.shape[0] for c in conts], dtype=torch.long)

    # pad to max length in batch
    cont_pad = pad_sequence(conts, batch_first=True, padding_value=0.0)       # (B, T_max, F)
    type_pad = pad_sequence(type_ids, batch_first=True, padding_value=0)      # (B, T_max)
    res_pad  = pad_sequence(res_ids,  batch_first=True, padding_value=0)      # (B, T_max)
    y = torch.stack(ys, dim=0).float()                                        # (B, 2)

    return cont_pad.float(), type_pad.long(), res_pad.long(), lengths, y, keys # to verify padded length

# split train/valid
episode_game_ids = np.array(episode_game_ids, dtype=np.int64) # convert game_id list to numpy array

# ex) 10 game_ids -> 2,2,2,2,2 for 5-fold
gkf = GroupKFold(n_splits=N_SPLITS)

# FOLDth idx -> for validation, rest for training
tr_idx, va_idx = None, None
for fold_i, (tr, va) in enumerate(
    gkf.split(
        np.zeros(len(episodes)),  # dummy X
        np.zeros(len(episodes)),  # dummy y
        groups=episode_game_ids   # group key
    )
):
    if fold_i == FOLD:
        tr_idx, va_idx = tr, va
        break

# Defensive check for fold selection.
assert tr_idx is not None and va_idx is not None, "Fold selection failed. Check FOLD and N_SPLITS."

# build train split datasets from tr_idx
train_eps = [episodes[i] for i in tr_idx]
train_tg  = [targets[i]  for i in tr_idx]
train_keys= [episode_keys[i] for i in tr_idx]

# build valid split datasets from va_idx
valid_eps = [episodes[i] for i in va_idx]
valid_tg  = [targets[i]  for i in va_idx]
valid_keys= [episode_keys[i] for i in va_idx]

# Dataset: 문제집
train_ds = EpisodeDataset(train_eps, train_tg, train_keys, augment=True, noise_std=NOISE_STD)
valid_ds = EpisodeDataset(valid_eps, valid_tg, valid_keys, augment=False)  # no augment for valid

# DataLoader: 문제집의 문제
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  collate_fn=collate_fn) # train: shuffle
valid_loader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn) # valid: no shuffle

# print dataset sizes
print("train episodes:", len(train_ds), "| valid episodes:", len(valid_ds))
# get one batch for debug
cont_pad, type_pad, res_pad, lengths, y, keys = next(iter(train_loader))
print("batch cont_pad:", tuple(cont_pad.shape)) # [B, T_max, F]
print("batch type_pad:", tuple(type_pad.shape)) # [B, T_max]
print("batch res_pad:", tuple(res_pad.shape))   # [B, T_max]
print("lengths:", tuple(lengths.shape))         # [B]
print("y:", tuple(y.shape))                     # [B, 2]
print("example key:", keys[0])                  # ex) 12345_7


train episodes: 12320 | valid episodes: 3108
batch cont_pad: (256, 173, 12)
batch type_pad: (256, 173)
batch res_pad: (256, 173)
lengths: (256,)
y: (256, 2)
example key: 126357_49


## 5. LSTM 베이스라인 모델 정의

In [None]:
class PassLSTM(nn.Module):
    # lstm for sequence regression with multi-head attention pooling
    def __init__(self, cont_dim, n_type, n_res, emb_dim=16, hidden=256, num_layers=NUM_LAYERS, dropout=DROPOUT, num_heads=NUM_HEADS):
        super().__init__()

        # number to learnable vectors(embeddings)
        self.type_emb = nn.Embedding(n_type, emb_dim, padding_idx=0)
        self.res_emb  = nn.Embedding(n_res,  emb_dim, padding_idx=0)

        # in_dim = cont + type_emb + res_emb
        in_dim = cont_dim + emb_dim + emb_dim
        
        # Batch normalization for input features
        self.input_bn = nn.BatchNorm1d(in_dim)

        # lstm backbone
        self.lstm = nn.LSTM(
            input_size=in_dim,
            hidden_size=hidden, # memory
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0, # no dropout if single layer
            bidirectional=False
        )
        
        # Batch normalization after LSTM
        self.lstm_bn = nn.BatchNorm1d(hidden)
        
        self.hidden = hidden
        self.num_heads = num_heads
        self.head_dim = hidden // num_heads
        
        assert hidden % num_heads == 0, "hidden must be divisible by num_heads"
        
        # multi-head attention: each head learns different importance patterns
        self.attn_heads = nn.ModuleList([
            nn.Linear(hidden, 1, bias=False) for _ in range(num_heads)
        ])
        
        # projection after concatenating heads
        self.head_proj = nn.Linear(hidden * num_heads, hidden)
        
        # Batch normalization after head projection
        self.head_proj_bn = nn.BatchNorm1d(hidden)
        
        # layer norm for stability
        self.layer_norm = nn.LayerNorm(hidden)

        # regression head -> (x,y)
        # receive pooled hidden state and output (x,y)
        self.head = nn.Sequential(
            nn.Linear(hidden, hidden),
            nn.BatchNorm1d(hidden),  # Added BN
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden, 2) # (x,y)
        )

    # forward pass with multi-head attention pooling
    # embed -> concat -> pack -> lstm -> unpack -> multi-head attention pool -> head
    def forward(self, cont_pad, type_pad, res_pad, lengths):
        # embed categories
        te = self.type_emb(type_pad)  # (B,T,emb)
        re = self.res_emb(res_pad)    # (B,T,emb)

        # concat all features
        x = torch.cat([cont_pad, te, re], dim=-1)  # (B,T,in_dim)
        
        # Apply batch normalization to input features
        # Reshape for BatchNorm1d: (B, T, F) -> (B*T, F) -> BN -> (B, T, F)
        B, T, F = x.shape
        x_flat = x.view(B*T, F)
        x_flat = self.input_bn(x_flat)
        x = x_flat.view(B, T, F)

        # pack padded sequence to ignore padding steps
        packed = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_out, (h_n, c_n) = self.lstm(packed)
        
        # unpack to get all timestep outputs for pooling
        outputs, _ = pad_packed_sequence(packed_out, batch_first=True)  # (B, T, H)
        
        # Apply batch normalization to LSTM outputs
        B, T, H = outputs.shape
        outputs_flat = outputs.contiguous().view(B*T, H)
        outputs_flat = self.lstm_bn(outputs_flat)
        outputs = outputs_flat.view(B, T, H)
        
        # mask: True for valid timesteps, False for padding
        idx = torch.arange(T, device=outputs.device).unsqueeze(0)       # (1, T)
        mask = (idx < lengths.unsqueeze(1))                             # (B, T) bool
        
        # multi-head attention pooling
        head_outputs = []
        for attn_layer in self.attn_heads:
            # attention scores for this head
            scores = attn_layer(outputs).squeeze(-1)                    # (B, T)
            
            # mask padding positions with -inf before softmax
            scores = scores.masked_fill(~mask, float('-inf'))           # (B, T)
            
            # attention weights (softmax over valid timesteps)
            attn_weights = torch.softmax(scores, dim=1)                 # (B, T)
            
            # handle all-padding edge case
            attn_weights = torch.nan_to_num(attn_weights, nan=0.0)
            
            # weighted sum for this head
            pooled_head = torch.bmm(attn_weights.unsqueeze(1), outputs).squeeze(1)  # (B, H)
            head_outputs.append(pooled_head)
        
        # concatenate all heads and project
        multi_head = torch.cat(head_outputs, dim=-1)  # (B, H * num_heads)
        pooled = self.head_proj(multi_head)           # (B, H)
        
        # Apply batch normalization to head projection
        pooled = self.head_proj_bn(pooled)
        
        # layer normalization
        pooled = self.layer_norm(pooled)
        
        # predict from multi-head attention-pooled representation
        out = self.head(pooled)  # (B, 2)
        return out

# sizes for embeddings
n_type = len(le_type.classes_) + 1  # +1 for padding idx=0
n_res  = len(le_res.classes_)  + 1  # +1 for padding idx=0
cont_dim = int(episodes[0]["cont"].shape[1])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PassLSTM(
                cont_dim=cont_dim, 
                n_type=n_type, 
                n_res=n_res, 
                emb_dim=16, 
                hidden=HIDDEN_SIZE, 
                num_layers=NUM_LAYERS, 
                dropout=DROPOUT,
                num_heads=NUM_HEADS
                ).to(device)

print("device:", device)
print("n_type:", n_type, "n_res:", n_res, "cont_dim:", cont_dim, "num_heads:", NUM_HEADS)


device: cuda
n_type: 27 n_res: 10 cont_dim: 12 num_heads: 4


## 6. 모델 학습 및 검증

In [9]:
def euclidean_sum_and_count(pred, true):
    # EN: Return sum of Euclidean distances and sample count.
    # KR: 유클리드 거리 합과 샘플 수를 반환해서 “샘플 기준 평균”을 정확히 계산한다.
    d = torch.sqrt(((pred - true) ** 2).sum(dim=1))  # (B,)
    return d.sum().item(), d.numel()

optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
criterion = nn.SmoothL1Loss()

# LR Scheduler: ReduceLROnPlateau (valid 기준)
# valid가 patience epoch 동안 개선 안 되면 lr을 factor만큼 줄임
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min',       # minimize valid distance
    factor=0.5,       # lr *= 0.5 when plateau
    patience=10,      # wait 10 epochs before reducing
    min_lr=1e-6       # minimum learning rate
)

best_val = 1e9
best_state = None

for epoch in range(1, EPOCHS + 1):
    model.train()

    # EN: Accumulators for weighted means.
    # KR: 샘플 수로 가중 평균을 내기 위한 누적 변수들이다.
    tr_loss_sum = 0.0
    tr_loss_cnt = 0
    tr_euc_sum  = 0.0
    tr_euc_cnt  = 0

    train_pbar = tqdm(train_loader, desc=f"Train {epoch}/{EPOCHS}", leave=False)

    for cont_pad, type_pad, res_pad, lengths, y, keys in train_pbar:
        cont_pad = cont_pad.to(device)
        type_pad = type_pad.to(device)
        res_pad  = res_pad.to(device)
        lengths  = lengths.to(device)
        y        = y.to(device)

        optimizer.zero_grad()
        pred = model(cont_pad, type_pad, res_pad, lengths)

        loss = criterion(pred, y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        # EN: Weighted mean for SmoothL1.
        # KR: SmoothL1Loss는 배치 평균 스칼라이므로, 샘플 수로 다시 가중해 누적한다.
        bsz = y.size(0)
        tr_loss_sum += loss.item() * bsz
        tr_loss_cnt += bsz

        # EN: Also log train Euclidean distance (metric-aligned).
        # KR: train에서도 대회 지표인 유클리드 거리를 함께 누적해서 원인 분석이 가능해진다.
        e_sum, e_cnt = euclidean_sum_and_count(pred.detach(), y)
        tr_euc_sum  += e_sum
        tr_euc_cnt  += e_cnt

        train_pbar.set_postfix(loss=f"{loss.item():.4f}")

    model.eval()

    val_euc_sum = 0.0
    val_euc_cnt = 0

    valid_pbar = tqdm(valid_loader, desc=f"Valid {epoch}/{EPOCHS}", leave=False)

    with torch.no_grad():
        for cont_pad, type_pad, res_pad, lengths, y, keys in valid_pbar:
            cont_pad = cont_pad.to(device)
            type_pad = type_pad.to(device)
            res_pad  = res_pad.to(device)
            lengths  = lengths.to(device)
            y        = y.to(device)

            pred = model(cont_pad, type_pad, res_pad, lengths)
            e_sum, e_cnt = euclidean_sum_and_count(pred, y)
            val_euc_sum += e_sum
            val_euc_cnt += e_cnt

            valid_pbar.set_postfix(dist=f"{(e_sum / max(e_cnt, 1)):.4f}")

    tr_loss  = tr_loss_sum / max(tr_loss_cnt, 1)
    tr_euc   = tr_euc_sum  / max(tr_euc_cnt, 1)
    val_dist = val_euc_sum / max(val_euc_cnt, 1)
    
    # step scheduler based on validation metric
    scheduler.step(val_dist)
    current_lr = optimizer.param_groups[0]['lr']
    
    # gap: valid - train euclid dist -> minus: good, plus: overfit
    print(f"[epoch {epoch}] lr={current_lr:.2e} | train_loss={tr_loss:.4f} | train_euclid_dist={tr_euc:.4f} | valid_euclid_dist={val_dist:.4f} | gap = {(val_dist - tr_euc):.4f}")

    if val_dist < best_val:
        best_val = val_dist
        best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}

if best_state is not None:
    model.load_state_dict(best_state)

print("best valid_euclid_dist:", best_val)


                                                                          

[epoch 1] lr=1.00e-03 | train_loss=33.5499 | train_euclid_dist=53.5278 | valid_euclid_dist=32.3586 | gap = -21.1691


                                                                           

[epoch 2] lr=1.00e-03 | train_loss=19.0856 | train_euclid_dist=30.4629 | valid_euclid_dist=27.2628 | gap = -3.2001


                                                                           

[epoch 3] lr=1.00e-03 | train_loss=16.3784 | train_euclid_dist=26.3420 | valid_euclid_dist=22.6148 | gap = -3.7271


                                                                           

[epoch 4] lr=1.00e-03 | train_loss=11.9031 | train_euclid_dist=19.5270 | valid_euclid_dist=16.6403 | gap = -2.8867


                                                                           

[epoch 5] lr=1.00e-03 | train_loss=10.6457 | train_euclid_dist=17.6291 | valid_euclid_dist=15.9122 | gap = -1.7169


                                                                           

[epoch 6] lr=1.00e-03 | train_loss=10.3321 | train_euclid_dist=17.1774 | valid_euclid_dist=15.7400 | gap = -1.4374


                                                                           

[epoch 7] lr=1.00e-03 | train_loss=10.1565 | train_euclid_dist=16.8880 | valid_euclid_dist=15.6325 | gap = -1.2555


                                                                           

[epoch 8] lr=1.00e-03 | train_loss=9.9876 | train_euclid_dist=16.6126 | valid_euclid_dist=15.1342 | gap = -1.4784


                                                                           

[epoch 9] lr=1.00e-03 | train_loss=9.9255 | train_euclid_dist=16.5371 | valid_euclid_dist=14.7263 | gap = -1.8108


                                                                            

[epoch 10] lr=1.00e-03 | train_loss=9.8349 | train_euclid_dist=16.3697 | valid_euclid_dist=14.8833 | gap = -1.4864


                                                                            

[epoch 11] lr=1.00e-03 | train_loss=9.7159 | train_euclid_dist=16.2093 | valid_euclid_dist=14.9557 | gap = -1.2537


                                                                            

[epoch 12] lr=1.00e-03 | train_loss=9.6310 | train_euclid_dist=16.0900 | valid_euclid_dist=14.9766 | gap = -1.1134


                                                                            

[epoch 13] lr=1.00e-03 | train_loss=9.5679 | train_euclid_dist=15.9620 | valid_euclid_dist=14.6931 | gap = -1.2690


                                                                            

[epoch 14] lr=1.00e-03 | train_loss=9.5717 | train_euclid_dist=15.9844 | valid_euclid_dist=14.4076 | gap = -1.5768


                                                                            

[epoch 15] lr=1.00e-03 | train_loss=9.4340 | train_euclid_dist=15.7920 | valid_euclid_dist=14.8347 | gap = -0.9573


                                                                            

[epoch 16] lr=1.00e-03 | train_loss=9.4521 | train_euclid_dist=15.7861 | valid_euclid_dist=14.6356 | gap = -1.1504


                                                                            

[epoch 17] lr=1.00e-03 | train_loss=9.4050 | train_euclid_dist=15.7096 | valid_euclid_dist=14.6256 | gap = -1.0840


                                                                            

[epoch 18] lr=1.00e-03 | train_loss=9.3766 | train_euclid_dist=15.6696 | valid_euclid_dist=14.4739 | gap = -1.1956


                                                                            

[epoch 19] lr=1.00e-03 | train_loss=9.3599 | train_euclid_dist=15.6493 | valid_euclid_dist=14.6080 | gap = -1.0413


                                                                           

[epoch 20] lr=1.00e-03 | train_loss=9.3111 | train_euclid_dist=15.5809 | valid_euclid_dist=14.4408 | gap = -1.1401


                                                                           

[epoch 21] lr=1.00e-03 | train_loss=9.1969 | train_euclid_dist=15.3899 | valid_euclid_dist=14.1714 | gap = -1.2185


                                                                           

[epoch 22] lr=1.00e-03 | train_loss=9.2282 | train_euclid_dist=15.4401 | valid_euclid_dist=14.2671 | gap = -1.1730


                                                                           

[epoch 23] lr=1.00e-03 | train_loss=9.0956 | train_euclid_dist=15.2366 | valid_euclid_dist=14.3070 | gap = -0.9295


                                                                           

[epoch 24] lr=1.00e-03 | train_loss=9.1989 | train_euclid_dist=15.3881 | valid_euclid_dist=14.1465 | gap = -1.2416


                                                                           

[epoch 25] lr=1.00e-03 | train_loss=9.0890 | train_euclid_dist=15.2190 | valid_euclid_dist=14.2878 | gap = -0.9311


                                                                           

[epoch 26] lr=1.00e-03 | train_loss=9.0818 | train_euclid_dist=15.2419 | valid_euclid_dist=14.4248 | gap = -0.8172


                                                                          

[epoch 27] lr=1.00e-03 | train_loss=9.0655 | train_euclid_dist=15.2131 | valid_euclid_dist=14.2906 | gap = -0.9224


                                                                           

[epoch 28] lr=1.00e-03 | train_loss=9.0332 | train_euclid_dist=15.1604 | valid_euclid_dist=14.4345 | gap = -0.7259


                                                                 

KeyboardInterrupt: 

## 7. 평가 데이터셋 추론

In [None]:
if val_dist < best_val:
        best_val = val_dist
        best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}

if best_state is not None:
    model.load_state_dict(best_state)

print("best valid_euclid_dist:", best_val)

best valid_euclid_dist: 13.886074679852145


In [None]:
# read -> feature -> predict -> submit

TEST_META_PATH = "../data/test.csv"
SUBMISSION_PATH = "../data/sample_submission.csv"
DATA_ROOT = "../data"

# use the same geometry constants as training
STADIUM_X, STADIUM_Y = 105.0, 68.0

CENTER_Y = STADIUM_Y / 2.0  # 34.0
HALF_X   = STADIUM_X / 2.0  # 52.5

GOAL_X, GOAL_Y = STADIUM_X, CENTER_Y  # (105.0, 34.0)

GOAL_POST_HALF = 3.66
GOAL_Y_L = CENTER_Y - GOAL_POST_HALF  # 30.34
GOAL_Y_R = CENTER_Y + GOAL_POST_HALF  # 37.66

P_BOX_X_MIN = STADIUM_X - 16.5        # 88.5
P_BOX_Y_MIN = CENTER_Y - 20.16        # 13.84
P_BOX_Y_MAX = CENTER_Y + 20.16        # 54.16

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# inference = eval mode
model.eval()

# load test episode list and submission template.
test_meta = pd.read_csv(TEST_META_PATH)
submission = pd.read_csv(SUBMISSION_PATH)

# build one episode features using the same preprocessing as training
def build_episode_from_df(g):
    # sort inside episode
    g = g.sort_values(["time_seconds", "action_id"]).reset_index(drop=True)

    # fill categories
    g["type_name"] = g["type_name"].fillna("__NA_TYPE__")
    g["result_name"] = g["result_name"].fillna("__NA_RES__")

    # handle unseen labels safely
    g.loc[~g["type_name"].isin(le_type.classes_), "type_name"] = "__NA_TYPE__"
    g.loc[~g["result_name"].isin(le_res.classes_), "result_name"] = "__NA_RES__"

    # Transform category strings into integer indices.
    type_id = le_type.transform(g["type_name"]).astype("int64") + 1
    res_id  = le_res.transform(g["result_name"]).astype("int64") + 1

    # dt
    t = g["time_seconds"].astype("float32").values
    dt = np.zeros_like(t, dtype="float32")
    dt[1:] = t[1:] - t[:-1]
    dt[dt < 0] = 0.0  # time-reversal safe-guard

    # coordinates
    sx = g["start_x"].astype("float32").values
    sy = g["start_y"].astype("float32").values
    ex = g["end_x"].astype("float32").values
    ey = g["end_y"].astype("float32").values

    # replace nan to 0.0
    sx = np.nan_to_num(sx, nan=0.0)
    sy = np.nan_to_num(sy, nan=0.0)
    ex = np.nan_to_num(ex, nan=0.0)
    ey = np.nan_to_num(ey, nan=0.0)

    # mask last end for leak-safe
    ex_mask = ex.copy()
    ey_mask = ey.copy()
    ex_mask[-1] = 0.0
    ey_mask[-1] = 0.0

    # goal segment distance
    dxg = GOAL_X - sx
    dy_goal = np.maximum(0.0, np.maximum(GOAL_Y_L - sy, sy - GOAL_Y_R)).astype("float32")
    dist_to_goal = np.sqrt(dxg**2 + dy_goal**2).astype("float32")

    # goal view angle
    alpha_L = np.arctan2(GOAL_Y_L - sy, GOAL_X - sx).astype("float32")
    alpha_R = np.arctan2(GOAL_Y_R - sy, GOAL_X - sx).astype("float32")
    theta_view = np.abs(alpha_R - alpha_L).astype("float32")

    # half line features
    in_own_half = (sx < HALF_X).astype("float32")

    # penalty box features
    dx_box = np.maximum(0.0, P_BOX_X_MIN - sx).astype("float32")
    dy_box = np.maximum(0.0, np.maximum(P_BOX_Y_MIN - sy, sy - P_BOX_Y_MAX)).astype("float32")
    dist_p_box = np.sqrt(dx_box**2 + dy_box**2).astype("float32")

    # previous event features
    T = len(g)
    prev_dx = np.zeros(T, dtype="float32")
    prev_dy = np.zeros(T, dtype="float32")
    prev_valid = np.zeros(T, dtype="float32")

    if T > 1:
        # movement for prev event (t-1's end - start)
        dx_prev_raw = ex[:-1] - sx[:-1]  # shape (T-1,)
        dy_prev_raw = ey[:-1] - sy[:-1]

        # assign on t>=1
        prev_dx[1:] = dx_prev_raw
        prev_dy[1:] = dy_prev_raw
        prev_valid[1:] = 1.0

    # continuous features (must match training order)
    cont = np.stack(
        [
            sx,            # 1
            sy,            # 2
            ex_mask,       # 3
            ey_mask,       # 4
            dt,            # 5
            dist_to_goal,  # 6
            theta_view,    # 7
            in_own_half,   # 8
            dist_p_box,    # 9
            prev_dx,       # 10
            prev_dy,       # 11
            prev_valid     # 12
        ],
        axis=1
    ).astype("float32")

    return cont, type_id, res_id

# predict for each episode and store results by key.
pred_map = {}

with torch.no_grad():
    for _, row in test_meta.iterrows():
        game_episode = row["game_episode"]

        # test.csv has a column "path" like "./test/153363/153363_1.csv"
        rel_path = str(row["path"])
        rel_path = rel_path[2:] if rel_path.startswith("./") else rel_path
        full_path = os.path.join(DATA_ROOT, rel_path)

        # read one episode event file.
        g = pd.read_csv(full_path)

        # build features (cont, type_id, res_id).
        cont, type_id, res_id = build_episode_from_df(g)

        # convert arrays to tensors and add batch dim
        cont_t = torch.from_numpy(cont).unsqueeze(0).to(device)     # (T,F) -> (1,T,F)
        type_t = torch.from_numpy(type_id).unsqueeze(0).to(device)  # (T) -> (1,T)
        res_t  = torch.from_numpy(res_id).unsqueeze(0).to(device)   # (T) -> (1,T)
        lengths = torch.tensor([cont.shape[0]], dtype=torch.long).to(device) # true length
        pred = model(cont_t.float(), type_t.long(), res_t.long(), lengths)  # (1,2)

        pred_xy = pred.squeeze(0).detach().cpu().numpy().astype("float32")

        pred_map[game_episode] = pred_xy # prediction

# align predictions to sample_submission order
preds_x = []
preds_y = []
missing = []

for ge in submission["game_episode"].tolist():
    # look up prediction by game_episode key.
    if ge not in pred_map:
        # handle missing predictions (should not happen)
        missing.append(ge)
        preds_x.append(0.0)
        preds_y.append(0.0)
        continue
    px, py = pred_map[ge]
    preds_x.append(float(px))
    preds_y.append(float(py))

if len(missing) > 0:
    # Warn if any episodes are missing.
    print("warning: missing episodes in pred_map:", len(missing))

# Assign predicted
submission["end_x"] = preds_x
submission["end_y"] = preds_y

# Done inference for all rows.
print("inference done:", len(submission))


inference done: 2414


## 8. 제출 Submission 생성

In [None]:
base = "LSTM_2_submit"
ext = ".csv"

i = 0
while True:
    out_name = f"{base}_{i}{ext}"
    if not os.path.exists(out_name):
        break
    i += 1

submission[["game_episode", "end_x", "end_y"]].to_csv(out_name, index=False)
print("saved:", out_name)


saved: LSTM_2_submit_5.csv
