In [1]:
import os
import pandas as pd
import numpy as np

BASE_02 = "/workspace/data/02/players"
BASE_03_STATIC = "/workspace/data/03/static"
BASE_03_DYNAMIC = "/workspace/data/03/dynamic"

assert os.path.exists(BASE_02)


In [2]:
athlete_ids = sorted([
    d for d in os.listdir(BASE_02)
    if os.path.isdir(os.path.join(BASE_02, d))
])

print("ÈÅ∏ÊâãÊï∞:", len(athlete_ids))
athlete_ids[:5]


ÈÅ∏ÊâãÊï∞: 30


['121b05df-f5f6-4029-92a7-5420dea45e4d',
 '13bb34b4-8c38-4c86-86b8-bbe8574988c8',
 '15d36f96-6a91-4787-96f8-5fdf8565006b',
 '223a7cbc-a76b-4e36-ab5c-215fc9492e84',
 '3ded61ff-c67b-4776-a1ef-5050bb5c7fd3']

In [3]:
BASE_03_STATIC  = "/workspace/data/03/static"
BASE_03_DYNAMIC = "/workspace/data/03/dynamic"

athlete_dirs = sorted([
    d for d in os.listdir(BASE_03_STATIC)
    if os.path.isdir(os.path.join(BASE_03_STATIC, d))
])

print("ÂØæË±°ÈÅ∏ÊâãÊï∞:", len(athlete_dirs))
print("ÂÖàÈ†≠5‰∫∫:", athlete_dirs[:5])


ÂØæË±°ÈÅ∏ÊâãÊï∞: 30
ÂÖàÈ†≠5‰∫∫: ['121b05df-f5f6-4029-92a7-5420dea45e4d', '13bb34b4-8c38-4c86-86b8-bbe8574988c8', '15d36f96-6a91-4787-96f8-5fdf8565006b', '223a7cbc-a76b-4e36-ab5c-215fc9492e84', '3ded61ff-c67b-4776-a1ef-5050bb5c7fd3']


In [4]:
rows = []

for athlete_id in athlete_dirs:

    stat_path = os.path.join(BASE_03_STATIC, athlete_id, "static_labels.parquet")
    dyn_path  = os.path.join(BASE_03_DYNAMIC, athlete_id, "dynamic_labels.parquet")

    if not (os.path.exists(stat_path) and os.path.exists(dyn_path)):
        continue

    df_stat = pd.read_parquet(stat_path)
    df_dyn  = pd.read_parquet(dyn_path)

    static_rate  = df_stat["static_anomaly"].mean()
    dynamic_rate = df_dyn["dyn_anomaly"].mean()

    rows.append({
        "athlete_id": athlete_id,
        "n_static_days": len(df_stat),
        "n_dynamic_days": len(df_dyn),
        "static_rate": static_rate,
        "dynamic_rate": dynamic_rate,
    })

df_summary = pd.DataFrame(rows)
print("summary shape:", df_summary.shape)
display(df_summary.sort_values("dynamic_rate", ascending=False).head(10))


summary shape: (30, 5)


Unnamed: 0,athlete_id,n_static_days,n_dynamic_days,static_rate,dynamic_rate
23,d35e1edc-5a93-4c2a-bd6b-a4fbe25297aa,83,54,0.036145,0.018519
9,6eda50d0-970c-44ab-b470-de9ebc71ae52,248,219,0.032258,0.018265
19,a231087b-4ba6-43a3-b96a-8d2d912cf7e0,476,447,0.031513,0.01566
20,a2562343-249b-4971-94fd-0c17b6b38e52,234,205,0.029915,0.014634
29,f38be9da-a872-495a-8d3e-626f9c75ae89,98,69,0.030612,0.014493
22,c77b7e70-5c83-47a3-ab39-e9165aa3e586,236,207,0.033898,0.014493
1,13bb34b4-8c38-4c86-86b8-bbe8574988c8,237,208,0.033755,0.014423
13,83a85906-44bd-4976-8906-53faed1684f3,237,208,0.033755,0.014423
28,f0dfd8b9-7cb1-4a3a-991a-4fb9b1e605d3,238,209,0.033613,0.014354
3,223a7cbc-a76b-4e36-ab5c-215fc9492e84,448,419,0.03125,0.01432


In [5]:
OUT_PATH = "/workspace/data/03/player_overview_summary.parquet"
df_summary.to_parquet(OUT_PATH, index=False)

print("üíæ saved summary ‚Üí", OUT_PATH)


üíæ saved summary ‚Üí /workspace/data/03/player_overview_summary.parquet


In [6]:
# ===========================================
# Player candidate selection for comparison
# ===========================================

import pandas as pd

# summary „ÅØ„Åô„Åß„Å´‰ΩúÊàêÊ∏à„Åø„ÅÆ DataFrame „ÇíÊÉ≥ÂÆö
df = df_summary.copy()

# Êù°‰ª∂„ÇíÊòéÁ§∫
LOW_DYNAMIC = df["dynamic_rate"].quantile(0.3)
LOW_STATIC  = df["static_rate"].quantile(0.3)

print("LOW_DYNAMIC threshold:", LOW_DYNAMIC)
print("LOW_STATIC threshold :", LOW_STATIC)

# ÂÄôË£úAÔºöÂÆâÂÆöÂûã
stable_players = df[
    (df["dynamic_rate"] <= LOW_DYNAMIC) &
    (df["static_rate"] <= LOW_STATIC)
].sort_values("dynamic_rate")

stable_players


LOW_DYNAMIC threshold: 0.010960613081447157
LOW_STATIC threshold : 0.031203968903436987


Unnamed: 0,athlete_id,n_static_days,n_dynamic_days,static_rate,dynamic_rate
2,15d36f96-6a91-4787-96f8-5fdf8565006b,328,299,0.030488,0.010033
7,4759d9d8-9e0e-44b8-9c70-874bf974c4ba,527,498,0.030361,0.01004
16,923787b2-19ea-4cf2-ace3-d9680d482cfb,623,594,0.030498,0.010101
5,44eea4b6-3614-4ca2-b8d7-098b6120c1fb,519,490,0.030829,0.010204
15,8fdd27ba-fabe-4df0-a4ec-a53e7cda8383,611,582,0.031097,0.010309
25,d605c7e5-a678-44ce-8bd9-fa66594edd49,595,566,0.030252,0.010601
