In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

# --- paths
HERE = Path.cwd()
PROJECT_ROOT = HERE.parent if HERE.name == "notebooks" else HERE

in_path = PROJECT_ROOT / "data_processed" / "tableau" / "v_tableau_final_analysis.csv"

df = pd.read_csv(in_path)

# force numeric
metric_map = {
    "pct_possession":       ("possession", True),
    "pct_field_tilt":       ("field_tilt", True),
    "pct_xt":               ("xt", True),
    "pct_xgd":              ("xgd", True),
    "pct_passes_into_box":  ("passes_into_box", True),
    "pct_avg_pass_height":  ("avg_pass_height", True),
    "pct_set_piece_xg":     ("set_piece_xg", True),
    "pct_points":           ("points", True),

    # PPDA: less = higher pressing, so "higher_better=False"
    "pct_ppda":  ("ppda", False),
}

for _, (col, _) in metric_map.items():
    df[col] = pd.to_numeric(df[col], errors="coerce")

# percentile per season
def pct_within_season(series: pd.Series, higher_better: bool) -> pd.Series:
    # higher_better=True => higher values have higher percentile
    # higher_better=False => lower values have lower perentile (ex: PPDA)
    ascending = True if higher_better else False
    return series.rank(pct=True, ascending=ascending) * 100

# calculate percentiles per Season (team vs legue in that season)
for pct_col, (raw_col, higher_better) in metric_map.items():
    df[pct_col] = df.groupby("season")[raw_col].transform(
        lambda s: pct_within_season(s, higher_better)
    ).round(2)

# save file
out_dir = PROJECT_ROOT / "data_processed" / "tableau"
out_dir.mkdir(parents=True, exist_ok=True)

out_path = out_dir / "v_tableau_final_analysis_pct.csv"
df.to_csv(out_path, index=False)

print("✅ Saved:", out_path)
print("Cols added:", list(metric_map.keys()))


✅ Saved: C:\Users\tozes\Documents\IronHack\pl_21-25_analysis\data_processed\tableau\v_tableau_final_analysis_pct.csv
Cols added: ['pct_possession', 'pct_field_tilt', 'pct_xt', 'pct_xgd', 'pct_passes_into_box', 'pct_avg_pass_height', 'pct_set_piece_xg', 'pct_points', 'pct_ppda']
