In [None]:
from pathlib import Path
import pandas as pd
import os

# ================== SETUP ==================
# Notebook'un proje root'undan çalışması için
project_root = Path().absolute()
if project_root.name == "notebooks":
    project_root = project_root.parent
os.chdir(project_root)

# ================== PATHS ==================
DATA_RAW = project_root / "data_raw"
DATA_INTERIM = project_root / "data_interim"
INJURY_DIR = DATA_RAW / "injury_reports_raw"
INJURY_CSV = INJURY_DIR / "latest_injury.csv"
OUT_CSV = DATA_INTERIM / "latest_injury_team_status.csv"

# Çıktı klasörünü oluştur
OUT_CSV.parent.mkdir(parents=True, exist_ok=True)

# ================== LOAD ==================
if not INJURY_CSV.exists():
    raise FileNotFoundError(
        f"❌ latest_injury.csv bulunamadı: {INJURY_CSV.resolve()}\n"
        "Önce download_injury_report.py çalıştırılmalı."
    )

df = pd.read_csv(INJURY_CSV)

print(f"✅ Injury CSV yüklendi: {INJURY_CSV}")
print("Rows:", len(df))

✅ Injury CSV yüklendi: ..\data_raw\injury_reports_raw\latest_injury.csv
Rows: 60


In [None]:
# ================== CLEAN ==================
# Sadece oyuncu satırları
df = df[df["player_name"].notna()].copy()

# Status normalize
df["status"] = df["status"].str.upper().str.strip()

VALID_STATUSES = {"OUT", "QUESTIONABLE", "DOUBTFUL"}
df = df[df["status"].isin(VALID_STATUSES)]

# ================== ONE-HOT → BINARY ==================
df["inj_out"] = (df["status"] == "OUT").astype(int)
df["inj_questionable"] = (df["status"] == "QUESTIONABLE").astype(int)
df["inj_doubtful"] = (df["status"] == "DOUBTFUL").astype(int)

In [None]:
# ================== DEDUP ==================
# Aynı oyuncu + maç için tek satır
df = (
    df.sort_values(
        ["player_name", "game_date", "game_time_et", "status"],
        na_position="last"
    )
    .drop_duplicates(
        subset=["player_name", "game_date", "matchup"],
        keep="first"
    )
)

In [None]:
# ================== SELECT ==================
KEEP_COLS = [
    "report_datetime",
    "game_date",
    "game_time_et",
    "matchup",
    "team",
    "player_name",
    "inj_out",
    "inj_questionable",
    "inj_doubtful",
]

df_out = df[KEEP_COLS]

In [None]:
# ================== SAVE ==================
df_out.to_csv(OUT_CSV, index=False)

print(f"✅ Oyuncu bazlı injury CSV kaydedildi:\n{OUT_CSV.resolve()}")

NameError: name 'OUT_CSV' is not defined