In [None]:
from pathlib import Path
import pandas as pd

# ================== PATHS ==================
DATA_RAW = Path("../data_raw")
INJURY_DIR = DATA_RAW / "injury_reports_raw"
INJURY_CSV = INJURY_DIR / "latest_injury.csv"
OUT_CSV = INJURY_DIR / "latest_injury_team_status.csv"

# ================== LOAD ==================
if not INJURY_CSV.exists():
    raise FileNotFoundError(
        f"❌ latest_injury.csv bulunamadı: {INJURY_CSV.resolve()}\n"
        "Önce download_injury_report.py çalıştırılmalı."
    )

df = pd.read_csv(INJURY_CSV)
# kolon isimlerini temizle
df.columns = df.columns.str.strip()

print(f"✅ Injury CSV yüklendi: {INJURY_CSV}")
print("Rows:", len(df))

✅ Injury CSV yüklendi: ..\data_raw\injury_reports_raw\latest_injury.csv
Rows: 70


In [9]:
# ================== CLEAN ==================
# kolon adlarını normalize et
col_map = {c.lower().strip(): c for c in df.columns}
if "player_name" not in col_map:
    raise ValueError(f"player_name kolonu yok: {df.columns}")

# Sadece oyuncu satırları
df = df[df[col_map["player_name"]].notna()].copy()

# Status normalize
status_col = col_map.get("status", "status")
df[status_col] = df[status_col].str.upper().str.strip()

VALID_STATUSES = {"OUT", "QUESTIONABLE", "DOUBTFUL"}
df = df[df[status_col].isin(VALID_STATUSES)]

# ================== ONE-HOT → BINARY ==================
df["inj_out"] = (df[status_col] == "OUT").astype(int)
df["inj_questionable"] = (df[status_col] == "QUESTIONABLE").astype(int)
df["inj_doubtful"] = (df[status_col] == "DOUBTFUL").astype(int)

# player_name ve team sütunlarını sabitle
player_col = col_map["player_name"]
team_col = col_map.get("team", "team")

In [10]:
# ================== DEDUP ==================
# Aynı oyuncu + maç için tek satır
df = (
    df.sort_values(
        [player_col, "game_date", "game_time_et", status_col],
        na_position="last"
    )
    .drop_duplicates(
        subset=[player_col, "game_date", "matchup"],
        keep="first"
    )
)

# player/team kolonlarını yeniden adlandır
if player_col != "player_name":
    df = df.rename(columns={player_col: "player_name"})
if team_col != "team":
    df = df.rename(columns={team_col: "team"})

In [12]:
# ================== SELECT ==================
KEEP_COLS = [
    "report_datetime",
    "game_date",
    "game_time_et",
    "matchup",
    "team",
    "player_name",
    "inj_out",
    "inj_questionable",
    "inj_doubtful",
]

# Eksik kolonları varsa doldur
for col in KEEP_COLS:
    if col not in df.columns:
        df[col] = None

df_out = df[KEEP_COLS]

In [13]:
# ================== SAVE ==================
df_out.to_csv(OUT_CSV, index=False)

print(f"✅ Oyuncu bazlı injury CSV kaydedildi:\n{OUT_CSV.resolve()}")
print("Rows:", len(df_out))

✅ Oyuncu bazlı injury CSV kaydedildi:
C:\Users\ersan\OneDrive\Masaüstü\ders\ceng481\NBA-Game-Prediction-using-Artificial-Neural-Networks\data_raw\injury_reports_raw\latest_injury_team_status.csv
Rows: 93
