In [17]:
import json
import re
import pandas as pd

# --- 0) Inputs ---
EVENT_PATH = "event_response.json"  # NEW: din event metadata fil

MARKETS_IN_ORDER = [
    {"name": "fouls", "accordion_path": "accordion_response_fouls.json"},
    {"name": "shots", "accordion_path": "accordion_response_shots.json"},
    {"name": "shotsOnTarget", "accordion_path": "accordion_response_shotsOnTarget.json"},
    {"name": "assists", "accordion_path": "accordion_response_assists.json"},
]

OUT_CSV = "player_props_all.csv"

COLUMNS = [
    "event",              # NEW: læsbart kampnavn
    "player",
    "selectionLabel",
    "odds_decimal",
    "status_selection",
    "marketLabel",
    "deadline",
]

# --- 1) Helpers ---
def load_json(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def parse_line_from_marketid(market_id: str):
    if not isinstance(market_id, str):
        return None
    m = re.search(r"-([0-9]+(?:\.[0-9]+)?)-\d+$", market_id)
    return float(m.group(1)) if m else None

def selection_decimal(sel: dict):
    mp = sel.get("marketSelectionPriceFormats") or sel.get("oddsFormats") or {}
    if isinstance(mp, dict):
        if "1" in mp:
            return mp["1"]
        if "decimal" in mp:
            return mp["decimal"]
    return None

def extract_player_and_prop(market: dict):
    group_labels = (market.get("marketSpecifics", {}) or {}).get("groupLabels", {}) or {}
    player = group_labels.get("2")
    prop = group_labels.get("1")
    if not player or not prop:
        lab = market.get("label") or market.get("marketFriendlyName") or ""
        if "|" in lab:
            left, right = lab.split("|", 1)
            prop = prop or left.strip()
            player = player or right.strip()
    return player, prop

def build_event_map(event_json: dict) -> dict:
    """
    Bygger mapping: eventId -> "Home vs Away"
    Understøtter både:
      - data.event (single event)
      - data.events (liste af events)
      - events direkte som liste/dict
    """
    m = {}

    def name_from_event(evt: dict):
        participants = evt.get("participants") or []
        home = None
        away = None
        for p in participants:
            if p.get("side") == 1:
                home = p.get("label")
            elif p.get("side") == 2:
                away = p.get("label")
        # fallback hvis side mangler
        if not home and len(participants) > 0:
            home = participants[0].get("label")
        if not away and len(participants) > 1:
            away = participants[1].get("label")
        if home and away:
            return f"{home} vs {away}"
        return None

    def id_from_event(evt: dict):
        return evt.get("eventId") or evt.get("id")

    data = event_json.get("data") if isinstance(event_json, dict) else None

    # Case A: { data: { event: {...} } }
    if isinstance(data, dict) and isinstance(data.get("event"), dict):
        evt = data["event"]
        eid = id_from_event(evt)
        ename = name_from_event(evt)
        if eid and ename:
            m[eid] = ename

    # Case B: { data: { events: [ {...}, ... ] } }
    if isinstance(data, dict) and isinstance(data.get("events"), list):
        for evt in data["events"]:
            if not isinstance(evt, dict):
                continue
            eid = id_from_event(evt)
            ename = name_from_event(evt)
            if eid and ename:
                m[eid] = ename

    # Case C: event_json er selv en liste af events
    if isinstance(event_json, list):
        for evt in event_json:
            if not isinstance(evt, dict):
                continue
            eid = id_from_event(evt)
            ename = name_from_event(evt)
            if eid and ename:
                m[eid] = ename

    return m

def get_event_id_from_accordion(accordion: dict):
    """
    Finder eventId fra første market i accordion.
    """
    accs = (accordion.get("data", {}) or {}).get("accordions", {}) or {}
    for _, group in accs.items():
        markets = group.get("markets") or []
        if markets:
            return markets[0].get("eventId")
    return None

# --- 2) Parse en accordion fil ---
def parse_one_accordion(accordion: dict, source_order: int, event_map: dict) -> pd.DataFrame:
    accordions = (accordion.get("data", {}) or {}).get("accordions", {}) or {}

    event_id = get_event_id_from_accordion(accordion)
    event_text = event_map.get(event_id) if event_id else None
    if not event_text:
        event_text = event_id  # fallback: viser i det mindste id

    market_rows = []
    selection_rows = []

    for _, group in accordions.items():
        for m in group.get("markets", []):
            market_id = m.get("id")
            player, _ = extract_player_and_prop(m)

            market_rows.append({
                "event": event_text,
                "marketId": market_id,
                "marketLabel": m.get("label"),
                "deadline": m.get("deadline"),
                "player": player,
                "line": parse_line_from_marketid(market_id),  # beholdes internt
            })

        for s in group.get("selections", []):
            selection_rows.append({
                "marketId": s.get("marketId"),
                "selectionLabel": s.get("label"),
                "odds_decimal": selection_decimal(s),
                "status_selection": s.get("status"),
            })

    df_markets = pd.DataFrame(market_rows)
    df_selections = pd.DataFrame(selection_rows)

    if df_markets.empty or df_selections.empty:
        return pd.DataFrame(columns=COLUMNS + ["source_order"])

    df_all = df_selections.merge(df_markets, on="marketId", how="left")
    df_all["odds_decimal"] = pd.to_numeric(df_all["odds_decimal"], errors="coerce")

    df_all["source_order"] = source_order
    return df_all[COLUMNS + ["source_order"]].copy()

# --- 3) Main ---
event_json = load_json(EVENT_PATH)
event_map = build_event_map(event_json)

parts = []
for i, item in enumerate(MARKETS_IN_ORDER, start=1):
    accordion = load_json(item["accordion_path"])
    parts.append(parse_one_accordion(accordion, source_order=i, event_map=event_map))

df_combined = pd.concat(parts, ignore_index=True)

# Sortering: først fouls, så shots, osv (source_order), og derefter pænt inde i blokken
df_combined = df_combined.sort_values(
    ["source_order", "event", "player", "selectionLabel"],
    kind="stable",
    na_position="last"
)

df_out = df_combined.drop(columns=["source_order"], errors="ignore")
df_out.to_csv(OUT_CSV, index=False, encoding="utf-8")
print(f"Saved {len(df_out)} rows to {OUT_CSV}")

Saved 465 rows to player_props_all.csv
