In [10]:
import json
import re
import pandas as pd
from datetime import datetime, timezone

IN_PATH = "betano.txt"
OUT_CSV = "betano_player_props.csv"

COLUMNS = [
    "event",
    "player",
    "selectionLabel",
    "odds_decimal",
    "status_selection",
    "marketLabel",
    "deadline",
]

def load_json(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def ms_to_iso_utc(ms):
    if ms is None:
        return None
    try:
        ms = int(ms)
        return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).isoformat()
    except Exception:
        return None

def normalize_selection_label(label: str):
    """
    1+ -> Over 0.5
    2+ -> Over 1.5
    3+ -> Over 2.5
    """
    if not isinstance(label, str):
        return label
    label = label.strip()
    m = re.fullmatch(r"(\d+)\+", label)
    if m:
        x = int(m.group(1))
        return f"Over {x - 0.5}"
    return label

def build_event_name(evt: dict) -> str:
    name = evt.get("name") or evt.get("shortName")
    if isinstance(name, str) and name.strip():
        return name.strip()

    parts = evt.get("participants") or []
    if isinstance(parts, list) and len(parts) >= 2:
        labels = []
        for p in sorted(parts, key=lambda x: x.get("sortOrder", 9999)):
            lab = p.get("label")
            if isinstance(lab, str) and lab.strip():
                labels.append(lab.strip())
        if len(labels) >= 2:
            return f"{labels[0]} - {labels[1]}"

    slug = evt.get("slug")
    if isinstance(slug, str) and slug.strip():
        return slug.strip()

    return str(evt.get("id") or evt.get("eventId") or "unknown_event")

def pick_market_header(market: dict) -> str:
    tl = market.get("tableLayout") or {}
    header = tl.get("headerTitle")
    if isinstance(header, str) and header.strip():
        return header.strip()

    nm = market.get("name")
    if isinstance(nm, str) and nm.strip():
        return nm.strip()

    return None

def selection_price_decimal(sel: dict):
    p = sel.get("price")
    if isinstance(p, (int, float)):
        return p
    if isinstance(p, str):
        return p.strip()
    if isinstance(p, dict):
        for k in ["decimal", "odds", "value"]:
            if k in p:
                return p.get(k)
    odds_formats = sel.get("oddsFormats") or {}
    if isinstance(odds_formats, dict) and "decimal" in odds_formats:
        return odds_formats.get("decimal")
    return None

def strip_leading_player_from_header(header: str, players_in_market):
    if not isinstance(header, str) or not header.strip():
        return header
    header = header.strip()

    players_sorted = sorted(
        [p for p in players_in_market if isinstance(p, str) and p.strip()],
        key=lambda x: len(x),
        reverse=True
    )
    for p in players_sorted:
        p = p.strip()
        if header.lower().startswith(p.lower() + " "):
            return header[len(p):].strip()
    return header

def normalize_market_header_to_betsson(header: str):
    if not isinstance(header, str) or not header.strip():
        return header
    h = header.strip()

    if re.search(r"\bskud\s+p[åa]\s+m[åa]l\b", h, flags=re.IGNORECASE):
        return "Antal afslutninger på mål"

    # vigtigt: hvis du allerede har lavet mapping til "Spillers samlede antal skud",
    # så behold det. Her viser jeg den version der matcher Betsson:
    if re.search(r"\bskud\b", h, flags=re.IGNORECASE):
        return "Spillers samlede antal skud"

    if re.search(r"\bassist", h, flags=re.IGNORECASE):
        return "Spillers samlede antal assister"

    if re.search(r"\bfrispark\b", h, flags=re.IGNORECASE) or re.search(r"\bfoul", h, flags=re.IGNORECASE):
        return "Spiller Frispark Begået"

    return h

def is_full_match_market(market: dict, raw_header: str) -> bool:
    """
    Returnerer True kun hvis market ser ud til at være for hele kampen.
    Hvis det nævner halvleg/perioder osv, så return False.
    """
    text_parts = []
    if isinstance(raw_header, str):
        text_parts.append(raw_header)
    nm = market.get("name")
    if isinstance(nm, str):
        text_parts.append(nm)
    text = " | ".join(text_parts).lower()

    # ting der typisk betyder "ikke hele kampen"
    bad_tokens = [
        "1. halvleg", "2. halvleg", "halvleg",
        "1st half", "2nd half", "first half", "second half",
        "periode", "period", "quarter", "q1", "q2", "q3", "q4",
        "overtime", "over time", "ekstra tid", "forlænget",
        "inkl. overtid", "incl. overtime",
    ]

    return not any(tok in text for tok in bad_tokens)

def parse_betano_player_props(doc: dict) -> pd.DataFrame:
    evt = ((doc.get("data") or {}).get("event")) or doc.get("event") or {}
    event_name = build_event_name(evt)

    rows_out = []
    markets = evt.get("markets") or []
    if not isinstance(markets, list):
        markets = []

    for market in markets:
        tl = market.get("tableLayout") or {}
        rows = tl.get("rows") or []
        if not isinstance(rows, list) or len(rows) == 0:
            continue

        raw_header = pick_market_header(market)

        # behold kun hele kampen markeder
        if not is_full_match_market(market, raw_header):
            continue

        players_in_market = []
        for r in rows:
            nm = r.get("title") or r.get("name")
            if isinstance(nm, str) and nm.strip():
                players_in_market.append(nm.strip())

        header_no_player = strip_leading_player_from_header(raw_header, players_in_market)
        market_type = normalize_market_header_to_betsson(header_no_player)

        deadline = ms_to_iso_utc(
            market.get("marketCloseTimeMillis")
            or market.get("closeTimeMillis")
            or market.get("deadlineMillis")
        )
        status_market = market.get("status") or market.get("marketStatus") or "Open"

        for r in rows:
            player = r.get("title") or r.get("name")
            if not (isinstance(player, str) and player.strip()):
                continue
            player = player.strip()

            market_label = f"{market_type} | {player}" if market_type else player

            group_selections = r.get("groupSelections") or []
            if not isinstance(group_selections, list):
                continue

            for gs in group_selections:
                sels = gs.get("selections") or []
                if not isinstance(sels, list):
                    continue

                for sel in sels:
                    raw_label = sel.get("name") or sel.get("label")
                    if not (isinstance(raw_label, str) and raw_label.strip()):
                        continue

                    selection_label = normalize_selection_label(raw_label)
                    odds_dec = selection_price_decimal(sel)

                    rows_out.append({
                        "event": event_name,
                        "player": player,
                        "selectionLabel": selection_label,
                        "odds_decimal": odds_dec,
                        "status_selection": status_market,
                        "marketLabel": market_label,
                        "deadline": deadline,
                    })

    df = pd.DataFrame(rows_out, columns=COLUMNS)
    df["odds_decimal"] = pd.to_numeric(df["odds_decimal"], errors="coerce")
    df = df.dropna(subset=["odds_decimal"]).reset_index(drop=True)

    # Dedupe efter at scope er filtreret: behold én række pr nøgle
    # Hvis der stadig er dubletter, tager vi den første efter sortering på odds (ikke "bedst" som strategi, bare deterministisk)
    df = df.sort_values(["event", "marketLabel", "selectionLabel", "odds_decimal"], kind="stable")
    df = df.drop_duplicates(subset=["event", "marketLabel", "selectionLabel"], keep="first").reset_index(drop=True)

    return df

def main():
    doc = load_json(IN_PATH)
    df = parse_betano_player_props(doc)

    df = df.sort_values(["marketLabel", "player", "selectionLabel"], kind="stable")
    df.to_csv(OUT_CSV, index=False, encoding="utf-8")
    print(f"Saved {len(df)} rows to {OUT_CSV}")
    print(df.head(25))

if __name__ == "__main__":
    main()

Saved 1132 rows to betano_player_props.csv
                        event                 player selectionLabel  \
0   Arsenal FC - Liverpool FC    Alexis Mac Allister       Over 0.5   
1   Arsenal FC - Liverpool FC    Alexis Mac Allister       Over 1.5   
2   Arsenal FC - Liverpool FC    Alexis Mac Allister       Over 2.5   
3   Arsenal FC - Liverpool FC            Amara Nallo       Over 0.5   
4   Arsenal FC - Liverpool FC            Amara Nallo       Over 1.5   
5   Arsenal FC - Liverpool FC  Andre Harriman-Annous       Over 0.5   
6   Arsenal FC - Liverpool FC  Andre Harriman-Annous       Over 1.5   
7   Arsenal FC - Liverpool FC  Andre Harriman-Annous       Over 2.5   
8   Arsenal FC - Liverpool FC  Andre Harriman-Annous       Over 3.5   
9   Arsenal FC - Liverpool FC  Andre Harriman-Annous       Over 4.5   
10  Arsenal FC - Liverpool FC       Andrew Robertson       Over 0.5   
11  Arsenal FC - Liverpool FC       Andrew Robertson       Over 1.5   
12  Arsenal FC - Liverpool FC     