In [6]:
import json
import re
import pandas as pd
from datetime import datetime, timezone

IN_PATH = "betano.txt"
OUT_CSV = "betano_player_props.csv"

COLUMNS = [
    "event",
    "player",
    "selectionLabel",
    "odds_decimal",
    "status_selection",
    "marketLabel",
    "deadline",
]

# Hardcode event-navn som du bad om
FORCED_EVENT_NAME = "Eintracht Frankfurt vs Borussia Dortmund"

# =========================
# HELPERS
# =========================
def load_json(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def ms_to_iso_utc(ms):
    if ms is None:
        return None
    try:
        ms = int(ms)
        return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).isoformat()
    except Exception:
        return None

def selection_price_decimal(sel: dict):
    p = sel.get("price")
    if isinstance(p, (int, float)):
        return p
    if isinstance(p, str):
        return p.strip()
    if isinstance(p, dict):
        for k in ["decimal", "odds", "value"]:
            if k in p:
                return p.get(k)
    odds_formats = sel.get("oddsFormats") or {}
    if isinstance(odds_formats, dict) and "decimal" in odds_formats:
        return odds_formats.get("decimal")
    return None

def is_full_match_market(market: dict, raw_header: str) -> bool:
    parts = []
    if isinstance(raw_header, str):
        parts.append(raw_header)
    nm = market.get("name")
    if isinstance(nm, str):
        parts.append(nm)
    txt = " | ".join(parts).lower()

    bad_tokens = [
        "1. halvleg", "2. halvleg", "halvleg",
        "1st half", "2nd half", "first half", "second half",
        "periode", "period", "quarter", "q1", "q2", "q3", "q4",
        "overtime", "over time", "ekstra tid", "forlænget",
        "inkl. overtid", "incl. overtime",
    ]
    return not any(t in txt for t in bad_tokens)

def pick_market_header(market: dict) -> str:
    tl = market.get("tableLayout") or {}
    # title kan være "Målscorer" hos dig
    for key in ["title", "headerTitle"]:
        v = tl.get(key)
        if isinstance(v, str) and v.strip():
            return v.strip()

    nm = market.get("name")
    if isinstance(nm, str) and nm.strip():
        return nm.strip()
    return None

def strip_leading_player_from_header(header: str, players_in_market):
    if not isinstance(header, str) or not header.strip():
        return header
    header = header.strip()

    players_sorted = sorted(
        [p for p in players_in_market if isinstance(p, str) and p.strip()],
        key=lambda x: len(x),
        reverse=True
    )
    for p in players_sorted:
        p = p.strip()
        if header.lower().startswith(p.lower() + " "):
            return header[len(p):].strip()
    return header

def normalize_selection_label_count(label: str):
    """
    1+ -> Over 0.5
    2+ -> Over 1.5
    3+ -> Over 2.5
    """
    if not isinstance(label, str):
        return None
    s = label.strip()
    m = re.fullmatch(r"(\d+)\+", s)
    if not m:
        return None
    x = int(m.group(1))
    return f"Over {x - 0.5}"

def normalize_market_type(header: str):
    """
    Map til dine ønskede labels.
    """
    if not isinstance(header, str) or not header.strip():
        return None
    h = header.strip().lower()

    # Målscorer
    if "målscorer" in h or "scorer" in h:
        return "Spiller scorer"

    # Kort
    if "kort" in h or "card" in h:
        return "Spiller får kort"

    # Skud på mål
    if re.search(r"\bskud\s+p[åa]\s+m[åa]l\b", h):
        return "Antal afslutninger på mål"

    # Skud (total)
    if "skud" in h and not re.search(r"\bskud\s+p[åa]\s+m[åa]l\b", h):
        return "Spillers samlede antal skud"

    # Assists
    if "assist" in h:
        return "Spillers samlede antal assister"

    # Redninger
    if "redning" in h or "saves" in h:
        return "Målmand Redninger"

    return None

def get_column_titles(market: dict):
    tl = market.get("tableLayout") or {}
    cols = tl.get("columnTitles") or []
    out = []
    if isinstance(cols, list):
        for c in cols:
            t = c.get("title")
            if isinstance(t, str) and t.strip():
                out.append(t.strip())
    return out

# =========================
# PARSER
# =========================
def parse_betano_player_props(doc: dict) -> pd.DataFrame:
    evt = ((doc.get("data") or {}).get("event")) or doc.get("event") or {}
    markets = evt.get("markets") or []
    if not isinstance(markets, list):
        markets = []

    rows_out = []

    for market in markets:
        tl = market.get("tableLayout") or {}
        rows = tl.get("rows") or []
        if not isinstance(rows, list) or len(rows) == 0:
            continue

        raw_header = pick_market_header(market)
        if not is_full_match_market(market, raw_header):
            continue

        # player navne i markedet
        players_in_market = []
        for r in rows:
            nm = r.get("title") or r.get("name")
            if isinstance(nm, str) and nm.strip():
                players_in_market.append(nm.strip())

        header_no_player = strip_leading_player_from_header(raw_header, players_in_market)
        market_type = normalize_market_type(header_no_player)
        if market_type is None:
            continue

        # kun dine ønskede markeder
        if market_type not in {
            "Antal afslutninger på mål",
            "Spillers samlede antal skud",
            "Målmand Redninger",
            "Spillers samlede antal assister",
            "Spiller får kort",
            "Spiller scorer",
        }:
            continue

        deadline = ms_to_iso_utc(
            market.get("marketCloseTimeMillis")
            or market.get("closeTimeMillis")
            or market.get("deadlineMillis")
        )
        status_market = market.get("status") or market.get("marketStatus") or "Open"

        # special-case: Målscorer skal være "Når som helst"
        col_titles = get_column_titles(market)
        require_anytime = (market_type == "Spiller scorer")
        has_anytime = any(ct.lower() == "når som helst" for ct in col_titles)

        # Hvis det er målscorer men ikke har "Når som helst", så skip (du bad specifikt om den)
        if require_anytime and not has_anytime:
            continue

        for r in rows:
            player = r.get("title") or r.get("name")
            if not (isinstance(player, str) and player.strip()):
                continue
            player = player.strip()

            group_selections = r.get("groupSelections") or []
            if not isinstance(group_selections, list):
                continue

            for gs in group_selections:
                sels = gs.get("selections") or []
                if not isinstance(sels, list):
                    continue

                for sel in sels:
                    raw_label = sel.get("name") or sel.get("label")
                    if not (isinstance(raw_label, str) and raw_label.strip()):
                        continue
                    raw_label = raw_label.strip()

                    odds_dec = selection_price_decimal(sel)
                    odds_dec = pd.to_numeric(odds_dec, errors="coerce")
                    if pd.isna(odds_dec) or odds_dec <= 0:
                        continue

                    # -------------------------
                    # MARKET-SPECIFIK FILTRERING
                    # -------------------------
                    if market_type == "Spillers samlede antal assister":
                        # behold KUN Over 0.5 og Over 1.5
                        norm = normalize_selection_label_count(raw_label)  # None hvis label ikke er 1+ / 2+ osv
                        if norm not in {"Over 0.5", "Over 1.5"}:
                            continue
                        selection_label = norm

                    elif market_type == "Spiller scorer":
                        # "Når som helst" => selectionLabel = "yes"
                        selection_label = "Yes"

                    elif market_type == "Spiller får kort":
                        # selectionLabel = "ja"
                        selection_label = "Yes"

                    else:
                        # count markets (skud, skud på mål, redninger)
                        norm = normalize_selection_label_count(raw_label)
                        if norm is None:
                            continue
                        selection_label = norm

                    market_label = f"{market_type} | {player}"

                    rows_out.append({
                        "event": FORCED_EVENT_NAME,
                        "player": player,
                        "selectionLabel": selection_label,
                        "odds_decimal": float(odds_dec),
                        "status_selection": status_market,
                        "marketLabel": market_label,
                        "deadline": deadline,
                    })

    df = pd.DataFrame(rows_out, columns=COLUMNS)
    if df.empty:
        return df

    df["odds_decimal"] = pd.to_numeric(df["odds_decimal"], errors="coerce")
    df = df.dropna(subset=["odds_decimal"]).reset_index(drop=True)

    # dedupe pr nøgle
    df = df.sort_values(["event", "marketLabel", "selectionLabel", "odds_decimal"], kind="stable")
    df = df.drop_duplicates(subset=["event", "marketLabel", "selectionLabel"], keep="first").reset_index(drop=True)

    # sort
    df = df.sort_values(["marketLabel", "player", "selectionLabel"], kind="stable").reset_index(drop=True)

    return df

def main():
    doc = load_json(IN_PATH)
    df = parse_betano_player_props(doc)

    df.to_csv(OUT_CSV, index=False, encoding="utf-8")
    print(f"Saved {len(df)} rows to {OUT_CSV}")
    print(df.head(50))

if __name__ == "__main__":
    main()

Saved 599 rows to betano_player_props.csv
                                       event                player  \
0   Eintracht Frankfurt vs Borussia Dortmund         Ansgar Knauff   
1   Eintracht Frankfurt vs Borussia Dortmund         Ansgar Knauff   
2   Eintracht Frankfurt vs Borussia Dortmund         Ansgar Knauff   
3   Eintracht Frankfurt vs Borussia Dortmund         Ansgar Knauff   
4   Eintracht Frankfurt vs Borussia Dortmund         Arthur Theate   
5   Eintracht Frankfurt vs Borussia Dortmund         Arthur Theate   
6   Eintracht Frankfurt vs Borussia Dortmund         Arthur Theate   
7   Eintracht Frankfurt vs Borussia Dortmund         Aurele Amenda   
8   Eintracht Frankfurt vs Borussia Dortmund         Aurele Amenda   
9   Eintracht Frankfurt vs Borussia Dortmund          Aurelio Buta   
10  Eintracht Frankfurt vs Borussia Dortmund          Aurelio Buta   
11  Eintracht Frankfurt vs Borussia Dortmund          Aurelio Buta   
12  Eintracht Frankfurt vs Borussia Dortmund    