In [4]:
import json
import re
import pandas as pd

# =========================
# CONFIG
# =========================
IN_PATH = ["unibet.txt"]
OUT_CSV = "unibet_player_props.csv"

COLUMNS = [
    "event",
    "player",
    "selectionLabel",
    "odds_decimal",
    "status_selection",
    "marketLabel",
    "deadline",
]

# =========================
# HELPERS
# =========================
def load_json(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        txt = f.read().strip()
        if not txt:
            raise ValueError(f"Fil er tom: {path}")
        return json.loads(txt)

def to_float(x):
    return pd.to_numeric(x, errors="coerce")

def iso_from_unibet_time(ts):
    if not isinstance(ts, str):
        return None
    s = ts.strip()
    if s.endswith("Z"):
        s = s[:-1] + "+00:00"
    return s

def normalize_event_name(name: str) -> str:
    if not isinstance(name, str):
        return "unknown_event"

    s = name.strip()
    s = re.sub(r"\bFC\b", "", s, flags=re.IGNORECASE)

    s = s.replace(" vs. ", " vs ")
    s = s.replace(" v ", " vs ")
    s = s.replace(" - ", " vs ")

    s = re.sub(r"\s+", " ", s).strip()
    return s or "unknown_event"

def status_to_open(s):
    s2 = str(s).strip().upper()
    if s2 in ["OPEN", "ACTIVE", "TRADING", "ONGOING"]:
        return "Open"
    return str(s).strip() if s is not None else "Open"

def outcome_is_bettable_unibet(o: dict) -> bool:
    if not isinstance(o, dict):
        return False
    st = str(o.get("status", "")).strip().upper()
    if st and st != "OPEN":
        return False
    return True

def betoffer_is_open(bo: dict) -> bool:
    st = str(bo.get("status", "")).strip().upper()
    if st and st != "OPEN":
        return False
    return True

def odds_to_decimal_unibet(odds_int):
    """
    Unibet odds er typisk int som 4200 for 4.20
    """
    v = to_float(odds_int)
    if pd.isna(v):
        return None
    return float(v) / 1000.0

def line_to_over_label(line_int):
    """
    Unibet line er typisk 2500 for 2.5
    """
    v = to_float(line_int)
    if pd.isna(v):
        return None
    return f"Over {float(v) / 1000.0}"

def pick_event_info(doc: dict):
    events = doc.get("events") or []
    if isinstance(events, list) and len(events) > 0 and isinstance(events[0], dict):
        evt = events[0]
        event_name = normalize_event_name(evt.get("name"))
        deadline = iso_from_unibet_time(evt.get("start"))
        return event_name, deadline
    return "unknown_event", None

def market_type_from_criterion(crit: dict):
    """
    Mapper Unibet criterion englishLabel til dine standard market labels.
    """
    if not isinstance(crit, dict):
        return None

    el = crit.get("englishLabel") or ""
    el = str(el).strip()

    # Skud
    if el == "Player's shots (Settled using Opta data)":
        return "Spillers samlede antal skud"

    # Skud på mål
    if el == "Player's shots on target (Settled using Opta data)":
        return "Antal afslutninger på mål"

    # Assists
    if el == "To Assist (Settled using Opta data)":
        return "Spillers samlede antal assister"

    # Kort
    if el == "To Get a Card":
        return "Spiller får kort"

    # Scorer
    if el == "To Score":
        return "Spiller scorer"

    # NY: Frispark begået (fouls conceded)
    if el == "Player's fouls conceded (Settled using Opta data)":
        return "Spiller Frispark Begået"

    # NY: Redninger målmand
    if el == "Goalkeeper Saves":
        return "Målmand Redninger"

    return None

# =========================
# PARSER
# =========================
def parse_unibet_player_props(doc: dict) -> pd.DataFrame:
    event_name, deadline = pick_event_info(doc)

    betoffers = doc.get("betOffers") or []
    if not isinstance(betoffers, list):
        betoffers = []

    rows_out = []

    for bo in betoffers:
        if not isinstance(bo, dict):
            continue

        if not betoffer_is_open(bo):
            continue

        crit = bo.get("criterion") or {}
        mtype = market_type_from_criterion(crit)
        if mtype is None:
            continue

        status_market = status_to_open(bo.get("status"))

        outcomes = bo.get("outcomes") or []
        if not isinstance(outcomes, list) or len(outcomes) == 0:
            continue

        for o in outcomes:
            if not outcome_is_bettable_unibet(o):
                continue

            # Player-markeder har participant, GK saves kan være uden participant (men ofte eventParticipantId)
            player = o.get("participant")
            if not isinstance(player, str) or not player.strip():
                # hvis ingen participant, så kan vi ikke matche dine andre datasæt stabilt
                continue
            player = player.strip()

            odds_dec = odds_to_decimal_unibet(o.get("odds"))
            if odds_dec is None or pd.isna(odds_dec) or odds_dec <= 0:
                continue

            # selectionLabel afhænger af market type
            if mtype in [
                "Spillers samlede antal skud",
                "Antal afslutninger på mål",
                "Spiller Frispark Begået",
                "Målmand Redninger",
            ]:
                sel = line_to_over_label(o.get("line"))
                if sel is None:
                    continue
                selection_label = sel

            elif mtype == "Spillers samlede antal assister":
                # Unibet er Ja/Nej, vi matcher dine count markets ved at mappe Ja til Over 0.5
                if str(o.get("englishLabel", "")).strip().lower() != "yes" and str(o.get("label", "")).strip().lower() != "ja":
                    continue
                selection_label = "Over 0.5"

            elif mtype == "Spiller får kort":
                if str(o.get("englishLabel", "")).strip().lower() != "yes" and str(o.get("label", "")).strip().lower() != "ja":
                    continue
                selection_label = "Yes"

            elif mtype == "Spiller scorer":
                if str(o.get("englishLabel", "")).strip().lower() != "yes" and str(o.get("label", "")).strip().lower() != "ja":
                    continue
                selection_label = "Yes"

            else:
                continue

            market_label = f"{mtype} | {player}"

            rows_out.append({
                "event": event_name,
                "player": player,
                "selectionLabel": selection_label,
                "odds_decimal": float(odds_dec),
                "status_selection": status_market,
                "marketLabel": market_label,
                "deadline": deadline,
            })

    df = pd.DataFrame(rows_out, columns=COLUMNS)
    if df.empty:
        return df

    df["odds_decimal"] = pd.to_numeric(df["odds_decimal"], errors="coerce")
    df = df.dropna(subset=["odds_decimal"]).reset_index(drop=True)

    # Dedupe
    df = df.sort_values(
        ["event", "marketLabel", "selectionLabel", "odds_decimal"],
        kind="stable"
    )
    df = df.drop_duplicates(
        subset=["event", "marketLabel", "selectionLabel"],
        keep="first"
    ).reset_index(drop=True)

    # Sortering
    df = df.sort_values(
        ["marketLabel", "player", "selectionLabel"],
        kind="stable"
    ).reset_index(drop=True)

    return df

def main():
    dfs = []

    for path in IN_PATH:
        print(f"Loading {path}")
        doc = load_json(path)
        df_part = parse_unibet_player_props(doc)

        if not df_part.empty:
            dfs.append(df_part)

    if not dfs:
        raise ValueError("Ingen data blev parsed fra nogen filer.")

    df = pd.concat(dfs, ignore_index=True)

    # ekstra dedupe på tværs af filer
    df = df.sort_values(
        ["event", "marketLabel", "selectionLabel", "odds_decimal"],
        kind="stable"
    )
    df = df.drop_duplicates(
        subset=["event", "marketLabel", "selectionLabel"],
        keep="first"
    ).reset_index(drop=True)

    df.to_csv(OUT_CSV, index=False, encoding="utf-8")
    print(f"Saved {len(df)} rows to {OUT_CSV}")
    print(df.head(50))

if __name__ == "__main__":
    main()

Loading unibet.txt
Saved 244 rows to unibet_player_props.csv
               event                       player selectionLabel  \
0   Roma vs Sassuolo             Alessandro Arena       Over 0.5   
1   Roma vs Sassuolo             Alessandro Arena       Over 1.5   
2   Roma vs Sassuolo                 Alieu Fadera       Over 0.5   
3   Roma vs Sassuolo             Andrea Pinamonti       Over 0.5   
4   Roma vs Sassuolo             Andrea Pinamonti       Over 1.5   
5   Roma vs Sassuolo             Armand Laurienté       Over 0.5   
6   Roma vs Sassuolo             Armand Laurienté       Over 1.5   
7   Roma vs Sassuolo                Aster Vranckx       Over 0.5   
8   Roma vs Sassuolo                Daniel Boloca       Over 0.5   
9   Roma vs Sassuolo             Daniele Ghilardi       Over 0.5   
10  Roma vs Sassuolo                Devyne Rensch       Over 0.5   
11  Roma vs Sassuolo                Devyne Rensch       Over 1.5   
12  Roma vs Sassuolo                Evan Ferguson      