In [4]:
import json
import re
import pandas as pd

# =========================
# CONFIG
# =========================
IN_PATH = "expekt.txt"
OUT_CSV = "expekt_player_props.csv"

COLUMNS = [
    "event",
    "player",
    "selectionLabel",
    "odds_decimal",
    "status_selection",
    "marketLabel",
    "deadline",
]

# =========================
# HELPERS
# =========================
def load_json(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        txt = f.read().strip()
        if not txt:
            raise ValueError(f"Fil er tom: {path}")
        return json.loads(txt)

def to_float(x):
    return pd.to_numeric(x, errors="coerce")

def iso_from_expekt_time(ts):
    # Expekt bruger ofte "2026-01-08T20:00:00Z"
    if not isinstance(ts, str):
        return None
    s = ts.strip()
    if s.endswith("Z"):
        s = s[:-1] + "+00:00"
    return s

def normalize_event_name(name: str) -> str:
    # Gør event så den ligner dine andre: fjern FC, og brug "vs"
    if not isinstance(name, str):
        return None
    s = name.strip()
    s = re.sub(r"\bFC\b", "", s, flags=re.IGNORECASE)
    s = s.replace(" v ", " vs ")
    s = s.replace(" - ", " vs ")
    s = re.sub(r"\s+", " ", s).strip()
    return s

def player_name_from_outcome(name: str):
    # Expekt: "Mac Allister, Alexis" -> "Alexis Mac Allister"
    if not isinstance(name, str):
        return None
    n = name.strip()
    if "," in n:
        last, first = [p.strip() for p in n.split(",", 1)]
        if first and last:
            return f"{first} {last}"
    return n

def selection_label_from_x_plus(x: int) -> str:
    # X+ -> Over (X - 0.5)
    return f"Over {x - 0.5}"

def status_to_open(s):
    s2 = str(s).strip().upper()
    if s2 in ["OPEN", "ACTIVE", "TRADING", "ONGOING"]:
        return "Open"
    return str(s).strip() if s is not None else "Open"

# =========================
# PARSER
# =========================
def parse_expekt_player_props(doc: dict) -> pd.DataFrame:
    markets = doc.get("markets") or []
    if not isinstance(markets, list):
        markets = []

    event_raw = doc.get("name")
    if not isinstance(event_raw, str) or not event_raw.strip():
        # fallback hvis name mangler
        parts = doc.get("participants") or []
        if isinstance(parts, list) and len(parts) >= 2:
            a = parts[0].get("name")
            b = parts[1].get("name")
            event_raw = f"{a} vs {b}"
        else:
            event_raw = "unknown_event"

    event_name = normalize_event_name(event_raw)
    deadline = iso_from_expekt_time(doc.get("startTime"))

    # Kun de præcise whole match markets vi vil have
    patterns = [
        (r"^Spiller har (\d+)\+ skud på mål$", "Antal afslutninger på mål"),
        (r"^Spiller har (\d+)\+ skud$", "Spillers samlede antal skud"),
        (r"^Spiller laver (\d+)\+ assists$", "Spillers samlede antal assister"),
        (r"^Spiller begår (\d+)\+ forseelser$", "Spiller Frispark Begået"),
    ]

    rows_out = []

    for m in markets:
        market_name = m.get("name")
        if not isinstance(market_name, str) or not market_name.strip():
            continue
        market_name = market_name.strip()

        market_type = None
        x_val = None

        for pat, mtype in patterns:
            mm = re.match(pat, market_name)
            if mm:
                market_type = mtype
                x_val = int(mm.group(1))
                break

        if market_type is None or x_val is None:
            continue

        selection_label = selection_label_from_x_plus(x_val)

        status_market = status_to_open(m.get("status"))

        outcomes = m.get("outcomes") or []
        if not isinstance(outcomes, list) or len(outcomes) == 0:
            continue

        for o in outcomes:
            player_raw = o.get("name")
            player = player_name_from_outcome(player_raw)
            if not player:
                continue

            odds_dec = o.get("formatDecimal")
            odds_dec = to_float(odds_dec)

            if pd.isna(odds_dec):
                continue

            market_label = f"{market_type} | {player}"

            rows_out.append({
                "event": event_name,
                "player": player,
                "selectionLabel": selection_label,
                "odds_decimal": odds_dec,
                "status_selection": status_market,
                "marketLabel": market_label,
                "deadline": deadline,
            })

    df = pd.DataFrame(rows_out, columns=COLUMNS)
    if df.empty:
        return df

    # Dedupe: hvis Expekt har dubletter af samme key, behold første deterministisk
    df = df.sort_values(
        ["event", "marketLabel", "selectionLabel", "odds_decimal"],
        kind="stable"
    )
    df = df.drop_duplicates(
        subset=["event", "marketLabel", "selectionLabel"],
        keep="first"
    ).reset_index(drop=True)

    # Pæn sortering
    df = df.sort_values(
        ["marketLabel", "player", "selectionLabel"],
        kind="stable"
    ).reset_index(drop=True)

    return df

def main():
    doc = load_json(IN_PATH)
    df = parse_expekt_player_props(doc)

    df.to_csv(OUT_CSV, index=False, encoding="utf-8")
    print(f"Saved {len(df)} rows to {OUT_CSV}")
    print(df.head(25))

if __name__ == "__main__":
    main()

Saved 520 rows to expekt_player_props.csv
                   event               player selectionLabel  odds_decimal  \
0   Arsenal vs Liverpool  Alexis Mac Allister       Over 0.5          2.40   
1   Arsenal vs Liverpool  Alexis Mac Allister       Over 1.5          8.00   
2   Arsenal vs Liverpool  Alexis Mac Allister       Over 2.5         34.00   
3   Arsenal vs Liverpool  Alexis Mac Allister       Over 3.5        261.00   
4   Arsenal vs Liverpool       Andy Robertson       Over 0.5          5.50   
5   Arsenal vs Liverpool       Andy Robertson       Over 1.5         34.00   
6   Arsenal vs Liverpool       Andy Robertson       Over 2.5        651.00   
7   Arsenal vs Liverpool       Andy Robertson       Over 3.5       1001.00   
8   Arsenal vs Liverpool            Ben White       Over 0.5          4.50   
9   Arsenal vs Liverpool            Ben White       Over 1.5         23.00   
10  Arsenal vs Liverpool            Ben White       Over 2.5        301.00   
11  Arsenal vs Liverpo