In [4]:
import json
import re
import pandas as pd
from datetime import datetime, timezone

# =========================
# CONFIG
# =========================
IN_PATH = [
    "Betinia.txt", "betinia1.txt", "betinia2.txt", "betinia3.txt", "betinia4.txt",
    "Betinia5.txt", "betinia6.txt", "betinia7.txt", "betinia8.txt", "betinia9.txt",
    "betinia10.txt", "betinia11.txt"
]
OUT_CSV = "betinia_player_props.csv"

COLUMNS = [
    "event",
    "player",
    "selectionLabel",
    "odds_decimal",
    "status_selection",
    "marketLabel",
    "deadline",
]

# =========================
# HELPERS
# =========================
def load_json(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        txt = f.read().strip()
        if not txt:
            raise ValueError(f"Fil er tom: {path}")
        return json.loads(txt)

def to_float(x):
    return pd.to_numeric(x, errors="coerce")

def normalize_event_name_betinia(name: str) -> str:
    if not isinstance(name, str):
        return "unknown_event"

    s = name.strip()
    s = s.replace(" vs. ", " vs ")
    s = s.replace(" v ", " vs ")
    s = s.replace(" - ", " vs ")

    team_map = {
        "Frankfurt": "Eintracht Frankfurt",
        "Borussia Dortmund": "Borussia Dortmund",
    }

    parts = [p.strip() for p in s.split("vs")]
    if len(parts) == 2:
        home, away = parts
        home = team_map.get(home, home)
        away = team_map.get(away, away)
        s = f"{home} vs {away}"

    s = re.sub(r"\bFC\b", "", s, flags=re.IGNORECASE)
    s = re.sub(r"\s+", " ", s).strip()
    return s or "unknown_event"

def ms_to_iso_utc(ms):
    if ms is None:
        return None
    try:
        ms = int(ms)
        return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).isoformat()
    except Exception:
        return None

def status_from_oddstatus(odd_status):
    try:
        if int(odd_status) == 0:
            return "Open"
    except Exception:
        pass
    return "Closed"

def parse_line_from_sv(sv: str):
    if not isinstance(sv, str) or not sv.strip():
        return None
    first = sv.split("|", 1)[0].strip()
    try:
        return float(first)
    except Exception:
        return None

def normalize_player_name(name: str) -> str:
    """
    Beholder kun de første to navne:
    'Ramon Terrats Espacio' -> 'Ramon Terrats'
    'Haïssam Hassan Ali' -> 'Haissam Hassan'
    """
    if not isinstance(name, str):
        return None

    # fjern accents
    import unicodedata
    name = unicodedata.normalize("NFKD", name)
    name = "".join(c for c in name if not unicodedata.combining(c))

    parts = name.strip().split()
    if len(parts) >= 2:
        return f"{parts[0]} {parts[1]}"
    return name.strip()

def pick_player_from_childmarket(cm: dict):
    cn = cm.get("childName")
    if isinstance(cn, str) and cn.strip():
        return cn.strip()

    nm = cm.get("name")
    if isinstance(nm, str) and "-" in nm:
        right = nm.split("-", 1)[1].strip()
        right = re.sub(r"\s*\([^)]*\)\s*$", "", right).strip()
        return right or None

    sn = cm.get("shortName")
    if isinstance(sn, str) and sn.strip():
        sn2 = re.sub(r"\s*\([^)]*\)\s*$", "", sn).strip()
        return sn2 or sn.strip()

    return None

def extract_over_line_from_odd(odd: dict):
    nm = odd.get("name")
    if isinstance(nm, str):
        m = re.search(r"\bover\s+([0-9]+(?:\.[0-9]+)?)\b", nm, flags=re.IGNORECASE)
        if m:
            try:
                return float(m.group(1))
            except Exception:
                pass
    sv = odd.get("sv")
    if isinstance(sv, str):
        try:
            return float(sv.strip())
        except Exception:
            pass
    return None

def normalize_selection_label(label: str):
    if not isinstance(label, str):
        return None
    s = label.strip()

    m = re.fullmatch(r"(\d+)\+", s)
    if m:
        x = int(m.group(1))
        return f"Over {x - 0.5}"

    m2 = re.fullmatch(r"Over\s+([0-9]+(?:\.[0-9]+)?)", s, flags=re.IGNORECASE)
    if m2:
        return f"Over {m2.group(1)}"

    if s.lower() == "over":
        return "Over"

    return s

# =========================
# MARKET MATCHING
# =========================
def market_type_from_parent_name(parent_name: str):
    if not isinstance(parent_name, str):
        return None
    n = parent_name.strip().lower()

    if "skud på mål" in n:
        return "Antal afslutninger på mål"

    if n == "spiller skud" or ("spiller skud" in n and "på mål" not in n):
        return "Spillers samlede antal skud"

    if "tacklinger" in n:
        return "Spillers samlede antal tacklinger"

    if ("frispark" in n or "forseelser" in n) and "mod spiller" in n:
        return "Spiller Frispark Tildelt"

    if "frispark" in n:
        return "Spiller Frispark Begået"

    if "forseelser begået" in n or ("begået" in n and "forseelser" in n):
        return "Spiller Frispark Begået"

    if n == "assist" or "assist" in n:
        return "Spillers samlede antal assister"

    if "advarsler" in n or "kort" in n:
        return "Spiller får kort"

    if "målscorer" in n:
        return "Spiller scorer"

    if "redninger" in n:
        return "Målmand Redninger"

    if "offside" in n:
        return "Spiller Offsides"

    return None

def market_type_from_child_name(child_name: str):
    if not isinstance(child_name, str):
        return None
    n = child_name.strip().lower()

    if "skud på mål" in n:
        return "Antal afslutninger på mål"

    if re.search(r"\bskud\b", n) and "på mål" not in n:
        return "Spillers samlede antal skud"

    if "tacklinger" in n:
        return "Spillers samlede antal tacklinger"

    if ("frispark" in n or "forseelser" in n) and "mod spiller" in n:
        return "Spiller Frispark Tildelt"

    if "frispark" in n:
        return "Spiller Frispark Begået"

    if "forseelser begået" in n or ("begået" in n and "forseelser" in n):
        return "Spiller Frispark Begået"

    if n.startswith("assist") or "assist" in n:
        return "Spillers samlede antal assister"

    if "kort" in n or "advarsel" in n:
        return "Spiller får kort"

    if "målscorer" in n or "scorer" in n:
        return "Spiller scorer"

    if "redninger" in n:
        return "Målmand Redninger"

    if "offside" in n:
        return "Spiller Offsides"

    return None

# =========================
# PARSER
# =========================
def parse_betinia_player_props(doc: dict) -> pd.DataFrame:
    event_name = normalize_event_name_betinia(doc.get("name"))
    deadline = ms_to_iso_utc(doc.get("startDate"))

    odds_list = doc.get("odds") or []
    odds_map = {}
    if isinstance(odds_list, list):
        for o in odds_list:
            oid = o.get("id")
            if oid is None:
                continue
            odds_map[int(oid)] = o

    child_markets = doc.get("childMarkets") or []
    child_map = {}
    if isinstance(child_markets, list):
        for cm in child_markets:
            cid = cm.get("id")
            if cid is None:
                continue
            child_map[int(cid)] = cm

    markets = doc.get("markets") or []
    if not isinstance(markets, list):
        markets = []

    rows_out = []

    for parent in markets:
        parent_name = parent.get("shortName") or parent.get("name")
        mtype_parent = market_type_from_parent_name(parent_name)

        child_ids = parent.get("childMarketIds") or []
        if not isinstance(child_ids, list) or len(child_ids) == 0:
            continue

        for cid in child_ids:
            cm = child_map.get(int(cid)) if cid is not None else None
            if not isinstance(cm, dict):
                continue

            mtype = mtype_parent or market_type_from_child_name(cm.get("name"))
            if not mtype:
                continue

            player_raw = pick_player_from_childmarket(cm)
            if not player_raw:
                continue

            # ---------- VIGTIGT: NORMALISER SPILLER ----------
            player = normalize_player_name_two_words(player_raw)
            if not player:
                continue

            line_val = parse_line_from_sv(cm.get("sv"))

            odd_id_groups = cm.get("desktopOddIds") or cm.get("mobileOddIds") or []
            if not isinstance(odd_id_groups, list) or len(odd_id_groups) == 0:
                continue

            flat_odd_ids = []
            for g in odd_id_groups:
                if isinstance(g, list):
                    flat_odd_ids.extend([x for x in g if x is not None])
                elif g is not None:
                    flat_odd_ids.append(g)

            for oid in flat_odd_ids:
                try:
                    oid_int = int(oid)
                except Exception:
                    continue

                odd = odds_map.get(oid_int)
                if not isinstance(odd, dict):
                    continue

                odd_name = odd.get("name")
                if not isinstance(odd_name, str):
                    odd_name = ""

                price = to_float(odd.get("price"))
                if pd.isna(price) or price <= 0:
                    continue

                if "under" in odd_name.strip().lower():
                    continue

                sel_label_raw = normalize_selection_label(odd_name)

                if sel_label_raw == "Over":
                    if line_val is None:
                        continue
                    sel_label_raw = f"Over {line_val}"

                if mtype == "Spiller scorer":
                    over_line = extract_over_line_from_odd(odd)
                    if over_line is None and line_val is not None:
                        over_line = line_val
                    if over_line is None or abs(float(over_line) - 0.5) > 1e-9:
                        continue
                    selection_label = "Yes"

                elif mtype == "Spiller får kort":
                    over_line = extract_over_line_from_odd(odd)
                    if over_line is None and line_val is not None:
                        over_line = line_val
                    if over_line is not None and abs(float(over_line) - 0.5) > 1e-9:
                        continue
                    selection_label = "Yes"

                else:
                    selection_label = sel_label_raw

                status_selection = status_from_oddstatus(odd.get("oddStatus"))

                # ---------- MARKETLABEL SKAL BRUGE NORMALISERET PLAYER ----------
                market_label = f"{mtype} | {player}"

                rows_out.append({
                    "event": event_name,
                    "player": player,
                    "selectionLabel": selection_label,
                    "odds_decimal": float(price),
                    "status_selection": status_selection,
                    "marketLabel": market_label,
                    "deadline": deadline,
                })

    df = pd.DataFrame(rows_out, columns=COLUMNS)
    if df.empty:
        return df

    df = df.sort_values(
        ["event", "marketLabel", "selectionLabel", "odds_decimal"],
        kind="stable"
    )
    df = df.drop_duplicates(
        subset=["event", "marketLabel", "selectionLabel"],
        keep="first"
    ).reset_index(drop=True)

    df = df.sort_values(
        ["marketLabel", "player", "selectionLabel"],
        kind="stable"
    ).reset_index(drop=True)

    return df

def main():
    dfs = []

    for path in IN_PATH:
        print(f"Loading {path}")
        doc = load_json(path)
        df_part = parse_betinia_player_props(doc)

        if not df_part.empty:
            dfs.append(df_part)

    if not dfs:
        raise ValueError("Ingen data blev parsed fra nogen Betinia-filer.")

    df = pd.concat(dfs, ignore_index=True)

    df = df.sort_values(
        ["event", "marketLabel", "selectionLabel", "odds_decimal"],
        kind="stable"
    )
    df = df.drop_duplicates(
        subset=["event", "marketLabel", "selectionLabel"],
        keep="first"
    ).reset_index(drop=True)

    df.to_csv(OUT_CSV, index=False, encoding="utf-8")
    print(f"Saved {len(df)} rows to {OUT_CSV}")
    print(df.head(50))

if __name__ == "__main__":
    main()

Loading Betinia.txt
Loading betinia1.txt
Loading betinia2.txt
Loading betinia3.txt
Loading betinia4.txt
Loading Betinia5.txt
Loading betinia6.txt
Loading betinia7.txt
Loading betinia8.txt
Loading betinia9.txt
Loading betinia10.txt
Loading betinia11.txt
Saved 5382 rows to betinia_player_props.csv
                  event                 player selectionLabel  odds_decimal  \
0   AS Roma vs Sassuolo      Alessandro Gianni       Over 0.5        1.6154   
1   AS Roma vs Sassuolo      Alessandro Gianni       Over 1.5        4.2500   
2   AS Roma vs Sassuolo           Alieu Fadera       Over 0.5        2.7500   
3   AS Roma vs Sassuolo           Alieu Fadera       Over 1.5       12.0000   
4   AS Roma vs Sassuolo       Andrea Pinamonti       Over 0.5        1.7143   
5   AS Roma vs Sassuolo       Andrea Pinamonti       Over 1.5        4.7500   
6   AS Roma vs Sassuolo               Angelino       Over 0.5        3.2000   
7   AS Roma vs Sassuolo               Angelino       Over 1.5       17.