In [25]:
import json
import re
import pandas as pd
from datetime import datetime, timezone

# =========================
# CONFIG
# =========================
EVENT_PATH = "event_response.json"

BETSSON_MARKETS_IN_ORDER = [
    {"name": "fouls",         "accordion_path": "accordion_response_fouls.json"},
    {"name": "shots",         "accordion_path": "accordion_response_shots.json"},
    {"name": "shotsOnTarget", "accordion_path": "accordion_response_shotsOnTarget.json"},
    {"name": "assists",       "accordion_path": "accordion_response_assists.json"},
]

BETANO_IN_PATH = "betano.txt"
OUT_CSV = "betsson_betano_combined.csv"

OUT_COLUMNS = [
    "event",
    "player",
    "selectionLabel",
    "marketLabel",
    "deadline",
    "status_selection_betsson",
    "odds_decimal_betsson",
    "status_selection_betano",
    "odds_decimal_betano",
]

MERGE_WITH_DEADLINE = False

# =========================
# GENERIC HELPERS
# =========================
def load_json(path: str):
    with open(path, "r", encoding="utf-8") as f:
        txt = f.read().strip()
        if not txt:
            raise ValueError(f"Fil er tom: {path}")
        return json.loads(txt)

def to_float(x):
    return pd.to_numeric(x, errors="coerce")

def ms_to_iso_utc(ms):
    if ms is None:
        return None
    try:
        ms = int(ms)
        return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).isoformat()
    except Exception:
        return None

def normalize_event_name(name: str) -> str:
    """
    Betano "Arsenal FC - Liverpool FC" -> "Arsenal vs Liverpool"
    """
    if not isinstance(name, str):
        return None
    s = name.strip()
    s = re.sub(r"\bFC\b", "", s, flags=re.IGNORECASE)
    s = re.sub(r"\s+", " ", s).strip()
    s = s.replace(" - ", " vs ")
    s = re.sub(r"\s+", " ", s).strip()
    return s

# =========================
# BETSSON PARSER
# =========================
def parse_line_from_marketid(market_id: str):
    if not isinstance(market_id, str):
        return None
    m = re.search(r"-([0-9]+(?:\.[0-9]+)?)-\d+$", market_id)
    return float(m.group(1)) if m else None

def selection_decimal_betsson(sel: dict):
    mp = sel.get("marketSelectionPriceFormats") or sel.get("oddsFormats") or {}
    if isinstance(mp, dict):
        if "1" in mp:
            return mp["1"]
        if "decimal" in mp:
            return mp["decimal"]
    return None

def extract_player_from_market_betsson(market: dict):
    group_labels = (market.get("marketSpecifics", {}) or {}).get("groupLabels", {}) or {}
    player = group_labels.get("2")
    if not player:
        lab = market.get("label") or market.get("marketFriendlyName") or ""
        if "|" in lab:
            _, right = lab.split("|", 1)
            player = right.strip()
    return player

def build_event_map(event_json: dict) -> dict:
    m = {}

    def name_from_event(evt: dict):
        participants = evt.get("participants") or []
        home = None
        away = None
        for p in participants:
            if p.get("side") == 1:
                home = p.get("label")
            elif p.get("side") == 2:
                away = p.get("label")
        if not home and len(participants) > 0:
            home = participants[0].get("label")
        if not away and len(participants) > 1:
            away = participants[1].get("label")
        if home and away:
            return f"{home} vs {away}"
        return None

    def id_from_event(evt: dict):
        return evt.get("eventId") or evt.get("id")

    data = event_json.get("data") if isinstance(event_json, dict) else None

    if isinstance(data, dict) and isinstance(data.get("event"), dict):
        evt = data["event"]
        eid = id_from_event(evt)
        ename = name_from_event(evt)
        if eid and ename:
            m[eid] = ename

    if isinstance(data, dict) and isinstance(data.get("events"), list):
        for evt in data["events"]:
            if not isinstance(evt, dict):
                continue
            eid = id_from_event(evt)
            ename = name_from_event(evt)
            if eid and ename:
                m[eid] = ename

    if isinstance(event_json, list):
        for evt in event_json:
            if not isinstance(evt, dict):
                continue
            eid = id_from_event(evt)
            ename = name_from_event(evt)
            if eid and ename:
                m[eid] = ename

    return m

def get_event_id_from_accordion(accordion: dict):
    accs = (accordion.get("data", {}) or {}).get("accordions", {}) or {}
    for _, group in accs.items():
        markets = group.get("markets") or []
        if markets:
            return markets[0].get("eventId")
    return None

def parse_betsson_one_accordion(accordion: dict, source_order: int, event_map: dict) -> pd.DataFrame:
    accordions = (accordion.get("data", {}) or {}).get("accordions", {}) or {}

    event_id = get_event_id_from_accordion(accordion)
    event_text = event_map.get(event_id) if event_id else None
    if not event_text:
        event_text = event_id

    market_rows = []
    selection_rows = []

    for _, group in accordions.items():
        for m in group.get("markets", []):
            market_id = m.get("id")
            player = extract_player_from_market_betsson(m)

            market_rows.append({
                "source_order": source_order,
                "event": event_text,
                "event_norm": normalize_event_name(event_text),
                "marketId": market_id,
                "marketLabel": m.get("label"),
                "deadline": m.get("deadline"),
                "player": player,
                "line": parse_line_from_marketid(market_id),
            })

        for s in group.get("selections", []):
            selection_rows.append({
                "marketId": s.get("marketId"),
                "selectionLabel": s.get("label"),
                "odds_decimal_betsson": selection_decimal_betsson(s),
                "status_selection_betsson": s.get("status"),
            })

    df_markets = pd.DataFrame(market_rows)
    df_selections = pd.DataFrame(selection_rows)

    if df_markets.empty or df_selections.empty:
        return pd.DataFrame()

    df_all = df_selections.merge(df_markets, on="marketId", how="left")
    df_all["odds_decimal_betsson"] = to_float(df_all["odds_decimal_betsson"])
    return df_all

def parse_betsson_all() -> pd.DataFrame:
    event_json = load_json(EVENT_PATH)
    event_map = build_event_map(event_json)

    parts = []
    for i, item in enumerate(BETSSON_MARKETS_IN_ORDER, start=1):
        accordion = load_json(item["accordion_path"])
        df_part = parse_betsson_one_accordion(accordion, source_order=i, event_map=event_map)
        if not df_part.empty:
            parts.append(df_part)

    if not parts:
        return pd.DataFrame()

    df = pd.concat(parts, ignore_index=True)

    keep = [
        "source_order", "event", "event_norm", "player",
        "selectionLabel", "marketLabel", "deadline",
        "status_selection_betsson", "odds_decimal_betsson"
    ]
    df = df[keep].copy()

    for c in ["event_norm", "player", "selectionLabel", "marketLabel", "deadline"]:
        df[c] = df[c].astype(str).str.strip()

    return df

# =========================
# BETANO PARSER
# =========================
def normalize_selection_label_betano(label: str):
    if not isinstance(label, str):
        return label
    label = label.strip()
    m = re.fullmatch(r"(\d+)\+", label)
    if m:
        x = int(m.group(1))
        return f"Over {x - 0.5}"
    return label

def build_event_name_betano(evt: dict) -> str:
    name = evt.get("name") or evt.get("shortName")
    if isinstance(name, str) and name.strip():
        return name.strip()

    parts = evt.get("participants") or []
    if isinstance(parts, list) and len(parts) >= 2:
        labels = []
        for p in sorted(parts, key=lambda x: x.get("sortOrder", 9999)):
            lab = p.get("label")
            if isinstance(lab, str) and lab.strip():
                labels.append(lab.strip())
        if len(labels) >= 2:
            return f"{labels[0]} - {labels[1]}"

    slug = evt.get("slug")
    if isinstance(slug, str) and slug.strip():
        return slug.strip()

    return str(evt.get("id") or evt.get("eventId") or "unknown_event")

def pick_market_header_betano(market: dict) -> str:
    tl = market.get("tableLayout") or {}
    header = tl.get("headerTitle")
    if isinstance(header, str) and header.strip():
        return header.strip()
    nm = market.get("name")
    if isinstance(nm, str) and nm.strip():
        return nm.strip()
    return None

def selection_price_decimal_betano(sel: dict):
    p = sel.get("price")
    if isinstance(p, (int, float)):
        return p
    if isinstance(p, str):
        return p.strip()
    if isinstance(p, dict):
        for k in ["decimal", "odds", "value"]:
            if k in p:
                return p.get(k)
    odds_formats = sel.get("oddsFormats") or {}
    if isinstance(odds_formats, dict) and "decimal" in odds_formats:
        return odds_formats.get("decimal")
    return None

def strip_leading_player_from_header(header: str, players_in_market):
    if not isinstance(header, str) or not header.strip():
        return header
    header = header.strip()

    players_sorted = sorted(
        [p for p in players_in_market if isinstance(p, str) and p.strip()],
        key=lambda x: len(x),
        reverse=True
    )
    for p in players_sorted:
        p = p.strip()
        if header.lower().startswith(p.lower() + " "):
            return header[len(p):].strip()
    return header

def normalize_market_header_to_betsson(header: str):
    if not isinstance(header, str) or not header.strip():
        return header
    h = header.strip()

    if re.search(r"\bskud\s+p[åa]\s+m[åa]l\b", h, flags=re.IGNORECASE):
        return "Antal afslutninger på mål"
    if re.search(r"\bskud\b", h, flags=re.IGNORECASE):
        return "Spillers samlede antal skud"
    if re.search(r"\bassist", h, flags=re.IGNORECASE):
        return "Spillers samlede antal assister"
    if re.search(r"\bfrispark\b", h, flags=re.IGNORECASE) or re.search(r"\bfoul", h, flags=re.IGNORECASE):
        return "Spiller Frispark Begået"

    return h

def is_full_match_market_betano(market: dict, raw_header: str) -> bool:
    """
    Behold kun hele kampen markeder.
    Hvis der står 1. halvleg osv i header eller market.name, så skip.
    """
    parts = []
    if isinstance(raw_header, str):
        parts.append(raw_header)
    nm = market.get("name")
    if isinstance(nm, str):
        parts.append(nm)
    txt = " | ".join(parts).lower()

    bad_tokens = [
        "1. halvleg", "2. halvleg", "halvleg",
        "1st half", "2nd half", "first half", "second half",
        "periode", "period", "quarter", "q1", "q2", "q3", "q4",
        "overtime", "over time", "ekstra tid", "forlænget",
        "inkl. overtid", "incl. overtime",
    ]
    return not any(t in txt for t in bad_tokens)

def parse_betano_all() -> pd.DataFrame:
    doc = load_json(BETANO_IN_PATH)
    evt = ((doc.get("data") or {}).get("event")) or doc.get("event") or {}
    event_name = build_event_name_betano(evt)
    event_norm = normalize_event_name(event_name)

    rows_out = []
    markets = evt.get("markets") or []
    if not isinstance(markets, list):
        markets = []

    for market in markets:
        tl = market.get("tableLayout") or {}
        rows = tl.get("rows") or []
        if not isinstance(rows, list) or len(rows) == 0:
            continue

        raw_header = pick_market_header_betano(market)

        # NEW: behold kun hele kampen markeder
        if not is_full_match_market_betano(market, raw_header):
            continue

        players_in_market = []
        for r in rows:
            nm = r.get("title") or r.get("name")
            if isinstance(nm, str) and nm.strip():
                players_in_market.append(nm.strip())

        header_no_player = strip_leading_player_from_header(raw_header, players_in_market)
        market_type = normalize_market_header_to_betsson(header_no_player)

        deadline = ms_to_iso_utc(
            market.get("marketCloseTimeMillis")
            or market.get("closeTimeMillis")
            or market.get("deadlineMillis")
        )

        status_market = market.get("status") or market.get("marketStatus") or "Open"

        for r in rows:
            player = r.get("title") or r.get("name")
            if not (isinstance(player, str) and player.strip()):
                continue
            player = player.strip()

            market_label = f"{market_type} | {player}" if market_type else player

            group_selections = r.get("groupSelections") or []
            if not isinstance(group_selections, list):
                continue

            for gs in group_selections:
                sels = gs.get("selections") or []
                if not isinstance(sels, list):
                    continue

                for sel in sels:
                    raw_label = sel.get("name") or sel.get("label")
                    if not (isinstance(raw_label, str) and raw_label.strip()):
                        continue

                    selection_label = normalize_selection_label_betano(raw_label)
                    odds_dec = selection_price_decimal_betano(sel)

                    rows_out.append({
                        "event_betano": event_name,
                        "event_norm": event_norm,
                        "player": player,
                        "selectionLabel": selection_label,
                        "marketLabel": market_label,
                        "deadline": deadline,
                        "status_selection_betano": status_market,
                        "odds_decimal_betano": odds_dec,
                    })

    df = pd.DataFrame(rows_out)
    if df.empty:
        return df

    df["odds_decimal_betano"] = to_float(df["odds_decimal_betano"])
    df = df.dropna(subset=["odds_decimal_betano"]).reset_index(drop=True)

    for c in ["event_norm", "player", "selectionLabel", "marketLabel", "deadline"]:
        df[c] = df[c].astype(str).str.strip()

    # NEW: efter full match filter, dedupe pr nøgle
    df = df.sort_values(["event_norm", "marketLabel", "selectionLabel", "odds_decimal_betano"], kind="stable")
    df = df.drop_duplicates(subset=["event_norm", "marketLabel", "selectionLabel"], keep="first").reset_index(drop=True)

    return df

# =========================
# MERGE + OUTPUT
# =========================
def main():
    df_betsson = parse_betsson_all()
    df_betano = parse_betano_all()

    if df_betsson.empty:
        raise ValueError("Betsson-data er tom. Tjek dine accordion_response_*.json og event_response.json")
    if df_betano.empty:
        raise ValueError("Betano-data er tom. Tjek betano.txt")

    # Merge keys
    if MERGE_WITH_DEADLINE:
        keys = ["event_norm", "player", "selectionLabel", "marketLabel", "deadline"]
    else:
        keys = ["event_norm", "player", "selectionLabel", "marketLabel"]

    df_merged = df_betsson.merge(df_betano, on=keys, how="left")

    # Sikr event kolonne
    if "event" not in df_merged.columns:
        if "event_x" in df_merged.columns:
            df_merged["event"] = df_merged["event_x"]
        elif "event_betsson" in df_merged.columns:
            df_merged["event"] = df_merged["event_betsson"]
        else:
            df_merged["event"] = df_merged.get("event_betano")

    # deadline kolonne hvis ikke merget på deadline
    if "deadline" not in df_merged.columns and "deadline_x" in df_merged.columns:
        df_merged["deadline"] = df_merged["deadline_x"]

    n_total = len(df_merged)
    n_match = df_merged["odds_decimal_betano"].notna().sum() if "odds_decimal_betano" in df_merged.columns else 0
    print(f"Merge matches: {n_match}/{n_total}")

    for col in OUT_COLUMNS:
        if col not in df_merged.columns:
            df_merged[col] = pd.NA

    df_out = df_merged[OUT_COLUMNS].copy()

    # Sortering som Betsson bloksortering
    if "source_order" in df_merged.columns:
        df_out = df_out.assign(source_order=df_merged["source_order"].values)
        df_out = df_out.sort_values(
            ["source_order", "event", "player", "marketLabel", "selectionLabel"],
            kind="stable",
            na_position="last"
        ).drop(columns=["source_order"])

    df_out.to_csv(OUT_CSV, index=False, encoding="utf-8")
    print(f"Saved {len(df_out)} rows to {OUT_CSV}")
    print(df_out.head(20))

if __name__ == "__main__":
    main()

Merge matches: 275/465
Saved 465 rows to betsson_betano_combined.csv
                    event                player selectionLabel  \
40   Arsenal vs Liverpool   Alexis Mac Allister       Over 0.5   
41   Arsenal vs Liverpool   Alexis Mac Allister       Over 1.5   
39   Arsenal vs Liverpool   Alexis Mac Allister       Over 2.5   
126  Arsenal vs Liverpool           Amara Nallo       Over 0.5   
127  Arsenal vs Liverpool           Amara Nallo       Over 1.5   
125  Arsenal vs Liverpool           Amara Nallo       Over 2.5   
94   Arsenal vs Liverpool  Andre HarrimanAnnous       Over 0.5   
96   Arsenal vs Liverpool  Andre HarrimanAnnous       Over 1.5   
95   Arsenal vs Liverpool  Andre HarrimanAnnous       Over 2.5   
65   Arsenal vs Liverpool      Andrew Robertson       Over 0.5   
66   Arsenal vs Liverpool      Andrew Robertson       Over 1.5   
55   Arsenal vs Liverpool             Ben White       Over 0.5   
56   Arsenal vs Liverpool             Ben White       Over 1.5   
115  Ar

  df = pd.concat(parts, ignore_index=True)


In [26]:
import pandas as pd
import numpy as np

# --- 1) Input (din fælles CSV fra merge) ---
IN_MERGED_CSV = "betsson_betano_combined.csv"   # RET hvis din fil hedder noget andet
OUT_ALL_WITH_RATIOS = "merged_with_ratios.csv"
OUT_VALUEBETS = "valuebets_ratio_ge_2.csv"

THRESHOLD = 2.0

# --- 2) Load ---
df = pd.read_csv(IN_MERGED_CSV)

# Forventede kolonner (tilpas kun hvis dine hedder noget andet)
# odds_decimal_betsson
# odds_decimal_betano
required = ["odds_decimal_betsson", "odds_decimal_betano"]
missing = [c for c in required if c not in df.columns]
if missing:
    raise ValueError(f"Mangler kolonner i input: {missing}. Tjek filnavn/kolonnenavne.")

# --- 3) Ryd op i odds ---
df["odds_decimal_betsson"] = pd.to_numeric(df["odds_decimal_betsson"], errors="coerce")
df["odds_decimal_betano"] = pd.to_numeric(df["odds_decimal_betano"], errors="coerce")

# Undgå division med 0 og NaN
valid = (
    df["odds_decimal_betsson"].notna() &
    df["odds_decimal_betano"].notna() &
    (df["odds_decimal_betsson"] > 0) &
    (df["odds_decimal_betano"] > 0)
)

# --- 4) Ratios (begge veje) ---
df["ratio_betsson_over_betano"] = np.nan
df["ratio_betano_over_betsson"] = np.nan

df.loc[valid, "ratio_betsson_over_betano"] = (
    df.loc[valid, "odds_decimal_betsson"] / df.loc[valid, "odds_decimal_betano"]
)
df.loc[valid, "ratio_betano_over_betsson"] = (
    df.loc[valid, "odds_decimal_betano"] / df.loc[valid, "odds_decimal_betsson"]
)

# Max ratio (hvilken side er bedst)
df["ratio_max"] = df[["ratio_betsson_over_betano", "ratio_betano_over_betsson"]].max(axis=1)

df["best_bookmaker"] = np.where(
    df["ratio_betsson_over_betano"] >= df["ratio_betano_over_betsson"],
    "Betsson",
    "Betano"
)

df["best_odds_decimal"] = np.where(
    df["best_bookmaker"] == "Betsson",
    df["odds_decimal_betsson"],
    df["odds_decimal_betano"]
)

df["other_bookmaker"] = np.where(df["best_bookmaker"] == "Betsson", "Betano", "Betsson")

df["other_odds_decimal"] = np.where(
    df["best_bookmaker"] == "Betsson",
    df["odds_decimal_betano"],
    df["odds_decimal_betsson"]
)

# --- 5) Filter valuebets ---
df_value = df[valid & (df["ratio_max"] >= THRESHOLD)].copy()

# Sorter så de største ratios ligger øverst
df_value = df_value.sort_values(["ratio_max"], ascending=False, kind="stable")

# --- 6) Gem outputs ---
df.to_csv(OUT_ALL_WITH_RATIOS, index=False, encoding="utf-8")
df_value.to_csv(OUT_VALUEBETS, index=False, encoding="utf-8")

print(f"Gemte: {OUT_ALL_WITH_RATIOS} ({len(df)} rækker)")
print(f"Valuebets ratio >= {THRESHOLD}: {OUT_VALUEBETS} ({len(df_value)} rækker)")

# Vis top 20 i console
cols_show = []
for c in ["event", "player", "selectionLabel", "marketLabel", "deadline"]:
    if c in df_value.columns:
        cols_show.append(c)

cols_show += [
    "odds_decimal_betsson",
    "odds_decimal_betano",
    "best_bookmaker",
    "best_odds_decimal",
    "other_bookmaker",
    "other_odds_decimal",
    "ratio_max",
]

print("\nTOP 20 valuebets:")
print(df_value[cols_show].head(20).to_string(index=False))

Gemte: merged_with_ratios.csv (465 rækker)
Valuebets ratio >= 2.0: valuebets_ratio_ge_2.csv (12 rækker)

TOP 20 valuebets:
               event             player selectionLabel                                      marketLabel             deadline  odds_decimal_betsson  odds_decimal_betano best_bookmaker  best_odds_decimal other_bookmaker  other_odds_decimal  ratio_max
Arsenal vs Liverpool     Michael Laffey       Over 2.5     Spillers samlede antal skud | Michael Laffey 2026-01-08T20:00:00Z                  3.20                14.50         Betano              14.50         Betsson                3.20   4.531250
Arsenal vs Liverpool      Tommy Pilling       Over 2.5      Spillers samlede antal skud | Tommy Pilling 2026-01-08T20:00:00Z                  3.20                11.75         Betano              11.75         Betsson                3.20   3.671875
Arsenal vs Liverpool     Michael Laffey       Over 1.5     Spillers samlede antal skud | Michael Laffey 2026-01-08T20:00:00Z      