In [5]:
import pandas as pd
import numpy as np

# =========================
# CONFIG
# =========================
BETINIA_CSV = "betinia_player_props.csv"
EXPEKT_CSV  = "expekt_player_props.csv"

OUT_COMBINED = "betinia_expekt_combined.csv"

# Hvis du vil kræve deadline matcher også, sæt True
MERGE_WITH_DEADLINE = False

# =========================
# LOAD
# =========================
df_betinia = pd.read_csv(BETINIA_CSV)
df_expekt  = pd.read_csv(EXPEKT_CSV)

if df_betinia.empty:
    raise ValueError("Betinia CSV er tom.")
if df_expekt.empty:
    raise ValueError("Expekt CSV er tom.")

# =========================
# NORMALIZE / TYPES
# =========================
for df in [df_betinia, df_expekt]:
    # sikr string keys uden ekstra spaces
    for c in ["event", "player", "selectionLabel", "marketLabel"]:
        if c in df.columns:
            df[c] = df[c].astype(str).str.strip()

    if "deadline" in df.columns:
        df["deadline"] = df["deadline"].astype(str).str.strip()

    # odds -> float
    df["odds_decimal"] = pd.to_numeric(df["odds_decimal"], errors="coerce")

# Rename odds/status per bookmaker
df_betinia = df_betinia.rename(columns={
    "odds_decimal": "odds_decimal_betinia",
    "status_selection": "status_selection_betinia",
})

df_expekt = df_expekt.rename(columns={
    "odds_decimal": "odds_decimal_expekt",
    "status_selection": "status_selection_expekt",
})

# =========================
# MERGE
# =========================
if MERGE_WITH_DEADLINE:
    keys = ["event", "player", "selectionLabel", "marketLabel", "deadline"]
else:
    keys = ["event", "player", "selectionLabel", "marketLabel"]

# Vi starter med Betinia som venstre, og mapper Expekt ind
df_merged = df_betinia.merge(df_expekt, on=keys, how="left")

# =========================
# PAYOUT + RATIOS
# =========================
# payout = decimal odds (for 1 kr indsats giver decimal tilbage)
df_merged["payout_betinia"] = df_merged["odds_decimal_betinia"]
df_merged["payout_expekt"]  = df_merged["odds_decimal_expekt"]

valid = (
    df_merged["payout_betinia"].notna() &
    df_merged["payout_expekt"].notna() &
    (df_merged["payout_betinia"] > 0) &
    (df_merged["payout_expekt"] > 0)
)

df_merged["ratio_betinia_over_expekt"] = np.nan
df_merged["ratio_expekt_over_betinia"] = np.nan

df_merged.loc[valid, "ratio_betinia_over_expekt"] = (
    df_merged.loc[valid, "payout_betinia"] / df_merged.loc[valid, "payout_expekt"]
)
df_merged.loc[valid, "ratio_expekt_over_betinia"] = (
    df_merged.loc[valid, "payout_expekt"] / df_merged.loc[valid, "payout_betinia"]
)

df_merged["ratio_max"] = df_merged[
    ["ratio_betinia_over_expekt", "ratio_expekt_over_betinia"]
].max(axis=1)

df_merged["best_bookmaker"] = np.where(
    df_merged["ratio_betinia_over_expekt"] >= df_merged["ratio_expekt_over_betinia"],
    "Betinia",
    "Expekt"
)

df_merged["best_payout"] = np.where(
    df_merged["best_bookmaker"] == "Betinia",
    df_merged["payout_betinia"],
    df_merged["payout_expekt"]
)

df_merged["other_bookmaker"] = np.where(
    df_merged["best_bookmaker"] == "Betinia",
    "Expekt",
    "Betinia"
)

df_merged["other_payout"] = np.where(
    df_merged["best_bookmaker"] == "Betinia",
    df_merged["payout_expekt"],
    df_merged["payout_betinia"]
)

# =========================
# OUTPUT
# =========================
df_merged.to_csv(OUT_COMBINED, index=False, encoding="utf-8")

n_total = len(df_merged)
n_match = df_merged["payout_expekt"].notna().sum()
print(f"Merge matches (Expekt found): {n_match}/{n_total}")
print(f"Saved combined CSV: {OUT_COMBINED}")

# Top 20 hvor begge findes og ratio er størst
df_top = df_merged[valid].copy()
df_top = df_top.sort_values("ratio_max", ascending=False, kind="stable")

cols_show = [
    "event", "player", "selectionLabel", "marketLabel"
]
if "deadline" in df_top.columns:
    cols_show.append("deadline")

cols_show += [
    "payout_betinia", "payout_expekt",
    "best_bookmaker", "best_payout",
    "other_bookmaker", "other_payout",
    "ratio_max"
]

print("\nTOP 20 ratios (kun hvor begge odds findes):")
print(df_top[cols_show].head(100).to_string(index=False))

Merge matches (Expekt found): 304/606
Saved combined CSV: betinia_expekt_combined.csv

TOP 20 ratios (kun hvor begge odds findes):
                                   event             player selectionLabel                                          marketLabel  payout_betinia  payout_expekt best_bookmaker  best_payout other_bookmaker  other_payout  ratio_max
Eintracht Frankfurt vs Borussia Dortmund      Ansgar Knauff       Over 2.5    Spillers samlede antal tacklinger | Ansgar Knauff          7.5000           2.38        Betinia       7.5000          Expekt        2.3800   3.151261
Eintracht Frankfurt vs Borussia Dortmund      Karim Adeyemi       Over 2.5    Spillers samlede antal tacklinger | Karim Adeyemi          6.6667           2.40        Betinia       6.6667          Expekt        2.4000   2.777792
Eintracht Frankfurt vs Borussia Dortmund      Ansgar Knauff       Over 1.5    Spillers samlede antal tacklinger | Ansgar Knauff          3.6667           1.40        Betinia       3.666