In [7]:
import pandas as pd
import numpy as np

# =========================
# CONFIG
# =========================
BETINIA_CSV = "betinia_player_props.csv"
EXPEKT_CSV  = "expekt_player_props.csv"
BETANO_CSV  = "betano_player_props.csv"

OUT_COMBINED = "betinia_expekt_betano_combined.csv"

# Hvis du vil kræve deadline matcher også, sæt True
MERGE_WITH_DEADLINE = False

# =========================
# LOAD
# =========================
df_betinia = pd.read_csv(BETINIA_CSV)
df_expekt  = pd.read_csv(EXPEKT_CSV)
df_betano  = pd.read_csv(BETANO_CSV)

if df_betinia.empty:
    raise ValueError("Betinia CSV er tom.")
if df_expekt.empty:
    raise ValueError("Expekt CSV er tom.")
if df_betano.empty:
    raise ValueError("Betano CSV er tom.")

# =========================
# NORMALIZE / TYPES
# =========================
def clean_df(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # keys som strings uden spaces
    for c in ["event", "player", "selectionLabel", "marketLabel"]:
        if c in df.columns:
            df[c] = df[c].astype(str).str.strip()

    if "deadline" in df.columns:
        df["deadline"] = df["deadline"].astype(str).str.strip()

    # odds -> float
    if "odds_decimal" in df.columns:
        df["odds_decimal"] = pd.to_numeric(df["odds_decimal"], errors="coerce")

    return df

df_betinia = clean_df(df_betinia)
df_expekt  = clean_df(df_expekt)
df_betano  = clean_df(df_betano)

# Rename odds/status per bookmaker
df_betinia = df_betinia.rename(columns={
    "odds_decimal": "odds_decimal_betinia",
    "status_selection": "status_selection_betinia",
})

df_expekt = df_expekt.rename(columns={
    "odds_decimal": "odds_decimal_expekt",
    "status_selection": "status_selection_expekt",
})

df_betano = df_betano.rename(columns={
    "odds_decimal": "odds_decimal_betano",
    "status_selection": "status_selection_betano",
})

# =========================
# MERGE KEYS
# =========================
if MERGE_WITH_DEADLINE:
    keys = ["event", "player", "selectionLabel", "marketLabel", "deadline"]
else:
    keys = ["event", "player", "selectionLabel", "marketLabel"]

# =========================
# OUTER MERGE (så vi ikke mister rækker)
# =========================
df_merged = df_betinia.merge(df_expekt, on=keys, how="outer")
df_merged = df_merged.merge(df_betano, on=keys, how="outer")

# =========================
# PAYOUT COLS
# =========================
df_merged["payout_betinia"] = pd.to_numeric(df_merged.get("odds_decimal_betinia"), errors="coerce")
df_merged["payout_expekt"]  = pd.to_numeric(df_merged.get("odds_decimal_expekt"), errors="coerce")
df_merged["payout_betano"]  = pd.to_numeric(df_merged.get("odds_decimal_betano"), errors="coerce")

# =========================
# PAIRWISE RATIOS
# (kun der hvor begge findes)
# =========================
def ratio(a, b):
    valid = a.notna() & b.notna() & (a > 0) & (b > 0)
    out = pd.Series(np.nan, index=a.index, dtype="float64")
    out.loc[valid] = a.loc[valid] / b.loc[valid]
    return out

df_merged["ratio_betinia_over_expekt"] = ratio(df_merged["payout_betinia"], df_merged["payout_expekt"])
df_merged["ratio_expekt_over_betinia"] = ratio(df_merged["payout_expekt"], df_merged["payout_betinia"])

df_merged["ratio_betinia_over_betano"] = ratio(df_merged["payout_betinia"], df_merged["payout_betano"])
df_merged["ratio_betano_over_betinia"] = ratio(df_merged["payout_betano"], df_merged["payout_betinia"])

df_merged["ratio_expekt_over_betano"] = ratio(df_merged["payout_expekt"], df_merged["payout_betano"])
df_merged["ratio_betano_over_expekt"] = ratio(df_merged["payout_betano"], df_merged["payout_expekt"])

# =========================
# BEST + SECOND BEST (tværs af alle tre)
# =========================
payout_cols = ["payout_betinia", "payout_expekt", "payout_betano"]
bookmakers = ["Betinia", "Expekt", "Betano"]

payout_matrix = df_merged[payout_cols].copy()
# invalid <= 0 skal ikke tælle
payout_matrix = payout_matrix.where(payout_matrix > 0)

df_merged["best_payout"] = payout_matrix.max(axis=1)
df_merged["best_bookmaker"] = payout_matrix.idxmax(axis=1)  # giver fx "payout_betano"

# map til pæne navne
df_merged["best_bookmaker"] = df_merged["best_bookmaker"].map({
    "payout_betinia": "Betinia",
    "payout_expekt": "Expekt",
    "payout_betano": "Betano",
})

# second best: sortér værdier pr række og tag nr 2
sorted_vals = np.sort(payout_matrix.to_numpy(dtype=float), axis=1)  # stigende med nan
# np.sort skubber nan til sidst, så vi tager næstsidste som second-best hvis >=2 findes
second_best = np.full(len(df_merged), np.nan, dtype=float)

for i in range(len(df_merged)):
    row = sorted_vals[i]
    row = row[~np.isnan(row)]
    if len(row) >= 2:
        second_best[i] = row[-2]

df_merged["second_best_payout"] = second_best

# ratio mellem best og second best (kun hvis begge findes)
valid_best = (
    df_merged["best_payout"].notna() &
    df_merged["second_best_payout"].notna() &
    (df_merged["best_payout"] > 0) &
    (df_merged["second_best_payout"] > 0)
)
df_merged["ratio_best_over_second"] = np.nan
df_merged.loc[valid_best, "ratio_best_over_second"] = (
    df_merged.loc[valid_best, "best_payout"] / df_merged.loc[valid_best, "second_best_payout"]
)

# =========================
# OUTPUT
# =========================
df_merged.to_csv(OUT_COMBINED, index=False, encoding="utf-8")

n_total = len(df_merged)
n_betinia = df_merged["payout_betinia"].notna().sum()
n_expekt  = df_merged["payout_expekt"].notna().sum()
n_betano  = df_merged["payout_betano"].notna().sum()

n_two_plus = (
    payout_matrix.notna().sum(axis=1) >= 2
).sum()

print(f"Rows total: {n_total}")
print(f"Rows with Betinia odds: {n_betinia}")
print(f"Rows with Expekt odds:  {n_expekt}")
print(f"Rows with Betano odds:  {n_betano}")
print(f"Rows with >= 2 bookmakers (best/second ratio possible): {n_two_plus}")
print(f"Saved combined CSV: {OUT_COMBINED}")

# Top 100: hvor vi kan beregne best vs second
df_top = df_merged[valid_best].copy()
df_top = df_top.sort_values("ratio_best_over_second", ascending=False, kind="stable")

cols_show = ["event", "player", "selectionLabel", "marketLabel"]
if MERGE_WITH_DEADLINE and "deadline" in df_top.columns:
    cols_show.append("deadline")

cols_show += [
    "payout_betinia", "payout_expekt", "payout_betano",
    "best_bookmaker", "best_payout",
    "second_best_payout",
    "ratio_best_over_second",
]

print("\nTOP (best vs second) hvor mindst 2 bookmakere findes:")
print(df_top[cols_show].head(100).to_string(index=False))

Rows total: 1328
Rows with Betinia odds: 606
Rows with Expekt odds:  734
Rows with Betano odds:  599
Rows with >= 2 bookmakers (best/second ratio possible): 440
Saved combined CSV: betinia_expekt_betano_combined.csv

TOP (best vs second) hvor mindst 2 bookmakere findes:
                                   event               player selectionLabel                                            marketLabel  payout_betinia  payout_expekt  payout_betano best_bookmaker  best_payout  second_best_payout  ratio_best_over_second
Eintracht Frankfurt vs Borussia Dortmund      Nathaniel Brown       Over 2.5          Spillers samlede antal skud | Nathaniel Brown             NaN           4.00          16.50         Betano      16.5000              4.0000                4.125000
Eintracht Frankfurt vs Borussia Dortmund            Yan Couto       Over 2.5                Spillers samlede antal skud | Yan Couto             NaN           3.40          11.00         Betano      11.0000              3.4000    