In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta, timezone

API_TOKEN = "KuQ4Rt1ypOCvXfcm3cZXdPhOUlbuOrpHBgJkFm1MWTvtRR8TLhgEI02hjDxz"
BASE_URL = "https://api.sportmonks.com/v3/football"
BOOKMAKER_URL = "https://api.sportmonks.com/v3/odds/bookmakers"  # bookmaker lookup is under /v3/odds/...

# -------------------------
# Fixtures (next 12 hours)
# -------------------------
def fetch_fixtures_next_12h() -> pd.DataFrame:
    """Return fixtures starting in the next 12 hours (UTC)."""
    now_utc = datetime.now(timezone.utc)
    end_utc = now_utc + timedelta(hours=12)

    start_date = now_utc.date()
    end_date = (now_utc + timedelta(days=1)).date()
    endpoint = f"/fixtures/between/{start_date:%Y-%m-%d}/{end_date:%Y-%m-%d}"

    session = requests.Session()
    fixtures = []
    page = 1

    while True:
        r = session.get(
            f"{BASE_URL}{endpoint}",
            params={"api_token": API_TOKEN, "page": page},
            timeout=30,
        )
        r.raise_for_status()
        payload = r.json()

        fixtures.extend(payload.get("data") or [])

        # Pagination handling: be defensive across endpoints/plans
        pagination = ((payload.get("meta") or {}).get("pagination")) or {}
        has_more = pagination.get("has_more")
        total_pages = pagination.get("total_pages")
        current_page = pagination.get("current_page")

        if has_more is False:
            break
        if total_pages is not None and current_page is not None and int(current_page) >= int(total_pages):
            break
        if has_more is None and (total_pages is None or current_page is None):
            # If the API didn't return pagination metadata, assume single page.
            break

        page += 1

    df = pd.DataFrame(
        {
            "FixtureID": [f.get("id") for f in fixtures],
            "Fixture": [f.get("name") for f in fixtures],
            "StartingAt": [f.get("starting_at") for f in fixtures],
        }
    )

    if df.empty:
        return df

    df["StartingAt"] = pd.to_datetime(df["StartingAt"], utc=True, errors="coerce")
    df = df[df["StartingAt"].between(now_utc, end_utc)].reset_index(drop=True)
    return df


# -------------------------
# Bookmaker ID -> Name lookup (cached)
# -------------------------
def _get_bookmaker_name(bookmaker_id: int | None, session: requests.Session, cache: dict[int, str]) -> str | None:
    """
    Resolve bookmaker_id to bookmaker name via /v3/odds/bookmakers/{ID}.
    Caches results to avoid repeated calls.
    """
    if bookmaker_id is None:
        return None

    try:
        bookmaker_id_int = int(bookmaker_id)
    except (TypeError, ValueError):
        return None

    if bookmaker_id_int in cache:
        return cache[bookmaker_id_int]

    r = session.get(
        f"{BOOKMAKER_URL}/{bookmaker_id_int}",
        params={"api_token": API_TOKEN},
        timeout=30,
    )
    if r.status_code == 404:
        return None
    r.raise_for_status()

    data = (r.json() or {}).get("data") or {}
    name = data.get("name")
    if isinstance(name, str) and name.strip():
        cache[bookmaker_id_int] = name.strip()
        return cache[bookmaker_id_int]

    return None


# -------------------------
# Odds (keep ONLY the fields used in your first version)
# -------------------------
def fetch_odds_prematch_for_fixture(
    fixture_id: int,
    session: requests.Session,
    bookmaker_cache: dict[int, str],
) -> list[dict]:
    """Pull odds for a single fixture and shape the columns we need (first-code schema)."""
    endpoint = f"/odds/pre-match/fixtures/{fixture_id}"

    r = session.get(
        f"{BASE_URL}{endpoint}",
        params={"api_token": API_TOKEN},
        timeout=30,
    )
    if r.status_code == 404:
        return []

    r.raise_for_status()
    odds = r.json().get("data") or []

    rows = []
    for odd in odds:
        bookmaker_id = odd.get("bookmaker_id")
        bookmaker_name = _get_bookmaker_name(bookmaker_id, session, bookmaker_cache)

        rows.append(
            {
                "OddID": odd.get("id"),
                "FixtureID": odd.get("fixture_id"),
                "MarketID": odd.get("market_id"),
                "Bookmaker": bookmaker_name,  # <-- filled reliably via bookmaker_id lookup
                "Market": odd.get("market_description"),
                "Label": odd.get("label"),
                "Name": odd.get("name"),
                "Value": odd.get("value"),
                "DP3": odd.get("dp3"),
                "Total": odd.get("total"),
                "Handicap": odd.get("handicap"),
                "LatestBookmakerUpdate": odd.get("latest_bookmaker_update"),
            }
        )

    return rows


def fetch_odds_next_12h() -> pd.DataFrame:
    fixtures_df = fetch_fixtures_next_12h()
    if fixtures_df.empty:
        return pd.DataFrame()

    session = requests.Session()
    bookmaker_cache: dict[int, str] = {}
    rows: list[dict] = []

    for fid in fixtures_df["FixtureID"].astype(int):
        rows.extend(fetch_odds_prematch_for_fixture(fid, session, bookmaker_cache))

    df = pd.DataFrame(rows)
    if df.empty:
        return df

    df = df.merge(fixtures_df, on="FixtureID", how="left")

    # Normalize types
    df["Odds"] = pd.to_numeric(df["Value"], errors="coerce")
    df["Total"] = pd.to_numeric(df["Total"], errors="coerce")
    df["Handicap"] = pd.to_numeric(df["Handicap"], errors="coerce")

    # Drop unusable odds (same spirit as your original)
    df = df.dropna(subset=["Odds", "MarketID", "Bookmaker", "Label", "StartingAt"]).reset_index(drop=True)

    return df


# -------------------------
# Example usage
# -------------------------
df = fetch_odds_next_12h()

columns_to_show = [
    "StartingAt",
    "Fixture",
    "Bookmaker",
    "Market",
    "Label",
    "Odds",
    "Name",
    "Total",
    "Handicap",
    "LatestBookmakerUpdate",
]

df[columns_to_show].head(50)

Unnamed: 0,StartingAt,Fixture,Bookmaker,Market,Label,Odds,Name,Total,Handicap,LatestBookmakerUpdate
0,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,Player Headed Shots on Target,2.5,67.0,Walid Cheddira,,,2026-01-06 13:41:20
1,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,Player Shots,7.5,21.0,Armand Lauriente,,,2026-01-06 15:31:50
2,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,Goalscorers,Anytime,9.5,Juan Cabal,,,2026-01-06 19:41:42
3,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,1st Player Booked,1st Card,29.0,Daniele Rugani,,,2026-01-06 19:21:09
4,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,Alternative Handicap Result,Tie,9.0,,,-3.0,2026-01-06 19:41:42
5,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,Player Shots On Target,1.5,21.0,Aster Vranckx,,,2026-01-06 19:41:42
6,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,Player Shots On Target,1.5,19.0,Josh Doig,,,2026-01-06 19:41:42
7,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,1st Player Booked,1st Card,29.0,Dusan Vlahovic,,,2026-01-06 19:21:09
8,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,1st Player Booked,1st Card,41.0,Giacomo Satalino,,,2026-01-06 19:21:09
9,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,bet365,Player to be booked,Booked,3.5,Armand Lauriente,,,2026-01-06 19:41:42


In [2]:
import pandas as pd
import numpy as np

def find_valuebets(
    df: pd.DataFrame,
    pct_threshold: float = 0.30,
    min_bookmakers: int = 3,
    group_cols: list[str] | None = None,
    odds_col: str = "Odds",
    bookmaker_col: str = "Bookmaker",
    # normalization so 1.5 vs 1.50 don't split groups:
    line_round: int = 3,
    na_sentinel: str = "__NA__",
    # optional: avoid multiple rows per bookmaker for same selection
    dedupe_per_bookmaker: bool = True,
    dedupe_keep: str = "best",  # "best" (max odds) or "latest"
    latest_col: str = "LatestBookmakerUpdate",
) -> pd.DataFrame:
    """
    Valuebet if Odds >= (1 + pct_threshold) * median(Odds) within the same selection group.

    Grouping default (as you requested):
      FixtureID, MarketID, Label, Name, Total, Handicap

    Adds:
      MedianOdds, ValueThreshold, IsValueBet, ComparedCount, ComparedBookmakers, ComparedOdds
    """
    out = df.copy()

    if group_cols is None:
        group_cols = ["FixtureID", "MarketID", "Label", "Name", "Total", "Handicap"]

    # --- Required columns check ---
    required = [odds_col, bookmaker_col] + group_cols
    missing = [c for c in required if c not in out.columns]
    if missing:
        raise KeyError(f"Missing required columns: {missing}")

    # --- Normalize core types ---
    out[odds_col] = pd.to_numeric(out[odds_col], errors="coerce")
    out = out[out[odds_col] <= 10].copy()
    out[bookmaker_col] = out[bookmaker_col].astype("string")

    # Total/Handicap should be numeric for stable rounding/grouping
    # (we'll later convert to a string key with sentinel)
    out["Total"] = pd.to_numeric(out["Total"], errors="coerce").round(line_round)
    out["Handicap"] = pd.to_numeric(out["Handicap"], errors="coerce").round(line_round)

    # Name can be missing; keep as string
    out["Name"] = out["Name"].astype("string")

    # --- Drop only truly unusable rows ---
    # We DO NOT drop on Total/Handicap/Name; those can be NaN.
    out = out.dropna(subset=[odds_col, bookmaker_col, "FixtureID", "MarketID", "Label"]).copy()

    # --- Build safe grouping keys (so NaNs don't delete rows) ---
    # Create grouping versions of Name/Total/Handicap that are never null.
    out["_NameKey"] = out["Name"].fillna(na_sentinel)
    out["_TotalKey"] = out["Total"].map(lambda x: f"{x:.{line_round}f}" if pd.notna(x) else na_sentinel)
    out["_HandicapKey"] = out["Handicap"].map(lambda x: f"{x:.{line_round}f}" if pd.notna(x) else na_sentinel)

    # Use exactly the grouping you want, but with safe keys for nullable cols
    group_key_cols = ["FixtureID", "MarketID", "Label", "_NameKey", "_TotalKey", "_HandicapKey"]

    # --- Optional: dedupe (group + bookmaker) to avoid duplicates blowing up comparisons ---
    if dedupe_per_bookmaker:
        dedupe_keys = group_key_cols + [bookmaker_col]

        if dedupe_keep.lower() == "latest" and latest_col in out.columns:
            out[latest_col] = pd.to_datetime(out[latest_col], utc=True, errors="coerce")
            out = out.sort_values(by=dedupe_keys + [latest_col], ascending=True, kind="mergesort")
            out = out.drop_duplicates(subset=dedupe_keys, keep="last").copy()
        else:
            # keep "best" odds per bookmaker per group
            out = out.sort_values(by=dedupe_keys + [odds_col], ascending=True, kind="mergesort")
            out = out.drop_duplicates(subset=dedupe_keys, keep="last").copy()

    # --- Core valuebet stats ---
    g = out.groupby(group_key_cols, dropna=False)
    out["MedianOdds"] = g[odds_col].transform("median")
    out["ValueThreshold"] = out["MedianOdds"] * (1.0 + pct_threshold)
    out["_GroupBookmakersN"] = g[bookmaker_col].transform("nunique")
    out["IsValueBet"] = (out[odds_col] >= out["ValueThreshold"]) & (out["_GroupBookmakersN"] >= min_bookmakers)

    # --- Compared vs lists WITHOUT apply(axis=1) ---
    out = out.reset_index(drop=False).rename(columns={"index": "_RowID"})

    left = out[["_RowID"] + group_key_cols + [bookmaker_col, odds_col]]
    right = out[["_RowID"] + group_key_cols + [bookmaker_col, odds_col]].rename(
        columns={"_RowID": "_RowID_other", bookmaker_col: "Bookmaker_other", odds_col: "Odds_other"}
    )

    m = left.merge(right, on=group_key_cols, how="left")
    m = m[(m["_RowID"] != m["_RowID_other"]) & (m[bookmaker_col] != m["Bookmaker_other"])]

    agg = m.groupby("_RowID", sort=False).agg(
        ComparedBookmakers=("Bookmaker_other", lambda s: sorted(pd.unique(s.dropna()))),
        ComparedOdds=("Odds_other", lambda s: list(pd.to_numeric(s, errors="coerce").dropna().astype(float))),
        ComparedCount=("Bookmaker_other", lambda s: int(pd.Series(s.dropna()).nunique())),
    )

    out = out.merge(agg, on="_RowID", how="left")
    out["ComparedBookmakers"] = out["ComparedBookmakers"].apply(lambda x: x if isinstance(x, list) else [])
    out["ComparedOdds"] = out["ComparedOdds"].apply(lambda x: x if isinstance(x, list) else [])
    out["ComparedCount"] = out["ComparedCount"].fillna(0).astype(int)

    # Cleanup temp columns
    out = out.drop(columns=["_GroupBookmakersN", "_NameKey", "_TotalKey", "_HandicapKey"])

    return out


In [6]:
value_df = find_valuebets(
    df,
    pct_threshold=0.30,
    min_bookmakers=3,
    group_cols=["FixtureID", "MarketID", "Label", "Name", "Total", "Handicap"],
)

cols = [
    "StartingAt","Fixture","Bookmaker","Market","Label","Name","Total","Handicap",
    "Odds","MedianOdds","ValueThreshold","IsValueBet",
    "ComparedCount","ComparedBookmakers",
]
value_df[value_df["IsValueBet"]][cols].head(50)

Unnamed: 0,StartingAt,Fixture,Bookmaker,Market,Label,Name,Total,Handicap,Odds,MedianOdds,ValueThreshold,IsValueBet,ComparedCount,ComparedBookmakers
101,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,MelBet,Asian Handicap,Away,Away,,-1.5,2.85,1.03,1.339,True,2,"[1xbet, Marathonbet]"
862,2026-01-06 19:45:00+00:00,Sassuolo vs Juventus,Pinnacle,Handicap Result,Draw,Draw,,3.0,9.85,7.5,9.75,True,3,"[10Bet, Marathonbet, Unibet]"
2630,2026-01-06 20:00:00+00:00,West Ham United vs Nottingham Forest,CloudBet,Asian Handicap,Away,Away,,-0.25,2.14,1.56,2.028,True,6,"[1xbet, HKJC, MansionBet, Marathonbet, Pinnacl..."
2631,2026-01-06 20:00:00+00:00,West Ham United vs Nottingham Forest,HKJC,Asian Handicap,Away,Away,,-0.25,2.08,1.56,2.028,True,6,"[1xbet, CloudBet, MansionBet, Marathonbet, Pin..."
5164,2026-01-06 19:45:00+00:00,Livingston vs St. Mirren,CloudBet,Asian Handicap,Away,Away,,-0.25,1.94,1.47,1.911,True,4,"[1xbet, MansionBet, Marathonbet, Pinnacle]"
5165,2026-01-06 19:45:00+00:00,Livingston vs St. Mirren,MansionBet,Asian Handicap,Away,Away,,-0.25,2.38,1.47,1.911,True,4,"[1xbet, CloudBet, Marathonbet, Pinnacle]"
7645,2026-01-06 20:00:00+00:00,Rangers vs Aberdeen,Pinnacle,Asian Handicap,Away,Away,,-0.5,3.35,2.5,3.25,True,2,"[10Bet, Unibet]"
7698,2026-01-06 20:00:00+00:00,Rangers vs Aberdeen,1xbet,Asian Handicap,Away,Away,,0.75,9.3,5.73,7.449,True,3,"[CloudBet, MansionBet, Marathonbet]"
7701,2026-01-06 20:00:00+00:00,Rangers vs Aberdeen,Marathonbet,Asian Handicap,Away,Away,,0.75,9.3,5.73,7.449,True,3,"[1xbet, CloudBet, MansionBet]"
7705,2026-01-06 20:00:00+00:00,Rangers vs Aberdeen,Marathonbet,Asian Handicap,Away,Away,,1.0,9.0,2.04,2.652,True,3,"[CloudBet, Dafabet, MansionBet]"


In [7]:
# only show valuebets:
value_bets = value_df[value_df["IsValueBet"]]

# remove columns: DP3, OddID, _RowID, MarketID
value_bets = value_bets.drop(columns=["DP3", "OddID", "_RowID", "MarketID"], errors="ignore")
value_bets.head(50)

Unnamed: 0,FixtureID,Bookmaker,Market,Label,Name,Value,Total,Handicap,LatestBookmakerUpdate,Fixture,StartingAt,Odds,MedianOdds,ValueThreshold,IsValueBet,ComparedBookmakers,ComparedOdds,ComparedCount
101,19425069,MelBet,Asian Handicap,Away,Away,2.85,,-1.5,2026-01-06 19:31:14,Sassuolo vs Juventus,2026-01-06 19:45:00+00:00,2.85,1.03,1.339,True,"[1xbet, Marathonbet]","[1.03, 1.0]",2
862,19425069,Pinnacle,Handicap Result,Draw,Draw,9.85,,3.0,2026-01-06 11:19:41,Sassuolo vs Juventus,2026-01-06 19:45:00+00:00,9.85,7.5,9.75,True,"[10Bet, Marathonbet, Unibet]","[7.5, 5.0, 7.5]",3
2630,19427665,CloudBet,Asian Handicap,Away,Away,2.14,,-0.25,2026-01-06 19:30:12,West Ham United vs Nottingham Forest,2026-01-06 20:00:00+00:00,2.14,1.56,2.028,True,"[1xbet, HKJC, MansionBet, Marathonbet, Pinnacl...","[1.51, 2.08, 2.02, 1.51, 1.55, 1.56]",6
2631,19427665,HKJC,Asian Handicap,Away,Away,2.08,,-0.25,2026-01-06 19:30:46,West Ham United vs Nottingham Forest,2026-01-06 20:00:00+00:00,2.08,1.56,2.028,True,"[1xbet, CloudBet, MansionBet, Marathonbet, Pin...","[1.51, 2.14, 2.02, 1.51, 1.55, 1.56]",6
5164,19428102,CloudBet,Asian Handicap,Away,Away,1.94,,-0.25,2026-01-06 19:31:14,Livingston vs St. Mirren,2026-01-06 19:45:00+00:00,1.94,1.47,1.911,True,"[1xbet, MansionBet, Marathonbet, Pinnacle]","[1.41, 2.38, 1.41, 1.47]",4
5165,19428102,MansionBet,Asian Handicap,Away,Away,2.38,,-0.25,2026-01-04 18:01:08,Livingston vs St. Mirren,2026-01-06 19:45:00+00:00,2.38,1.47,1.911,True,"[1xbet, CloudBet, Marathonbet, Pinnacle]","[1.41, 1.94, 1.41, 1.47]",4
7645,19590624,Pinnacle,Asian Handicap,Away,Away,3.35,,-0.5,2026-01-06 11:31:41,Rangers vs Aberdeen,2026-01-06 20:00:00+00:00,3.35,2.5,3.25,True,"[10Bet, Unibet]","[2.5, 2.4]",2
7698,19590624,1xbet,Asian Handicap,Away,Away,9.3,,0.75,2026-01-06 11:34:10,Rangers vs Aberdeen,2026-01-06 20:00:00+00:00,9.3,5.73,7.449,True,"[CloudBet, MansionBet, Marathonbet]","[1.99, 2.16, 9.3]",3
7701,19590624,Marathonbet,Asian Handicap,Away,Away,9.3,,0.75,2026-01-06 11:35:43,Rangers vs Aberdeen,2026-01-06 20:00:00+00:00,9.3,5.73,7.449,True,"[1xbet, CloudBet, MansionBet]","[9.3, 1.99, 2.16]",3
7705,19590624,Marathonbet,Asian Handicap,Away,Away,9.0,,1.0,2026-01-06 11:35:43,Rangers vs Aberdeen,2026-01-06 20:00:00+00:00,9.0,2.04,2.652,True,"[CloudBet, Dafabet, MansionBet]","[1.97, 2.03, 2.05]",3


In [9]:
# 1) Do you have enough rows after cleaning?
print("Rows:", len(value_df))
print("Valuebets:", value_df["IsValueBet"].sum())

# 2) How many groups have >= 3 bookmakers?
tmp = df.copy()
tmp["Odds"] = pd.to_numeric(tmp["Odds"], errors="coerce")
tmp = tmp.dropna(subset=["Odds", "Bookmaker", "FixtureID", "MarketID", "Label", "Name", "Total", "Handicap"])

grp = tmp.groupby(["FixtureID","MarketID","Label","Name","Total","Handicap"])["Bookmaker"].nunique()
print("Groups total:", len(grp))
print("Groups with >=3 books:", (grp >= 3).sum())

# 3) What's the maximum "value ratio" you even see?
# ratio = Odds / MedianOdds; valuebet at 30% is ratio >= 1.30
mx = (value_df["Odds"] / value_df["MedianOdds"]).max()
print("Max Odds/MedianOdds ratio:", mx)

Rows: 0
Valuebets: 0
Groups total: 0
Groups with >=3 books: 0
Max Odds/MedianOdds ratio: nan
