In [3]:

import requests
import pandas as pd
from datetime import datetime, timedelta, timezone

API_TOKEN = "KuQ4Rt1ypOCvXfcm3cZXdPhOUlbuOrpHBgJkFm1MWTvtRR8TLhgEI02hjDxz"
BASE_URL = "https://api.sportmonks.com/v3/football"
BOOKMAKER_URL = "https://api.sportmonks.com/v3/odds/bookmakers"  # bookmaker lookup is under /v3/odds/...

# -------------------------
# Fixtures (next 12 hours)
# -------------------------
def fetch_fixtures_next_12h() -> pd.DataFrame:
    """Return fixtures starting in the next 12 hours (UTC)."""
    now_utc = datetime.now(timezone.utc)
    end_utc = now_utc + timedelta(hours=12)

    start_date = now_utc.date()
    end_date = (now_utc + timedelta(days=1)).date()
    endpoint = f"/fixtures/between/{start_date:%Y-%m-%d}/{end_date:%Y-%m-%d}"

    session = requests.Session()
    fixtures = []
    page = 1

    while True:
        r = session.get(
            f"{BASE_URL}{endpoint}",
            params={"api_token": API_TOKEN, "page": page},
            timeout=30,
        )
        r.raise_for_status()
        payload = r.json()

        fixtures.extend(payload.get("data") or [])

        # Pagination handling: be defensive across endpoints/plans
        pagination = ((payload.get("meta") or {}).get("pagination")) or {}
        has_more = pagination.get("has_more")
        total_pages = pagination.get("total_pages")
        current_page = pagination.get("current_page")

        if has_more is False:
            break
        if total_pages is not None and current_page is not None and int(current_page) >= int(total_pages):
            break
        if has_more is None and (total_pages is None or current_page is None):
            # If the API didn't return pagination metadata, assume single page.
            break

        page += 1

    df = pd.DataFrame(
        {
            "FixtureID": [f.get("id") for f in fixtures],
            "Fixture": [f.get("name") for f in fixtures],
            "StartingAt": [f.get("starting_at") for f in fixtures],
        }
    )

    if df.empty:
        return df

    df["StartingAt"] = pd.to_datetime(df["StartingAt"], utc=True, errors="coerce")
    df = df[df["StartingAt"].between(now_utc, end_utc)].reset_index(drop=True)
    return df


# -------------------------
# Bookmaker ID -> Name lookup (cached)
# -------------------------
def _get_bookmaker_name(bookmaker_id: int | None, session: requests.Session, cache: dict[int, str]) -> str | None:
    """
    Resolve bookmaker_id to bookmaker name via /v3/odds/bookmakers/{ID}.
    Caches results to avoid repeated calls.
    """
    if bookmaker_id is None:
        return None

    try:
        bookmaker_id_int = int(bookmaker_id)
    except (TypeError, ValueError):
        return None

    if bookmaker_id_int in cache:
        return cache[bookmaker_id_int]

    r = session.get(
        f"{BOOKMAKER_URL}/{bookmaker_id_int}",
        params={"api_token": API_TOKEN},
        timeout=30,
    )
    if r.status_code == 404:
        return None
    r.raise_for_status()

    data = (r.json() or {}).get("data") or {}
    name = data.get("name")
    if isinstance(name, str) and name.strip():
        cache[bookmaker_id_int] = name.strip()
        return cache[bookmaker_id_int]

    return None


# -------------------------
# Odds (keep ONLY the fields used in your first version)
# -------------------------
def fetch_odds_prematch_for_fixture(
    fixture_id: int,
    session: requests.Session,
    bookmaker_cache: dict[int, str],
) -> list[dict]:
    """Pull odds for a single fixture and shape the columns we need (first-code schema)."""
    endpoint = f"/odds/pre-match/fixtures/{fixture_id}"

    r = session.get(
        f"{BASE_URL}{endpoint}",
        params={"api_token": API_TOKEN},
        timeout=30,
    )
    if r.status_code == 404:
        return []

    r.raise_for_status()
    odds = r.json().get("data") or []

    rows = []
    for odd in odds:
        bookmaker_id = odd.get("bookmaker_id")
        bookmaker_name = _get_bookmaker_name(bookmaker_id, session, bookmaker_cache)

        rows.append(
            {
                "OddID": odd.get("id"),
                "FixtureID": odd.get("fixture_id"),
                "MarketID": odd.get("market_id"),
                "Bookmaker": bookmaker_name,  # <-- filled reliably via bookmaker_id lookup
                "Market": odd.get("market_description"),
                "Label": odd.get("label"),
                "Name": odd.get("name"),
                "Value": odd.get("value"),
                "DP3": odd.get("dp3"),
                "Total": odd.get("total"),
                "Handicap": odd.get("handicap"),
                "LatestBookmakerUpdate": odd.get("latest_bookmaker_update"),
            }
        )

    return rows


def fetch_odds_next_12h() -> pd.DataFrame:
    fixtures_df = fetch_fixtures_next_12h()
    if fixtures_df.empty:
        return pd.DataFrame()

    session = requests.Session()
    bookmaker_cache: dict[int, str] = {}
    rows: list[dict] = []

    for fid in fixtures_df["FixtureID"].astype(int):
        rows.extend(fetch_odds_prematch_for_fixture(fid, session, bookmaker_cache))

    df = pd.DataFrame(rows)
    if df.empty:
        return df

    df = df.merge(fixtures_df, on="FixtureID", how="left")

    # Normalize types
    df["Odds"] = pd.to_numeric(df["Value"], errors="coerce")
    df["Total"] = pd.to_numeric(df["Total"], errors="coerce")
    df["Handicap"] = pd.to_numeric(df["Handicap"], errors="coerce")

    # Drop unusable odds (same spirit as your original)
    df = df.dropna(subset=["Odds", "MarketID", "Bookmaker", "Label", "StartingAt"]).reset_index(drop=True)

    return df


# -------------------------
# Example usage + valuebet scan
# -------------------------
df = fetch_odds_next_12h()

VALUEBET_THRESHOLD_PCT = 0.30  # 30% premium vs median => valuebet; tweak as needed

columns_to_show = [
    "StartingAt",
    "Fixture",
    "Bookmaker",
    "Market",
    "Label",
    "Odds",
    "Name",
    "Total",
    "Handicap",
    "LatestBookmakerUpdate",
]

if df.empty:
    results = pd.DataFrame()
    value_bets = pd.DataFrame()
else:
    group_keys = ["FixtureID", "MarketID", "Label", "Name", "Total", "Handicap"]

    # Keep one line per bookmaker per selection (latest row wins) so the median
    # isn't skewed by multiple prices from the same bookmaker.
    df = df.sort_values(group_keys + ["Bookmaker", "LatestBookmakerUpdate"])
    df = df.drop_duplicates(subset=group_keys + ["Bookmaker"], keep="last")

    grouped = df.groupby(group_keys, dropna=False)

    stats_median = grouped["Odds"].median().rename("OddsMedian")
    stats_bookmaker_count = grouped["Bookmaker"].nunique().rename("ComparedBookmakerCount")
    stats_bookmaker_list = grouped["Bookmaker"].agg(
        lambda x: sorted({b for b in x if isinstance(b, str) and b.strip()})
    ).rename("ComparedBookmakers")

    df = df.merge(stats_median, on=group_keys, how="left")
    df = df.merge(stats_bookmaker_count, on=group_keys, how="left")
    df = df.merge(stats_bookmaker_list, on=group_keys, how="left")

    df["OtherBookmakersCompared"] = df.apply(
        lambda row: [b for b in (row["ComparedBookmakers"] or []) if b != row["Bookmaker"]],
        axis=1,
    )
    df["OtherBookmakersCount"] = df["OtherBookmakersCompared"].apply(len)

    df["OddsVsMedian"] = df["Odds"] / df["OddsMedian"]
    df["OddsVsMedian"].replace([float("inf"), -float("inf")], pd.NA, inplace=True)
    df["ValueBetThresholdPct"] = VALUEBET_THRESHOLD_PCT
    df["IsValueBet"] = (df["ComparedBookmakerCount"] > 1) & (
        df["OddsVsMedian"] >= (1 + VALUEBET_THRESHOLD_PCT)
    )

    df["ComparedBookmakersStr"] = df["ComparedBookmakers"].apply(
        lambda lst: ", ".join(lst) if isinstance(lst, list) else None
    )
    df["OtherBookmakersComparedStr"] = df["OtherBookmakersCompared"].apply(
        lambda lst: ", ".join(lst) if lst else ""
    )

    display_cols = columns_to_show + [
        "OddsMedian",
        "OddsVsMedian",
        "ValueBetThresholdPct",
        "IsValueBet",
        "ComparedBookmakerCount",
        "OtherBookmakersCount",
        "OtherBookmakersComparedStr",
        "ComparedBookmakersStr",
    ]

    results = df[display_cols].copy()
    value_bets = df[df["IsValueBet"]][display_cols].copy()

# Inspect these in the notebook as needed:
# - results.head()
# - value_bets.head()


First 50 odds rows (with value flags and comparisons):
               StartingAt       Fixture   Bookmaker                             Market              Label   Odds              Name  Total  Handicap LatestBookmakerUpdate  OddsMedian  OddsVsMedian  ValueBetThresholdPct  IsValueBet  ComparedBookmakerCount  OtherBookmakersCount                                                                                                                                         OtherBookmakersComparedStr                                                                                                                                                        ComparedBookmakersStr
2026-01-06 17:00:00+00:00 Lecce vs Roma      bet365                         Team Shots                  1   1.83              None    NaN       NaN   2026-01-04 19:40:37       1.830      1.000000                   0.3       False                       1                     0                                                          