In [2]:
import re
import requests
import pandas as pd
from datetime import datetime, timedelta, timezone

API_TOKEN = "guthxAv1rvKshVf1XgEeNbVGvXX2V1aDzDVhlmj98EaaF6akBm6fEts7IbEJ"
BASE_URL = "https://api.sportmonks.com/v3/football"
BOOKMAKER_URL = "https://api.sportmonks.com/v3/odds/bookmakers"  # bookmaker lookup is under /v3/odds/...

BET365_NAME = "bet365"  # we will match case-insensitively


# -------------------------
# Text helpers
# -------------------------
def _norm(s) -> str:
    if s is None:
        return ""
    s = str(s).lower().strip()
    s = re.sub(r"\s+", " ", s)
    return s


# -------------------------
# Fixtures (next X hours)
# -------------------------
def fetch_fixtures_next_hours(hours: int = 20) -> pd.DataFrame:
    now_utc = datetime.now(timezone.utc)
    end_utc = now_utc + timedelta(hours=hours)

    start_date = now_utc.date()
    end_date = (now_utc + timedelta(days=1)).date()
    endpoint = f"/fixtures/between/{start_date:%Y-%m-%d}/{end_date:%Y-%m-%d}"

    session = requests.Session()
    fixtures = []
    page = 1

    while True:
        r = session.get(
            f"{BASE_URL}{endpoint}",
            params={"api_token": API_TOKEN, "page": page},
            timeout=30,
        )
        r.raise_for_status()
        payload = r.json()

        fixtures.extend(payload.get("data") or [])

        pagination = ((payload.get("meta") or {}).get("pagination")) or {}
        has_more = pagination.get("has_more")
        total_pages = pagination.get("total_pages")
        current_page = pagination.get("current_page")

        if has_more is False:
            break
        if total_pages is not None and current_page is not None and int(current_page) >= int(total_pages):
            break
        if has_more is None and (total_pages is None or current_page is None):
            break

        page += 1

    df = pd.DataFrame(
        {
            "FixtureID": [f.get("id") for f in fixtures],
            "Fixture": [f.get("name") for f in fixtures],
            "StartingAt": [f.get("starting_at") for f in fixtures],
        }
    )

    if df.empty:
        return df

    df["StartingAt"] = pd.to_datetime(df["StartingAt"], utc=True, errors="coerce")
    df = df[df["StartingAt"].between(now_utc, end_utc)].reset_index(drop=True)
    return df


# -------------------------
# Bookmaker ID -> Name lookup (cached)
# -------------------------
def _get_bookmaker_name(bookmaker_id: int | None, session: requests.Session, cache: dict[int, str]) -> str | None:
    if bookmaker_id is None:
        return None

    try:
        bookmaker_id_int = int(bookmaker_id)
    except (TypeError, ValueError):
        return None

    if bookmaker_id_int in cache:
        return cache[bookmaker_id_int]

    r = session.get(
        f"{BOOKMAKER_URL}/{bookmaker_id_int}",
        params={"api_token": API_TOKEN},
        timeout=30,
    )
    if r.status_code == 404:
        return None
    r.raise_for_status()

    data = (r.json() or {}).get("data") or {}
    name = data.get("name")
    if isinstance(name, str) and name.strip():
        cache[bookmaker_id_int] = name.strip()
        return cache[bookmaker_id_int]

    return None


# -------------------------
# Odds fetch (bet365 only)
# -------------------------
def fetch_odds_prematch_for_fixture_bet365_only(
    fixture_id: int,
    session: requests.Session,
    bookmaker_cache: dict[int, str],
) -> list[dict]:
    endpoint = f"/odds/pre-match/fixtures/{fixture_id}"

    r = session.get(
        f"{BASE_URL}{endpoint}",
        params={"api_token": API_TOKEN},
        timeout=30,
    )
    if r.status_code == 404:
        return []

    r.raise_for_status()
    odds = r.json().get("data") or []

    rows = []
    for odd in odds:
        bookmaker_id = odd.get("bookmaker_id")
        bookmaker_name = _get_bookmaker_name(bookmaker_id, session, bookmaker_cache)

        # Keep only bet365 (case-insensitive match)
        if _norm(bookmaker_name) != BET365_NAME:
            continue

        rows.append(
            {
                "OddID": odd.get("id"),
                "FixtureID": odd.get("fixture_id"),
                "MarketID": odd.get("market_id"),
                "BookmakerID": bookmaker_id,
                "Bookmaker": bookmaker_name,

                "Market": odd.get("market_description"),
                "Label": odd.get("label"),
                "Name": odd.get("name"),

                "Value": odd.get("value"),
                "DP3": odd.get("dp3"),
                "Total": odd.get("total"),
                "Handicap": odd.get("handicap"),

                "Participants": odd.get("participants"),
                "LatestBookmakerUpdate": odd.get("latest_bookmaker_update"),
            }
        )

    return rows


def fetch_bet365_odds_next_hours(hours: int = 20) -> pd.DataFrame:
    fixtures_df = fetch_fixtures_next_hours(hours=hours)
    if fixtures_df.empty:
        return pd.DataFrame()

    session = requests.Session()
    bookmaker_cache: dict[int, str] = {}
    rows: list[dict] = []

    for fid in fixtures_df["FixtureID"].astype(int):
        rows.extend(fetch_odds_prematch_for_fixture_bet365_only(fid, session, bookmaker_cache))

    df = pd.DataFrame(rows)
    if df.empty:
        return df

    df = df.merge(fixtures_df, on="FixtureID", how="left")

    # Normalize numeric fields
    df["Odds"] = pd.to_numeric(df["Value"], errors="coerce")
    df["Total"] = pd.to_numeric(df["Total"], errors="coerce")
    df["Handicap"] = pd.to_numeric(df["Handicap"], errors="coerce")

    # Drop unusable odds rows
    df = df.dropna(subset=["Odds", "Bookmaker", "StartingAt"]).reset_index(drop=True)

    return df


# -------------------------
# Example usage
# -------------------------
df_bet365 = fetch_bet365_odds_next_hours(hours=20)

print(f"Rows from bet365: {len(df_bet365)}")
print(df_bet365[[
    "StartingAt", "Fixture", "Bookmaker", "Market", "Label", "Name",
    "Odds", "Total", "Handicap", "LatestBookmakerUpdate"
]].head(50))

cols_to_drop = [
    "OddID",
    "FixtureID",
    "MarketID",
    "BookmakerID",
    "DP3",
    "Participants",
    "Value",
]

df_bet365 = df_bet365.drop(
    columns=[c for c in cols_to_drop if c in df_bet365.columns],
    errors="ignore"
)

# Save to CSV if you want
df_bet365.to_csv("bet365_odds.csv", index=False, encoding="utf-8")
print("Saved bet365_odds.csv")

Rows from bet365: 28230
                  StartingAt              Fixture Bookmaker  \
0  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
1  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
2  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
3  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
4  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
5  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
6  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
7  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
8  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
9  2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
10 2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
11 2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
12 2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
13 2026-01-07 17:30:00+00:00  Bologna vs Atalanta    bet365   
14 2026-01-07 17:30:00+00:00  B