In [1]:
# Stats:  https://www.sofascore.com/api/v1/category/1843/scheduled-events/2025-01-01
# Odds:   https://api.sofascore.com/api/v1/sport/tennis/odds/1/2025-05-06

import requests
import datetime
import json
import pandas as pd


In [6]:
def fetch_events_for_date(date_str: str, category_id: int = 1843) -> dict:
    url = f"https://www.sofascore.com/api/v1/category/{category_id}/scheduled-events/{date_str}"
    response = requests.get(url)
    response.raise_for_status()
    return response.json()
def fetch_odds_for_date(date_str: str) -> dict:
    url = f"https://api.sofascore.com/api/v1/sport/tennis/odds/1/{date_str}"
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

In [7]:
start_date = datetime.date(2025, 1, 1)
end_date = datetime.date(2025, 1, 31)
dt = datetime.timedelta(days=1)

all_events = []
all_odds = {}
current = start_date
print(f"Fetching Events and Odds")
while current <= end_date:
    date_str = current.isoformat()
    print(f"Fetching events and odds for {date_str}...")
    data = fetch_events_for_date(date_str)

    # Extract events (adjust key if different)
    events = data.get("events", [])
    for ev in events:
        # Tag each event with its date
        ev["scheduledDate"] = date_str
        all_events.append(ev)
    
    od_data = fetch_odds_for_date(date_str)
    all_odds[date_str] = od_data
    
    current += dt

print(f"Done")

Fetching Events and Odds
Fetching events for 2025-01-01...
Fetching events for 2025-01-02...
Fetching events for 2025-01-03...
Fetching events for 2025-01-04...
Fetching events for 2025-01-05...
Fetching events for 2025-01-06...
Fetching events for 2025-01-07...
Fetching events for 2025-01-08...
Fetching events for 2025-01-09...
Fetching events for 2025-01-10...
Fetching events for 2025-01-11...
Fetching events for 2025-01-12...
Fetching events for 2025-01-13...
Fetching events for 2025-01-14...
Fetching events for 2025-01-15...
Fetching events for 2025-01-16...
Fetching events for 2025-01-17...
Fetching events for 2025-01-18...
Fetching events for 2025-01-19...
Fetching events for 2025-01-20...
Fetching events for 2025-01-21...
Fetching events for 2025-01-22...
Fetching events for 2025-01-23...
Fetching events for 2025-01-24...
Fetching events for 2025-01-25...
Fetching events for 2025-01-26...
Fetching events for 2025-01-27...
Fetching events for 2025-01-28...
Fetching events for 202

In [8]:
print("Writing JSON Files")
with open("january_2025_events.json", "w", encoding="utf-8") as f:
    json.dump(all_events, f, ensure_ascii=False, indent=2)
with open("january_2025_tennis_odds.json", "w", encoding="utf-8") as f:
    json.dump(all_odds, f, ensure_ascii=False, indent=2)
print("Done")

Writing JSON Files
Done


In [15]:
events_df = pd.json_normalize(all_events)
events_df = events_df.rename(columns={"id": "match_id"})

events_df["match_start_ts"] = events_df["startTimestamp"]
events_df["period_start_ts"] = events_df.get("time.currentPeriodStartTimestamp")

odds_records = []
for date_str, od_obj in all_odds.items():
    odds_by_match = od_obj.get("odds", {})
    for match_id_str, market in odds_by_match.items():
        rec = {
            "match_id": int(match_id_str),
            "scheduledDate": date_str,
            "odds_marketId":   market.get("marketId"),
            "odds_marketName": market.get("marketName"),
            "odds_isLive":     market.get("isLive"),
        }
        for i, choice in enumerate(market.get("choices", []), start=1):
            rec[f"odds_choice_{i}_name"] = choice.get("name")
            rec[f"odds_choice_{i}_frac"] = choice.get("fractionalValue")
        odds_records.append(rec)

odds_df = pd.DataFrame(odds_records)

merged = events_df.merge(
    odds_df,
    on=["match_id", "scheduledDate"],
    how="left",
    validate="one_to_one"
)

# 4) Create a unique row‑ID for each “game” (here using the period_start timestamp)
merged["game_id"] = (
    merged["match_id"].astype(str)
    + "_"
    + merged["period_start_ts"].fillna(merged["match_start_ts"]).astype(int).astype(str)
)


In [18]:
merged = merged.sort_values(["match_id", "match_start_ts"])
front_cols = ["match_id", "match_start_ts", "period_start_ts", "game_id"]
other_cols = [c for c in merged.columns if c not in front_cols]
merged = merged[front_cols + other_cols]
merged.head()[front_cols + other_cols]

Unnamed: 0,match_id,match_start_ts,period_start_ts,game_id,firstToServe,customId,winnerCode,hasGlobalHighlights,crowdsourcingDataDisplayEnabled,slug,...,awayTeam.fieldTranslations.shortNameTranslation.ar,awayTeam.fieldTranslations.shortNameTranslation.hi,awayTeam.fieldTranslations.shortNameTranslation.bn,odds_marketId,odds_marketName,odds_isLive,odds_choice_1_name,odds_choice_1_frac,odds_choice_2_name,odds_choice_2_frac
22,13273166,1735769100,1735776000.0,13273166_1735775728,1.0,ODmcsFdTd,1.0,False,False,e-van-loben-sels-qian-sun,...,,,,1.0,Full time,False,1,1/14,2,13/2
95,13273166,1735769100,1735776000.0,13273166_1735775728,1.0,ODmcsFdTd,1.0,False,False,e-van-loben-sels-qian-sun,...,,,,1.0,Full time,False,1,1/14,2,13/2
15,13273168,1735682400,1735685000.0,13273168_1735685159,1.0,XNKcsucFd,2.0,False,False,martin-mazev-alexander-hoogmartens,...,,,,1.0,Full time,False,1,2/1,2,4/11
102,13273170,1735855200,1735858000.0,13273170_1735857830,2.0,DhMbsXibd,2.0,False,False,dominique-rolland-peter-makk,...,,,,1.0,Full time,False,1,10/3,2,1/5
173,13273170,1735855200,1735858000.0,13273170_1735857830,2.0,DhMbsXibd,2.0,False,False,dominique-rolland-peter-makk,...,,,,1.0,Full time,False,1,10/3,2,1/5


In [25]:
merged_live = merged[ merged["odds_isLive"] == True ]
merged_live = merged_live.reset_index(drop=True)
print(merged_live.shape)
merged_live.head()

(0, 158)


Unnamed: 0,match_id,match_start_ts,period_start_ts,game_id,firstToServe,customId,winnerCode,hasGlobalHighlights,crowdsourcingDataDisplayEnabled,slug,...,awayTeam.fieldTranslations.shortNameTranslation.ar,awayTeam.fieldTranslations.shortNameTranslation.hi,awayTeam.fieldTranslations.shortNameTranslation.bn,odds_marketId,odds_marketName,odds_isLive,odds_choice_1_name,odds_choice_1_frac,odds_choice_2_name,odds_choice_2_frac
