In [37]:
import os
from dotenv import load_dotenv
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time

# Load API key
load_dotenv(override=True)
API_KEY = os.getenv("ODDS_API_KEY")


In [None]:
# Odds from a date in 2024 NFL season

test_date = "2024-12-15T14:00:00Z"  # Mid-December 2024

print(f"Testing if 2024 data available: {test_date}\n")

url = "https://api.the-odds-api.com/v4/sports/americanfootball_nfl/odds-history"
params = {
    "apiKey": API_KEY,
    "date": test_date,
    "regions": "us",
    "markets": "h2h",
    "oddsFormat": "american"
}

response = requests.get(url, params=params)

print(f"Status Code: {response.status_code}")

if response.status_code == 200:
    data = response.json()
    print(f"2024 data IS available!")
    print(f"   Found {len(data['data'])} games")
else:
    print(f"2024 data NOT available")
    print(f"   Error: {response.text}")

Testing if 2024 data available: 2024-12-15T14:00:00Z

Status Code: 200
2024 data IS available!
   Found 31 games


In [39]:
# Test multiple years to see how far back data goes
test_dates = [
    "2024-12-15",  # 2024
    "2023-12-15",  # 2023
    "2022-12-15",  # 2022
    "2021-12-15",  # 2021
    "2020-12-15",  # 2020
    "2019-12-15",  # 2019
    "2018-12-15",  # 2018
    "2017-12-15",  # 2017
    "2016-12-15",  # 2016
]

print("Testing data availability by year:\n")
print("=" * 60)

available_years = []

for date_str in test_dates:
    year = date_str[:4]
    formatted_date = date_str + "T14:00:00Z"
    
    print(f"{year}: ", end="")
    
    url = "https://api.the-odds-api.com/v4/sports/americanfootball_nfl/odds-history"
    params = {
        "apiKey": API_KEY,
        "date": formatted_date,
        "regions": "us",
        "markets": "h2h",
        "oddsFormat": "american"
    }
    
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        num_games = len(data['data'])
        print(f"Available ({num_games} games)")
        available_years.append(year)
    elif response.status_code == 429:
        print(f"Rate limited")
    else:
        print(f"Not available ({response.status_code})")

print("=" * 60)
print(f"\nAvailable years: {', '.join(available_years)}")

Testing data availability by year:

2024: Available (31 games)
2023: Available (31 games)
2022: Available (16 games)
2021: Available (16 games)
2020: Available (16 games)
2019: Available (0 games)
2018: Available (0 games)
2017: Available (0 games)
2016: Available (0 games)

Available years: 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016


In [None]:
#For the 2024 NFL season
def get_2024_season_odds_weekly(api_key):
    """
    Get odds from 2024 NFL season - ONCE PER WEEK
    Only games happening within 7 days of snapshot date
    """
    
    all_records = []
    
    # 2024 NFL season: August 31, 2024 - February 7, 2025
    start_date = datetime(2024, 8,31)
    end_date = datetime(2025, 2, 7)
    
    current_date = start_date
    date_count = 0
    
    while current_date <= end_date:
        formatted_date = current_date.strftime("%Y-%m-%dT14:00:00Z")
        date_str = current_date.strftime("%Y-%m-%d")
        
        print(f"{date_str}...", end=" ")
        
        url = "https://api.the-odds-api.com/v4/sports/americanfootball_nfl/odds-history"
        params = {
            "apiKey": api_key,
            "date": formatted_date,
            "regions": "us",
            "markets": "h2h",
            "oddsFormat": "american"
        }
        
        response = requests.get(url, params=params)
        
        if response.status_code == 200:
            data = response.json()
            games = data['data']
            snapshot_time = pd.to_datetime(data['timestamp'])
            
            # Filter: only games within 7 days of snapshot
            games_in_window = 0
            
            for game in games:
                game_commence = pd.to_datetime(game["commence_time"])
                
                # Check if game is within 7 days of snapshot
                days_until_game = (game_commence - snapshot_time).days

                if 0 <= days_until_game <= 7:  # 0 to 7 days from now
                    games_in_window += 1
                    
                    for bookmaker in game["bookmakers"]:
                        for market in bookmaker["markets"]:
                            if market["key"] == "h2h":
                                for outcome in market["outcomes"]:
                                    all_records.append({
                                        "game_id": game["id"],
                                        "home_team": game["home_team"],
                                        "away_team": game["away_team"],
                                        "commence_time": game["commence_time"],
                                        "sportsbook": bookmaker["key"],
                                        "team": outcome["name"],
                                        "odds": outcome["price"],
                                        "snapshot_date": data['timestamp'],
                                        "days_until_game": days_until_game
                                    })
            
            print(f"{games_in_window} games (within 7 days)")
            date_count += 1
        
        elif response.status_code == 429:
            print("Rate limited - waiting...")
            time.sleep(60)
        
        else:
            print(f"{response.status_code}")
        
        # Jump forward 7 days
        current_date += timedelta(days=7)
    
    print(f"\nollected from {date_count} dates (weekly snapshots)")
    return all_records

print("Collecting 2024 NFL Season odds - WEEKLY, 7-DAY WINDOW...")
print("(This will take 1-2 minutes)\n")

odds_2024_records = get_2024_season_odds_weekly(API_KEY)

Collecting 2024 NFL Season odds - WEEKLY, 9-DAY WINDOW...
(This will take 1-2 minutes)

2024-08-31... ✅ 2 games (within 9 days)
2024-09-07... ✅ 15 games (within 9 days)
2024-09-14... ✅ 16 games (within 9 days)
2024-09-21... ✅ 16 games (within 9 days)
2024-09-28... ✅ 17 games (within 9 days)
2024-10-05... ✅ 15 games (within 9 days)
2024-10-12... ✅ 15 games (within 9 days)
2024-10-19... ✅ 15 games (within 9 days)
2024-10-26... ✅ 16 games (within 9 days)
2024-11-02... ✅ 15 games (within 9 days)
2024-11-09... ✅ 14 games (within 9 days)
2024-11-16... ✅ 14 games (within 9 days)
2024-11-23... ✅ 16 games (within 9 days)
2024-11-30... ✅ 13 games (within 9 days)
2024-12-07... ✅ 13 games (within 9 days)
2024-12-14... ✅ 18 games (within 9 days)
2024-12-21... ✅ 21 games (within 9 days)
2024-12-28... ✅ 13 games (within 9 days)
2025-01-04... ✅ 16 games (within 9 days)
2025-01-11... ✅ 7 games (within 9 days)
2025-01-18... ✅ 4 games (within 9 days)
2025-01-25... ✅ 2 games (within 9 days)
2025-02-01... 

In [41]:
# Convert to DataFrame
odds_2024_df = pd.DataFrame(odds_2024_records)

# Clean dates
odds_2024_df["commence_time"] = pd.to_datetime(odds_2024_df["commence_time"])
odds_2024_df["snapshot_date"] = pd.to_datetime(odds_2024_df["snapshot_date"])

# Add game_id
odds_2024_df["game_id_simple"] = odds_2024_df["home_team"] + " vs " + odds_2024_df["away_team"]

print(f"Total records: {len(odds_2024_df):,}")
print(f"Unique games: {odds_2024_df['game_id_simple'].nunique():,}")
print(f"Date range: {odds_2024_df['snapshot_date'].min()} to {odds_2024_df['snapshot_date'].max()}")




Total records: 5,420
Unique games: 278
Date range: 2024-08-31 13:55:38+00:00 to 2025-01-25 13:55:38+00:00


In [42]:
# Convert to DataFrame
odds_2024_df = pd.DataFrame(odds_2024_records)

# Clean dates
odds_2024_df["commence_time"] = pd.to_datetime(odds_2024_df["commence_time"])
odds_2024_df["snapshot_date"] = pd.to_datetime(odds_2024_df["snapshot_date"])

# Add game_id
odds_2024_df["game_id_simple"] = odds_2024_df["home_team"] + " vs " + odds_2024_df["away_team"]

print(f" Total records: {len(odds_2024_df):,}")
print(f" Unique games: {odds_2024_df['game_id_simple'].nunique():,}")
print(f"Date range: {odds_2024_df['snapshot_date'].min()} to {odds_2024_df['snapshot_date'].max()}")



 Total records: 5,420
 Unique games: 278
Date range: 2024-08-31 13:55:38+00:00 to 2025-01-25 13:55:38+00:00


In [30]:
# Save the cleaned 2024 odds
odds_2024_df.to_csv("odds_2024_filtered.csv", index=False)

print("✅ Saved to odds_2024_filtered.csv")
print(f"✅ Records: {len(odds_2024_df):,}")
print(f"✅ Games: {odds_2024_df['game_id_simple'].nunique():,}")

✅ Saved to odds_2024_filtered.csv
✅ Records: 5,420
✅ Games: 278


In [None]:
def get_multi_year_odds_weekly(api_key, start_year=2016, end_year=2023):
    """
    Get odds from multiple NFL seasons (2016-2023)
    WEEKLY snapshots, only games within 9 days
    """
    
    all_records = []
    
    for year in range(start_year, end_year + 1):
        print(f"\n{'='*60}")
        print(f"Collecting {year} NFL Season (Weekly, 9-day window)")
        print(f"{'='*60}\n")
        
        # Sept 1 to Feb 7 of next year
        start_date = datetime(year, 9, 1)
        end_date = datetime(year + 1, 2, 7)
        
        current_date = start_date
        
        while current_date <= end_date:
            formatted_date = current_date.strftime("%Y-%m-%dT14:00:00Z")
            date_str = current_date.strftime("%Y-%m-%d")
            
            print(f"{date_str}...", end=" ")
            
            url = "https://api.the-odds-api.com/v4/sports/americanfootball_nfl/odds-history"
            params = {
                "apiKey": api_key,
                "date": formatted_date,
                "regions": "us",
                "markets": "h2h",
                "oddsFormat": "american"
            }
            
            response = requests.get(url, params=params)
            
            if response.status_code == 200:
                data = response.json()
                games = data['data']
                snapshot_time = pd.to_datetime(data['timestamp'])
                
                games_in_window = 0
                
                for game in games:
                    game_commence = pd.to_datetime(game["commence_time"])
                    
                    # Only games within 7 days of snapshot
                    days_until_game = (game_commence - snapshot_time).days
                    
                    if 0 <= days_until_game <= 9:
                        games_in_window += 1
                        
                        for bookmaker in game["bookmakers"]:
                            for market in bookmaker["markets"]:
                                if market["key"] == "h2h":
                                    for outcome in market["outcomes"]:
                                        all_records.append({
                                            "game_id": game["id"],
                                            "home_team": game["home_team"],
                                            "away_team": game["away_team"],
                                            "commence_time": game["commence_time"],
                                            "sportsbook": bookmaker["key"],
                                            "team": outcome["name"],
                                            "odds": outcome["price"],
                                            "snapshot_date": data['timestamp'],
                                            "season": year,
                                            "days_until_game": days_until_game
                                        })
                
                print(f"{games_in_window} games")
            
            elif response.status_code == 429:
                print(" Rate limited - waiting...")
                time.sleep(60)
            
            else:
                print(f" {response.status_code}")
            
            # Jump 7 days
            current_date += timedelta(days=7)
    
    print(f"\n{'='*60}")
    print(f"Collected {len(all_records):,} total records")
    print(f"{'='*60}")
    
    return all_records

print("Starting multi-year collection (2016-2023)...")
print("⏳ This will take 30-45 minutes...\n")

odds_2016_2023_records = get_multi_year_odds_weekly(API_KEY, start_year=2016, end_year=2023)

Starting multi-year collection (2016-2023)...
⏳ This will take 30-45 minutes...


Collecting 2016 NFL Season (Weekly, 9-day window)

2016-09-01... ✅ 0 games
2016-09-08... ✅ 0 games
2016-09-15... ✅ 0 games
2016-09-22... ✅ 0 games
2016-09-29... ✅ 0 games
2016-10-06... ✅ 0 games
2016-10-13... ✅ 0 games
2016-10-20... ✅ 0 games
2016-10-27... ✅ 0 games
2016-11-03... ✅ 0 games
2016-11-10... ✅ 0 games
2016-11-17... ✅ 0 games
2016-11-24... ✅ 0 games
2016-12-01... ✅ 0 games
2016-12-08... ✅ 0 games
2016-12-15... ✅ 0 games
2016-12-22... ✅ 0 games
2016-12-29... ✅ 0 games
2017-01-05... ✅ 0 games
2017-01-12... ✅ 0 games
2017-01-19... ✅ 0 games
2017-01-26... ✅ 0 games
2017-02-02... ✅ 0 games

Collecting 2017 NFL Season (Weekly, 9-day window)

2017-09-01... ✅ 0 games
2017-09-08... ✅ 0 games
2017-09-15... ✅ 0 games
2017-09-22... ✅ 0 games
2017-09-29... ✅ 0 games
2017-10-06... ✅ 0 games
2017-10-13... ✅ 0 games
2017-10-20... ✅ 0 games
2017-10-27... ✅ 0 games
2017-11-03... ✅ 0 games
2017-11-10... ✅ 0 games

In [43]:
# Convert to DataFrame
odds_2016_2023_df = pd.DataFrame(odds_2016_2023_records)

# Clean dates
odds_2016_2023_df["commence_time"] = pd.to_datetime(odds_2016_2023_df["commence_time"])
odds_2016_2023_df["snapshot_date"] = pd.to_datetime(odds_2016_2023_df["snapshot_date"])

# Add game_id
odds_2016_2023_df["game_id_simple"] = odds_2016_2023_df["home_team"] + " vs " + odds_2016_2023_df["away_team"]

print(f"Total records: {len(odds_2016_2023_df):,}")
print(f" Unique games: {odds_2016_2023_df['game_id_simple'].nunique():,}")
print(f" Seasons: {sorted(odds_2016_2023_df['season'].unique())}")
print(f"Date range: {odds_2016_2023_df['snapshot_date'].min()} to {odds_2016_2023_df['snapshot_date'].max()}")

print("\nRecords per season:")
print(odds_2016_2023_df['season'].value_counts().sort_index())



Total records: 38,980
 Unique games: 695
 Seasons: [np.int64(2020), np.int64(2021), np.int64(2022), np.int64(2023)]
Date range: 2020-09-01 13:55:00+00:00 to 2024-02-02 13:55:38+00:00

Records per season:
season
2020     6298
2021    10050
2022    12006
2023    10626
Name: count, dtype: int64


In [44]:
# Save
odds_2016_2023_df.to_csv("odds_2020_2023_filtered.csv", index=False)

