This code will:

Create a date range from July 6, 2025 to July 18, 2025
Loop through each day and fetch game data
Save individual JSON and CSV files for each day
Combine all games into one master CSV file
Handle errors gracefully if API calls fail

In [5]:
import requests
import pandas as pd
import json
import os
from datetime import datetime, timedelta

# Define date range
start_date = datetime(2025, 3, 27)
end_date = datetime(2025, 3, 31)

# Generate list of dates
date_range = []
current_date = start_date
while current_date <= end_date:
    date_range.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=1)

print(f"Fetching data for dates: {date_range}")

all_games_data = []

for date_str in date_range:
    print(f"Processing {date_str}...")
    
    # MLB API endpoint for daily schedule
    url = f'https://statsapi.mlb.com/api/v1/schedule?sportId=1&date={date_str}'
    
    try:
        response = requests.get(url)
        data = response.json()
        
        # Create directories
        os.makedirs('data/json/games', exist_ok=True)
        os.makedirs('data/csv/games', exist_ok=True)
        
        # Save individual day JSON
        with open(f'data/json/games/mlb_games_{date_str}.json', 'w') as f:
            json.dump(data, f, indent=2)
        
        # Extract games list
        games = data.get('dates', [{}])[0].get('games', [])
        
        if games:
            df = pd.json_normalize(games)
            # Filter games where status.detailedState is 'Final'
            df = df[df['status.detailedState'] == 'Final']
            df = df[['gamePk', 'gameDate', 'teams.away.team.id', 'teams.home.team.id']]
            
            # Add date column for tracking
            df['date'] = date_str
            
            # Save individual day CSV
            df.to_csv(f'data/csv/games/mlb_games_{date_str}.csv', index=False)
            print(f"  - Saved {len(df)} games for {date_str}")
            
            # Add to combined dataset
            all_games_data.append(df)
        else:
            print(f"  - No games found for {date_str}")
            
    except Exception as e:
        print(f"  - Error fetching data for {date_str}: {e}")

# Combine all dates into one DataFrame
if all_games_data:
    combined_df = pd.concat(all_games_data, ignore_index=True)
    combined_df.to_csv('data/csv/games/mlb_games_2025-07-06_to_2025-07-18.csv', index=False)
    print(f"\nTotal games saved: {len(combined_df)}")
    print(f"Combined file saved: data/csv/games/mlb_games_2025-07-06_to_2025-07-18.csv")
else:
    print("No game data found for the date range")

Fetching data for dates: ['2025-03-27', '2025-03-28', '2025-03-29', '2025-03-30', '2025-03-31']
Processing 2025-03-27...
  - Saved 14 games for 2025-03-27
Processing 2025-03-28...
  - Saved 9 games for 2025-03-28
Processing 2025-03-29...
  - Saved 15 games for 2025-03-29
Processing 2025-03-30...
  - Saved 13 games for 2025-03-30
Processing 2025-03-31...
  - Saved 14 games for 2025-03-31

Total games saved: 65
Combined file saved: data/csv/games/mlb_games_2025-07-06_to_2025-07-18.csv
