In [None]:
import requests
import pandas as pd
import json
import os
from datetime import datetime, timedelta

def fetch_and_save_boxscore_json(date_str):
    """
    Fetch boxscore JSON data for a specific date and save to file
    """
    csv_path = f'data/csv/games/mlb_games_{date_str}.csv'
    json_path = f'data/json/boxscores/mlb_boxscores_{date_str}.json'

    if not os.path.exists(csv_path):
        print(f"CSV file not found: {csv_path}")
        return False

    games_df = pd.read_csv(csv_path)
    all_boxscores = []
    
    print(f"  - Fetching boxscores for {len(games_df)} games on {date_str}")
    
    for game_pk in games_df['gamePk']:
        url = f'https://statsapi.mlb.com/api/v1/game/{game_pk}/boxscore'
        try:
            response = requests.get(url)
            if response.status_code == 200:
                boxscore_data = response.json()
                all_boxscores.append({'gamePk': game_pk, 'boxscore': boxscore_data})
            else:
                print(f"    - Failed to fetch game {game_pk}: {response.status_code}")
                all_boxscores.append({'gamePk': game_pk, 'boxscore': None})
        except Exception as e:
            print(f"    - Error fetching game {game_pk}: {e}")
            all_boxscores.append({'gamePk': game_pk, 'boxscore': None})
    
    # Ensure directory exists
    os.makedirs(os.path.dirname(json_path), exist_ok=True)
    
    # Save JSON
    with open(json_path, 'w') as f:
        json.dump(all_boxscores, f, indent=2)
    
    print(f"  - Saved JSON data to {json_path}")
    return True

In [None]:
# Define date range for fetching boxscore data
start_date = datetime(2025, 9, 1)
end_date = datetime(2025, 9, 30)

# Generate list of dates
date_range = []
current_date = start_date
while current_date <= end_date:
    date_range.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=1)

print(f"Fetching boxscore JSON data for {len(date_range)} dates")
print(f"Date range: {date_range[0]} to {date_range[-1]}")

In [None]:
# Process each date - fetch and save JSON data
successful_dates = []
failed_dates = []

for date_str in date_range:
    print(f"Processing {date_str}...")
    
    try:
        success = fetch_and_save_boxscore_json(date_str)
        if success:
            successful_dates.append(date_str)
        else:
            failed_dates.append(date_str)
        
    except Exception as e:
        print(f"  - Error processing {date_str}: {e}")
        failed_dates.append(date_str)

print(f"\nFetch Summary:")
print(f"Successfully processed: {len(successful_dates)} dates")
print(f"Failed: {len(failed_dates)} dates")

if failed_dates:
    print(f"Failed dates: {failed_dates}")