This code will:

Loop through each date from July 6, 2025 to July 18, 2025
For each date, call your existing fetch_and_save_final_boxscores() function
Save individual files for each day:
JSON: data/json/boxscores/mlb_boxscores_{date}.json
CSV: data/csv/boxscores/mlb_boxscores_{date}.csv
Create a combined CSV with all batting data from the entire date range
Handle errors gracefully if any dates fail to process

In [3]:
import requests
import pandas as pd
import json
import os
from datetime import datetime, timedelta

def fetch_and_save_final_boxscores(date_str):
    csv_path = f'data/csv/games/mlb_games_{date_str}.csv'
    json_path = f'data/json/boxscores/mlb_boxscores_{date_str}.json'
    csv_output_path = f'data/csv/boxscores/mlb_boxscores_{date_str}.csv'

    if not os.path.exists(csv_path):
        print(f"CSV file not found: {csv_path}")
        return

    games_df = pd.read_csv(csv_path)
    all_boxscores = []
    
    for game_pk in games_df['gamePk']:
        url = f'https://statsapi.mlb.com/api/v1/game/{game_pk}/boxscore'
        response = requests.get(url)
        if response.status_code == 200:
            boxscore_data = response.json()
            all_boxscores.append({'gamePk': game_pk, 'boxscore': boxscore_data})
        else:
            all_boxscores.append({'gamePk': game_pk, 'boxscore': None})
    
    # Ensure directories exist
    os.makedirs(os.path.dirname(json_path), exist_ok=True)
    os.makedirs('data/csv/boxscores', exist_ok=True)
    
    # Save JSON
    with open(json_path, 'w') as f:
        json.dump(all_boxscores, f, indent=2)
    
    # Convert to CSV - flatten batting data for all players
    batting_rows = []
    for game in all_boxscores:
        if game['boxscore']:
            for team_type in ['home', 'away']:
                team = game['boxscore']['teams'][team_type]
                batters = team.get('batters', [])
                players = team.get('players', {})
                
                for batter_id in batters:
                    player_key = f"ID{batter_id}"
                    player = players.get(player_key, {})
                    if player:
                        person = player.get('person', {})
                        stats = player.get('stats', {}).get('batting', {})
                        
                        row = {
                            'gamePk': game['gamePk'],
                            'team': team['team']['name'],
                            'team_type': team_type,
                            'player_id': person.get('id'),
                            'player_name': person.get('fullName'),
                            'batting_order': player.get('battingOrder'),
                            'gamesPlayed': stats.get('gamesPlayed'),
                            'atBats': stats.get('atBats'),
                            'hits': stats.get('hits'),
                            'runs': stats.get('runs'),
                            'doubles': stats.get('doubles'),
                            'triples': stats.get('triples'),
                            'homeRuns': stats.get('homeRuns'),
                            'rbi': stats.get('rbi'),
                            'baseOnBalls': stats.get('baseOnBalls'),
                            'strikeOuts': stats.get('strikeOuts'),
                            'leftOnBase': stats.get('leftOnBase'),
                        }
                        batting_rows.append(row)
    
    # Create and save CSV
    df = pd.DataFrame(batting_rows)
    df.to_csv(csv_output_path, index=False)
    print(f"Saved boxscore data to {csv_output_path}")

# Define date range
start_date = datetime(2025, 3, 27)
end_date = datetime(2025, 5, 31)

# Generate list of dates
date_range = []
current_date = start_date
while current_date <= end_date:
    date_range.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=1)

print(f"Processing boxscores for dates: {date_range}")

# Process each date
all_batting_data = []
for date_str in date_range:
    print(f"Processing boxscores for {date_str}...")
    
    try:
        fetch_and_save_final_boxscores(date_str)
        
        # Read the individual CSV to add to combined dataset
        csv_path = f'data/csv/boxscores/mlb_boxscores_{date_str}.csv'
        if os.path.exists(csv_path):
            daily_df = pd.read_csv(csv_path)
            daily_df['date'] = date_str
            all_batting_data.append(daily_df)
            print(f"  - Processed {len(daily_df)} batting records for {date_str}")
        
    except Exception as e:
        print(f"  - Error processing {date_str}: {e}")

# Combine all batting data into one master CSV
if all_batting_data:
    combined_df = pd.concat(all_batting_data, ignore_index=True)
    combined_df.to_csv('data/csv/boxscores/mlb_boxscores_2025-07-06_to_2025-07-18.csv', index=False)
    print(f"\nCombined boxscore data saved: {len(combined_df)} total batting records")
    print(f"File: data/csv/boxscores/mlb_boxscores_2025-07-06_to_2025-07-18.csv")
else:
    print("No boxscore data found for the date range")

Processing boxscores for dates: ['2025-03-27', '2025-03-28', '2025-03-29', '2025-03-30', '2025-03-31', '2025-04-01', '2025-04-02', '2025-04-03', '2025-04-04', '2025-04-05', '2025-04-06', '2025-04-07', '2025-04-08', '2025-04-09', '2025-04-10', '2025-04-11', '2025-04-12', '2025-04-13', '2025-04-14', '2025-04-15', '2025-04-16', '2025-04-17', '2025-04-18', '2025-04-19', '2025-04-20', '2025-04-21', '2025-04-22', '2025-04-23', '2025-04-24', '2025-04-25', '2025-04-26', '2025-04-27', '2025-04-28', '2025-04-29', '2025-04-30', '2025-05-01', '2025-05-02', '2025-05-03', '2025-05-04', '2025-05-05', '2025-05-06', '2025-05-07', '2025-05-08', '2025-05-09', '2025-05-10', '2025-05-11', '2025-05-12', '2025-05-13', '2025-05-14', '2025-05-15', '2025-05-16', '2025-05-17', '2025-05-18', '2025-05-19', '2025-05-20', '2025-05-21', '2025-05-22', '2025-05-23', '2025-05-24', '2025-05-25', '2025-05-26', '2025-05-27', '2025-05-28', '2025-05-29', '2025-05-30', '2025-05-31']
Processing boxscores for 2025-03-27...
Save