In [4]:
import requests
import pandas as pd
import json
import os
from datetime import datetime, timedelta

def fetch_and_save_final_boxscores(date_str):
    csv_path = f'data/csv/games/mlb_games_{date_str}.csv'
    json_path = f'data/json/boxscores/mlb_boxscores_{date_str}.json'
    csv_output_path = f'data/csv/boxscores/mlb_boxscores_{date_str}.csv'

    if not os.path.exists(csv_path):
        print(f"CSV file not found: {csv_path}")
        return

    games_df = pd.read_csv(csv_path)
    all_boxscores = []
    
    for game_pk in games_df['gamePk']:
        url = f'https://statsapi.mlb.com/api/v1/game/{game_pk}/boxscore'
        response = requests.get(url)
        if response.status_code == 200:
            boxscore_data = response.json()
            all_boxscores.append({'gamePk': game_pk, 'boxscore': boxscore_data})
        else:
            all_boxscores.append({'gamePk': game_pk, 'boxscore': None})
    
    # Ensure directories exist
    os.makedirs(os.path.dirname(json_path), exist_ok=True)
    os.makedirs('data/csv/boxscores', exist_ok=True)
    
    # Save JSON
    with open(json_path, 'w') as f:
        json.dump(all_boxscores, f, indent=2)
    
    # Convert to CSV - flatten batting data for all players
    batting_rows = []
    for game in all_boxscores:
        if game['boxscore']:
            for team_type in ['home', 'away']:
                team = game['boxscore']['teams'][team_type]
                batters = team.get('batters', [])
                players = team.get('players', {})
                
                for batter_id in batters:
                    player_key = f"ID{batter_id}"
                    player = players.get(player_key, {})
                    if player:
                        person = player.get('person', {})
                        stats = player.get('stats', {}).get('batting', {})
                        
                        row = {
                            'gamePk': game['gamePk'],
                            'team': team['team']['name'],
                            'team_type': team_type,
                            'player_id': person.get('id'),
                            'player_name': person.get('fullName'),
                            'batting_order': player.get('battingOrder'),
                            'gamesPlayed': stats.get('gamesPlayed'),
                            'atBats': stats.get('atBats'),
                            'hits': stats.get('hits'),
                            'runs': stats.get('runs'),
                            'doubles': stats.get('doubles'),
                            'triples': stats.get('triples'),
                            'homeRuns': stats.get('homeRuns'),
                            'rbi': stats.get('rbi'),
                            'baseOnBalls': stats.get('baseOnBalls'),
                            'strikeOuts': stats.get('strikeOuts'),
                            'leftOnBase': stats.get('leftOnBase'),
                        }
                        batting_rows.append(row)
    
    # Create and save CSV
    df = pd.DataFrame(batting_rows)
    df.to_csv(csv_output_path, index=False)
    print(f"Saved boxscore data to {csv_output_path}")

# Example usage:
yesterday = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
fetch_and_save_final_boxscores(yesterday)

Saved boxscore data to data/csv/boxscores/mlb_boxscores_2025-07-19.csv
