In [3]:
import requests
import pandas as pd
from datetime import datetime, timedelta

In [4]:
# Get yesterday’s date
yesterday = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

In [5]:
# MLB API endpoint for daily schedule
url = f'https://statsapi.mlb.com/api/v1/schedule?sportId=1&date={yesterday}'
print(url)

https://statsapi.mlb.com/api/v1/schedule?sportId=1&date=2025-07-05


In [14]:
import json

response = requests.get(url)
data = response.json()
with open(f'assets/json/games/mlb_games_{yesterday}.json', 'w') as f:
    json.dump(data, f, indent=2)

# Extract games list
games = data.get('dates', [{}])[0].get('games', [])

if games:
    df = pd.json_normalize(games)
    # Filter games where status.detailedState is 'Final'
    df = df[df['status.detailedState'] == 'Final']
    df = df[['gamePk', 'gameDate', 'teams.away.team.id', 'teams.home.team.id']]
else:
    df = pd.DataFrame()
df.to_csv(f'assets/csv/games/mlb_games_{yesterday}.csv', index=False)

In [7]:
# Get list of gamePk values to fetch boxscores for each game
game_pks = df['gamePk'].tolist()
print(game_pks)

[777230, 777229, 777231, 777228, 777233, 777234, 777237, 777241, 777238, 777240, 777221, 777232, 777223, 777227, 777225]


In [8]:
# Iterate through game_pks to fetch boxscore data and append to boxscore.csv
for game_pk in game_pks:
    boxscore_url = f'https://statsapi.mlb.com/api/v1/game/{game_pk}/boxscore'
    boxscore_response = requests.get(boxscore_url)
    boxscore_data = boxscore_response.json()
    # Flatten the boxscore data for teams
    teams_data = boxscore_data.get('teams', {})
    for team_type in ['home', 'away']:
        team_info = teams_data.get(team_type, {})
        row = {
            'gamePk': game_pk,
            'team_type': team_type,
            'team_id': team_info.get('team', {}).get('id'),
            'team_name': team_info.get('team', {}).get('name'),
            'runs': team_info.get('teamStats', {}).get('batting', {}).get('runs'),
            'hits': team_info.get('teamStats', {}).get('batting', {}).get('hits'),
            'errors': team_info.get('teamStats', {}).get('fielding', {}).get('errors')
        }
        # Append to CSV
        pd.DataFrame([row]).to_csv('boxscore.csv', mode='a', header=not pd.io.common.file_exists('boxscore.csv'), index=False)

In [9]:
teams_data

{'away': {'team': {'springLeague': {'id': 115,
    'name': 'Grapefruit League',
    'link': '/api/v1/league/115',
    'abbreviation': 'GL'},
   'allStarStatus': 'N',
   'id': 134,
   'name': 'Pittsburgh Pirates',
   'link': '/api/v1/teams/134',
   'season': 2025,
   'venue': {'id': 31, 'name': 'PNC Park', 'link': '/api/v1/venues/31'},
   'springVenue': {'id': 2526, 'link': '/api/v1/venues/2526'},
   'teamCode': 'pit',
   'fileCode': 'pit',
   'abbreviation': 'PIT',
   'teamName': 'Pirates',
   'locationName': 'Pittsburgh',
   'firstYearOfPlay': '1882',
   'league': {'id': 104,
    'name': 'National League',
    'link': '/api/v1/league/104'},
   'division': {'id': 205,
    'name': 'National League Central',
    'link': '/api/v1/divisions/205'},
   'sport': {'id': 1,
    'link': '/api/v1/sports/1',
    'name': 'Major League Baseball'},
   'shortName': 'Pittsburgh',
   'record': {'gamesPlayed': 90,
    'wildCardGamesBack': '-',
    'leagueGamesBack': '-',
    'springLeagueGamesBack': '-',