In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta

In [2]:
# Get yesterday’s date
yesterday = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

In [3]:
# MLB API endpoint for daily schedule
url = f'https://statsapi.mlb.com/api/v1/schedule?sportId=1&date={yesterday}'
print(url)

https://statsapi.mlb.com/api/v1/schedule?sportId=1&date=2025-07-19


In [4]:
import json
import os

response = requests.get(url)
data = response.json()

# Create directories relative to your current project root
os.makedirs('data/json/games', exist_ok=True)
with open(f'data/json/games/mlb_games_{yesterday}.json', 'w') as f:
    json.dump(data, f, indent=2)

# Extract games list
games = data.get('dates', [{}])[0].get('games', [])

if games:
    df = pd.json_normalize(games)
    # Filter games where status.detailedState is 'Final'
    df = df[df['status.detailedState'] == 'Final']
    df = df[['gamePk', 'gameDate', 'teams.away.team.id', 'teams.home.team.id']]
else:
    df = pd.DataFrame()

# Create CSV directory and save file
os.makedirs('data/csv/games', exist_ok=True)
df.to_csv(f'data/csv/games/mlb_games_{yesterday}.csv', index=False)
print(f"Created file: data/csv/games/mlb_games_{yesterday}.csv with {len(df)} games")

Created file: data/csv/games/mlb_games_2025-07-19.csv with 15 games


In [5]:
# Get list of gamePk values to fetch boxscores for each game
game_pks = df['gamePk'].tolist()
print(game_pks)

[777094, 777092, 777093, 777089, 777090, 777091, 777095, 777088, 777104, 777096, 777081, 777097, 777080, 777085, 777086]


In [6]:
# Iterate through game_pks to fetch boxscore data and append to boxscore.csv
for game_pk in game_pks:
    boxscore_url = f'https://statsapi.mlb.com/api/v1/game/{game_pk}/boxscore'
    boxscore_response = requests.get(boxscore_url)
    boxscore_data = boxscore_response.json()
    # Flatten the boxscore data for teams
    teams_data = boxscore_data.get('teams', {})
    for team_type in ['home', 'away']:
        team_info = teams_data.get(team_type, {})
        row = {
            'gamePk': game_pk,
            'team_type': team_type,
            'team_id': team_info.get('team', {}).get('id'),
            'team_name': team_info.get('team', {}).get('name'),
            'runs': team_info.get('teamStats', {}).get('batting', {}).get('runs'),
            'hits': team_info.get('teamStats', {}).get('batting', {}).get('hits'),
            'errors': team_info.get('teamStats', {}).get('fielding', {}).get('errors')
        }
        # Append to CSV
        pd.DataFrame([row]).to_csv('boxscore.csv', mode='a', header=not pd.io.common.file_exists('boxscore.csv'), index=False)

In [7]:
teams_data

{'away': {'team': {'springLeague': {'id': 115,
    'name': 'Grapefruit League',
    'link': '/api/v1/league/115',
    'abbreviation': 'GL'},
   'allStarStatus': 'N',
   'id': 117,
   'name': 'Houston Astros',
   'link': '/api/v1/teams/117',
   'season': 2025,
   'venue': {'id': 2392, 'name': 'Daikin Park', 'link': '/api/v1/venues/2392'},
   'springVenue': {'id': 5000, 'link': '/api/v1/venues/5000'},
   'teamCode': 'hou',
   'fileCode': 'hou',
   'abbreviation': 'HOU',
   'teamName': 'Astros',
   'locationName': 'Houston',
   'firstYearOfPlay': '1962',
   'league': {'id': 103,
    'name': 'American League',
    'link': '/api/v1/league/103'},
   'division': {'id': 200,
    'name': 'American League West',
    'link': '/api/v1/divisions/200'},
   'sport': {'id': 1,
    'link': '/api/v1/sports/1',
    'name': 'Major League Baseball'},
   'shortName': 'Houston',
   'record': {'gamesPlayed': 98,
    'wildCardGamesBack': '-',
    'leagueGamesBack': '-',
    'springLeagueGamesBack': '-',
    's