In [12]:
import requests
from datetime import date, timedelta, datetime
import time
import sys
import statsapi
from pprint import pprint
import json
import os
import csv
# from google.cloud import bigquery



In [2]:
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.expanduser("~/Documents/jupyter/MLB/credentials/mlb-analysis-463501-869e729270f3.json")

# client = bigquery.Client()
# print("Project:", client.project)

Project: mlb-analysis-463501


In [14]:
games = []
seen_games = set()

In [15]:
#Helper function for team stats by season
def team_stat_data(team, group, season, stats='season'):
    url = f"https://statsapi.mlb.com/api/v1/teams/{team_id}/stats"
    params = {
        "season": season,
        "group": group,
        "stats": stats
    }
    r = requests.get(url, params=params)
    r.raise_for_status()
    return r.json()["stats"][0]["splits"][0]

In [16]:
#Get today/yesterdays date 
today = date.today()
yesterday = today - timedelta(days = 1)
yesterday = yesterday.isoformat()

In [17]:
#set/print start/end date
start_date = today
end_date = today
print(start_date)
print(end_date)

2025-07-04
2025-07-04


In [31]:
# # Get schedule for start/end date
schedule = statsapi.schedule(start_date, end_date, sportId=1, leagueId=104)
games = []
weather = []
seen_games = set()

#Pull only attribute in list into new array and add relevant fields
#gamePks = [game['game_id'] for game in schedule]

#Pull available game data from schedule
#Each new dictionary counts as one row 
for game in schedule:
    gamePk = game['game_id']
    if gamePk in seen_games:
        continue
    #Get weather
    boxscore = statsapi.get("game_boxscore", {'gamePk': gamePk})
    for item in boxscore['info']:
        if item['label'] == "Weather":
            weather = item['value']
    games.append({
        "gamePk": game['game_id'],
        "gametime": game['game_datetime'],
        "awayId": game['away_id'],
        "homeId": game['home_id'],
        "weather": weather
    })
    seen_games.add(game['game_id'])
    
    
#Remove duplicates
# games_dict = {g['gamePk']: g for g in games}
# games = list(games_dict.values())

# pprint(games)
with open(f"games/{games[0]['gametime'][:10]}_games.csv", mode="w", newline="", encoding="utf-8") as file:
    # Step 2: Create writer with header
    writer = csv.DictWriter(file, fieldnames=games[0].keys())

    writer.writeheader()  # Step 3: Write header
    writer.writerows(games)  # Step 4: Write rows     

In [174]:
teams = {}
players = {}

In [35]:
team_game_stats = []
player_game_stats = []

#Get team stats for games today and player stats by game
team_game_stats.append("attribute, statistic, value, gamePk, playerId") #Header row
for g in games:
    gamePk = g['gamePk']
    boxscore = statsapi.get("game_boxscore", {'gamePk': gamePk})
    for side in ["home", "away"]:
        #Add game stats for each team in todays games
        team_stats = boxscore["teams"][side]["teamStats"]
        # Create data structure {team_game_stats} to hold game data, team statistics and player statistics by season and game
        #Batting stats (KAV + Game structure)
        for stat, val in team_stats['batting'].items():
            team_game_stats.append(f"stat_team_batting, {stat}, {val}, {gamePk},")
        for stat, val in team_stats['pitching'].items():
            team_game_stats.append(f"stat_team_pitching, {stat}, {val}, {gamePk},")
        for stat, val in team_stats['fielding'].items():
            team_game_stats.append(f"stat_team_fielding, {stat}, {val}, {gamePk},")
        print(f"Loaded {side} team data from game: {boxscore['teams']['away']['team']['abbreviation']} vs. {boxscore['teams']['home']['team']['abbreviation']}")
        #Player stats (KAV + game + player ID) only if they played any positions
        for player_key, player_data in boxscore["teams"][side]["players"].items():
            player_id = player_data["person"]["id"]  # ✅ This is the integer ID
            url = f"https://statsapi.mlb.com/api/v1/people/{player_id}/stats"
            params = {"gamePk": gamePk, "stats": "gameLog"}
            response = requests.get(url, params=params)
            player_stats = response.json()
            player_name = player_data['person']['fullName']
            if len(player_stats['stats']) > 0:
                player_stats = player_stats['stats'][0]['splits'][0]
                if('positionsPlayed' not in player_stats):
                    continue
                if any(pos['code'] == 'P' for pos in player_stats['positionsPlayed']):
                    for stat, val in player_stats['stat']:
                        player_game_stats.append(f"stat_player_pitching, {stat}, {val}, {gamePk}, {player_id}")
                        sys.exit()
                else:
                    for stat_val in player_stats['stat']:
                        player_game_stats.append(f"stat_player_hitting, {stat}, {val}, {gamePk}, {player_id}")
                print(f"Loaded player data for {player_name} in game: {boxscore['teams']['away']['team']['abbreviation']} vs. {boxscore['teams']['home']['team']['abbreviation']}")
                # sys.exit()
            
with open(f"team_stats/{games[0]['gametime'][:10]}_game_stats.csv", mode="w", newline="", encoding="utf-8") as file:
    # Step 2: Create writer with header
    writer = csv.DictWriter(file, fieldnames=team_game_stats[0].keys())

    writer.writeheader()  # Step 3: Write header
    writer.writerows(team_game_stats)  # Step 4: Write rows  
    
with open(f"player_stats/{games[0]['gametime'][:10]}_game_stats.csv", mode="w", newline="", encoding="utf-8") as file:
    # Step 2: Create writer with header
    writer = csv.DictWriter(file, fieldnames=player_game_stats[0].keys())

    writer.writeheader()  # Step 3: Write header
    writer.writerows(player_game_stats)  # Step 4: Write rows      

Loaded home team data from game: BOS vs. WSH
Loaded player data for Brady House in game: BOS vs. WSH
Loaded player data for CJ Abrams in game: BOS vs. WSH
Loaded player data for Riley Adams in game: BOS vs. WSH
Loaded player data for Nathaniel Lowe in game: BOS vs. WSH
Loaded player data for Amed Rosario in game: BOS vs. WSH
Loaded player data for Josh Bell in game: BOS vs. WSH
Loaded player data for Paul DeJong in game: BOS vs. WSH
Loaded player data for Daylen Lile in game: BOS vs. WSH
Loaded player data for James Wood in game: BOS vs. WSH
Loaded player data for Jacob Young in game: BOS vs. WSH
Loaded player data for Alex Call in game: BOS vs. WSH
Loaded player data for Keibert Ruiz in game: BOS vs. WSH
Loaded player data for Luis García Jr. in game: BOS vs. WSH
Loaded away team data from game: BOS vs. WSH
Loaded player data for Roman Anthony in game: BOS vs. WSH
Loaded player data for Romy Gonzalez in game: BOS vs. WSH
Loaded player data for Connor Wong in game: BOS vs. WSH
Loaded p

KeyboardInterrupt: 