In [9]:
import pandas as pd
import numpy as np
from pybaseball import statcast, playerid_lookup, batting_stats, pitching_stats_range, schedule_and_record
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings("ignore")

In [10]:
YEAR = 2024
MIN_PA = 100
START_DATE = f"{YEAR}-03-20"
END_DATE = f"{YEAR}-10-15"
TEAM = "BOS"

In [21]:
def get_qualified_team_batters(year=YEAR, min_pa=MIN_PA, team=TEAM):
    print(f"Fetching {team} batters with > {min_pa} PA in {year}...")

    batting_stats_2024 = batting_stats(year, qual=min_pa)

    team_batters = batting_stats_2024[batting_stats_2024["Team"] == team]
    
    return team_batters[["Name", "IDfg", "PA", "AVG", "OBP", "SLG"]]

In [22]:
qualified_batters = get_qualified_team_batters()
print(f"Found {len(qualified_batters)} qualified {TEAM} batters")
qualified_batters.head()

Fetching BOS batters with > 100 PA in 2024...
Found 15 qualified BOS batters


Unnamed: 0,Name,IDfg,PA,AVG,OBP,SLG
41,Jarren Duran,24617,735,0.285,0.342,0.492
30,Rafael Devers,17350,601,0.272,0.354,0.516
84,Wilyer Abreu,23772,447,0.253,0.322,0.459
35,Tyler O'Neill,15711,473,0.241,0.336,0.511
214,David Hamilton,27531,317,0.248,0.303,0.395


In [None]:
from pybaseball import statcast_batter

def get_player_game_data(player_id, player_name, start_date, end_date):
    player_data = statcast_batter(start_dt=start_date, end_dt=end_date, player_id=player_id)

    player_batting = player_data[player_data["batter"] == player_id].copy()

    player_batting["player_name"] = player_name

    return player_batting

def collect_all_player_data(qualified_batters, start_date, end_date):
    all_player_data = []

    for _, player in tqdm(qualified_batters.iterrows(), total=len(qualified_batters), des="Processing players"):
        print(f"Fetching data for {player["Name"]}...")
        player_data = get_player_game_data(player)