In [None]:
import statsapi
import numpy as np
from datetime import datetime, timedelta

BATTER_STATS = [
    "gamesPlayed",
    "flyOuts",
    "groundOuts",
    "airOuts",
    "runs",
    "doubles",
    "triples",
    "homeRuns",
    "strikeOuts",
    "baseOnBalls",
    "intentionalWalks",
    "hits",
    "hitByPitch",
    "avg",
    "atBats",
    "obp",
    "slg",
    "ops",
    "caughtStealing",
    "stolenBases",
    "stolenBasePercentage",
    "groundIntoDoublePlay",
    "groundIntoTriplePlay",
    "numberOfPitches",
    "plateAppearances",
    "totalBases",
    "rbi",
    "leftOnBase",
    "sacBunts",
    "sacFlies",
    "babip",
    "groundOutsToAirouts",
    "catchersInterference",
    "atBatsPerHomeRun",
]

ZERO_VALUES = ['.---', '-.--']

def get_batter_stats_v2(player_id, game_date, year=2024, last_x=5):
    season_start_date = f"01/01/{year}"
    end_date = datetime.strptime(game_date, "%m/%d/%Y")
    last_x_start_date = (end_date - timedelta(days=last_x)).strftime("%m/%d/%Y")

    player_season_stats = statsapi.player_stat_data(
        player_id, group="[hitting]", type=f"[byDateRange],startDate={season_start_date},endDate={game_date},currentTeam"
    )["stats"]
    player_last_x_dates_stats = statsapi.player_stat_data(
        player_id, group="[hitting]", type=f"[byDateRange],startDate={last_x_start_date},endDate={game_date},currentTeam"
    )["stats"]
    season = {}
    last_x_batting = {}
    
    if len(player_season_stats) > 0:
        season = player_season_stats[0]["stats"]
    
    if len(player_last_x_dates_stats) > 0:
        last_x_batting = player_last_x_dates_stats[0]["stats"]

    # Turn feature set into a numpy array
    features = []
    for val in BATTER_STATS:
        # First do the season stats:
        season_val = season.get(val, 0.0)
        if season_val in ZERO_VALUES:
            season_val = 0.0
        features.append(float(season_val))

    # Now do the last x games stats:
    for val in BATTER_STATS:
        last_x_val = last_x_batting.get(val, 0.0)
        if last_x_val in ZERO_VALUES:
            last_x_val = 0.0
        features.append(float(last_x_val))

    return features

#ValueError: [514888, 608324, 673237, 665161, 605170, 592325, 701305, 676694, 676801]

get_batter_stats_v2(514888, "04/10/2024")

In [None]:
import statsapi
from datetime import datetime, timedelta
import numpy as np

PITCHER_STATS = ['gamesPlayed', 'gamesStarted', 'flyOuts', 'groundOuts', 'airOuts', 'runs', 'doubles', 'triples', 'homeRuns', 'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch', 'avg', 'atBats', 'obp', 'slg', 'ops', 'caughtStealing', 'stolenBases', 'stolenBasePercentage', 'groundIntoDoublePlay', 'numberOfPitches', 'era', 'inningsPitched', 'wins', 'losses', 'saves', 'saveOpportunities', 'holds', 'blownSaves', 'earnedRuns', 'whip', 'battersFaced', 'outs', 'gamesPitched', 'completeGames', 'shutouts', 'strikes', 'strikePercentage', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffs', 'totalBases', 'groundOutsToAirouts', 'winPercentage', 'pitchesPerInning', 'gamesFinished', 'strikeoutWalkRatio', 'strikeoutsPer9Inn', 'walksPer9Inn', 'hitsPer9Inn', 'runsScoredPer9', 'homeRunsPer9', 'inheritedRunners', 'inheritedRunnersScored', 'catchersInterference', 'sacBunts', 'sacFlies']

ZERO_VALUES = ['.---', '-.--']

def get_pitcher_stats_v2(player_id, game_date, year, last_x=5):
    season_start_date = f"01/01/{year}"
    end_date = datetime.strptime(game_date, "%m/%d/%Y")
    last_x_start_date = (end_date - timedelta(days=last_x)).strftime("%m/%d/%Y")

    player_season_stats = statsapi.player_stat_data(
        player_id, group="[pitching]", type=f"[byDateRange],startDate={season_start_date},endDate={game_date},currentTeam"
    )["stats"]
    player_last_x_dates_stats = statsapi.player_stat_data(
        player_id, group="[pitching]", type=f"[byDateRange],startDate={last_x_start_date},endDate={game_date},currentTeam"
    )["stats"]
    season = {}
    last_x_pitching = {}
    if len(player_season_stats) > 0:
        season = player_season_stats[0]["stats"]
    
    if len(player_last_x_dates_stats) > 0:
        last_x_pitching = player_last_x_dates_stats[0]["stats"]

    # Turn feature set into a numpy array
    features = []
    for val in PITCHER_STATS:
        # First do the season stats:
        season_val = season.get(val, 0.0)
        if season_val in ZERO_VALUES:
            season_val = 0.0
        features.append(float(season_val))

    # Now do the last x games stats:
    for val in PITCHER_STATS:
        last_x_val = last_x_pitching.get(val, 0.0)
        if last_x_val in ZERO_VALUES:
            last_x_val = 0.0
        features.append(float(last_x_val))

    return features

kikuchi = 579328
assert len(get_pitcher_stats_v2(kikuchi, "04/10/2024", "2024")) == len(PITCHER_STATS)*2 

In [None]:
import statsapi
import numpy as np

BATTER_STATS = [
    "gamesPlayed",
    "flyOuts",
    "groundOuts",
    "airOuts",
    "runs",
    "doubles",
    "triples",
    "homeRuns",
    "strikeOuts",
    "baseOnBalls",
    "intentionalWalks",
    "hits",
    "hitByPitch",
    "avg",
    "atBats",
    "obp",
    "slg",
    "ops",
    "caughtStealing",
    "stolenBases",
    "stolenBasePercentage",
    "groundIntoDoublePlay",
    "groundIntoTriplePlay",
    "numberOfPitches",
    "plateAppearances",
    "totalBases",
    "rbi",
    "leftOnBase",
    "sacBunts",
    "sacFlies",
    "babip",
    "groundOutsToAirouts",
    "catchersInterference",
    "atBatsPerHomeRun",
]

ZERO_VALUES = ['.---', '-.--']

def get_batter_stats(player_id):
    player = statsapi.player_stat_data(
        player_id, group="[hitting]", type="[season,lastXGames],limit=5"
    )
    stats = player["stats"]
    season = None
    last_x_batting = None
    for stat in stats:
        if stat["group"] == "hitting" and stat["type"] == "season":
            season = stat["stats"]
        elif stat["group"] == "hitting" and stat["type"] == "lastXGames":
            last_x_batting = stat["stats"]

    # Turn feature set into a numpy array
    features = []
    for val in BATTER_STATS:
        # First do the season stats:
        season_val = season.get(val, 0.0)
        if season_val in ZERO_VALUES:
            season_val = 0.0
        features.append(float(season_val))

    # Now do the last x games stats:
    for val in BATTER_STATS:
        last_x_val = last_x_batting.get(val, 0.0)
        if last_x_val in ZERO_VALUES:
            last_x_val = 0.0
        features.append(float(last_x_val))

    return features

In [None]:
import statsapi
import numpy as np

PITCHER_STATS = ['gamesPlayed', 'gamesStarted', 'flyOuts', 'groundOuts', 'airOuts', 'runs', 'doubles', 'triples', 'homeRuns', 'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch', 'avg', 'atBats', 'obp', 'slg', 'ops', 'caughtStealing', 'stolenBases', 'stolenBasePercentage', 'groundIntoDoublePlay', 'numberOfPitches', 'era', 'inningsPitched', 'wins', 'losses', 'saves', 'saveOpportunities', 'holds', 'blownSaves', 'earnedRuns', 'whip', 'battersFaced', 'outs', 'gamesPitched', 'completeGames', 'shutouts', 'strikes', 'strikePercentage', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffs', 'totalBases', 'groundOutsToAirouts', 'winPercentage', 'pitchesPerInning', 'gamesFinished', 'strikeoutWalkRatio', 'strikeoutsPer9Inn', 'walksPer9Inn', 'hitsPer9Inn', 'runsScoredPer9', 'homeRunsPer9', 'inheritedRunners', 'inheritedRunnersScored', 'catchersInterference', 'sacBunts', 'sacFlies']

ZERO_VALUES = ['.---', '-.--']

def get_pitcher_stats(player_id):
    player = statsapi.player_stat_data(
        player_id, group="[pitching]", type="[season,lastXGames],limit=5"
    )
    stats = player["stats"]
    season = None
    season_stats = []
    last_x_pitching = None
    last_x_pitching_stats = []
    # Check if there's multiple season stats and lastXGames stats- these indicate a trade. We only care about current team
    # Index values are- 0=total, 1=new_team, 2=old_team
    for stat in stats:
        if stat["group"] == "pitching" and stat["type"] == "season":
            season_stats.append(stat["stats"])
        elif stat["group"] == "pitching" and stat["type"] == "lastXGames":
            last_x_pitching_stats.append(stat["stats"])
    if len(season_stats) > 1:
        season = season_stats[1]
    else:
        season = season_stats[0]

    if len(last_x_pitching_stats) > 1:
        last_x_pitching = last_x_pitching_stats[1]
    else:
        last_x_pitching = last_x_pitching_stats[0]

    # Turn feature set into a numpy array
    features = []
    for val in PITCHER_STATS:
        # First do the season stats:
        season_val = season.get(val, 0.0)
        if season_val in ZERO_VALUES:
            season_val = 0.0
        features.append(float(season_val))

    # Now do the last x games stats:
    for val in PITCHER_STATS:
        last_x_val = last_x_pitching.get(val, 0.0)
        if last_x_val in ZERO_VALUES:
            last_x_val = 0.0
        features.append(float(last_x_val))

    return features

kikuchi = 579328
assert len(get_pitcher_stats(kikuchi)) == len(PITCHER_STATS)*2 

In [None]:
import statsapi

def get_lineup_player_stats(game_id):
    # Get game details
    game_response = statsapi.get("game", {"gamePk": game_id})

    # Extract lineup information
    away_lineup = game_response["liveData"]["boxscore"]["teams"]["away"]["battingOrder"]
    away_starting_pitcher_id = game_response["liveData"]["boxscore"]["teams"]["away"]["pitchers"][0] # Astros
    away_runs = game_response["liveData"]["linescore"]["teams"]["away"]["runs"]
    
    home_lineup = game_response["liveData"]["boxscore"]["teams"]["home"]["battingOrder"]
    home_starting_pitcher_id = game_response["liveData"]["boxscore"]["teams"]["home"]["pitchers"][0] # Orioles
    home_runs = game_response["liveData"]["linescore"]["teams"]["home"]["runs"]

    # Feature set: [[OPPONENT_STARTING_PITCHER],[ROSTER]]
    away_opponent_pitcher_stats = get_pitcher_stats(home_starting_pitcher_id)
    away_batting_stats = []
    for batter in away_lineup:
        away_batting_stats.extend(get_batter_stats(batter))

    home_opponent_pitcher_stats = get_pitcher_stats(away_starting_pitcher_id)
    home_batting_stats = []
    for batter in home_lineup:
        home_batting_stats.extend(get_batter_stats(batter))
    
    home_features = [] + home_batting_stats + home_opponent_pitcher_stats
    away_features = [] + away_batting_stats + away_opponent_pitcher_stats

    return (home_runs, home_features), (away_runs, away_features)


# Example usage
game_id = 746994  # Replace with your game ID
(home_runs, home_features), (away_runs, away_features) = get_lineup_player_stats(game_id)

In [None]:
import statsapi

years = ['2021', '2022', '2023']
validation_years = ['2024']

for year in years:
    games = statsapi.schedule(start_date=f"04/01/{year}", end_date=f"09/30/{year}")
    for game in games:
        game_id = game['game_id']
        (home_runs, home_features), (away_runs, away_features) = get_lineup_player_stats(game_id)
        print(home_runs, home_features, away_runs, away_features)

In [None]:
import statsapi
import time

def season_data(year):
    # Get all games for a season
    # game_data: list[BaseballGameData] = []
    game_ids = []
    months = [("04","30"), ("05", "31"), ("06","30"), ("07", "31"), ("08", "31"), ("09", "30")]
    for month, end_day in months:
        games = statsapi.schedule(start_date=f"{month}/01/{year}", end_date=f"{month}/{end_day}/{year}")
        for game in games:
            game_ids.append(game["game_id"])
        del games
    return game_ids

start =  time.time()
game_ids = season_data('2021')
print(f"Time taken: {time.time()-start}")


In [None]:
import torch
from torchsummary import summary

class BaseballModel(torch.nn.Module):
    def __init__(self, in_features, out_features, layers=[256, 512, 1024, 2048]):
        super(BaseballModel, self).__init__()
        layer_sizes = [in_features] + layers
        model_layers = []
        for i in range(len(layers)):
            model_layers.append(torch.nn.Linear(layer_sizes[i], layer_sizes[i+1]))
            model_layers.append(torch.nn.ReLU()) 
        
        # Classification layer
        model_layers.append(torch.nn.Linear(layers[-1], out_features))

        self.network = torch.nn.Sequential(*model_layers)

    def forward(self, x):
        return self.network(x)

# Example usage
model = BaseballModel(in_features=100, out_features=10)  # Replace with your actual in_features and out_features
summary(model, input_size=(1, 100))  # Replace (1, 100) with the actual input size