## Adapting functions for 2022 stats

In [8]:
#Imports
import statsapi
import datetime 
import pandas as pd
import json
from functools import reduce
import numpy as np

In [4]:
#Used in the player stats function
def add_dicts(d1, d2):
    d3 = dict(d1)
    d3.update(d2)
    try:
        for i, j in d1.items():

            for x, y in d2.items():

                if (i == x):

                    d3[i]=(j+y)
    except: pass
    return d3

In [5]:
#Function to return player stats - hitting, pitching, and fielding

#Inputs are the id, start date and end date and the season year

def player_stats(id,start_date, end_date, year):

    #Getting player stats - separately because the API switches their order when fetched at the same time 
    hitting_stats = statsapi.get("people", {"personIds": id, "hydrate": f"stats(group=[hitting],type=[byDateRange],startDate={start_date},endDate={end_date},season={year})"})
    pitching_stats = statsapi.get("people", {"personIds": id, "hydrate": f"stats(group=[pitching],type=[byDateRange],startDate={start_date},endDate={end_date},season={year})"})
    fielding_stats = statsapi.get("people", {"personIds": id, "hydrate": f"stats(group=[fielding],type=[byDateRange],startDate={start_date},endDate={end_date},season={year})"})

    #dict where I will add all the collected stats
    stat_dict = {}

    #Temporary storage for fielding dicts for all positions
    fielding_list = []

    #List to check if the stats for a fielding position have already been collected - as they are listed more than once
    pos_codes = []

    #Taking last entry of hitting stats, as that is the cumulative for the date range
    try:
        player_hitting_stats = hitting_stats['people'][0]['stats'][0]['splits'][-1]['stat']
        stat_dict['hitting'] = player_hitting_stats
    except: pass
    
    #Taking last entry of pitching stats, as that is the cumulative for the date range
    try:
        player_pitching_stats = pitching_stats['people'][0]['stats'][0]['splits'][-1]['stat']
        stat_dict['pitching'] = player_pitching_stats
    except: pass

    #Summing cumulative fielding stats for each position played (Team changes now accounted for)
    try:
        for i in range(len(fielding_stats['people'][0]['stats'][0]['splits'])):
            if fielding_stats['people'][0]['stats'][0]['splits'][i]['sport']['abbreviation'] == 'All':
                fielding_list.append(fielding_stats['people'][0]['stats'][0]['splits'][i]['stat']) 
                
        all_fielding = {}

        for i in range(len(fielding_list)):
            all_fielding = add_dicts(all_fielding, fielding_list[i])
        stat_dict['fielding'] = all_fielding
        
        
    except:pass
    return stat_dict

In [51]:
#Start of 2022 season
season_info = statsapi.get("seasons", {"sportId": 1, "season": 2022})
start_date = season_info['seasons'][0]['regularSeasonStartDate']

In [27]:
home_team = input('Enter the name of the home team')
away_team = input('Enter the name of the away team')

home_info = statsapi.lookup_team(home_team)
away_info = statsapi.lookup_team(away_team)


In [None]:
home_info

In [97]:
#Getting home team hitting stats 
h_params = {'season':2022, 'stats':'season', 'group': 'hitting', 'sportIds':1, 'teamId': home_info[0]['id']} #include sportIds=1 to only return data for MLB
h_hit_stats = statsapi.get('team_stats', h_params, force=True)

In [126]:
#Getting away team hitting stats 
a_params = {'season':2022, 'stats':'season', 'group': 'hitting', 'sportIds':1, 'teamId': away_info[0]['id']} #include sportIds=1 to only return data for MLB
a_hit_stats = statsapi.get('team_stats', a_params, force=True)

In [116]:
h_hit_stats['stats'][0]['splits'][0]['stat']['runs']

536

In [69]:
statsapi.lookup_player('Shohei Ohtani')[0]['id']

660271

In [68]:
statsapi.player_stat_data(660271)['stats'][0]['stats']['hits']

125

In [47]:
#Getting dictionary of team roster from API
h_roster_dict = statsapi.get("team_roster", {"teamId": home_info[0]['id']})
a_roster_dict = statsapi.get("team_roster", {"teamId": away_info[0]['id']})

In [48]:
#Storing player_ids as a list
h_roster_list = [h_roster_dict['roster'][i]['person']['id'] for i in range(len(h_roster_dict['roster']))]
a_roster_list = [a_roster_dict['roster'][i]['person']['id'] for i in range(len(a_roster_dict['roster']))]


In [75]:
#Function that returns the outcomes for each game, as a dict

def season_match_outcomes(season_start, season_end):
    dict_list = []
    for i in statsapi.schedule(start_date=season_start, end_date=season_end):
        match_dict = {}
        try:
            match_dict['game_id'] = i['game_id']
            match_dict['win'] = i['winning_team']
            match_dict['loss'] = i['losing_team']
            dict_list.append(match_dict)
        except: pass
    return dict_list

In [70]:
#Function that calcualtes a team's win % up to a specific date

#Takes the team_id as an input, as well as the start and end date of the period

def team_record(team_id, start_date, end_date):
    results = season_match_outcomes(start_date, end_date)
    wins = 0
    losses = 0
    team_name = statsapi.lookup_team(team_id)[0]['name']
    for i in range(len(results)):
        if team_name == 'Cleveland Guardians':
            if (results[i]['win'] == team_name) or (results[i]['win'] =='Cleveland Indians'):
                wins+=1
            elif (results[i]['loss'] == team_name) or (results[i]['loss'] =='Cleveland Indians'):
                losses+=1
        else:
            if results[i]['win'] == team_name:
                wins+=1
            elif results[i]['loss'] == team_name:
                losses+=1
    return round(wins/(wins+losses),3)

In [71]:
def log_5(home_team_id, away_team_id, start_date, end_date):
    home_per = team_record(home_team_id, start_date, end_date)
    away_per = team_record(away_team_id, start_date, end_date)
    log5 = (home_per - (home_per * away_per))/(home_per + away_per - (2*home_per*away_per))
    return log5

In [127]:
def all_hit_stats_df(df_home, df_away):

    #End date - date of the game input
    end_date = datetime.datetime.today().strftime('%Y-%m-%d')

    #year is used in the player_stats function, and is the first 4 characters of the date string
    year = 2022

    #Id of home team
    home_id = home_info[0]['id']

    #Id of away team
    away_id = away_info[0]['id']
        
    #Getting start date for rolling 10 day stats - 9 days before the end_date
    rolling_start_date = (datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=-9)).strftime('%Y-%m-%d')

    #List of stats being pulled from API
    hitting_stats = ['hits', 'baseOnBalls', 'atBats', 'hitByPitch','doubles', 'triples', 'homeRuns', 'plateAppearances', 'runs']

    #Calculating polynomial expectation

    log5 = log_5(home_id, away_id, start_date, end_date)

    #Initializing home stats variables for rolling hitting stats
    h_r_hits = 0
    h_r_baseOnBalls = 0
    h_r_atBats = 0
    h_r_hitByPitch = 0
    h_r_doubles = 0
    h_r_triples = 0
    h_r_homeRuns= 0
    h_r_plateAppearances = 0

    #Initializing away stats variables for rolling hitting stats
    a_r_hits = 0
    a_r_baseOnBalls = 0
    a_r_atBats = 0
    a_r_hitByPitch = 0
    a_r_doubles = 0
    a_r_triples = 0
    a_r_homeRuns= 0
    a_r_plateAppearances = 0

    #Getting stats for the home team
    h_hits= h_hit_stats['stats'][0]['splits'][0]['stat']['hits']
    h_baseOnBalls= h_hit_stats['stats'][0]['splits'][0]['stat']['baseOnBalls']
    h_atBats= h_hit_stats['stats'][0]['splits'][0]['stat']['atBats']
    h_hitByPitch= h_hit_stats['stats'][0]['splits'][0]['stat']['hitByPitch']
    h_doubles= h_hit_stats['stats'][0]['splits'][0]['stat']['doubles']
    h_triples= h_hit_stats['stats'][0]['splits'][0]['stat']['triples']
    h_homeRuns= h_hit_stats['stats'][0]['splits'][0]['stat']['homeRuns']
    h_plateAppearances= h_hit_stats['stats'][0]['splits'][0]['stat']['plateAppearances']
    h_runs= h_hit_stats['stats'][0]['splits'][0]['stat']['runs']

    #Getting rolling 10 day stats 
    for j in h_roster_list:
        try:
            #Storing all rolling stats as a list
            stat_list_10 = [player_stats(j, rolling_start_date, end_date, year)['hitting'][i] for i in hitting_stats]
            
            #Adding the home team rolling stats together for the time period
            h_r_hits+= stat_list_10[0]
            h_r_baseOnBalls+= stat_list_10[1]
            h_r_atBats+= stat_list_10[2]
            h_r_hitByPitch+= stat_list_10[3]
            h_r_doubles+= stat_list_10[4]
            h_r_triples+= stat_list_10[5]
            h_r_homeRuns+= stat_list_10[6]
            h_r_plateAppearances+= stat_list_10[7]
        except: pass

    #Getting stats for the away team
    a_hits= a_hit_stats['stats'][0]['splits'][0]['stat']['hits']
    a_baseOnBalls= a_hit_stats['stats'][0]['splits'][0]['stat']['baseOnBalls']
    a_atBats= a_hit_stats['stats'][0]['splits'][0]['stat']['atBats']
    a_hitByPitch= a_hit_stats['stats'][0]['splits'][0]['stat']['hitByPitch']
    a_doubles= a_hit_stats['stats'][0]['splits'][0]['stat']['doubles']
    a_triples= a_hit_stats['stats'][0]['splits'][0]['stat']['triples']
    a_homeRuns= a_hit_stats['stats'][0]['splits'][0]['stat']['homeRuns']
    a_plateAppearances= a_hit_stats['stats'][0]['splits'][0]['stat']['plateAppearances']
    a_runs= a_hit_stats['stats'][0]['splits'][0]['stat']['runs']

    #Getting stats for each player on the away team
    for j in a_roster_list:   
        try:
            #Storing all rolling stats as a list
            stat_list_10 = [player_stats(j, rolling_start_date, end_date, year)['hitting'][i] for i in hitting_stats]

            #Adding the away team rolling stats together for the time period
            a_r_hits+= stat_list_10[0]
            a_r_baseOnBalls+= stat_list_10[1]
            a_r_atBats+= stat_list_10[2]
            a_r_hitByPitch+= stat_list_10[3]
            a_r_doubles+= stat_list_10[4]
            a_r_triples+= stat_list_10[5]
            a_r_homeRuns+= stat_list_10[6]
            a_r_plateAppearances+= stat_list_10[7]
        except: pass
    #appending stats to their respective dfs
    df_home.loc[len(df_home)] = [round(h_hits/h_atBats,3),round((h_hits+h_baseOnBalls+h_hitByPitch)/h_plateAppearances,3),round((h_hits+h_doubles+h_triples*2+h_homeRuns*3)/h_atBats,3),round((h_r_hits+h_r_baseOnBalls+h_r_hitByPitch)/h_r_plateAppearances,3),round((h_r_hits+h_r_doubles+h_r_triples*2+h_r_homeRuns*3)/h_r_atBats,3), log5, h_runs,0]
    df_away.loc[len(df_away)] = [round(a_hits/a_atBats,3),round((a_hits+a_baseOnBalls+a_hitByPitch)/a_plateAppearances,3),round((a_hits+a_doubles+a_triples*2+a_homeRuns*3)/a_atBats,3),round((a_r_hits+a_r_baseOnBalls+a_r_hitByPitch)/a_r_plateAppearances,3),round((a_r_hits+a_r_doubles+a_r_triples*2+a_r_homeRuns*3)/a_r_atBats,3), 0,0,(a_runs*-1)]
    #Returning the difference of home and away team stats
    return df_home.subtract(df_away, fill_value =0)



#### Testing function

In [128]:
#Creating the empty dfs to be populated by all_team_stats

#Name of columns
team_hit_stats = ['avg', 'obp', 'slug', 'obp_10', 'slug_10', 'log_5', 'h_runs', 'a_runs']

#dfs
df_h_h = pd.DataFrame(columns = team_hit_stats)
df_a_h = pd.DataFrame(columns = team_hit_stats)

In [129]:
#Takes 38s
all_hit_stats_df(df_h_h,df_a_h)

Unnamed: 0,avg,obp,slug,obp_10,slug_10,log_5,h_runs,a_runs
0,-0.018,-0.002,0.005,-0.074,-0.076,0.437779,536.0,540.0
