In [2]:
from pybaseball import batting_stats, playerid_reverse_lookup
import pandas as pd

In [3]:
def get_hitters_from_year(year, qual):
    hitter_data = batting_stats(year, qual=qual)

    hitter_ids = hitter_data["IDfg"].tolist()

    lookup_table = playerid_reverse_lookup(hitter_ids, key_type="fangraphs")

    missing_ids = [id for id in hitter_ids if id not in lookup_table["key_fangraphs"].values]

    trimmed_hitter_data = hitter_data[~hitter_data["IDfg"].isin(missing_ids)]

    # missing_players = hitter_data[hitter_data["IDfg"].isin(missing_ids)]

    # for _, row in missing_players.iterrows():
    #     print(f"Player {row['Name']} (ID: {row['IDfg']}) is missing")
    
    return lookup_table, trimmed_hitter_data

lookup_table, trimmed_hitter_data = get_hitters_from_year(2024, 100)

Gathering player lookup table. This may take a moment.


In [4]:
lookup_table.head()

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last
0,edman,tommy,669242,edmat001,edmanto01,19470,2019.0,2024.0
1,schneider,davis,676914,schnd001,schneda03,23565,2023.0,2024.0
2,lopez,nicky,670032,lopen001,lopezni01,19339,2019.0,2024.0
3,lowe,brandon,664040,loweb001,lowebr01,18882,2018.0,2024.0
4,goodman,hunter,696100,goodh001,goodmhu01,29715,2023.0,2024.0


In [None]:
trimmed_hitter_data.head()

Unnamed: 0,IDfg,Season,Name,Team,Age,G,AB,PA,H,1B,...,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA,L-WAR
0,15640,2024,Aaron Judge,NYY,32,158,559,704,180,85,...,117.5,238,0.609,391,0.146,0.267,0.31,0.723,0.479,11.5
5,25764,2024,Bobby Witt Jr.,KCR,24,161,636,709,211,123,...,116.9,259,0.481,538,0.138,0.236,0.315,0.577,0.407,10.4
1,19755,2024,Shohei Ohtani,LAD,29,159,636,731,197,98,...,119.2,287,0.595,482,0.144,0.269,0.314,0.66,0.442,9.2
2,20123,2024,Juan Soto,NYY,25,157,576,713,166,90,...,115.7,262,0.568,461,0.189,0.263,0.316,0.646,0.462,7.9
16,26289,2024,Gunnar Henderson,BAL,23,159,630,719,177,102,...,113.1,256,0.539,475,0.167,0.265,0.283,0.492,0.374,8.2


In [36]:
# The hitting stats that we want to keep for calculations
hitting_stats = ['IDfg', 'AVG+', 'OBP+', 'SLG+', 'BABIP+', 'BB%+', 'HardHit%', 'wFB', 'wSL', 'wCT', 'wCB', 'wCH', 'wSF']
needed_hitter_data = trimmed_hitter_data[hitting_stats]
needed_hitter_data.insert(13, 'vs RHP', None)
needed_hitter_data.insert(14, 'vs LHP', None)
needed_hitter_data.head()

Unnamed: 0,IDfg,AVG+,OBP+,SLG+,BABIP+,BB%+,HardHit%,wFB,wSL,wCT,wCB,wCH,wSF,vs RHP,vs LHP
0,15640,134,148,178,129,233,0.609,48.4,18.8,6.2,1.3,5.2,5.9,,
5,25764,138,126,149,124,99,0.481,28.1,20.3,4.1,1.3,-0.5,8.2,,
1,19755,125,124,160,114,134,0.595,31.7,14.2,1.1,9.2,8.2,8.2,,
2,20123,120,136,144,104,223,0.568,39.2,4.3,7.3,7.8,7.1,2.0,,
16,26289,117,118,134,112,134,0.539,8.5,13.3,7.1,8.7,5.5,0.5,,


In [24]:
from pybaseball import get_splits

def get_player_splits(bbref_id, year):
    platoon_splits = get_splits(bbref_id, year=year).xs('Platoon Splits', level=0, axis=0)

    vs_rhp = platoon_splits.loc['vs RHP', 'H'] / platoon_splits.loc['vs RHP', 'AB']
    vs_lhp = platoon_splits.loc['vs LHP', 'H'] / platoon_splits.loc['vs LHP', 'AB']
    return round(vs_rhp, 3), round(vs_lhp, 3)

avg_rhp, avg_lph = get_player_splits('judgeaa01', 2024)

In [48]:
def get_all_player_splits(hitter_data, lookup_table, year):

    for index, row in hitter_data.iterrows():
        idfg = row['IDfg']

        key = lookup_table.loc[lookup_table['key_fangraphs'] == idfg, 'key_bbref'].values

        if key:
            key = key[0]

            vs_rhp, vs_lhp = get_player_splits(key, year)

            hitter_data.at[index, 'vs RHP'] = vs_rhp
            hitter_data.at[index, 'vs LHP'] = vs_lhp


In [49]:
get_all_player_splits(needed_hitter_data, lookup_table, 2024)
needed_hitter_data.head()

Unnamed: 0,IDfg,AVG+,OBP+,SLG+,BABIP+,BB%+,HardHit%,wFB,wSL,wCT,wCB,wCH,wSF,vs RHP,vs LHP
0,15640,134,148,178,129,233,0.609,48.4,18.8,6.2,1.3,5.2,5.9,0.326,0.311
5,25764,138,126,149,124,99,0.481,28.1,20.3,4.1,1.3,-0.5,8.2,0.336,0.316
1,19755,125,124,160,114,134,0.595,31.7,14.2,1.1,9.2,8.2,8.2,0.322,0.288
2,20123,120,136,144,104,223,0.568,39.2,4.3,7.3,7.8,7.1,2.0,0.293,0.278
16,26289,117,118,134,112,134,0.539,8.5,13.3,7.1,8.7,5.5,0.5,0.291,0.257


In [50]:
needed_hitter_data.to_csv('hitter_data.csv', index=False)