Welcome to the Analysis File. All explainations are listed in comments.

In [3]:
import pandas as pd

In [4]:
filename = "allPlays1625.parquet"

In [6]:
#Get Metrics of Desired Range
j = pd.read_parquet(filename)

#Get all at bats in order
j = j.sort_values(['game_pk', 'inning','half_inning_int','at_bat_index',  'start_time'])

#Get half_id unique id per half inning
j['half_id'] = j['game_pk'].astype(str) + "-" + j['inning'].astype(str) + "-" + j['half_inning']

#drop duplicates
j = (
    j.drop_duplicates(subset=['game_pk', 'half_id', 'at_bat_index', 'start_time'], keep='last')
     .reset_index(drop=True)
)

#State before and after columns
j['state_before'] =j['before_1B'].astype(str) + j['before_2B'].astype(str) + j['before_3B'].astype(str) +  j['outs_before'].astype(str)
j['state_after'] =j['after_1B'].astype(str) + j['after_2B'].astype(str) + j['after_3B'].astype(str) +  j['outs_after'].astype(str)


#Calculate total runs scrored in each half inning
j['runs_cum_in_half'] = j.groupby('half_id')['runs_scored'].cumsum()

#Get runs remaining before and after play columns
j['runs_in_half_total'] = j.groupby('half_id')['runs_scored'].transform('sum')
j['runs_remaining_after_play'] = j['runs_in_half_total'] - j['runs_cum_in_half']
j['runs_remaining_before_play'] = j['runs_remaining_after_play'] + j['runs_scored']

#only inlcude batter decisions (no steals, wild pitch, etc)
BATTER_EVENTS = {
    "single","double","triple","home_run","strikeout","walk","intent_walk","hit_by_pitch",
    "field_out","force_out","double_play","triple_play","sac_fly","sac_bunt",
    "field_error","other_out","catcher_interf"
}
j_pa = j[j['event_type'].str.lower().isin(BATTER_EVENTS)].copy()

#Assign runs remaining in each inning to each state
re_table = (j_pa.groupby('state_before', as_index = False).agg(RE = ('runs_remaining_before_play', 'mean'), n = ('runs_remaining_before_play', 'size')).sort_values('state_before'))
re_map = re_table.set_index('state_before')['RE']

#List runs expected before and after at bat
j['RE_before'] = j['state_before'].map(re_map)
j['RE_after']  = j['state_after'].map(re_map).fillna(0.0)

#Value of each play is runs scored on each play plus the change in expected
j['run_value'] = j['runs_scored'] + j['RE_after'] - j['RE_before']


In [7]:
#RE Values for each state
re_map



state_before
0000    0.510041
0001    0.271986
0002    0.104252
0010    1.384892
0011    0.985410
0012    0.373765
0100    1.149324
0101    0.700681
0102    0.333000
0110    2.004488
0111    1.442566
0112    0.602875
1000    0.980505
1001    0.589522
1002    0.233595
1010    1.857106
1011    1.338644
1012    0.513997
1100    1.611970
1101    1.044387
1102    0.452587
1110    2.412617
1111    1.757251
1112    0.778435
Name: RE, dtype: float64

In [8]:
#Get difference in bunt vs no bunt at each state
overall = j_pa.groupby(['state_before','is_bunt'], as_index=False).agg(n=('run_value', 'size'), rv_mean=('run_value','mean'),
          rv_median=('run_value','median'),
          rv_std=('run_value','std'))


overall[['state_before', 'is_bunt','n', 'rv_mean', 'rv_median', 'rv_std']]

Unnamed: 0,state_before,is_bunt,n,rv_mean,rv_median,rv_std
0,0,False,422487,-0.000188,-0.229821,0.367165
1,0,True,2436,0.172056,0.384723,0.399963
2,1,False,305661,-0.001095,-0.163696,0.276819
3,1,True,1189,0.044239,-0.159096,0.225611
4,2,False,242835,0.000367,-0.098773,0.210418
5,2,True,728,0.009022,-0.098088,0.117685
6,10,False,2770,0.032407,-0.157774,0.479753
7,10,True,10,0.180604,0.473561,0.546213
8,11,False,12178,0.015949,0.138157,0.542579
9,11,True,91,0.196515,0.204241,0.445332


In [9]:
#This cell is for easier viewing

by_state = (
    j.groupby(['state_before','is_bunt'], as_index=False)
      .agg(rv_mean=('run_value','mean'))
)

# Pivot to wide
cmp = by_state.pivot(index='state_before', columns='is_bunt', values='rv_mean')
cmp = cmp.rename(columns={False:'No', True:'Yes'})

# Difference (bunt âˆ’ no bunt)
cmp['difference'] = cmp.get('Yes', 0) - cmp.get('No', 0)
cmp

is_bunt,No,Yes,difference
state_before,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.018781,0.147739,0.128958
1,0.012349,0.059546,0.047196
2,0.0012,0.009404,0.008204
10,0.033106,0.055871,0.022766
11,0.014519,0.181205,0.166686
12,-0.00358,0.157226,0.160806
100,0.02476,-0.030516,-0.055276
101,0.017036,-0.001347,-0.018383
102,-0.002314,0.022954,0.025268
110,0.015108,0.275775,0.260666
