# Strike 'em Out Computations

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

In [28]:
# import data
df = pd.read_csv('CleanedUpForDB.csv')

# clean df by eliminating rows where game_pk and pitch_type is missing
df.dropna(subset=['game_pk', 'pitch_type'], inplace=True)
df.drop(columns=['Unnamed: 0'])

# rename columns
df.rename(columns = {'events':'pitch_event', 'description':'pitch_result', 'batter':'batter_id' ,'pitcher':'pitcher_id'}, inplace=True) 

# only keep pitches when batter swung
df = df[(df['pitch_result'] == 'swinging_strike') | (df['pitch_result'] == 'swinging_strike_blocked') |
        (df['pitch_result'] == 'foul') | (df['pitch_result'] == 'foul_tip') |
        (df['pitch_result'] == 'hit_into_play') | (df['pitch_result'] == 'hit_into_play_no_out') | (df['pitch_result'] == 'hit_into_play_score')]

# switch from catcher's perspective to pitcher's perspective
# Catcher's POV: (plate_x,plate_z)
# Pitcher's POV: (plate_-x,plate_z)
df['plate_-x'] = -df['plate_x']

# create unique pitch_id
df['game_pk'] = df['game_pk'].astype(str)
df['sv_id'] = df['sv_id'].astype(str)
df['pitch_id'] = df['game_pk'] + "-" + df['sv_id']

# if no player_id, then no runner on base 
df['on_1b'] = df['on_1b'].notna()
df['on_2b'] = df['on_2b'].notna()
df['on_3b'] = df['on_3b'].notna()

# make new df as per Tony's email
df_tony = df[['pitch_id','game_date','batter_id','pitcher_id','pitch_type',
              'plate_-x','plate_z','sz_top','sz_bot','pitch_result','pitch_event','on_3b','on_2b','on_1b']]

df_tony.insert(5, 'cluster', 0, True)

In [43]:
# df_tony.shape # (342877, 15)
# df_tony.size # 5143155 elements
# df_tony.dtypes

In [30]:
df_tony.head()
# df_tony.tail()

Unnamed: 0,pitch_id,game_date,batter_id,pitcher_id,pitch_type,cluster,plate_-x,plate_z,sz_top,sz_bot,pitch_result,pitch_event,on_3b,on_2b,on_1b
0,564734-190405_055708,4/4/19,600303,664871,FF,0,-0.2531,1.8586,2.9253,1.1519,hit_into_play,field_out,False,True,False
2,564734-190405_055607,4/4/19,405395,664871,FF,0,-0.1461,2.2176,2.8994,1.3073,hit_into_play_score,field_out,True,True,False
3,564734-190405_055540,4/4/19,405395,664871,FF,0,0.7999,1.9176,2.9511,1.2556,foul,,True,True,False
5,564734-190405_055441,4/4/19,664058,664871,FF,0,1.0338,3.0197,3.0029,1.2297,hit_into_play,field_out,False,True,True
8,564734-190405_055313,4/4/19,571506,664871,CU,0,0.3677,1.5926,3.1577,1.2556,hit_into_play_no_out,field_error,False,False,True


## Calculate Zone of Pitch

In [46]:
def det_batter_zone(x,z,sz_top,sz_bot):
    sz_mid = ((sz_top - sz_bot)/2) + sz_bot
    sz_third = (sz_top - sz_bot)/3
    # non-competitive pitches
    if (z < 1) or (z > 4) or (x < -1.5) or (x > 1.5):
        return 0 
    # inner strikezone (1-9)
    elif (sz_bot <= z) and (z <= sz_top) and (-0.7 <= x) and (x <= 0.7):
        if (sz_top >= z) and (z > (sz_top-sz_third)):
            if (x <= -0.23):
                return 1
            elif (x <= 0.23):
                return 2
            elif (x <= 0.7):
                  return 3
        elif ((sz_top-sz_third) >= z) and (z > (sz_bot+sz_third)):
            if (x <= -0.23):
                return 4
            elif (x <= 0.23):
                return 5
            elif (x <= 0.7):
                return 6
        elif ((sz_bot+sz_third) >= z) and (z >= sz_bot):
            if (x <= -0.23):
                return 7
            elif (x <= 0.23):
                return 8
            elif (x <= 0.7):
                return 9
    # outer strike zone
    # top row (10-13)
    elif (sz_top < z) and (z <= 4) and (-1.5 <= x) and (x <= 1.5):
        if (x <= -0.7):
            return 10
        elif (x <= 0):
            return 11
        elif (x <= 0.7):
            return 12
        elif (x <= 1.5):
            return 13
    # top middle row (14,15)
    elif (sz_mid < z) and (z <= sz_top):
        if (-1.5 <= x) and (-0.7 > x):
            return 14
        elif (0.7 < x) and (x <= 1.5):
            return 15
    # bottom middle row (16,17)
    elif (sz_bot < z) and (z <= sz_mid):
        if (-1.5 <= x) and (-0.7 > x):
            return 16
        elif (0.7 < x) and (x <= 1.5):
            return 17
    # bottom row (18-21)
    elif (1 <= z) and (z <= sz_bot) and (-1.5 <= x) and (x <= 1.5):
        if (x <= -0.7):
            return 18
        elif (x <= 0):
            return 19
        elif (x <= 0.7):
            return 20
        elif (x <= 1.5):
            return 21
    # null or invalid
    return -1 

## Calculate Cluster for Pitch Types of Pitcher

## Create Structure for JSON

In [60]:
half_computed_db = {}
for row in range(df_tony.shape[0]):
    pitch_details = df_tony.iloc[row].values.tolist()
    
    pitch_id = pitch_details[0]
    half_computed_db[pitch_id] = []
    
    half_computed_db[pitch_id].append(int(pitch_details[2])) # batter_id 
    half_computed_db[pitch_id].append(pitch_details[1]) # game_date
    half_computed_db[pitch_id].append(int(pitch_details[3])) # pitcher_id
    half_computed_db[pitch_id].append(pitch_details[4]) # pitch_type
    half_computed_db[pitch_id].append(int(pitch_details[5])) # cluster
    
    pitch_zone = det_batter_zone(pitch_details[6],pitch_details[7],
                                 pitch_details[8],pitch_details[9])
    half_computed_db[pitch_id].append(pitch_zone)
    
    half_computed_db[pitch_id].append(pitch_details[10]) # pitch_result
    half_computed_db[pitch_id].append(pitch_details[11]) # pitch_event
    half_computed_db[pitch_id].append(str(pitch_details[14])) # on_1b
    half_computed_db[pitch_id].append(str(pitch_details[13])) # on_2b
    half_computed_db[pitch_id].append(str(pitch_details[12])) # on_3b
    

In [61]:
print(half_computed_db['564734-190405_055708']) # first one
print(half_computed_db['565107-190929_190735']) # last one

[600303, '4/4/19', 664871, 'FF', 0, 4, 'hit_into_play', 'field_out', 'False', 'True', 'False']
[593643, '9/29/19', 593958, 'FF', 0, 12, 'swinging_strike', nan, 'False', 'False', 'False']


In [62]:
with open("all_2019_pitches.json", "w") as hitters_file:
    json.dump(half_computed_db, hitters_file, sort_keys=True, indent=2)