In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.options.mode.chained_assignment = None

In [2]:
play_filenames = [r'C:\\Users\mendo\Desktop\Retro Data\play_data\all2009.csv',
                  r'C:\\Users\mendo\Desktop\Retro Data\play_data\all2010.csv',
                  r'C:\\Users\mendo\Desktop\Retro Data\play_data\all2011.csv',
                  r'C:\\Users\mendo\Desktop\Retro Data\play_data\all2012.csv',
                  r'C:\\Users\mendo\Desktop\Retro Data\play_data\all2013.csv',
                  r'C:\\Users\mendo\Desktop\Retro Data\play_data\all2014.csv',
                  r'C:\\Users\mendo\Desktop\Retro Data\play_data\all2015.csv',
                  r'C:\\Users\mendo\Desktop\Retro Data\play_data\all2016.csv',
                  r'C:\\Users\mendo\Desktop\Retro Data\play_data\all2017.csv']

play_headers = ['game_id', 'visiting_team', 'inning', 'batting_team', 'outs', 
                'balls', 'strikes', 'pitch_sequence', 'visitor_score', 'home_score',
                'batter', 'batter_hand', 'result_batter', 'result_batter_hand', 'pitcher',
                'pitcher_hand', 'result_pitcher', 'result_pitcher_hand', 'catcher',
                'first_baseman', 'second_baseman', 'third_baseman', 'shortstop',
                'left_fielder', 'center_fielder', 'right_fielder', 'runner_on_first',
                'runner_on_second', 'runner_on_third', 'event_text', 'leadoff_flag',
                'pinch_hit_flag', 'batter_def_position', 'batter_lineup_position',
                'event_type', 'batter_event_flag', 'time_at_bat_flag', 'hit_value',
                'sac_hit_flag', 'sac_fly_flag', 'outs_on_play', 'double_play_flag',
                'triple_play_flag', 'rbi_on_play', 'wild_pitch', 'passed_ball', 
                'fielded_by', 'batted_ball_type', 'bunt', 'foul', 'hit_location', 
                'errors', 'first_error_player','first_error_type', 'second_error_player', 
                'second_error_type', 'third_error_player', 'third_error_type',
                'batter_destination', 'runner1_destination', 'runner2_destination',
                'runner3_destination', 'play_on_batter','play_on_runner1', 'play_on_runner2',
                'play_on_runner3','stolen_base_runner1', 'stolen_base_runner2', 
                'stolen_base_runner3','caught_stealing_runner1', 'caught_stealing_runner2',
                'caught_stealing_runner3','pickoff_runner1', 'pickoff_runner2', 
                'pickoff_runner3', 'pitcher_charged_runner1','pitcher_charged_runner2',
                'pitcher_charged_runner3', 'new_game_flag', 'end_game_flag','pinch_runner1', 
                'pinch_runner2', 'pinch_runner3', 'runner1_removed_for_pinch',
                'runner2_removed_for_pinch', 'runner3_removed_for_pinch','batter_removed_for_pinch',
                'position_of_batter_removed', 'fielder_output1', 'fielder_output2',
                'fielder_output3', 'fielder_assist1', 'fielder_assist2', 'fielder_assist3', 
                'fielder_assist4','fielder_assist5', 'event_id', 'home_team_id', 'batting_team_id',
                'fielding_team_id', 'half_inning', 'start_half_inning_flag', 'end_half_inning_flag',
                'offense_score','defense_score', 'runs_scored_half_inning', 'plate_appearances_off',
                'plate_appearances_def','start_plate_app_flag', 'trunc_plate_app_flag', 
                'base_state_start', 'base_state_end','batter_starter_flag', 'batter_on_deck',
                'batter_in_hold', 'pitcher_starter_flag','result_pitcher_starter_flag',
                'def_position_runner1', 'lineup_position_runner1', 'event_number_runner1',
                'def_position_runner2', 'lineup_position_runner2','event_number_runner2',
                'def_position_runner3', 'lineup_position_runner3','event_number_runner3',
                'responsible_runner1', 'responsible_runner2', 'responsible_runner3',
                'balls_in_plate_app', 'called_balls_plate_app', 'int_balls_plate_app', 
                'pitchout_plate_app','hit_batter_plate_app', 'other_balls_plate_app', 
                'strikes_in_plate_app', 'called_strikes_plate_app','swinging_strike_plate_app',
                'foul_strike_plate_app', 'other_strikes_plate_app','runs_on_play', 'batted_ball_fielder',
                'force_play2', 'force_play3', 'force_play_h','batter_safe_on_error', 'batter_fate',
                'fate_runner1', 'fate_runner2', 'fate_runner3', 'runs_scored_half_inning_after',
                'assist6', 'assist7', 'assist8', 'assist9', 'assist10', 'unknown_fielding_credit',
                'uncertain_play']

In [3]:
play_data = []
for filename in play_filenames:
    if not play_data:
        all_plays = pd.DataFrame(pd.read_csv(filename, low_memory=False, 
                        header=0, names=play_headers, index_col=False))
        play_data = [0,1]
    else:
        all_plays_tmp = pd.DataFrame(pd.read_csv(filename, low_memory=False, 
                        header=0, names=play_headers, index_col=False))
        all_plays = all_plays.append(all_plays_tmp)
        del(all_plays_tmp)
del(play_data)

In [4]:
event_dict = {0:'unknown', 1:'none', 2:'generic out', 3:'strikeout', 4:'stolen base',
             5:'defensive indifference', 6:'caught stealing', 7:'pickoff error', 8:'pickoff',
             9:'wild pitch', 10:'passed ball', 11:'balk', 12:'out advancing', 13:'foul error',
             14:'walk', 15:'intentional walk', 16:'hit by pitch', 17:'interference',
             18:'error', 19:'fielders choice', 20:'single', 21:'double', 22:'triple',
             23:'home run', 24:'missing play'}
    
all_plays['event_desc'] = all_plays.loc[:, ('event_type')].map(event_dict)
all_plays['outs'] = all_plays['generic_out'] + all_plays['strikeout'] + all_plays['caught_stealing'] + all_plays['pickoff']

In [5]:
player_dict = {1:'pitcher', 2:'catcher', 3:'1B', 4:'2B', 5:'3B', 6:'SS', 7:'LF', 8:'CF', 9:'RF', 10:'DH'}
all_plays['fielded_desc'] = all_plays.loc[:, ('fielded_by')].map(player_dict)

hit_dict = {'F':'flyball', 'G':'groundball', 'L':'linedrive', 'P':'popup'}
all_plays['batted_ball_desc'] = all_plays.loc[:, ('batted_ball_type')].map(hit_dict)

In [6]:
all_plays['at_bat_id'] = all_plays['game_id'] + '-' + all_plays['inning'].astype(str) + '-' + all_plays['pitcher'] + '-' + all_plays['batter']
all_plays['date'] = pd.to_datetime(all_plays['game_id'].str[3:11], format='%Y-%m-%d')
all_plays.sort_values(['date', 'game_id', 'inning', 'batting_team'])

Unnamed: 0,game_id,visiting_team,inning,batting_team,outs,balls,strikes,pitch_sequence,visitor_score,home_score,batter,batter_hand,result_batter,result_batter_hand,pitcher,pitcher_hand,result_pitcher,result_pitcher_hand,catcher,first_baseman,second_baseman,third_baseman,shortstop,left_fielder,center_fielder,right_fielder,runner_on_first,runner_on_second,runner_on_third,event_text,leadoff_flag,pinch_hit_flag,batter_def_position,batter_lineup_position,event_type,batter_event_flag,time_at_bat_flag,hit_value,sac_hit_flag,sac_fly_flag,outs_on_play,double_play_flag,triple_play_flag,rbi_on_play,wild_pitch,passed_ball,fielded_by,batted_ball_type,bunt,foul,hit_location,errors,first_error_player,first_error_type,second_error_player,second_error_type,third_error_player,third_error_type,batter_destination,runner1_destination,runner2_destination,runner3_destination,play_on_batter,play_on_runner1,play_on_runner2,play_on_runner3,stolen_base_runner1,stolen_base_runner2,stolen_base_runner3,caught_stealing_runner1,caught_stealing_runner2,caught_stealing_runner3,pickoff_runner1,pickoff_runner2,pickoff_runner3,pitcher_charged_runner1,pitcher_charged_runner2,pitcher_charged_runner3,new_game_flag,end_game_flag,pinch_runner1,pinch_runner2,pinch_runner3,runner1_removed_for_pinch,runner2_removed_for_pinch,runner3_removed_for_pinch,batter_removed_for_pinch,position_of_batter_removed,fielder_output1,fielder_output2,fielder_output3,fielder_assist1,fielder_assist2,fielder_assist3,fielder_assist4,fielder_assist5,event_id,home_team_id,batting_team_id,fielding_team_id,half_inning,start_half_inning_flag,end_half_inning_flag,offense_score,defense_score,runs_scored_half_inning,plate_appearances_off,plate_appearances_def,start_plate_app_flag,trunc_plate_app_flag,base_state_start,base_state_end,batter_starter_flag,batter_on_deck,batter_in_hold,pitcher_starter_flag,result_pitcher_starter_flag,def_position_runner1,lineup_position_runner1,event_number_runner1,def_position_runner2,lineup_position_runner2,event_number_runner2,def_position_runner3,lineup_position_runner3,event_number_runner3,responsible_runner1,responsible_runner2,responsible_runner3,balls_in_plate_app,called_balls_plate_app,int_balls_plate_app,pitchout_plate_app,hit_batter_plate_app,other_balls_plate_app,strikes_in_plate_app,called_strikes_plate_app,swinging_strike_plate_app,foul_strike_plate_app,other_strikes_plate_app,runs_on_play,batted_ball_fielder,force_play2,force_play3,force_play_h,batter_safe_on_error,batter_fate,fate_runner1,fate_runner2,fate_runner3,runs_scored_half_inning_after,assist6,assist7,assist8,assist9,assist10,unknown_fielding_credit,uncertain_play,event_desc,fielded_desc,batted_ball_desc,at_bat_id,date
129314,PHI200904050,ATL,1,0,0,1,1,BCX,0,0,johnk003,L,johnk003,L,myerb001,R,myerb001,R,ruizc001,howar001,utlec001,felip001,rollj001,ibanr001,victs001,wertj001,,,,8/F,T,F,4,1,2,T,T,0,F,F,1,F,F,0,F,F,8,F,F,F,,0,0,N,0,N,0,N,0,0,0,0,8.0,,,,F,F,F,F,F,F,F,F,F,,,,T,F,F,F,F,,,,,0,8,0,0,0,0,0,0,0,1,PHI,ATL,PHI,0,T,F,0,0,0,0,0,T,F,0,0,T,T,T,T,0,0,0,0,0,0,0,0,0,,,,1,1,0,0,0,0,2,1,0,0,1,0,0,victs001,F,F,F,F,0,0,0,0,2,0,0,0,0,0,F,F,generic out,CF,flyball,PHI200904050-1-myerb001-johnk003,2009-04-05
129315,PHI200904050,ATL,1,0,1,0,0,X,0,0,escoy001,R,escoy001,R,myerb001,R,myerb001,R,ruizc001,howar001,utlec001,felip001,rollj001,ibanr001,victs001,wertj001,,,,63/G,F,F,6,2,2,T,T,0,F,F,1,F,F,0,F,F,6,G,F,F,,0,0,N,0,N,0,N,0,0,0,0,63.0,,,,F,F,F,F,F,F,F,F,F,,,,F,F,F,F,F,,,,,0,3,0,0,6,0,0,0,0,2,PHI,ATL,PHI,0,F,F,0,0,0,1,1,T,F,0,0,T,T,T,T,0,0,0,0,0,0,0,0,0,,,,0,0,0,0,0,0,1,0,0,0,1,0,0,rollj001,F,F,F,F,0,0,0,0,2,0,0,0,0,0,F,F,generic out,SS,groundball,PHI200904050-1-myerb001-escoy001,2009-04-05
129316,PHI200904050,ATL,1,0,2,3,2,BBCSBX,0,0,jonec004,L,jonec004,L,myerb001,R,myerb001,R,ruizc001,howar001,utlec001,felip001,rollj001,ibanr001,victs001,wertj001,,,,S7/L,F,F,5,3,20,T,T,1,F,F,0,F,F,0,F,F,7,L,F,F,,0,0,N,0,N,0,N,1,0,0,0,,,,,F,F,F,F,F,F,F,F,F,,,,F,F,F,F,F,,,,,0,0,0,0,0,0,0,0,0,3,PHI,ATL,PHI,0,F,F,0,0,0,2,2,T,F,0,1,T,T,T,T,0,0,0,0,0,0,0,0,0,,,,3,3,0,0,0,0,3,1,1,0,1,0,0,ibanr001,F,F,F,F,4,0,0,0,2,0,0,0,0,0,F,F,single,LF,linedrive,PHI200904050-1-myerb001-jonec004,2009-04-05
129317,PHI200904050,ATL,1,0,2,2,0,B1BX,0,0,mccab002,L,mccab002,L,myerb001,R,myerb001,R,ruizc001,howar001,utlec001,felip001,rollj001,ibanr001,victs001,wertj001,jonec004,,,HR/9/F.1-H,F,F,2,4,23,T,T,4,F,F,0,F,F,2,F,F,0,F,F,F,9,0,0,N,0,N,0,N,4,4,0,0,,,,,F,F,F,F,F,F,F,F,F,myerb001,,,F,F,F,F,F,,,,,0,0,0,0,0,0,0,0,0,4,PHI,ATL,PHI,0,F,F,0,0,0,3,3,T,F,1,0,T,T,T,T,5,3,3,0,0,0,0,0,0,ruizc001,,,2,2,0,0,0,0,1,0,0,0,1,0,2,,F,F,F,F,4,4,0,0,0,0,0,0,0,0,F,F,home run,,flyball,PHI200904050-1-myerb001-mccab002,2009-04-05
129318,PHI200904050,ATL,1,0,2,0,2,CFX,2,0,andeg001,L,andeg001,L,myerb001,R,myerb001,R,ruizc001,howar001,utlec001,felip001,rollj001,ibanr001,victs001,wertj001,,,,3/G,F,F,7,5,2,T,T,0,F,F,1,F,F,0,F,F,3,G,F,F,,0,0,N,0,N,0,N,0,0,0,0,3.0,,,,F,F,F,F,F,F,F,F,F,,,,F,F,F,F,F,,,,,0,3,0,0,0,0,0,0,0,5,PHI,ATL,PHI,0,F,T,2,0,2,4,4,T,F,0,0,T,T,T,T,0,0,0,0,0,0,0,0,0,,,,0,0,0,0,0,0,3,1,0,1,1,0,0,howar001,F,F,F,F,0,0,0,0,0,0,0,0,0,0,F,F,generic out,1B,groundball,PHI200904050-1-myerb001-andeg001,2009-04-05
129319,PHI200904050,ATL,1,1,0,1,1,BFX,2,0,rollj001,L,rollj001,L,lowed001,R,lowed001,R,mccab002,kotcc001,johnk003,jonec004,escoy001,andeg001,schaj002,franj004,,,,3/G+,T,F,6,1,2,T,T,0,F,F,1,F,F,0,F,F,3,G,F,F,,0,0,N,0,N,0,N,0,0,0,0,3.0,,,,F,F,F,F,F,F,F,F,F,,,,F,F,F,F,F,,,,,0,3,0,0,0,0,0,0,0,6,PHI,PHI,ATL,1,T,F,0,2,0,0,0,T,F,0,0,T,T,T,T,0,0,0,0,0,0,0,0,0,,,,1,1,0,0,0,0,2,0,0,1,1,0,0,kotcc001,F,F,F,F,0,0,0,0,0,0,0,0,0,0,F,F,generic out,1B,groundball,PHI200904050-1-lowed001-rollj001,2009-04-05
129320,PHI200904050,ATL,1,1,1,1,2,CCFFBX,2,0,wertj001,R,wertj001,R,lowed001,R,lowed001,R,mccab002,kotcc001,johnk003,jonec004,escoy001,andeg001,schaj002,franj004,,,,4/P,F,F,9,2,2,T,T,0,F,F,1,F,F,0,F,F,4,P,F,F,,0,0,N,0,N,0,N,0,0,0,0,4.0,,,,F,F,F,F,F,F,F,F,F,,,,F,F,F,F,F,,,,,0,4,0,0,0,0,0,0,0,7,PHI,PHI,ATL,1,F,F,0,2,0,1,1,T,F,0,0,T,T,T,T,0,0,0,0,0,0,0,0,0,,,,1,1,0,0,0,0,5,2,0,2,1,0,0,johnk003,F,F,F,F,0,0,0,0,0,0,0,0,0,0,F,F,generic out,2B,popup,PHI200904050-1-lowed001-wertj001,2009-04-05
129321,PHI200904050,ATL,1,1,2,2,2,BCBCX,2,0,utlec001,L,utlec001,L,lowed001,R,lowed001,R,mccab002,kotcc001,johnk003,jonec004,escoy001,andeg001,schaj002,franj004,,,,9/F,F,F,4,3,2,T,T,0,F,F,1,F,F,0,F,F,9,F,F,F,,0,0,N,0,N,0,N,0,0,0,0,9.0,,,,F,F,F,F,F,F,F,F,F,,,,F,F,F,F,F,,,,,0,9,0,0,0,0,0,0,0,8,PHI,PHI,ATL,1,F,T,0,2,0,2,2,T,F,0,0,T,T,T,T,0,0,0,0,0,0,0,0,0,,,,2,2,0,0,0,0,3,2,0,0,1,0,0,franj004,F,F,F,F,0,0,0,0,0,0,0,0,0,0,F,F,generic out,RF,flyball,PHI200904050-1-lowed001-utlec001,2009-04-05
129322,PHI200904050,ATL,2,0,0,0,0,X,2,0,franj004,R,franj004,R,myerb001,R,myerb001,R,ruizc001,howar001,utlec001,felip001,rollj001,ibanr001,victs001,wertj001,,,,HR/7/L,T,F,9,6,23,T,T,4,F,F,0,F,F,1,F,F,0,L,F,F,7,0,0,N,0,N,0,N,4,0,0,0,,,,,F,F,F,F,F,F,F,F,F,,,,F,F,F,F,F,,,,,0,0,0,0,0,0,0,0,0,9,PHI,ATL,PHI,0,T,F,2,0,0,5,0,T,F,0,0,T,T,T,T,0,0,0,0,0,0,0,0,0,,,,0,0,0,0,0,0,1,0,0,0,1,0,1,,F,F,F,F,4,0,0,0,1,0,0,0,0,0,F,F,home run,,linedrive,PHI200904050-2-myerb001-franj004,2009-04-05
129323,PHI200904050,ATL,2,0,0,2,2,CBFBS,3,0,kotcc001,L,kotcc001,L,myerb001,R,myerb001,R,ruizc001,howar001,utlec001,felip001,rollj001,ibanr001,victs001,wertj001,,,,K,F,F,3,7,3,T,T,0,F,F,1,F,F,0,F,F,0,,F,F,,0,0,N,0,N,0,N,0,0,0,0,2.0,,,,F,F,F,F,F,F,F,F,F,,,,F,F,F,F,F,,,,,0,2,0,0,0,0,0,0,0,10,PHI,ATL,PHI,0,F,F,3,0,1,6,1,T,F,0,0,T,T,T,T,0,0,0,0,0,0,0,0,0,,,,2,2,0,0,0,0,3,1,1,1,0,0,0,,F,F,F,F,0,0,0,0,1,0,0,0,0,0,F,F,strikeout,,,PHI200904050-2-myerb001-kotcc001,2009-04-05


In [7]:
batters = all_plays[['date', 'game_id', 'home_team_id', 'visiting_team', 'inning', 'outs', 'at_bat_id',
                      'batting_team_id','batter', 'batter_hand', 
                      'pitcher', 'pitcher_hand', 'batter_lineup_position', 'event_desc', 'fielded_desc', 
                      'batted_ball_desc', 'batted_ball_type', 'bunt', 
                      'foul', 'errors','batter_destination', 'batter_fate', 'balls_in_plate_app',
                      'called_balls_plate_app', 'int_balls_plate_app','pitchout_plate_app', 
                      'hit_batter_plate_app','other_balls_plate_app', 'strikes_in_plate_app',
                      'called_strikes_plate_app', 'swinging_strike_plate_app','foul_strike_plate_app', 
                      'other_strikes_plate_app', 'runs_on_play','pitcher_starter_flag', 'event_text']]

In [8]:
def event_sum(df, column, string):
    event = []
    for row in df.loc[:, (column)].astype(str):
        if row == string:
            e = 1
        else:
            e = 0
        event.append(e)
    return event

In [9]:
batters['unknown'] = event_sum(batters, 'event_desc', 'unknown')
batters['none'] = event_sum(batters, 'event_desc', 'none')
batters['generic_out'] = event_sum(batters, 'event_desc', 'generic out')
batters['strikeout'] = event_sum(batters, 'event_desc', 'strikeout')
batters['stolen_base'] = event_sum(batters, 'event_desc', 'stolen base')
batters['defensive_indifference'] = event_sum(batters, 'event_desc', 'defensive indifference')
batters['caught_stealing'] = event_sum(batters, 'event_desc', 'caught stealing')
batters['pickoff_error'] = event_sum(batters, 'event_desc', 'pickoff error')
batters['pickoff'] = event_sum(batters, 'event_desc', 'pickoff')
batters['wild_pitch'] = event_sum(batters, 'event_desc', 'wild pitch')
batters['passed_ball'] = event_sum(batters, 'event_desc', 'passed ball')
batters['balk'] = event_sum(batters, 'event_desc', 'balk')
batters['out_advancing'] = event_sum(batters, 'event_desc', 'out advancing')
batters['foul_error'] = event_sum(batters, 'event_desc', 'foul error')
batters['walk'] = event_sum(batters, 'event_desc', 'walk')
batters['intentional_walk'] = event_sum(batters, 'event_desc', 'intentional walk')
batters['hit_by_pitch'] = event_sum(batters, 'event_desc', 'hit by pitch')
batters['interference'] = event_sum(batters, 'event_desc', 'interference')
batters['error'] = event_sum(batters, 'event_desc', 'error')
batters['fielders_choice'] = event_sum(batters, 'event_desc', 'fielders choice')
batters['single'] = event_sum(batters, 'event_desc', 'single')
batters['double'] = event_sum(batters, 'event_desc', 'double')
batters['triple'] = event_sum(batters, 'event_desc', 'triple')
batters['home_run'] = event_sum(batters, 'event_desc', 'home run')
batters['missing_play'] = event_sum(batters, 'event_desc', 'missing play')

batters['groundball'] = event_sum(batters, 'batted_ball_desc', 'groundball')
batters['flyball'] = event_sum(batters, 'batted_ball_desc', 'flyball')
batters['popup'] = event_sum(batters, 'batted_ball_desc', 'popup')
batters['linedrive'] = event_sum(batters, 'batted_ball_desc', 'linedrive')

In [10]:
sac_fly = []
sac_hit = []

for row in batters['event_text']:
    if 'SF' in row:
        f = 1
    elif 'SH' in row:
        h = 1
    else:
        f=0
        h=0
    sac_fly.append(f)
    sac_hit.append(h)
    
batters['sac_fly'] = sac_fly
batters['sac_hit'] = sac_hit

In [12]:
batter_scored = []
for row in batters['batter_fate']:
    if row == 4:
        b = 1
    else:
        b = 0
    batter_scored.append(b)
batters['batter_scored'] = batter_scored
batters['ab'] = 1

In [13]:
batters = batters.groupby(['game_id', 'batting_team_id', 'batter'], 
                          as_index=False)['balls_in_plate_app', 'called_balls_plate_app','int_balls_plate_app',
                                          'pitchout_plate_app', 'hit_batter_plate_app','other_balls_plate_app', 
                                          'strikes_in_plate_app','called_strikes_plate_app', 'swinging_strike_plate_app',
                                          'foul_strike_plate_app', 'other_strikes_plate_app', 'runs_on_play','unknown', 
                                          'none', 'generic_out','strikeout', 'stolen_base', 'defensive_indifference',
                                          'caught_stealing', 'pickoff_error', 'pickoff', 'wild_pitch','passed_ball', 
                                          'balk', 'out_advancing', 'foul_error', 'walk','intentional_walk', 'hit_by_pitch',
                                          'interference', 'error','fielders_choice', 'single', 'double', 'triple', 'home_run',
                                          'missing_play', 'groundball', 'flyball', 'popup', 'linedrive','batter_scored',
                                          'sac_fly', 'sac_hit', 'ab'].sum()

batters.to_csv(r'C:\\Users\\mendo\\Desktop\\batters.csv', sep=',')

batters

Unnamed: 0,game_id,batting_team_id,batter,balls_in_plate_app,called_balls_plate_app,int_balls_plate_app,pitchout_plate_app,hit_batter_plate_app,other_balls_plate_app,strikes_in_plate_app,called_strikes_plate_app,swinging_strike_plate_app,foul_strike_plate_app,other_strikes_plate_app,runs_on_play,unknown,none,generic_out,strikeout,stolen_base,defensive_indifference,caught_stealing,pickoff_error,pickoff,wild_pitch,passed_ball,balk,out_advancing,foul_error,walk,intentional_walk,hit_by_pitch,interference,error,fielders_choice,single,double,triple,home_run,missing_play,groundball,flyball,popup,linedrive,batter_scored,sac_fly,sac_hit,ab
0,ANA200904060,ANA,abreb001,8,0,0,0,0,10,5,2,1,2,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,4
1,ANA200904060,ANA,aybae001,5,0,0,0,0,5,1,0,1,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,3
2,ANA200904060,ANA,figgc001,8,0,0,0,0,14,2,0,10,2,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,4
3,ANA200904060,ANA,guerv001,1,0,0,0,0,7,1,0,2,4,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,3,0,1,0,0,0,0,4
4,ANA200904060,ANA,huntt001,4,0,0,0,0,8,0,0,4,4,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,2,0,1,0,0,0,4
5,ANA200904060,ANA,kendh001,5,0,0,0,0,10,3,2,2,3,0,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,4
6,ANA200904060,ANA,mathj001,1,0,0,0,0,10,1,1,6,2,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,3
7,ANA200904060,ANA,morak001,3,0,0,0,0,8,3,2,0,3,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,4
8,ANA200904060,ANA,rivej001,4,0,0,0,0,10,3,0,3,4,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,2,0,0,0,4
9,ANA200904060,OAK,cabro001,5,0,0,0,0,10,2,0,4,4,0,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2,0,2,0,0,0,0,5


In [14]:
batter_summary = batters.groupby(['game_id', 'batting_team_id'], 
                          as_index=False)['balls_in_plate_app', 'called_balls_plate_app','int_balls_plate_app',
                                          'pitchout_plate_app', 'hit_batter_plate_app','other_balls_plate_app', 
                                          'strikes_in_plate_app','called_strikes_plate_app', 'swinging_strike_plate_app',
                                          'foul_strike_plate_app', 'other_strikes_plate_app', 'runs_on_play','unknown', 
                                          'none', 'generic_out','strikeout', 'stolen_base', 'defensive_indifference',
                                          'caught_stealing', 'pickoff_error', 'pickoff', 'wild_pitch','passed_ball', 
                                          'balk', 'out_advancing', 'foul_error', 'walk','intentional_walk', 'hit_by_pitch',
                                          'interference', 'error','fielders_choice', 'single', 'double', 'triple', 'home_run',
                                          'missing_play', 'groundball', 'flyball', 'popup', 'linedrive','batter_scored',
                                          'sac_fly', 'sac_hit', 'ab'].sum()

batter_summary.columns = ['bat_' + str(col) for col in batter_summary.columns]
batter_summary.to_csv(r'C:\\Users\\mendo\\Desktop\\batter_summary.csv', sep=',')

batter_summary

Unnamed: 0,bat_game_id,bat_batting_team_id,bat_balls_in_plate_app,bat_called_balls_plate_app,bat_int_balls_plate_app,bat_pitchout_plate_app,bat_hit_batter_plate_app,bat_other_balls_plate_app,bat_strikes_in_plate_app,bat_called_strikes_plate_app,bat_swinging_strike_plate_app,bat_foul_strike_plate_app,bat_other_strikes_plate_app,bat_runs_on_play,bat_unknown,bat_none,bat_generic_out,bat_strikeout,bat_stolen_base,bat_defensive_indifference,bat_caught_stealing,bat_pickoff_error,bat_pickoff,bat_wild_pitch,bat_passed_ball,bat_balk,bat_out_advancing,bat_foul_error,bat_walk,bat_intentional_walk,bat_hit_by_pitch,bat_interference,bat_error,bat_fielders_choice,bat_single,bat_double,bat_triple,bat_home_run,bat_missing_play,bat_groundball,bat_flyball,bat_popup,bat_linedrive,bat_batter_scored,bat_sac_fly,bat_sac_hit,bat_ab
0,ANA200904060,ANA,39,0,0,0,0,82,19,7,29,27,0,3,0,0,18,6,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,7,1,0,1,0,8,10,3,6,3,0,0,34
1,ANA200904060,OAK,47,0,0,1,0,71,21,6,18,26,0,0,0,0,21,2,0,0,1,0,0,0,0,0,0,0,2,0,1,0,1,1,2,1,0,0,0,11,8,5,2,0,0,0,32
2,ANA200904070,ANA,73,1,0,0,0,96,30,16,21,29,0,4,0,0,19,6,1,1,1,0,0,1,0,0,0,0,5,1,0,0,1,1,4,4,0,0,0,12,12,2,3,3,0,1,45
3,ANA200904070,OAK,56,0,0,0,0,112,34,10,30,38,0,6,0,0,22,5,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,12,4,0,0,0,13,13,6,6,6,0,0,45
4,ANA200904080,ANA,58,0,0,0,0,103,29,16,31,27,0,4,0,0,17,9,1,0,0,0,0,1,0,0,0,0,3,0,0,0,1,0,8,1,0,0,0,17,4,3,3,3,2,0,41
5,ANA200904080,OAK,65,0,0,0,0,108,40,13,22,33,0,6,0,0,17,9,1,0,0,0,0,2,0,0,0,0,4,0,0,0,0,1,15,0,0,0,0,13,12,0,8,6,0,0,49
6,ANA200904100,ANA,62,4,0,1,0,78,30,10,10,28,0,6,0,0,18,5,4,0,0,0,0,0,0,0,0,0,5,1,1,0,0,0,8,2,0,0,0,13,9,4,2,6,1,0,44
7,ANA200904100,BOS,48,0,0,0,0,87,22,11,32,22,0,3,0,0,15,10,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,6,1,0,0,0,7,10,1,4,1,1,0,37
8,ANA200904110,ANA,56,0,0,0,0,99,22,11,36,30,0,4,0,0,22,4,1,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,2,3,0,3,0,15,11,2,2,4,0,0,39
9,ANA200904110,BOS,49,0,0,0,0,69,23,2,14,30,0,5,0,0,22,2,1,0,0,0,1,0,0,0,0,0,3,0,0,0,0,0,4,1,0,3,0,11,16,1,2,5,0,0,37
