In [1]:
from baseball_scraper import playerid_lookup
from baseball_scraper import statcast_pitcher
from baseball_scraper import pitching_stats
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from plotly import tools
init_notebook_mode(connected=True)  
import plotly.figure_factory as ff


In [2]:
playerid_lookup('cole', 'gerrit')

Gathering player lookup table. This may take a moment.


Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last
0,cole,gerrit,543037,coleg001,colege01,13125,2013.0,2020.0


In [3]:
cole_stats = statcast_pitcher('2020-07-22', '2020-10-18',543037)

Gathering Player Data


In [4]:
cole_stats.columns

Index(['pitch_type', 'game_date', 'release_speed', 'release_pos_x',
       'release_pos_z', 'player_name', 'batter', 'pitcher', 'events',
       'description', 'spin_dir', 'spin_rate_deprecated',
       'break_angle_deprecated', 'break_length_deprecated', 'zone', 'des',
       'game_type', 'stand', 'p_throws', 'home_team', 'away_team', 'type',
       'hit_location', 'bb_type', 'balls', 'strikes', 'game_year', 'pfx_x',
       'pfx_z', 'plate_x', 'plate_z', 'on_3b', 'on_2b', 'on_1b',
       'outs_when_up', 'inning', 'inning_topbot', 'hc_x', 'hc_y',
       'tfs_deprecated', 'tfs_zulu_deprecated', 'fielder_2', 'umpire', 'sv_id',
       'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az', 'sz_top', 'sz_bot',
       'hit_distance_sc', 'launch_speed', 'launch_angle', 'effective_speed',
       'release_spin_rate', 'release_extension', 'game_pk', 'pitcher.1',
       'fielder_2.1', 'fielder_3', 'fielder_4', 'fielder_5', 'fielder_6',
       'fielder_7', 'fielder_8', 'fielder_9', 'release_pos_y',
       'estima

In [5]:
cole_stats.head()

Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,player_name,batter,pitcher,events,description,...,home_score,away_score,bat_score,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment
0,KC,2020-10-09,83.0,-1.91,5.79,Gerrit Cole,668227,543037,field_out,hit_into_play,...,1,1,1,1,1,1,1,1,Standard,Standard
1,CH,2020-10-09,91.2,-1.94,5.45,Gerrit Cole,664040,543037,field_out,hit_into_play,...,1,1,1,1,1,1,1,1,Infield shift,Standard
2,FF,2020-10-09,98.3,-2.11,5.54,Gerrit Cole,664040,543037,,ball,...,1,1,1,1,1,1,1,1,Infield shift,Standard
3,FF,2020-10-09,96.4,-2.06,5.45,Gerrit Cole,640457,543037,home_run,hit_into_play_score,...,0,1,0,1,1,0,0,1,Infield shift,Standard
4,FF,2020-10-09,97.6,-1.93,5.58,Gerrit Cole,640457,543037,,ball,...,0,1,0,1,1,0,0,1,Infield shift,Standard


### Gerrit Cole Pitch types and outcomes (description column) when it is 2 strikes

In [6]:
strike2 = cole_stats.strikes == 2
cole_strike2 = cole_stats[strike2]

strike2_pct = cole_strike2.pitch_type.value_counts() / len(cole_strike2)*100

print('Pitch Percentage when count is at 2 strikes\n{}'.format(strike2_pct))

Pitch Percentage when count is at 2 strikes
FF    42.168675
SL    28.313253
KC    17.068273
CH     4.417671
Name: pitch_type, dtype: float64


In [7]:
trace0 = go.Scatter(x = cole_strike2.plate_x[cole_strike2.pitch_type == 'FF'],
                   y = cole_strike2.plate_z[cole_strike2.pitch_type == 'FF'],
                   name = 'Four Seam Fastball',
                   mode = 'markers',
                   hovertext=cole_strike2.description,
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace1 = go.Scatter(x = cole_strike2.plate_x[cole_strike2.pitch_type == 'SL'],
                   y = cole_strike2.plate_z[cole_strike2.pitch_type == 'SL'],
                   name = 'Slider',
                   mode = 'markers',
                   hovertext=cole_strike2.description,
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace2 = go.Scatter(x = cole_strike2.plate_x[cole_strike2.pitch_type == 'KC'],
                   y = cole_strike2.plate_z[cole_strike2.pitch_type == 'KC'],
                   name = 'Knuckle Curve',
                   mode = 'markers',
                   hovertext=cole_strike2.description,
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace3 = go.Scatter(x = cole_strike2.plate_x[cole_strike2.pitch_type == 'CH'],
                   y = cole_strike2.plate_z[cole_strike2.pitch_type == 'CH'],
                   name = 'Changeup',
                   mode = 'markers',
                   hovertext=cole_strike2.description,
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

data = [trace0, trace1, trace2, trace3]

layout = dict(title = 'Pitch types of Gerrit Cole on 2 Strike Count 2020 ',
              yaxis = dict(zeroline = False),
              xaxis = dict(zeroline = False),
              height=700, width=700
             )

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='styled-scatter')

### Pitch Types Gerrit Cole Threw for a strikeout

In [8]:
strikeout = cole_stats.events == 'strikeout'
cole_k = cole_stats[strikeout]

k_pitch_pct = cole_k.pitch_type.value_counts() / len(cole_k)*100
print('Strikeout Pitch %\n{}'.format(k_pitch_pct))

Strikeout Pitch %
FF    40.322581
SL    26.612903
KC    21.774194
CH     4.032258
Name: pitch_type, dtype: float64


### Gerrit Cole Pitch Location on Every Strikeout 2020

In [9]:
trace0 = go.Scatter(x = cole_k.plate_x[cole_k.pitch_type == 'FF'],
                   y = cole_k.plate_z[cole_k.pitch_type == 'FF'],
                   name = 'Four Seam Fastball',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace1 = go.Scatter(x = cole_k.plate_x[cole_k.pitch_type == 'SL'],
                   y = cole_k.plate_z[cole_k.pitch_type == 'SL'],
                   name = 'Slider',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace2 = go.Scatter(x = cole_k.plate_x[cole_k.pitch_type == 'KC'],
                   y = cole_k.plate_z[cole_k.pitch_type == 'KC'],
                   name = 'Knuckle Curve',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace3 = go.Scatter(x = cole_k.plate_x[cole_k.pitch_type == 'CH'],
                   y = cole_k.plate_z[cole_k.pitch_type == 'CH'],
                   name = 'Changeup',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

fig = make_subplots(rows=2, cols=2)
fig.add_trace(trace0, row=1, col=1)
fig.add_trace(trace1, row=1, col=2)
fig.add_trace(trace2, row=2, col=1)
fig.add_trace(trace3, row=2, col=2)

fig.update_layout(title_text='Pitch Location on Gerrit Cole Strikeouts 2020',
                 yaxis=dict(zeroline=False),
                 xaxis=dict(zeroline=False),
                 height=900)

py.iplot(fig, filename='styled-scatter')

In [10]:
trace0 = go.Scatter(x = cole_k.plate_x[cole_k.pitch_type == 'FF'],
                   y = cole_k.plate_z[cole_k.pitch_type == 'FF'],
                   name = 'Four Seam Fastball',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace1 = go.Scatter(x = cole_k.plate_x[cole_k.pitch_type == 'SL'],
                   y = cole_k.plate_z[cole_k.pitch_type == 'SL'],
                   name = 'Slider',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace2 = go.Scatter(x = cole_k.plate_x[cole_k.pitch_type == 'KC'],
                   y = cole_k.plate_z[cole_k.pitch_type == 'KC'],
                   name = 'Knuckle Curve',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace3 = go.Scatter(x = cole_k.plate_x[cole_k.pitch_type == 'CH'],
                   y = cole_k.plate_z[cole_k.pitch_type == 'CH'],
                   name = 'Changeup',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

data = [trace0, trace1, trace2, trace3]

layout = dict(title = 'Pitch types of Gerrit Cole Strikeouts 2020 ',
              yaxis = dict(zeroline = False),
              xaxis = dict(zeroline = False),
              height=700, width=700
             )

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='styled-scatter')


In [11]:
cole_stats.description.value_counts()

ball                       481
foul                       269
called_strike              241
swinging_strike            217
hit_into_play              145
hit_into_play_no_out        45
blocked_ball                30
foul_tip                    28
hit_into_play_score         24
swinging_strike_blocked     15
hit_by_pitch                 3
foul_bunt                    1
Name: description, dtype: int64

In [12]:
start_days = cole_stats.game_date.unique()
start_days

array(['2020-10-09', '2020-10-05', '2020-09-29', '2020-09-22',
       '2020-09-16', '2020-09-11', '2020-09-05', '2020-08-31',
       '2020-08-26', '2020-08-19', '2020-08-14', '2020-08-08',
       '2020-08-03', '2020-07-29', '2020-07-23'], dtype=object)

In [13]:
cole_stats.pitch_type.value_counts() / len(cole_stats)*100

FF    48.565710
SL    20.947298
KC    16.944630
CH     5.937292
Name: pitch_type, dtype: float64

In [14]:
cole_stats.events.value_counts()

field_out                    137
strikeout                    124
single                        32
walk                          20
home_run                      18
double                        16
force_out                      4
grounded_into_double_play      3
field_error                    3
hit_by_pitch                   3
fielders_choice_out            1
Name: events, dtype: int64

## Homeruns Hit Against Gerrit Cole

In [15]:
hr = cole_stats.events == 'home_run'
cole_hr = cole_stats[hr]
inning_hr = cole_hr.inning.value_counts()
print('Innings of HR Allowed by Gerrit Cole\n{}'.format(inning_hr))

Innings of HR Allowed by Gerrit Cole
4    4
3    4
1    4
5    2
2    2
7    1
6    1
Name: inning, dtype: int64


### Pitch Pie Chart of HRs Allowed

In [16]:
hr_pitch = cole_hr.pitch_type.value_counts() / len(cole_hr)*100
print('Pitch Type on HR Allowed\n{}'.format(hr_pitch))

fig = go.Figure(data=[go.Pie(labels=hr_pitch.index, 
                             values=hr_pitch,
                             title='Pitch Type on HR Allowed')])
fig.show()

Pitch Type on HR Allowed
FF    66.666667
SL    16.666667
KC    11.111111
CH     5.555556
Name: pitch_type, dtype: float64


### Pitch Locations of HRs Allowed

In [17]:
trace0 = go.Scatter(x = cole_hr.plate_x[cole_hr.pitch_type == 'FF'],
                   y = cole_hr.plate_z[cole_hr.pitch_type == 'FF'],
                   name = 'Four Seam Fastball',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace1 = go.Scatter(x = cole_hr.plate_x[cole_hr.pitch_type == 'SL'],
                   y = cole_hr.plate_z[cole_hr.pitch_type == 'SL'],
                   name = 'Slider',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace2 = go.Scatter(x = cole_hr.plate_x[cole_hr.pitch_type == 'KC'],
                   y = cole_hr.plate_z[cole_hr.pitch_type == 'KC'],
                   name = 'Knuckle Curve',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

trace3 = go.Scatter(x = cole_hr.plate_x[cole_hr.pitch_type == 'CH'],
                   y = cole_hr.plate_z[cole_hr.pitch_type == 'CH'],
                   name = 'Changeup',
                   mode = 'markers',
                   marker = dict(size=10,
                                line=dict(width=2)
                                )
                   )

data = [trace0, trace1, trace2, trace3]

layout = dict(title = 'Pitch types and Location of Gerrit Cole Homeruns Allowed 2020 ',
              yaxis = dict(zeroline = False),
              xaxis = dict(zeroline = False),
              height=700, width=700
             )

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='styled-scatter')


In [18]:
cole_hr[['pitch_type', 'release_speed', 'batter', 'inning', 'balls', 'strikes']]

Unnamed: 0,pitch_type,release_speed,batter,inning,balls,strikes
3,FF,96.4,640457,5,1,1
141,FF,95.8,596847,4,1,1
180,FF,96.4,668227,1,2,0
246,SL,87.8,647304,4,0,1
356,FF,95.6,624415,4,1,1
646,FF,96.0,621466,6,1,0
793,FF,97.0,595281,2,1,1
803,SL,89.3,596847,1,1,2
859,KC,83.0,542303,3,1,1
865,SL,90.0,621020,3,2,1


In [19]:
all_events = cole_stats.loc[cole_stats['events'].notnull(),['pitch_type', 'release_speed','events', 'inning', 'balls', 'strikes', 'inning', 'at_bat_number', 'pitch_number']]
all_events.head()

Unnamed: 0,pitch_type,release_speed,events,inning,balls,strikes,inning.1,at_bat_number,pitch_number
0,KC,83.0,field_out,6,0,0,6,44,1
1,CH,91.2,field_out,5,1,0,5,38,2
3,FF,96.4,home_run,5,1,1,5,37,3
6,FF,98.7,strikeout,5,2,2,5,36,6
12,KC,82.9,field_out,5,1,1,5,35,3


## Event Percentages on every pitch type

In [20]:
FF_events = all_events[all_events.pitch_type == 'FF']

FF_events['events'].value_counts() / len(FF_events)

field_out                    0.430108
strikeout                    0.268817
single                       0.102151
home_run                     0.064516
walk                         0.064516
double                       0.053763
grounded_into_double_play    0.005376
force_out                    0.005376
hit_by_pitch                 0.005376
Name: events, dtype: float64

In [21]:
KC_events = all_events[all_events.pitch_type == 'KC']

KC_events['events'].value_counts() / len(KC_events)

strikeout              0.473684
field_out              0.280702
single                 0.087719
double                 0.035088
home_run               0.035088
field_error            0.035088
force_out              0.017544
fielders_choice_out    0.017544
walk                   0.017544
Name: events, dtype: float64

In [22]:
SL_events = all_events[all_events.pitch_type == 'SL']

SL_events['events'].value_counts() / len(SL_events)

strikeout                    0.4125
field_out                    0.3125
walk                         0.0750
single                       0.0625
double                       0.0375
home_run                     0.0375
grounded_into_double_play    0.0250
hit_by_pitch                 0.0250
force_out                    0.0125
Name: events, dtype: float64

In [23]:
CH_events = all_events[all_events.pitch_type == 'CH']

CH_events['events'].value_counts() / len(CH_events)

field_out    0.384615
strikeout    0.384615
double       0.076923
home_run     0.076923
single       0.076923
Name: events, dtype: float64