In [1]:
# Import dependencies
import pandas as pd
import plotly.graph_objects as go

## Data Processing

In [2]:
# Read in Gerrit Cole's 2022 pitching logs csv
cole_df = pd.read_csv('Resources/Data/pitching_logs.csv')
cole_df.head()

Unnamed: 0,Rk,Gcar,Gtm,Date,Tm,At,Opp,Rslt,Inngs,Dec,...,Entered,Exited,1stInnR,1stInnER,1stInnBB,1stInnSO,1stInn1B,1stInn2B,1stInn3B,1stInnHR
0,1,235.0,1.0,Apr 8,NYY,,BOS,"W,6-5",GS-4,,...,1t start tie,4t 3 out d1,3,3,1,1,1,1,0,1
1,2,236.0,6.0,Apr 13,NYY,,TOR,"L,4-6",GS-6,,...,1t start tie,6t --3 2 out tie,1,1,0,1,0,0,0,1
2,3,237.0,11.0,Apr 19,NYY,@,DET,"W,4-2",GS-2,,...,1b start a 2,2b 123 2 out a1,0,0,1,3,0,0,0,0
3,4,238.0,16.0,Apr 24,NYY,,CLE,"W,10-2",GS-7,W(1-0),...,1t start tie,7t 1-- 2 out a8,0,0,1,2,0,0,0,0
4,5,239.0,21.0,Apr 30,NYY,@,KCR,"W,3-0",GS-6,W(2-0),...,1b start tie,6b 3 out a3,0,0,0,0,0,0,0,0


In [3]:
cole_df = cole_df.loc[cole_df['Rk'] < 22]
cole_df.tail()

Unnamed: 0,Rk,Gcar,Gtm,Date,Tm,At,Opp,Rslt,Inngs,Dec,...,Entered,Exited,1stInnR,1stInnER,1stInnBB,1stInnSO,1stInn1B,1stInn2B,1stInn3B,1stInnHR
16,17,251.0,83.0,Jul 7,NYY,@,BOS,"W,6-5",GS-6,W(8-2),...,1b start tie,6b 3 out a1,0,0,1,2,0,0,0,0
17,18,252.0,87.0,Jul 12,NYY,,CIN,"L,3-4",GS-7,,...,1t start tie,7t 3 out a3,0,0,1,1,0,0,0,0
18,19,253.0,92.0,Jul 17,NYY,,BOS,"W,13-2",GS-7,W(9-2),...,1t start tie,7t 3 out a9,0,0,0,2,0,0,0,0
19,20,254.0,96.0,Jul 23,NYY,@,BAL,"L,3-6",GS-7,L(9-3),...,1b start a 1,7b 1-- 0 out tie,0,0,1,1,1,0,0,0
20,21,255.0,101.0,Jul 29,NYY,,KCR,"W,11-5",GS-6,,...,1t start tie,6t 3 out d2,0,0,0,0,0,0,0,0


In [4]:
# Calculate 1B and 1stInnH columns
cole_df['1B'] = cole_df['H'] - cole_df['2B'] - cole_df['3B'] - cole_df['HR']
cole_df['1stInnH'] = cole_df['1stInn1B'] + cole_df['1stInn2B'] + cole_df['1stInn3B'] + cole_df['1stInnHR']

In [5]:
# Drop unnecessary columns
cole_df = cole_df[['R', 'ER', 'BB', 'SO', 'H', '1B', '2B', '3B', 'HR', '1stInnR', '1stInnER', '1stInnBB', '1stInnSO', '1stInnH', '1stInn1B', '1stInn2B', '1stInn3B', '1stInnHR']]
cole_df.head()


Unnamed: 0,R,ER,BB,SO,H,1B,2B,3B,HR,1stInnR,1stInnER,1stInnBB,1stInnSO,1stInnH,1stInn1B,1stInn2B,1stInn3B,1stInnHR
0,3.0,3.0,1.0,3.0,4.0,2.0,1.0,0.0,1.0,3,3,1,1,3,1,1,0,1
1,3.0,3.0,1.0,6.0,4.0,0.0,2.0,0.0,2.0,1,1,0,1,1,0,0,0,1
2,2.0,2.0,5.0,3.0,1.0,1.0,0.0,0.0,0.0,0,0,1,3,0,0,0,0,0
3,0.0,0.0,1.0,9.0,4.0,4.0,0.0,0.0,0.0,0,0,1,2,0,0,0,0,0
4,0.0,0.0,2.0,6.0,5.0,4.0,1.0,0.0,0.0,0,0,0,0,0,0,0,0,0


In [6]:
# Calculate columns for stats from after 1st inning
cole_df['RemR'] = cole_df['R'] - cole_df['1stInnR']
cole_df['RemER'] = cole_df['ER'] - cole_df['1stInnER']
cole_df['RemBB'] = cole_df['BB'] - cole_df['1stInnBB']
cole_df['RemSO'] = cole_df['SO'] - cole_df['1stInnSO']
cole_df['RemH'] = cole_df['H'] - cole_df['1stInnH']
cole_df['Rem1B'] = cole_df['1B'] - cole_df['1stInn1B']
cole_df['Rem2B'] = cole_df['2B'] - cole_df['1stInn2B']
cole_df['Rem3B'] = cole_df['3B'] - cole_df['1stInn3B']
cole_df['RemHR'] = cole_df['HR'] - cole_df['1stInnHR']

cole_df.head()

Unnamed: 0,R,ER,BB,SO,H,1B,2B,3B,HR,1stInnR,...,1stInnHR,RemR,RemER,RemBB,RemSO,RemH,Rem1B,Rem2B,Rem3B,RemHR
0,3.0,3.0,1.0,3.0,4.0,2.0,1.0,0.0,1.0,3,...,1,0.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0
1,3.0,3.0,1.0,6.0,4.0,0.0,2.0,0.0,2.0,1,...,1,2.0,2.0,1.0,5.0,3.0,0.0,2.0,0.0,1.0
2,2.0,2.0,5.0,3.0,1.0,1.0,0.0,0.0,0.0,0,...,0,2.0,2.0,4.0,0.0,1.0,1.0,0.0,0.0,0.0
3,0.0,0.0,1.0,9.0,4.0,4.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0.0,7.0,4.0,4.0,0.0,0.0,0.0
4,0.0,0.0,2.0,6.0,5.0,4.0,1.0,0.0,0.0,0,...,0,0.0,0.0,2.0,6.0,5.0,4.0,1.0,0.0,0.0


In [7]:
cole_season_df = pd.DataFrame({
    'IP': [22.0, 109.33],
    'R': [cole_df['1stInnR'].sum(), cole_df['RemR'].sum()],
    'ER': [cole_df['1stInnER'].sum(), cole_df['RemER'].sum()],
    'BB': [cole_df['1stInnBB'].sum(), cole_df['RemBB'].sum()],
    'SO': [cole_df['1stInnSO'].sum(), cole_df['RemSO'].sum()],
    'H': [cole_df['1stInnH'].sum(), cole_df['RemH'].sum()],
    '1B': [cole_df['1stInn1B'].sum(), cole_df['Rem1B'].sum()],
    '2B': [cole_df['1stInn2B'].sum(), cole_df['Rem2B'].sum()],
    '3B': [cole_df['1stInn3B'].sum(), cole_df['Rem3B'].sum()],
    'HR': [cole_df['1stInnHR'].sum(), cole_df['RemHR'].sum()],
})

cole_season_df

Unnamed: 0,IP,R,ER,BB,SO,H,1B,2B,3B,HR
0,22.0,7.0,7.0,12.0,29.0,14.0,6.0,3.0,0.0,5.0
1,109.33,40.0,39.0,20.0,133.0,83.0,53.0,17.0,0.0,13.0


In [8]:
cole_season_df['ERA'] = 9 * cole_season_df['ER'] / cole_season_df['IP']
cole_season_df['BB/9'] = 9 * cole_season_df['BB'] / cole_season_df['IP']
cole_season_df['SO/9'] = 9 * cole_season_df['SO'] / cole_season_df['IP']
cole_season_df['WHIP'] = (cole_season_df['BB'] + cole_season_df['H']) / cole_season_df['IP']
cole_season_df['HR/9'] = 9 * cole_season_df['HR'] / cole_season_df['IP']

cole_season_df

Unnamed: 0,IP,R,ER,BB,SO,H,1B,2B,3B,HR,ERA,BB/9,SO/9,WHIP,HR/9
0,22.0,7.0,7.0,12.0,29.0,14.0,6.0,3.0,0.0,5.0,2.863636,4.909091,11.863636,1.181818,2.045455
1,109.33,40.0,39.0,20.0,133.0,83.0,53.0,17.0,0.0,13.0,3.210464,1.646392,10.948505,0.942102,1.070155


In [10]:
list(cole_season_df.loc[0, ['ERA', 'BB/9', 'HR/9']])

[2.8636363636363638, 4.909090909090909, 2.0454545454545454]

In [60]:
# Create color variables for use in chart
navy = '#0C2340'
red = '#E4002C'
gray = '#C4CED3'


# Create traces
fig = go.Figure()

x = ['ERA', 'BB/9', 'HR/9']
y1 = list(cole_season_df.loc[0, ['ERA', 'BB/9', 'HR/9']])
y2 = list(cole_season_df.loc[1, ['ERA', 'BB/9', 'HR/9']])

fig.add_trace(go.Bar(x=x, y=y1, name="1st Inning", marker_color=navy, text="<b>1st<br>Inning</b>", insidetextanchor='middle'))
fig.add_trace(go.Bar(x=x, y=y2, name="Other", marker_color=navy, text="<b>Other</b>", insidetextanchor='middle'))


# Format traces
fig.update_traces(
    textfont=dict(
        color='white',
        size=16)
)


# Format axes
fig.update_xaxes(
#     title=dict(
#         text="<b>Batting Category</b>",
#         font=dict(
#             size=22,
#             color=navy)),
    tickfont=dict(
        size=22,
        color=navy),
    tickmode='array',
    ticktext=['<b>ERA</b>', '<b>BB/9</b>', '<b>HR/9</b>'],
    tickvals=['ERA', 'BB/9', 'HR/9'],
    showgrid=False
)

fig.update_yaxes(
#     title=dict(
#         text="<br><b>Improvement<br>TEX → NYY</b>",
#         font=dict(
#             size=22,
#             color=gray)),
    tickfont=dict(
        size=22,
        color=navy),
#     tickmode='array',
#     ticktext=['<b>0</b>', '<b>1</b>', '<b>2</b>', '<b>3</b>', '<b>4</b>', '<b>5</b>'],
#     tickvals=[0, 1, 2, 3, 4, 5],
    #zeroline=False,
    gridcolor='white',
    gridwidth=2
)


# Format layout
fig.update_layout(
    title=dict(
        text='<b>1st Is The Worst</b><br>Gerrit Cole Struggles "Right Off The Bat"',
        y=0.9,
        x=0.5,
        xanchor='center',
        yanchor='top',
        font=dict(
            size=25,
            color=navy)),
    showlegend=False,
    #yaxis_range=[-0.1,5.5],
    paper_bgcolor=gray,
    plot_bgcolor=gray,
    margin=dict(pad=10)
)


fig.show()