In [1]:
# Import dependencies
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
# Read in Yankees 2022 batting game logs
yankees_df = pd.read_csv('Resources/Data/yankees_batting_logs.csv')
yankees_df.head()

Unnamed: 0,Rk,Gtm,Date,At,Opp,Rslt,PA,AB,R,H,...,SB,CS,BA,OBP,SLG,OPS,LOB,#,Thr,Opp. Starter (GmeSc)
0,1,1,4/8/22,,BOS,"W,6-5",46,39,6,9,...,0,0,0.231,0.326,0.487,0.813,12,12,R,N.Eovaldi(51)
1,2,2,4/9/22,,BOS,"W,4-2",30,27,4,4,...,0,0,0.197,0.289,0.439,0.729,2,10,R,N.Pivetta(46)
2,3,3,4/10/22,,BOS,"L,3-4",41,36,3,11,...,1,0,0.235,0.325,0.412,0.737,11,11,R,T.Houck(36)
3,4,4,4/11/22,,TOR,"L,0-3",35,29,0,4,...,0,0,0.214,0.316,0.351,0.667,8,10,R,A.Manoah(73)
4,5,5,4/12/22,,TOR,"W,4-0",33,28,4,7,...,0,0,0.22,0.319,0.377,0.696,5,10,L,Y.Kikuchi(40)


In [3]:
# Filter df for columns of need
yankees_df = yankees_df[['Rk', 'Date', 'At', 'Opp', 'Rslt', 'PA', 'AB', 'R', 'H', '2B', '3B', 'HR', 'RBI', 'BB', 'SO', 'HBP', 'SF', 'GDP', 'LOB', 'Thr']]
yankees_df.head()


Unnamed: 0,Rk,Date,At,Opp,Rslt,PA,AB,R,H,2B,3B,HR,RBI,BB,SO,HBP,SF,GDP,LOB,Thr
0,1,4/8/22,,BOS,"W,6-5",46,39,6,9,1,0,3,6,4,15,2,1,0,12,R
1,2,4/9/22,,BOS,"W,4-2",30,27,4,4,0,0,2,4,3,5,0,0,0,2,R
2,3,4/10/22,,BOS,"L,3-4",41,36,3,11,2,0,0,3,4,9,1,0,2,11,R
3,4,4/11/22,,TOR,"L,0-3",35,29,0,4,0,0,0,0,5,9,1,0,2,8,R
4,5,4/12/22,,TOR,"W,4-0",33,28,4,7,4,0,1,3,3,5,1,1,1,5,L


In [4]:
# Group by opponent to count number of games against each opponent
games_against_df = yankees_df[['Rk', 'Opp']].groupby(['Opp']).count()
games_against_df.rename(columns={'Rk': 'G'}, inplace=True)
games_against_df

Unnamed: 0_level_0,G
Opp,Unnamed: 1_level_1
BAL,16
BOS,10
CHC,3
CHW,7
CIN,3
CLE,6
DET,6
HOU,7
KCR,7
LAA,3


In [5]:
# Group by opponent
opp_df = yankees_df[['Opp', 'PA', 'AB', 'R', 'H', '2B', '3B', 'HR', 'RBI', 'BB', 'SO', 'HBP', 'SF', 'GDP', 'LOB']].groupby(['Opp']).sum()
opp_df.insert(loc=0, column='G', value=games_against_df['G'])
opp_df

Unnamed: 0_level_0,G,PA,AB,R,H,2B,3B,HR,RBI,BB,SO,HBP,SF,GDP,LOB
Opp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
BAL,16,622,558,82,148,25,1,26,75,51,140,8,2,14,114
BOS,10,421,362,73,101,18,2,19,71,46,89,10,2,9,80
CHC,3,137,119,28,34,5,0,11,28,16,26,1,1,0,27
CHW,7,268,233,40,59,8,1,11,39,31,54,0,4,4,42
CIN,3,123,104,16,21,5,0,4,15,17,33,2,0,2,24
CLE,6,224,195,38,51,8,0,12,36,24,48,2,3,7,31
DET,6,224,195,30,45,9,1,8,28,23,49,2,3,3,38
HOU,7,255,219,22,33,1,0,10,22,31,67,4,0,2,44
KCR,7,269,229,47,54,9,0,15,44,34,53,3,3,3,44
LAA,3,112,99,17,33,8,0,6,17,11,20,0,2,2,23


In [6]:
# Create BA, OBP, 1B, SLG, OPS columns
opp_df['BA'] = opp_df['H'] / opp_df['AB']
opp_df['OBP'] = (opp_df['H'] + opp_df['BB'] + opp_df['HBP']) / (opp_df['AB'] + opp_df['BB'] + opp_df['HBP'] + opp_df['SF'])
opp_df['1B'] = opp_df['H'] - opp_df['2B'] - opp_df['3B'] - opp_df['HR']
opp_df['SLG'] = (opp_df['1B'] + opp_df['2B']*2 + opp_df['3B']*3 + opp_df['HR']*4) / opp_df['AB']
opp_df['OPS'] = opp_df['OBP'] + opp_df['SLG']
opp_df['SO/G'] = opp_df['SO'] / opp_df['G']

del opp_df['1B']

opp_df

Unnamed: 0_level_0,G,PA,AB,R,H,2B,3B,HR,RBI,BB,SO,HBP,SF,GDP,LOB,BA,OBP,SLG,OPS,SO/G
Opp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
BAL,16,622,558,82,148,25,1,26,75,51,140,8,2,14,114,0.265233,0.33441,0.453405,0.787815,8.75
BOS,10,421,362,73,101,18,2,19,71,46,89,10,2,9,80,0.279006,0.37381,0.497238,0.871047,8.9
CHC,3,137,119,28,34,5,0,11,28,16,26,1,1,0,27,0.285714,0.372263,0.605042,0.977305,8.666667
CHW,7,268,233,40,59,8,1,11,39,31,54,0,4,4,42,0.253219,0.335821,0.437768,0.773589,7.714286
CIN,3,123,104,16,21,5,0,4,15,17,33,2,0,2,24,0.201923,0.325203,0.365385,0.690588,11.0
CLE,6,224,195,38,51,8,0,12,36,24,48,2,3,7,31,0.261538,0.34375,0.487179,0.830929,8.0
DET,6,224,195,30,45,9,1,8,28,23,49,2,3,3,38,0.230769,0.313901,0.410256,0.724158,8.166667
HOU,7,255,219,22,33,1,0,10,22,31,67,4,0,2,44,0.150685,0.267717,0.292237,0.559954,9.571429
KCR,7,269,229,47,54,9,0,15,44,34,53,3,3,3,44,0.235808,0.33829,0.471616,0.809906,7.571429
LAA,3,112,99,17,33,8,0,6,17,11,20,0,2,2,23,0.333333,0.392857,0.59596,0.988817,6.666667


In [20]:
# Scatter plot of BA and SO/G
colors = ['#291c57']*7 + ['#EB6E1F'] + ['#291c57']*8
fig = px.scatter(opp_df, x='SO/G', y='BA', text=opp_df.index, color=opp_df.index, color_discrete_sequence=colors)

# Match text color with marker color
fig.for_each_trace(lambda t: t.update(textfont_color=t.marker.color, textposition='middle center'))


# Format figure
fig.update_xaxes(
    title=dict(text="<b>Strikeouts per Game</b>",
               font=dict(size=20)),
    tickfont=dict(size=18),
    zerolinecolor='#c6f7f7',
    gridcolor='#c6f7f7',
    gridwidth=2
)

fig.update_yaxes(
    title=dict(text="<b>Batting Average</b>",
               font=dict(size=20)),
    tickfont=dict(size=18),
    tickformat=".3f",
    zeroline=False,
    gridcolor='#c6f7f7',
    gridwidth=2
)

fig.update_traces(marker=dict(size=15, opacity=0),
                  textfont_size=18)

fig.update_layout(title=dict(text="<b>Astros Woes</b><br>Yankees Offense by Opponent",
                             y=0.9,
                             x=0.5,
                             xanchor='center',
                             yanchor='top',
                             font=dict(size=25)),
                  margin=dict(t=100),
                  xaxis_range=[11.25,5.75],
                  showlegend=False,
                  paper_bgcolor='white',
                  plot_bgcolor='white') 

fig.show()