In [1]:
# Import dependencies
import pandas as pd
import plotly.graph_objects as go

## Data Processing

In [2]:
# Read in Yankees game logs csv
games_df = pd.read_csv('Resources/Data/game_logs.csv')
games_df.head()

Unnamed: 0,Gtm,Date,At,Opp,Rslt,R
0,1,8-Apr,,BOS,"W,6-5",6
1,2,9-Apr,,BOS,"W,4-2",4
2,3,10-Apr,,BOS,"L,3-4",3
3,4,11-Apr,,TOR,"L,0-3",0
4,5,12-Apr,,TOR,"W,4-0",4


In [3]:
# Create columns for runs allowed and win margin
games_df['RA'] = games_df['Rslt'].apply(lambda x: int(x[-2:]) if (x[-2] != '-') else int(x[-1]))
games_df['WinMargin'] = games_df['R'] - games_df['RA']
games_df.head()

Unnamed: 0,Gtm,Date,At,Opp,Rslt,R,RA,WinMargin
0,1,8-Apr,,BOS,"W,6-5",6,5,1
1,2,9-Apr,,BOS,"W,4-2",4,2,2
2,3,10-Apr,,BOS,"L,3-4",3,4,-1
3,4,11-Apr,,TOR,"L,0-3",0,3,-3
4,5,12-Apr,,TOR,"W,4-0",4,0,4


In [4]:
# Create column where win margin is capped at absolute value of 5
games_df['Margin'] = games_df['WinMargin'].apply(lambda x: 5 if (x >= 5) else -5 if (x <= -5) else x)
games_df.head()

Unnamed: 0,Gtm,Date,At,Opp,Rslt,R,RA,WinMargin,Margin
0,1,8-Apr,,BOS,"W,6-5",6,5,1,1
1,2,9-Apr,,BOS,"W,4-2",4,2,2,2
2,3,10-Apr,,BOS,"L,3-4",3,4,-1,-1
3,4,11-Apr,,TOR,"L,0-3",0,3,-3,-3
4,5,12-Apr,,TOR,"W,4-0",4,0,4,4


In [5]:
# Group by capped win margin to find frequency of each run margin
diffs_df = pd.DataFrame(games_df.groupby(['Margin']).size())
diffs_df.rename(columns={0: 'Freq'}, inplace=True)
diffs_df.reset_index(inplace=True)
diffs_df

Unnamed: 0,Margin,Freq
0,-5,4
1,-4,1
2,-3,7
3,-2,11
4,-1,13
5,1,21
6,2,11
7,3,8
8,4,6
9,5,24


## Viz

In [8]:
# Create variables for use in chart
x = ['-5<br>or worse', '-4', '-3', '-2', '-1', '1', '2', '3', '4', '5<br>or better']
y = diffs_df['Freq']

navy = '#0C2340'
gray = '#C4CED3'


# Create traces
fig = go.Figure()

fig.add_trace(go.Bar(x=x, y=y, marker_color='white'))

fig.add_vline(x=4.5, line_width=2, line_dash="longdash", line_color=gray)


# Format axes
fig.update_xaxes(
    title=dict(
        text="<b>(Loss)             Run Margin             (Win)</b>",
        font=dict(
            size=22,
            color=gray)),
    tickfont=dict(
        size=18,
        color=gray),
    showgrid=False
)

fig.update_yaxes(
    title=dict(
        text="<br><b>Games</b>",
        font=dict(
            size=22,
            color=gray)),
    tickfont=dict(
        size=18,
        color=gray),
    gridcolor='rgb(20,45,75)',
    gridwidth=2,
    zerolinecolor='rgb(20,45,75)',
    zerolinewidth=2
)


# Format layout
fig.update_layout(
    title=dict(
        text="<b>Built Different</b><br>Breaking Down NYY's Run Differential",
        y=0.9,
        x=0.5,
        xanchor='center',
        yanchor='top',
        font=dict(
            size=25,
            color='white')),
    paper_bgcolor=navy,
    plot_bgcolor=navy,
    margin=dict(b=100, pad=2)
)


fig.show()