In [1]:
# Import dependencies
import pandas as pd
import plotly.graph_objects as go

## Data Processing

In [2]:
# Read in standard batting stats from baseball reference
std_bat_df = pd.read_csv('Resources/Data/standard_batting.csv')

In [3]:
# Only grab rows that group by team
std_bat_df = std_bat_df.loc[(std_bat_df['Year'] == 'TEX (4 yrs)') | (std_bat_df['Year'] == 'NYY (1 yr)')]
std_bat_df.set_index('Year', inplace=True)
std_bat_df

Unnamed: 0_level_0,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,...,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos,Awards
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TEX (4 yrs),,,,156,519,489,51,120,31,0,...,0.634,69,178,21,1,4,7,1,,
NYY (1 yr),,,,75,217,204,29,54,8,1,...,0.765,116,94,3,1,0,1,0,,


In [4]:
# Create HR%, SO%, BB% columns
std_bat_df['HR%'] = std_bat_df['HR'] / std_bat_df['PA']
std_bat_df['SO%'] = std_bat_df['SO'] / std_bat_df['PA']
std_bat_df['BB%'] = std_bat_df['BB'] / std_bat_df['PA']

In [5]:
# Remove unnecessary columns
std_bat_df = std_bat_df[['BA', 'OBP', 'SLG', 'SO%', 'BB%', 'HR%']]
std_bat_df

Unnamed: 0_level_0,BA,OBP,SLG,SO%,BB%,HR%
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
TEX (4 yrs),0.245,0.27,0.364,0.192678,0.034682,0.017341
NYY (1 yr),0.265,0.304,0.461,0.170507,0.050691,0.046083


In [6]:
# Calculate percent improvement between rows and create new row in df
std_bat_df.loc['Pct. Improvement'] = abs(
    (std_bat_df.loc['NYY (1 yr)'] - std_bat_df.loc['TEX (4 yrs)']) / std_bat_df.loc['TEX (4 yrs)']
)

std_bat_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


Unnamed: 0_level_0,BA,OBP,SLG,SO%,BB%,HR%
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
TEX (4 yrs),0.245,0.27,0.364,0.192678,0.034682,0.017341
NYY (1 yr),0.265,0.304,0.461,0.170507,0.050691,0.046083
Pct. Improvement,0.081633,0.125926,0.266484,0.115069,0.461598,1.65745


In [22]:
# Create variables for use in chart
x = list(std_bat_df.columns)
y = list(std_bat_df.loc[['Pct. Improvement']].values[0])
text = ['<b>' + "{:.0%}".format(pct) + '</b>' for pct in y]

navy = '#0C2340'
gray = '#C4CED3'


# Create traces
fig = go.Figure()

fig.add_trace(go.Bar(x=x, y=y, marker_color='white', text=text, insidetextanchor='middle'))


# Format traces
fig.update_traces(
    textfont=dict(
        color=navy,
        size=14)
)


# Format axes
fig.update_xaxes(
    title=dict(
        text="<b>Batting Category</b>",
        font=dict(
            size=22,
            color=gray)),
    tickfont=dict(
        size=18,
        color=gray),
    showgrid=False
)

fig.update_yaxes(
    title=dict(
        text="<br><b>Improvement<br>TEX → NYY</b>",
        font=dict(
            size=22,
            color=gray)),
    tickfont=dict(
        size=18,
        color=gray),
    tickmode='array',
    ticktext=['0%', '20%', '40%', '60%', '80%', '>100%'],
    tickvals=[0, 0.2, 0.4, 0.6, 0.8, 1],
    zeroline=False,
    gridcolor='rgb(20,45,75)',
    gridwidth=2
)


# Format layout
fig.update_layout(
    title=dict(
        text="<b>No Way, Jose!</b><br>Trevino's Breakout All-Star Season",
        y=0.9,
        x=0.5,
        xanchor='center',
        yanchor='top',
        font=dict(
            size=25,
            color='white')),
    yaxis_range=[0,1.003],
    paper_bgcolor='#0C2340',
    plot_bgcolor='#0C2340',
    margin=dict(pad=5)
)


fig.show()