In [22]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Average homeruns per year

In [None]:

df = pd.read_csv('core/Batting.csv')
batting_df = df[df['HR'] > 0]
home_runs_per_year = batting_df.groupby('yearID')['HR'].sum().reset_index()
sns.lineplot(x='yearID', y='HR', data=home_runs_per_year)
plt.axvline(x=1920, color='red', linestyle='--') #Dead-ball
plt.axvline(x=1900, color='red', linestyle='--') 
plt.axvline(x=1973, color='green', linestyle='--') #DH rule
plt.axvline(x=1994, color='purple', linestyle='--') #Steroids
plt.axvline(x=2005, color='purple', linestyle='--')
plt.title('Yearly Trends in Home Runs')
plt.xlabel('Year')
plt.ylabel('Total Home Runs')
plt.show()

### Regular vs post season batting averages

In [None]:
df = pd.read_csv('core/Batting.csv')
post_df = pd.read_csv('core/BattingPost.csv')
batting_df = df[df['AB'] > 0]
batting_post_df = post_df[post_df['AB'] > 0]
batting_avg = batting_df.groupby('yearID').agg({'H': 'sum', 'AB': 'sum'})
batting_avg['BA'] = batting_avg['H'] / batting_avg['AB']
batting_post_avg = batting_post_df.groupby('yearID').agg({'H': 'sum', 'AB': 'sum'})
batting_post_avg['BA'] = batting_post_avg['H'] / batting_post_avg['AB']
plt.plot(batting_avg.index, batting_avg['BA'], label='Regular Season BA', color='blue')
plt.plot(batting_post_avg.index, batting_post_avg['BA'], label='Postseason BA', color='red')
plt.xlabel('Year')
plt.ylabel('Batting Average')
plt.title('Regular Season vs Postseason Batting Averages by Year')
plt.legend()
plt.show()

### Regular Season vs Post Season HBP per Game by Year

In [None]:

pitching_regular_hbp = pitching_df.groupby('yearID').agg({'HBP': 'sum', 'G': 'sum'}).reset_index()
pitching_regular_hbp['HBP_per_game'] = pitching_regular_hbp['HBP'] / pitching_regular_hbp['G']
pitching_regular_hbp.rename(columns={'HBP': 'RegularSeason_HBP', 'G': 'RegularSeason_Games'}, inplace=True)
pitching_post_hbp = pitching_post_df.groupby('yearID').agg({'HBP': 'sum', 'G': 'sum'}).reset_index()
pitching_post_hbp['HBP_per_game'] = pitching_post_hbp['HBP'] / pitching_post_hbp['G']
pitching_post_hbp.rename(columns={'HBP': 'PostSeason_HBP', 'G': 'PostSeason_Games'}, inplace=True)
hbp_comparison = pd.merge(
    pitching_regular_hbp[['yearID', 'RegularSeason_HBP', 'HBP_per_game']], 
    pitching_post_hbp[['yearID', 'PostSeason_HBP', 'HBP_per_game']], 
    on='yearID', how='outer', suffixes=('_regular', '_post'))
plt.plot(hbp_comparison['yearID'], hbp_comparison['HBP_per_game_regular'], label='Regular Season HBP per Game', color='blue')
plt.plot(hbp_comparison['yearID'], hbp_comparison['HBP_per_game_post'], label='Postseason HBP per Game', color='red')
plt.xlabel('Year')
plt.ylabel('HBP per Game')
plt.title('Regular Season vs Postseason HBP per Game by Year')
plt.legend()
plt.show()