In [1]:
# Import dependencies
import pandas as pd
import plotly.graph_objects as go

## Data Processing

In [2]:
# Read in CSVs from baseball reference
std_bat_df = pd.read_csv('Resources/Data/standard_batting.csv')
val_bat_df = pd.read_csv('Resources/Data/value_batting.csv')
adv_bat_df = pd.read_csv('Resources/Data/advanced_batting.csv')
std_fld_df = pd.read_csv('Resources/Data/standard_fielding.csv')

### Standard Batting

In [3]:
# Only grab rows that group by team
std_bat_df = std_bat_df.loc[(std_bat_df['Year'] == 'TEX (4 yrs)') | (std_bat_df['Year'] == 'NYY (1 yr)')]
std_bat_df.reset_index(drop=True, inplace=True)
std_bat_df

Unnamed: 0,Year,Age,Tm,Lg,G,PA,AB,R,H,2B,...,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos,Awards
0,TEX (4 yrs),,,,156.0,519.0,489.0,51.0,120.0,31.0,...,0.634,69.0,178.0,21.0,1.0,4.0,7.0,1.0,,
1,NYY (1 yr),,,,74.0,213.0,200.0,28.0,52.0,7.0,...,0.74,110.0,88.0,3.0,1.0,0.0,1.0,0.0,,


In [4]:
# Remove unnecessary columns
std_bat_df = std_bat_df[['Year', 'HR', 'RBI', 'SB', 'BB', 'SO', 'BA', 'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP']]
std_bat_df

Unnamed: 0,Year,HR,RBI,SB,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP
0,TEX (4 yrs),9.0,55.0,1.0,18.0,100.0,0.245,0.27,0.364,0.634,69.0,178.0,21.0
1,NYY (1 yr),9.0,32.0,2.0,11.0,36.0,0.26,0.3,0.44,0.74,110.0,88.0,3.0


### Value Batting

In [5]:
# Only grab rows that group by team
val_bat_df = val_bat_df.loc[(val_bat_df['Year'] == 'TEX (4 yrs)') | (val_bat_df['Year'] == 'NYY (1 yr)')]
val_bat_df.reset_index(drop=True, inplace=True)
val_bat_df

Unnamed: 0,Year,Age,Tm,Lg,G,PA,Rbat,Rbaser,Rdp,Rfield,...,RAR,WAR,waaWL%,162WL%,oWAR,dWAR,oRAR,Salary,Pos,Awards
0,TEX (4 yrs),,,,156.0,519.0,-20.0,-3.0,-2.0,3.0,...,4.0,0.5,0.491,0.496,0.1,1.3,1.0,,,
1,NYY (1 yr),,,,74.0,213.0,3.0,1.0,0.0,7.0,...,23.0,2.3,0.522,0.51,1.6,1.2,15.0,,,


In [6]:
# Remove unnecessary columns
val_bat_df = val_bat_df[['Year', 'RAA', 'WAA', 'RAR', 'WAR', 'oWAR', 'dWAR', 'oRAR']]
val_bat_df

Unnamed: 0,Year,RAA,WAA,RAR,WAR,oWAR,dWAR,oRAR
0,TEX (4 yrs),-14.0,-1.3,4.0,0.5,0.1,1.3,1.0
1,NYY (1 yr),15.0,1.6,23.0,2.3,1.6,1.2,15.0


### Advanced Batting
Don't think this df is useful

In [7]:
# Remove unnecessary columns
adv_bat_df = adv_bat_df[['Tm', 'HR%', 'SO%', 'BB%']].dropna()
adv_bat_df

Unnamed: 0,Tm,HR%,SO%,BB%
0,TEX,0.00%,12.50%,0.00%
1,TEX,1.60%,21.40%,2.40%
2,TEX,2.40%,18.10%,3.60%
3,TEX,1.70%,18.90%,4.00%
4,NYY,4.20%,16.90%,5.20%


In [8]:
# Convert percent objects into decimal float
adv_bat_df['HR%'] = adv_bat_df['HR%'].apply(lambda x: float(x[:-1]))
adv_bat_df['SO%'] = adv_bat_df['SO%'].apply(lambda x: float(x[:-1]))
adv_bat_df['BB%'] = adv_bat_df['BB%'].apply(lambda x: float(x[:-1]))

adv_bat_df

Unnamed: 0,Tm,HR%,SO%,BB%
0,TEX,0.0,12.5,0.0
1,TEX,1.6,21.4,2.4
2,TEX,2.4,18.1,3.6
3,TEX,1.7,18.9,4.0
4,NYY,4.2,16.9,5.2


In [9]:
# Group by team
adv_bat_df = adv_bat_df.groupby(['Tm']).mean()
adv_bat_df