In [25]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from broom import Broom
# Click Dots below for CSV Imports

In [57]:
raw_data = Broom(csv_directory = 'resources').merge_csvs()

raw_data.columns

Index(['Tm', '#Bat', 'BatAge', 'R/G', 'G', 'PA', 'AB', 'R', 'H', '2B', '3B',
       'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'BA', 'OBP', 'SLG', 'OPS', 'OPS+',
       'TB', 'GDP', 'HBP', 'SH', 'SF', 'IBB', 'LOB', '#P', 'PAge', 'RA/G', 'W',
       'L', 'W-L%', 'ERA', 'G.1', 'GS', 'GF', 'CG', 'tSho', 'cSho', 'SV', 'IP',
       'H.1', 'R.1', 'ER', 'HR.1', 'BB.1', 'IBB.1', 'SO.1', 'HBP.1', 'BK',
       'WP', 'BF', 'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W',
       'LOB.1', 'Adj. Runs', 'Adj. Runs.1', 'Unnamed: 65'],
      dtype='object')

In [93]:
# Connect Each CSV to its Own DF
# The Broom function above only includes data from 2018. I'm going to try a pandas merge below.
# Ultimately, we only need Runs Scored and Runs allowed. 

a_16 = os.path.join("Resources", "2016_team_data.csv")
a_17 = os.path.join("Resources", "2017_team_data.csv")
a_18 = os.path.join("Resources", "2018_team_data.csv")
a_19 = os.path.join("Resources", "2019_team_data.csv")
a_20 = os.path.join("Resources", "2020_team_data.csv")

# Convert CSVs into DFs

df_16 = pd.read_csv(a_16)
df_17 = pd.read_csv(a_17)
df_18 = pd.read_csv(a_18)
df_19 = pd.read_csv(a_19)
df_20 = pd.read_csv(a_20)

# Pull Necessary Datapoints
# Note that Runs Scored, = "R" Runs Allowed = "R.1"
# Also note that we had to create an adjusted number for 2020 based upon the shortened season

runs_16 = df_16[['Tm','R','R.1']].dropna()
runs_17 = df_17[['Tm','R','R.1']].dropna()
runs_18 = df_18[['Tm','R','R.1']].dropna()
runs_19 = df_19[['Tm','R','R.1']].dropna()
runs_20 = df_20[['Tm','Adj. Runs','Adj. Runs.1']].dropna()

# Now We Can Merge This into one big Dataframe
# The names of the new columns will be a mess, so we will have to fix this.

first_merge = pd.merge(runs_16, runs_17, how='left', on=['Tm'])

second_merge = pd.merge(first_merge, runs_18, how='left', on = ['Tm'])

third_merge = pd.merge(second_merge, runs_19, how='left', on = ['Tm'])

df = pd.merge(third_merge, runs_20, how='left', on = ['Tm'])

# Change the names of the Columns

df.columns= ['Tm','RS 2016', 'RA 2016','RS 2017', 'RA 2017','RS 2018', 'RA 2018','RS 2019', 'RA 2019','RS 2020', 'RA 2020']

# Export into New CSV

df.to_csv("run_data")

df

Unnamed: 0,Tm,RS 2016,RA 2016,RS 2017,RA 2017,RS 2018,RA 2018,RS 2019,RA 2019,RS 2020,RA 2020
0,ARI,752.0,890.0,812.0,659.0,693.0,644.0,813,743,725.76,797.04
1,ATL,649.0,779.0,732.0,821.0,759.0,657.0,855,743,939.6,777.6
2,BAL,744.0,715.0,743.0,841.0,622.0,892.0,729,981,740.34,793.8
3,BOS,878.0,694.0,785.0,668.0,876.0,647.0,901,828,788.94,947.7
4,CHC,808.0,556.0,822.0,695.0,761.0,645.0,814,717,716.04,648.0
5,CHW,686.0,715.0,706.0,820.0,656.0,848.0,708,832,826.2,664.2
6,CIN,716.0,854.0,753.0,869.0,696.0,819.0,701,711,656.1,656.1
7,CLE,777.0,676.0,818.0,564.0,818.0,648.0,769,657,669.06,563.76
8,COL,845.0,860.0,824.0,757.0,780.0,745.0,835,958,741.96,952.56
9,DET,750.0,721.0,735.0,894.0,630.0,796.0,582,915,694.98,887.76
