In [25]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from broom import Broom
# Click Dots below for CSV Imports

In [57]:
raw_data = Broom(csv_directory = 'resources').merge_csvs()

raw_data.columns

Index(['Tm', '#Bat', 'BatAge', 'R/G', 'G', 'PA', 'AB', 'R', 'H', '2B', '3B',
       'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'BA', 'OBP', 'SLG', 'OPS', 'OPS+',
       'TB', 'GDP', 'HBP', 'SH', 'SF', 'IBB', 'LOB', '#P', 'PAge', 'RA/G', 'W',
       'L', 'W-L%', 'ERA', 'G.1', 'GS', 'GF', 'CG', 'tSho', 'cSho', 'SV', 'IP',
       'H.1', 'R.1', 'ER', 'HR.1', 'BB.1', 'IBB.1', 'SO.1', 'HBP.1', 'BK',
       'WP', 'BF', 'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W',
       'LOB.1', 'Adj. Runs', 'Adj. Runs.1', 'Unnamed: 65'],
      dtype='object')

In [97]:
# Connect Each CSV to its Own DF
# The Broom function above only includes data from 2018. I'm going to try a pandas merge below.
# Ultimately, we only need Runs Scored and Runs allowed. 

a_16 = os.path.join("Resources", "2016_team_data.csv")
a_17 = os.path.join("Resources", "2017_team_data.csv")
a_18 = os.path.join("Resources", "2018_team_data.csv")
a_19 = os.path.join("Resources", "2019_team_data.csv")
a_20 = os.path.join("Resources", "2020_team_data.csv")

# Convert CSVs into DFs

df_16 = pd.read_csv(a_16)
df_17 = pd.read_csv(a_17)
df_18 = pd.read_csv(a_18)
df_19 = pd.read_csv(a_19)
df_20 = pd.read_csv(a_20)

# Pull Necessary Datapoints
# Note that Runs Scored, = "R" Runs Allowed = "R.1"
# Also note that we had to create an adjusted number for 2020 based upon the shortened season

runs_16 = df_16[['Tm','R','R.1']].dropna()
runs_17 = df_17[['Tm','R','R.1']].dropna()
runs_18 = df_18[['Tm','R','R.1']].dropna()
runs_19 = df_19[['Tm','R','R.1']].dropna()
runs_20 = df_20[['Tm','Adj. Runs','Adj. Runs.1']].dropna()

# Now We Can Merge This into one big Dataframe
# The names of the new columns will be a mess, so we will have to fix this.

first_merge = pd.merge(runs_16, runs_17, how='left', on=['Tm'])

second_merge = pd.merge(first_merge, runs_18, how='left', on = ['Tm'])

third_merge = pd.merge(second_merge, runs_19, how='left', on = ['Tm'])

df = pd.merge(third_merge, runs_20, how='left', on = ['Tm'])

# Change the names of the Columns

df.columns= ['Tm','RS 2016', 'RA 2016','RS 2017', 'RA 2017','RS 2018', 'RA 2018','RS 2019', 'RA 2019','RS 2020', 'RA 2020']

# Export into New CSV

# We don't need to run the function below again (df.to_csv), as the CSV is already in the resources folder.
# We will keep it here for the sake of understanding the project
#df.to_csv("run_data")

df.head()

Unnamed: 0,Tm,RS 2016,RA 2016,RS 2017,RA 2017,RS 2018,RA 2018,RS 2019,RA 2019,RS 2020,RA 2020
0,ARI,752.0,890.0,812.0,659.0,693.0,644.0,813,743,725.76,797.04
1,ATL,649.0,779.0,732.0,821.0,759.0,657.0,855,743,939.6,777.6
2,BAL,744.0,715.0,743.0,841.0,622.0,892.0,729,981,740.34,793.8
3,BOS,878.0,694.0,785.0,668.0,876.0,647.0,901,828,788.94,947.7
4,CHC,808.0,556.0,822.0,695.0,761.0,645.0,814,717,716.04,648.0


In [116]:
# Create New Table with Annual Pythagorean Win %
# Recall that this function is equal to:
# (Runs Scored ^ 1.83) / ((Runs Score ^ 1.83) + (Runs Allowed ^ 1.83))

pyth_16 = (df['RS 2016']**1.83) / ((df['RS 2016']**1.83)+(df['RA 2016']**1.83))

pyth_17 = (df['RS 2017']**1.83) / ((df['RS 2017']**1.83)+(df['RA 2017']**1.83))

pyth_18 = (df['RS 2018']**1.83) / ((df['RS 2018']**1.83)+(df['RA 2018']**1.83))

pyth_19 = (df['RS 2019']**1.83) / ((df['RS 2019']**1.83)+(df['RA 2019']**1.83))

pyth_20 = (df['RS 2020']**1.83) / ((df['RS 2020']**1.83)+(df['RA 2020']**1.83))

pyth_df = pd.DataFrame({
    
        "Teams": df['Tm'],
        "Implied Win % (2016)": pyth_16,
        "Implied Win % (2017)": pyth_17,
        "Implied Win % (2018)": pyth_18,
        "Implied Win % (2019)": pyth_19,
        "Implied Win % (2020)": pyth_20})

pyth_df.set_index("Teams", inplace = True)
pyth_df.head()


#pyth_df.to_csv("pythag_win_percentage") --> Read Above on why this is commented out! 
#                                             CC: Export into new CSV

Unnamed: 0_level_0,Implied Win % (2016),Implied Win % (2017),Implied Win % (2018),Implied Win % (2019),Implied Win % (2020)
Teams,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ARI,0.423523,0.59437,0.533499,0.541098,0.457244
ATL,0.417239,0.447697,0.565644,0.563884,0.585723
BAL,0.518181,0.443559,0.340793,0.367415,0.468145
BOS,0.605962,0.573306,0.635185,0.538578,0.416897
CHC,0.66464,0.576184,0.57509,0.55779,0.545553


In [13]:
# Import Requests for API Call
import requests
import json

In [14]:
# In this API Call, we are looking to get a few things, if possible.
# First and foremost, we are looking for a real-time number (or as recent as possible)
# --- of Runs Scored and Runs Allowed per team. We will use these datapoints to
# ------ calculate the current season (2021) projected wins from an implied pythagorean win %
# ----------------------------------
# Besides that, we should call an API on historical win totals. We should calculate a figure
# --- from the implied win % database that outputs an implied win total (Pythag Wins) and
# ------ compare it versus the actual number. 
# ----------------------------------
# Finally, we will need to call an API for Historical Vegas win Totals
# We will compare all of the above to the vegas win totals.


url = "https://fly.sportsdata.io/v3/mlb/scores/json/TeamGameStatsByDate/2021-APR-19"

response = requests.get(url)
response_json = response.json()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)