In [25]:
import pybaseball
from pybaseball import statcast
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from datetime import timezone
from bs4 import BeautifulSoup
import io
import requests
pybaseball.cache.enable()

In [2]:
url = 'https://docs.google.com/spreadsheets/d/1JgczhD5VDQ1EiXqVG-blttZcVwbZd5_Ne_mefUGwJnk/pub?output=csv'
res = requests.get(url)
ID = pd.read_csv(io.BytesIO(res.content), sep=',')
ID.dropna(subset=['MLBID'], inplace=True)
ID['MLBID'] = ID['MLBID'].astype(int)
BID = pd.read_csv("mlb-player-stats-Batters.csv")
PID = pd.read_csv("mlb-player-stats-P.csv")

In [3]:
def convert_name(name):
    if name == 'Rockies':
        return 'COL'
    elif name == 'Reds':
        return 'CIN'
    elif name == 'Mariners':
        return 'SEA'
    elif name == 'Nationals':
        return 'WAS'
    elif name == 'Yankees':
        return 'NYY'
    elif name == 'Astros':
        return 'HOU'
    elif name == 'Red Sox':
        return 'BOS'
    elif name == 'Athletics':
        return 'OAK'
    elif name == 'Mets':
        return 'NYM'
    elif name == 'Braves':
        return 'ATL'
    elif name == 'Giants':
        return 'SF'
    elif name == 'Brewers':
        return 'MIL'
    elif name == 'Rays':
        return 'TB'
    elif name == 'Royals':
        return 'KC'
    elif name == 'White Sox':
        return 'CWS'
    elif name == 'Cubs':
        return 'CHC'
    elif name == 'Angels':
        return 'LAA'
    elif name == 'Tigers':
        return 'DET'
    elif name == 'Diamondbacks':
        return 'ARI'
    elif name == 'Guardians':
        return 'CLE'
    elif name == 'Orioles':
        return 'BAL'
    elif name == 'Twins':
        return 'MIN'
    elif name == 'Marlins':
        return 'MIA'
    elif name == 'Phillies':
        return 'PHI'
    elif name == 'Rangers':
        return 'TEX'
    elif name == 'Dodgers':
        return 'LAD'
    elif name == 'Padres':
        return 'SD'
    elif name == 'Pirates':
        return 'PIT'
    elif name == 'Blue Jays':
        return 'TOR'
    elif name == 'Cardinals':
        return 'STL'
    else:
        return np.nan
    
def flip_names(name):
    first_name, last_name = name.split(", ")
    return f"{last_name} {first_name}"

In [4]:
def getDKData2024():
    eastern_time = datetime.datetime.now(timezone.utc).astimezone(timezone(datetime.timedelta(hours=-5)))
    todaysdate = eastern_time.strftime("%m-%d-%Y")
    url = 'https://rotogrinders.com/lineups/mlb?site=draftkings'
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'lxml')

    gamelist = []
    gamecards = soup.findAll("div", {"class": "game-card-teams"})
    for x in gamecards:
        twoteams = x.findAll("span", {"class": "team-nameplate-mascot"})
        roadteam = convert_name(twoteams[0].text)
        hometeam = convert_name(twoteams[1].text)
        gamekey = "{}@{}".format(roadteam,hometeam)
        gamelist.append(gamekey)

    matchupsdf = pd.DataFrame()
    for game in gamelist:
        roadteam = game.split("@")[0]
        hometeam = game.split("@")[1]
        thisdf1 = pd.DataFrame({"Team": roadteam, "Opp": hometeam, "RoadTeam": roadteam, "HomeTeam": hometeam},index=[0])
        thisdf2 = pd.DataFrame({"Team": hometeam, "Opp": roadteam, "RoadTeam": roadteam, "HomeTeam": hometeam},index=[0])
        matchupsdf = pd.concat([matchupsdf,thisdf1,thisdf2])
        
    oppdict = dict(zip(matchupsdf.Team,matchupsdf.Opp))
    hometeamdict = dict(zip(matchupsdf.Team,matchupsdf.HomeTeam))
    roadteamdict = dict(zip(matchupsdf.Team,matchupsdf.RoadTeam))

    disabled_span_list = []
    for span in soup.findAll("span", {"class": "player-nameplate disabled"}):
        for a in span.findAll("a"):
            disabled_span_list.append(a.text)

    spdata = pd.DataFrame()
    for div in soup.findAll("span", {"class": "player-nameplate", "data-position": "SP"}):
        if "TBD" in str(div):
            playername = "TBD"
            pos = "SP"
            sal = 0
        else:
            for a in div.findAll('a', {'class': 'player-nameplate-name'}):
                playername = a.text.strip()

            strdiv = str(div)
            pos = strdiv[strdiv.find("data-position")+15:strdiv.find("data-salary")-2]
            sal = strdiv[strdiv.find("data-salary")+13:strdiv.find("<div class = 'player-nameplate-info'>")-3]
        try:
            ownership = strdiv[strdiv.find('<span class="small muted" data-auth="502">') + 42:strdiv.find('%')]
            ownership = ownership.replace("</span>", "")
            ownership = ownership.replace("</span", "")
            ownership = ownership.replace("</div>", "")
            ownership = ownership.replace(" ", "")
        except:
            ownership = np.nan

        thisspdata = pd.DataFrame([[playername, sal, ownership]], columns = ["Player", "Salary", "Ownership"])
        spdata = pd.concat([spdata, thisspdata])

    spdata2 = pd.merge(spdata, PID[["Player", "Team"]], left_on = ["Player"], right_on = ["Player"], how = "left").rename(columns = {"Team": "PitcherTeam"})
    spdata3 = pd.merge(spdata2, matchupsdf[["Team", "Opp"]], left_on = ["PitcherTeam"], right_on = ["Team"], how = "left").drop(columns = ["Team"])

    opp_spname_dict = dict(zip(spdata3.Opp, spdata3.Player))
    opp_spsal_dict = dict(zip(spdata3.Opp, spdata.Salary))
    opp_spown_dict = dict(zip(spdata3.Opp, spdata3.Ownership))

    ludf = pd.DataFrame()
    
    for li in soup.findAll("li", {"class": "lineup-card-player"}):
        for a in li.findAll("a", {"class": ["player-nameplate-name", "player-nameplate disabled"]}):
            playername = a.text

        listring = str(li)
        for span in li.find("span", {"class": "small"}):
            luspot = span.text
            luspot = luspot.replace("\n", "")
            luspot = luspot.strip()
            luspot = int(luspot)
        pos = listring[listring.find("data-position")+15:listring.find("data-salary")-2]
        sal = listring[listring.find("data-salary")+13:listring.find("<span class='small'>")-3]
        ownership = ownership.replace("</span>", "")
        ownership = ownership.replace("</span", "")
        ownership = ownership.replace("</li", "")
        ownership = ownership.replace("</div>", "")
        ownership = ownership.replace(" ", "")

        try:
            sal = int(sal)
        except:
            sal = 0
        thisludf = pd.DataFrame([[playername, luspot, sal, ownership]], columns = ["Player", "Spot", "Sal", "Ownership"])
        ludf = pd.concat([ludf, thisludf])

    ludf2 = pd.merge(ludf, BID[["Player", "Team"]], left_on = ["Player"], right_on = ["Player"], how = "left").rename(columns = {"Team": "BatterTeam"})
    ludf2['BatterTeam'] = ludf2['BatterTeam'].fillna(method='ffill')

    ludf2_teamlist = list(ludf2["BatterTeam"])

    dhteams = []
    for x in ludf2_teamlist:
        if ludf2_teamlist.count(x) > 11:
            if x in dhteams:
                pass
            else:
                dhteams.append(x)

    extract_dh = ludf2[ludf2["BatterTeam"].isin(dhteams)]
    new_ludf2 = ludf2[~ludf2["BatterTeam"].isin(dhteams)]

    new_team_list = []
    runcounter = 0

    for x in list(extract_dh["BatterTeam"].astype(str)):
        if runcounter < 18:
            new_team_list.append(x)
            runcounter += 1
        else:
            new_team_list.append(x+"2")
            runcounter += 1

    extract_dh["BatterTeam"] = new_team_list

    ludf2 = pd.concat([extract_dh, new_ludf2])
    ludf2["Opp"] = ludf2["BatterTeam"].map(oppdict)
    ludf2["HomeTeam"] = ludf2["BatterTeam"].map(hometeamdict)
    ludf2["RoadTeam"] = ludf2["BatterTeam"].map(roadteamdict)
    ludf2['SP'] = ludf2['BatterTeam'].map(opp_spname_dict)
    ludf2['SPSal'] = ludf2['BatterTeam'].map(opp_spsal_dict)
    ludf2['SPOwnership'] = ludf2['BatterTeam'].map(opp_spown_dict)
    ludf2['Date'] = todaysdate
    ludf2['Time'] = np.nan

    ludf3 = ludf2[['BatterTeam','RoadTeam','HomeTeam','Time','Spot','Player','Sal','Ownership','Date', "SP"]]

    dkdata = ludf3.copy()

    try:
        checknan = dkdata[["BatterTeam", "SP"]]
        getnans = checknan[["SP"].isna()]
        if len(getnans) == 0:
            nonans = 1
            nanmapdict = {}
        else:
            nonans = 0
            getnans["SP"] = disabled_span_list
            nanmapdict = dict(zip(getnans.Team, getnans.SP))
    except:
        pass

    try:
        dkdata["SP"] = np.where(dkdata["SP"].isna(), dkdata["BatterTeam"].map(nanmapdict), dkdata["SP"])
    except:
        pass
    
    for i in range(1, len(dkdata) - 1):
        if dkdata.loc[i, 'BatterTeam'] != dkdata.loc[i-1, 'BatterTeam']:
            if dkdata.loc[i, 'BatterTeam'] != dkdata.loc[i+1, 'BatterTeam']:
                dkdata.loc[i, 'BatterTeam'] = np.nan
                dkdata.loc[i, 'HomeTeam'] = np.nan
                dkdata.loc[i, 'RoadTeam'] = np.nan

    
    dkdata[["BatterTeam", "RoadTeam", "HomeTeam"]] = dkdata[["BatterTeam", "RoadTeam", "HomeTeam"]].fillna(method='ffill')
    dkdata = dkdata.drop_duplicates(subset = ["Spot", "Player", "SP"], keep = "first")
    dkdata = dkdata.drop(columns = ["Time", "Sal", "Ownership"])

    dkdata['Date'] = pd.to_datetime(dkdata['Date'])
    dkdata['Date'] = dkdata['Date'].dt.strftime('%Y-%m-%d')
    dkdata = dkdata.set_index("Date")
    dkdata = dkdata[dkdata['Spot'] <= 6]

    return(dkdata)

In [5]:
TodaysData = getDKData2024()

In [6]:
savant2023 = pd.read_csv("~/Desktop/Random-Projects/MLB/savant2023.csv")

In [7]:
eastern_time = datetime.datetime.now(timezone.utc).astimezone(timezone(datetime.timedelta(hours=-5)))
savant2024 = statcast(start_dt = "2024-03-28", end_dt = eastern_time.strftime("%Y-%m-%d"))

This is a large query, it may take a moment to complete


100%|██████████| 41/41 [00:18<00:00,  2.20it/s]


In [8]:
savant2024 = pd.merge(savant2024, ID[["MLBID", "MLBNAME"]], left_on = 'batter', right_on = 'MLBID', how = 'left')
savant2024.dropna(subset=['MLBNAME'], inplace=True)
savant2024 = savant2024.drop_duplicates(subset = ["pitch_type", "game_date", "release_speed", "release_pos_x", "release_pos_z", "player_name"], keep='first')

In [9]:
combined1 = pd.concat([savant2023, savant2024])
combined1 = combined1[combined1['inning'] == 1]
combined1['game_date'] = pd.to_datetime(combined1['game_date'])
combined1['game_date'] = combined1['game_date'].dt.strftime('%Y-%m-%d')
combined1["BatterTeam"] = np.nan
combined1['BatterTeam'] = np.where(combined1['inning_topbot'] == 'Top', combined1['away_team'], combined1['home_team'])
combined1["player_name"] = combined1["player_name"].apply(flip_names)

In [10]:
combined2 = combined1[["game_date", "home_team", "away_team", "inning_topbot", "stand", "at_bat_number", "pitch_number", "BatterTeam", "MLBNAME", "balls", "strikes", "outs_when_up", "on_1b", "on_2b", "on_3b", "events", "description", "hit_distance_sc", "launch_speed", "launch_angle", "estimated_ba_using_speedangle", "estimated_woba_using_speedangle", "woba_value", "p_throws", "player_name", "delta_home_win_exp", "delta_run_exp", "away_score", "home_score"]].sort_values(by = ["game_date", "home_team", "away_team", "inning_topbot", "at_bat_number", "pitch_number"], ascending=[True, True, True, False, True, True])
combined2 = combined2.set_index("game_date").sort_index(ascending = True)

In [11]:
def fill_na_with_0_and_1(column):
    column_filled = column.fillna(0)
    column_filled[column_filled != 0] = 1
    return column_filled

combined2['on_1b'] = fill_na_with_0_and_1(combined2['on_1b'])
combined2['on_2b'] = fill_na_with_0_and_1(combined2['on_2b'])
combined2['on_3b'] = fill_na_with_0_and_1(combined2['on_3b'])

In [12]:
combined2['NRFI'] = ((combined2['away_score'] == 0) | (combined2['home_score'] == 0)).astype(int)

In [13]:
# At the play by play level
pbp = combined2[["home_team", "away_team", "stand", "BatterTeam", "MLBNAME", "balls", "strikes", "outs_when_up", "on_1b", "on_2b", "on_3b", "description", "hit_distance_sc", "launch_speed", "launch_angle", "estimated_ba_using_speedangle", "estimated_woba_using_speedangle", "woba_value", "p_throws", "player_name", "delta_home_win_exp", "delta_run_exp", "NRFI"]].fillna(0)

Unnamed: 0_level_0,home_team,away_team,stand,BatterTeam,MLBNAME,balls,strikes,outs_when_up,on_1b,on_2b,...,launch_speed,launch_angle,estimated_ba_using_speedangle,estimated_woba_using_speedangle,woba_value,p_throws,player_name,delta_home_win_exp,delta_run_exp,NRFI
game_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-30,BOS,BAL,L,BAL,Cedric Mullins,0,0,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,R,Corey Kluber,0.0,0.036,1
2023-03-30,BOS,BAL,L,BAL,Cedric Mullins,1,0,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,R,Corey Kluber,0.0,-0.046,1
2023-03-30,BOS,BAL,L,BAL,Cedric Mullins,1,1,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,R,Corey Kluber,0.0,-0.055,1
2023-03-30,BOS,BAL,L,BAL,Cedric Mullins,1,2,0,0.0,0.0,...,102.7,16.0,0.0,0.0,0.0,R,Corey Kluber,0.0,0.0,1
2023-03-30,BOS,BAL,L,BAL,Cedric Mullins,1,2,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,R,Corey Kluber,0.022,-0.173,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-05,WSH,TOR,L,WSH,Keibert Ruiz,0,0,2,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,R,Alek Manoah,0.0,0.128,1
2024-05-05,WSH,TOR,L,WSH,Keibert Ruiz,1,0,2,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,R,Alek Manoah,0.0,-0.141,1
2024-05-05,WSH,TOR,L,WSH,Keibert Ruiz,1,1,2,1.0,1.0,...,55.7,46.0,0.0,0.0,0.0,R,Alek Manoah,0.0,-0.168,1
2024-05-05,WSH,TOR,L,WSH,Keibert Ruiz,1,2,2,1.0,1.0,...,79.8,-8.0,0.0,0.0,0.0,R,Alek Manoah,0.0,0.0,1


In [14]:
# At an AB level
ab = combined2[["home_team", "away_team", "stand", "BatterTeam", "MLBNAME", "balls", "strikes", "outs_when_up", "on_1b", "on_2b", "on_3b", "events", "description", "hit_distance_sc", "launch_speed", "launch_angle", "estimated_ba_using_speedangle", "estimated_woba_using_speedangle", "woba_value", "p_throws", "player_name", "delta_home_win_exp", "delta_run_exp", "NRFI"]]
ab = ab.dropna(subset=["events"]).fillna(0)

Unnamed: 0_level_0,home_team,away_team,stand,BatterTeam,MLBNAME,balls,strikes,outs_when_up,on_1b,on_2b,...,launch_speed,launch_angle,estimated_ba_using_speedangle,estimated_woba_using_speedangle,woba_value,p_throws,player_name,delta_home_win_exp,delta_run_exp,NRFI
game_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-30,BOS,BAL,L,BAL,Cedric Mullins,1,2,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,R,Corey Kluber,0.022,-0.173,1
2023-03-30,BOS,BAL,L,BAL,Adley Rutschman,2,0,1,0.0,0.0,...,104.5,26.0,0.943,1.848,2.0,R,Corey Kluber,-0.103,0.933,1
2023-03-30,BOS,BAL,L,BAL,Anthony Santander,3,2,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,R,Corey Kluber,0.014,-0.207,1
2023-03-30,BOS,BAL,R,BAL,Ryan Mountcastle,3,2,2,0.0,0.0,...,0.0,0.0,0.0,0.0,0.7,R,Corey Kluber,-0.011,0.11,1
2023-03-30,BOS,BAL,R,BAL,Ramon Urias,0,1,2,1.0,1.0,...,60.5,-42.0,0.137,0.123,0.0,R,Corey Kluber,0.038,-0.408,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-05,WSH,TOR,L,WSH,Luis Garcia,0,2,1,1.0,0.0,...,101.5,-8.0,0.323,0.29,0.9,R,Alek Manoah,0.061,0.62,1
2024-05-05,WSH,TOR,R,WSH,Nick Senzel,3,2,1,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,R,Alek Manoah,-0.054,-0.593,1
2024-05-05,WSH,TOR,L,WSH,Jesse Winker,3,0,2,0.0,1.0,...,0.0,0.0,0.0,0.697851,0.7,R,Alek Manoah,0.014,0.1,1
2024-05-05,WSH,TOR,R,WSH,Joey Meneses,3,1,2,1.0,1.0,...,0.0,0.0,0.0,0.697851,0.7,R,Alek Manoah,0.098,0.506,1


In [39]:
pbp = pbp[["home_team", "away_team", "BatterTeam", "MLBNAME", "player_name", "delta_run_exp"]]
ab = ab[["home_team", "away_team", "BatterTeam", "MLBNAME", "player_name", "delta_run_exp"]]
TodaysData2 = TodaysData.rename(columns={'Player': 'MLBNAME', 'RoadTeam': 'away_team', "HomeTeam": "home_team", "SP": "player_name"}).drop(columns = ["Spot"])

pbp

Unnamed: 0_level_0,home_team,away_team,BatterTeam,MLBNAME,player_name,delta_run_exp
game_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-30,3,2,2,82,94,0.036
2023-03-30,3,2,2,82,94,-0.046
2023-03-30,3,2,2,82,94,-0.055
2023-03-30,3,2,2,82,94,0.0
2023-03-30,3,2,2,82,94,-0.173
...,...,...,...,...,...,...
2024-05-05,29,28,29,273,12,0.128
2024-05-05,29,28,29,273,12,-0.141
2024-05-05,29,28,29,273,12,-0.168
2024-05-05,29,28,29,273,12,0.0


In [40]:
# Encodes the values so the ML algos can read in everything
non_numeric_columns = pbp.select_dtypes(exclude=['float64', 'int64']).columns
for col in non_numeric_columns:
    label_encoder = LabelEncoder()
    pbp[col] = label_encoder.fit_transform(pbp[col])

non_numeric_columns = ab.select_dtypes(exclude=['float64', 'int64']).columns
label_encoder = LabelEncoder()
for col in non_numeric_columns:
    ab[col] = label_encoder.fit_transform(ab[col])

label_encoders = {}
non_numeric_columns = TodaysData2.select_dtypes(exclude=['float64', 'int64']).columns
for col in non_numeric_columns:
    label_encoder = LabelEncoder()
    TodaysData2[col] = label_encoder.fit_transform(TodaysData2[col])
    label_encoders[col] = label_encoder

In [41]:
pbpFeatures = pbp.drop(columns = ["delta_run_exp"]).values.reshape(-1, 5)
pbpLabel = pbp["delta_run_exp"].values.reshape(-1, 1)
TodayFeatures = TodaysData2.values.reshape(-1, 5)

rf_regressor = RandomForestRegressor(n_estimators=100)
rf_regressor.fit(pbpFeatures, pbpLabel)

# Make predictions on the testing set
RFpred = rf_regressor.predict(TodayFeatures)

#rmse = np.sqrt(mean_squared_error(pbpLabel, RFpred))
#print("Root Mean Squared Error:", round(rmse, 2))

# Calculate accuracy
#accuracy = accuracy_score(pbpLabel, RFpred)
#precision = precision_score(pbpLabel, RFpred)
#print("Accuracy:", round(accuracy, 4))
#print("Precision:", round(precision, 4))

TodaysData2["RFPred"] = RFpred

  return fit_method(estimator, *args, **kwargs)


In [42]:
gb_regressor = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1)
gb_regressor.fit(pbpFeatures, pbpLabel)

GBpred = gb_regressor.predict(TodayFeatures)

TodaysData2["GBPred"] = GBpred

  y = column_or_1d(y, warn=True)


In [44]:
for col in non_numeric_columns:
    label_encoder = label_encoders[col]
    unknown_label = len(label_encoder.classes_)
    TodaysData2[col] = TodaysData2[col].apply(lambda x: x if x < unknown_label else -1)
    
    # Inverse transform the encoded values
    TodaysData2[col] = label_encoder.inverse_transform(TodaysData2[col])

TodaysData2

Unnamed: 0_level_0,BatterTeam,away_team,home_team,MLBNAME,player_name,RFPred,GBPred,KNNPred
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-05-07,SF,SF,PHI,Jung Hoo Lee,Zack Wheeler,-0.017064,-0.007739,-0.03873
2024-05-07,SF,SF,PHI,Thairo Estrada,Zack Wheeler,0.103388,0.001175,0.02732
2024-05-07,SF,SF,PHI,LaMonte Wade,Zack Wheeler,0.086221,-0.009559,-0.04273
2024-05-07,SF,SF,PHI,Wilmer Flores,Zack Wheeler,0.081562,-0.001802,0.06375
2024-05-07,SF,SF,PHI,Michael Conforto,Zack Wheeler,0.052278,-0.009559,-0.03166
...,...,...,...,...,...,...,...,...
2024-05-07,LAD,MIA,LAD,Shohei Ohtani,Roddery Munoz,-0.019388,0.001279,0.02858
2024-05-07,LAD,MIA,LAD,Freddie Freeman,Roddery Munoz,0.086305,-0.005451,-0.00309
2024-05-07,LAD,MIA,LAD,Will Smith,Roddery Munoz,-0.030410,0.001279,0.04298
2024-05-07,LAD,MIA,LAD,Max Muncy,Roddery Munoz,-0.004511,-0.009455,-0.02841


In [47]:
TodaysData3 = TodaysData2.groupby(["BatterTeam", "away_team", "home_team"]).agg(
    RFPred = ("RFPred", "sum"),
    GBPred = ("GBPred", "sum")).reset_index()

TodaysData3['Games'] = TodaysData3.apply(lambda x: tuple(sorted([x['away_team'], x['home_team']])), axis=1)
TodaysData3 = TodaysData3.groupby("Games").agg(
    RFPred=("RFPred", "sum"),
    GBPred = ("GBPred", "sum")).sort_values("RFPred")

TodaysData3

Unnamed: 0_level_0,RFPred,GBPred
Games,Unnamed: 1_level_1,Unnamed: 2_level_1
"(OAK, TEX)",-0.2993,-0.506868
"(CLE, DET)",-0.172814,-0.210801
"(LAA, PIT)",-0.098743,-0.064349
"(CHC, SD)",-0.071091,0.085697
"(MIN, SEA)",0.104574,-0.077654
"(KC, MIL)",0.170196,-0.140246
"(LAD, MIA)",0.329131,-0.02837
"(NYM, STL)",0.406959,0.143374
"(PHI, SF)",0.422865,-0.052714
"(CWS, TB)",0.454265,0.016456


In [46]:
"""combined4 = combined3
combined4["RFpred"] = RFpred
combined4["GBpred"] = GBpred
combined4["Kpred"] = Kpred
combined4"""

'combined4 = combined3\ncombined4["RFpred"] = RFpred\ncombined4["GBpred"] = GBpred\ncombined4["Kpred"] = Kpred\ncombined4'