In [1]:
import pandas as pd
import numpy as np
import duckdb
import warnings
import os

import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

# email
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

import warnings
from datetime import datetime, timedelta

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

categories = ['PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK']
con = duckdb.connect(database=":memory:")

cwd = os.path.abspath(os.getcwd()).replace("\\", "/")
if cwd.startswith("C:/Users/Rodolfo/"):
    RUN_LOCATION = "local"
else:
    RUN_LOCATION = "cloud"
time_offset = {"local": 3, "cloud": -5}
now = str((datetime.now() + timedelta(hours=time_offset[RUN_LOCATION]) + timedelta(hours=-3)).date())
print(f"Today's date:", now)

Today's date: 2026-01-11


In [2]:
%run ./common_utils.ipynb

# Initial Functions

In [3]:
def email(model, error):
    
    # Email details
    sender_email = "rodolfoe7157@gmail.com"
    receiver_email = "rodolfoe7157@gmail.com"
    password = "cqgu bfey cnyx sfue"  # See note below

    subject = "NBA create_Predictions error"
    body = f"Model: {model}_model\nERROR: {error}"

    # Create message
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = receiver_email
    msg['Subject'] = subject
    msg.attach(MIMEText(body, 'plain'))

    # Connect to Gmail SMTP server and send
    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
        server.login(sender_email, password)
        server.send_message(msg)

    print("Email sent successfully!")

In [4]:
def load_df(file_name):
    df = pd.DataFrame()
    for i in [2021, 2022, 2023, 2024, 2025]:
        df_temp = pd.read_csv(f"../tables/{i}/{file_name}.csv")
        df_temp['Season'] = i
        df = pd.concat([df, df_temp])
        
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df.Date)
    if file_name == "season_gamelogs":
        df = df[~df[['Date', 'Team', 'Player']].duplicated(keep='last')]
    
    return df

In [5]:
def create_base_df():
    
    # Load dfs
    df = load_df('parlay_stats')
    df2 = load_df('nba_schedule')
    df3 = load_df('season_gamelogs')
    df4 = load_df('injuries')
    df5 = load_df('plyr_pos_xref')
    df6 = load_df('daily_lineups')
    gmlog_cols = ['game_id', 'Player', 'MP', 'PF']
    df7 = load_df('h1_season_gamelogs')[gmlog_cols].rename(columns={"MP": "MP_h1", "PF": "PF_h1"})
    df8 = load_df('h2_season_gamelogs')[gmlog_cols].rename(columns={"MP": "MP_h2", "PF": "PF_h2"})
    df9 = load_df('q1_season_gamelogs')[gmlog_cols].rename(columns={"MP": "MP_q1", "PF": "PF_q1"})
    df10 = load_df('q2_season_gamelogs')[gmlog_cols].rename(columns={"MP": "MP_q2", "PF": "PF_q2"})
    df11 = load_df('q3_season_gamelogs')[gmlog_cols].rename(columns={"MP": "MP_q3", "PF": "PF_q3"})
    df12 = load_df('q4_season_gamelogs')[gmlog_cols].rename(columns={"MP": "MP_q4", "PF": "PF_q4"})

    df3 = df3.rename(columns={"3PM": "TPM", "3PA": "TPA", "3P%": "TP%", "TRB": "REB"}).drop(['Pos', 'Opp'], axis=1)
    df3['PR'] = df3.PTS + df3.REB 
    df3['PA'] = df3.PTS + df3.AST
    df3['RA'] = df3.REB + df3.AST
    df3['PRA'] = df3.PTS + df3.REB + df3.AST
    df3['STL_BLK'] = df3.STL + df3.BLK
    df = df.merge(df3, on=['Season', 'Date', 'Team', 'Player'], how='left')

    df_mtch = df2[['Season', 'Date', 'AwayABV', 'HomeABV', 'AwayPTS', 'HomePTS', 'AwayB2B', 'HomeB2B', 'is_OT', 'cup_gm', 'pstszn_gm']]
    df_mtch['Team_type'] = 'Away'
    df_mtch = df_mtch.rename(columns={"AwayABV": "Team", "HomeABV": "Opp", "AwayB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'is_OT', 'cup_gm', 'pstszn_gm', 'Team_type']]
    df_mtch2 = df_mtch.copy().rename(columns={"Team": "Opp", "Opp": "Team", "HomeB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'is_OT', 'cup_gm', 'pstszn_gm']]
    df_mtch2['Team_type'] = 'Home'
    df_mtch = pd.concat([df_mtch, df_mtch2])
    df_mtch = df_mtch[['Season', 'Date', 'Team', 'Team_type', 'AwayPTS', 'HomePTS', 'is_OT', 'cup_gm', 'pstszn_gm']]
    df_mtch = df_mtch.sort_values(["Team", "Date"])
    df_mtch['team_game_num'] = df_mtch.groupby(["Team", "Season"]).cumcount() + 1
    df_mtch['Spread'] = np.where(df_mtch.Team_type == 'Home', df_mtch.AwayPTS - df_mtch.HomePTS, df_mtch.HomePTS - df_mtch.AwayPTS)
    df_mtch['Total'] = df_mtch.AwayPTS + df_mtch.HomePTS
    df_mtch['is_Win'] = np.where(df_mtch.Spread > 0, 1, 0)
    df_mtch['Szn_Wins'] = df_mtch.groupby(['Season', 'Team'])['is_Win'].cumsum()
    df = df.drop(['Season', 'Team_type'], axis=1).merge(df_mtch, on=['Date', 'Team'])

    df = df.merge(df4[['Date', 'Team', 'Player', 'Status']], on=['Date', 'Team', 'Player'], how='left')
    df['Status'] = np.where((df.Active == 1) & (df.Status.isnull()), 'Available', df.Status)
    df['Status'] = np.where((df.Active == 0), 'Out', df.Status)
    df['Status'] = np.where((df.Status == 'Out') & (df.Active != 0), 'Available', df.Status)

    df6['role'] = 1
    df = df.merge(df6.drop('Pos', axis=1), on=['Season', 'Date', 'Team', 'Player'], how='left')
    df['role'] = df.role.fillna(2).astype(int)
    df['role'] = np.where(((df.MP < 8) & (df.role == 2)), 3, df.role)

    # Add gmlog splits
    df_gmlog_comb = df7.merge(df8, on=['game_id', 'Player'])
    for df_loop in (df9, df10, df11, df12):
        df_gmlog_comb = df_gmlog_comb.merge(df_loop, on=['game_id', 'Player'])
    df = df.merge(df_gmlog_comb, on=['game_id', 'Player'], how='left')
    
    global team_encoder, player_encoder, team_type_encoder, position_encoder, status_encoder
    team_encoder = LabelEncoder()
    player_encoder = LabelEncoder()
    team_type_encoder = LabelEncoder()
    position_encoder = LabelEncoder()
    status_encoder = LabelEncoder()

    # Encode string cols
    team_encoder.fit(pd.concat([df["Team"], df["Opp"]], axis=0))
    players_fits = pd.concat([df["Player"], df3["Player"]], axis=0)
    players_fits = pd.concat([players_fits, df4["Player"]], axis=0).drop_duplicates()
    player_encoder.fit(players_fits)
    df["Team"] = team_encoder.transform(df["Team"])
    df["Opp"] = team_encoder.transform(df["Opp"])
    df["Player_name"] = df.Player
    df["Player"] = player_encoder.transform(df["Player"])
    df["Pos"] = position_encoder.fit_transform(df["Pos"])
    df['Team_type'] = team_type_encoder.fit_transform(df['Team_type'])
    df["Status"] = status_encoder.fit_transform(df["Status"])

    return df

### Create missing_df

In [6]:
def create_df_missing(df, pred_col):
    
    df3 = load_df('season_gamelogs')
    df3 = df3.rename(columns={"3PM": "TPM", "3PA": "TPA", "3P%": "TP%", "TRB": "REB"}).drop(['Pos', 'Opp'], axis=1)
    df3['PR'] = df3.PTS + df3.REB 
    df3['PA'] = df3.PTS + df3.AST
    df3['RA'] = df3.REB + df3.AST
    df3['PRA'] = df3.PTS + df3.REB + df3.AST
    df3['STL_BLK'] = df3.STL + df3.BLK
    df4 = load_df('injuries')
    # Fill missing games from injuries.csv
    df_pred = create_base_df()
    team_games = df_pred[['Season', 'Team', 'Date']].drop_duplicates()
    players = df_pred[['Season','Player','Team']].drop_duplicates()
    fabricated = (players.sort_values('Season').groupby('Player', as_index=False).last())
    fabricated['Season'] = fabricated['Season'] + 1
    players = pd.concat([players, fabricated], ignore_index=True).drop_duplicates(['Season','Player','Team'])
    expanded = team_games.merge(players, on=['Season', 'Team'], how='left')

    df3["Team"] = team_encoder.transform(df3["Team"])
    df3["Player"] = player_encoder.transform(df3["Player"])
    df4["Team"] = team_encoder.transform(df4["Team"])
    df4["Player"] = player_encoder.transform(df4["Player"])
    df5 = load_df('plyr_pos_xref')
    df5['Team'] = team_encoder.transform(df5["Team"])
    df5['Player'] = player_encoder.transform(df5["Player"])

    expanded = expanded.merge(df3[['Season', 'Player', 'Date', 'MP']], on=['Season', 'Player', 'Date'], how='left').drop_duplicates(['Season', 'Date', 'Player', 'Team'])
    expanded = expanded[(expanded.MP.isnull()) & (expanded.Date != now)].drop('MP', axis=1)
    expanded = pd.concat([expanded, df4[df4.Status == 'Out'][['Season', 'Team', 'Date', 'Player']]])
    df4 = df4.merge(expanded, on=['Season', 'Date', 'Team', 'Player'], how='right')

    # Grab outs from players season gamelogs
    df4 = df4.merge(df3, on=['Season', 'Date', 'Team', 'Player'], how='outer')
    df4['Status'] = np.where(((df4.Active == 1) | (df4.MP > 0)), 'Available', df4.Status)
    df4['Status'] = np.where(((df4.Active == 0) | (df4.MP == 0) | (df4.MP.isnull())), 'Out', df4.Status)
    df4['Status'] = np.where((df4.Status == 'Out') & (df4.MP > 0), 'Available', df4.Status)
    df4['Status'] = np.where((df4.Status != 'Out') & (df4.MP == 0), 'Out', df4.Status)
    df4 = df4[df4.Status == 'Out'][['Season', 'Date', 'Team', 'Player']].drop_duplicates()
    
    df_missing = df[['Season', 'Date', 'Team', 'Player', 'role', pred_col]].copy()
    df_missing[f'{pred_col}_L10'] = (
        df_missing.groupby(['Player','Season'])[pred_col]
                  .transform(lambda x: x.rolling(10, min_periods=1).mean())
    )
    df_missing['role_L10_mode'] = (
        df_missing
            .groupby(['Player', 'Season'])['role']
            .transform(lambda x: x.rolling(10, min_periods=1)
                            .apply(lambda y: np.bincount(y.astype(np.int8), minlength=4).argmax(), raw=True))
    )
    df_missing = pd.merge_asof(df4, df_missing[["Season", "Player", "Date", "role", "role_L10_mode", f"{pred_col}_L10"]], 
                      on="Date", by=["Player", "Season"], direction="backward", allow_exact_matches=True).dropna()   
    df_missing = df_missing.merge(df5, on=['Season', 'Team', 'Player'])
    
    # Filter out old injuries
    df_missing = df_missing.sort_values(["Season", "Team", "Player", "Date"])
    df_missing["team_game_num"] = (df_missing.groupby(["Season", "Team"])["Date"].rank(method="dense").astype(int))
    df_missing["game_break"] = (df_missing.groupby(["Season", "Team", "Player"])["team_game_num"].diff().ne(1))
    df_missing["streak_id"] = (df_missing.groupby(["Season", "Team", "Player"])["game_break"].cumsum())
    df_missing["consecutive_games"] = (df_missing.groupby(["Season", "Team", "Player", "streak_id"]).cumcount().add(1))
    df_missing["eligible_today"] = (df_missing["consecutive_games"] < 10).astype(int)
    df_missing["role_for_count"] = np.where(df_missing["eligible_today"] == 1, df_missing["role_L10_mode"], np.nan)    

    df_missing["Player"] = player_encoder.inverse_transform(df_missing["Player"])
#     display(df_missing[df_missing.Team == 7].tail(50))

    out_minutes = (
    df_missing
      .groupby(["Season", "Date", "Team"])
      .agg(
#           team_mins_available=("MP_L10", lambda x: x.sum()),
          starters_out=("role_for_count", lambda x: (x == 1).sum())
      )
      .reset_index()
    )

    return out_minutes

# Minutes Projection Model

In [7]:
def setup_df_mins(con, df):

    df = df[['Season', 'Date', 'Team', 'Team_type', 'Opp', 'Player', 'Pos', 'role', 'B2B', 
             'MP', 'MP_h1', 'MP_h2', 'MP_q1', 'MP_q2', 'MP_q3', 'MP_q4', 
             'Spread', 'team_game_num', 'pstszn_gm', 'is_OT']]    
    df['dataset_gm'] = (df.groupby('Player')['MP'].cumcount().add(1).reset_index(drop=True))

    for col in ['MP']:
        for N in [3, 5, 10]:
            df[f'{col}_L{N}_avg'] = (
                df.groupby(['Player', 'Season'])[col]
                  .rolling(window=N, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0, 1], drop=True)
            )
            df[f'{col}_L{N}_avg'] = np.where(df['dataset_gm'] <= N, np.nan, df[f'{col}_L{N}_avg'])
            df[f'prev_team_mins_pct_L{N}'] = df[f'{col}_L{N}_avg'] / 240

    games_last_7_days = df.sort_values(['Player', 'Season', 'Date']).groupby(['Player', 'Season']).rolling('7D', on='Date', closed='left')['MP'].count().reset_index().rename(columns={"MP": "gms_L7_days"})
    games_last_7_days = games_last_7_days.drop_duplicates(
        subset=['Player', 'Season', 'Date']
    )
    df = df.merge(games_last_7_days, on=['Player', 'Season', 'Date'])
    df['gms_L7_days'] = df.gms_L7_days.fillna(0).astype(int)
        
    df['OT_adj_MP'] = np.where(df.is_OT != 0, df.MP - (5 * df.is_OT), df.MP)
    df['role'] = np.where(((df.OT_adj_MP >= 24) & (df.role != 1)), 1, df.role)
    df['role'] = np.where(((df.OT_adj_MP < 24) & (df.role == 1)), 2, df.role)
    df['role'] = np.where(((df.OT_adj_MP < 14) & (df.role == 2)), 3, df.role)
    for N in [1, 3, 5]:
        df[f"recent_role_L{N}"] = (
            df
            .groupby(["Player", "Season"])["role"]
            .rolling(5, min_periods=1)
            .apply(lambda arr: np.bincount(arr.astype(int), minlength=4).argmax(), raw=True)
            .reset_index(level=[0, 1], drop=True)
        )
        df[f"recent_role_L{N}"] = np.where(df['dataset_gm'] <= N, np.nan, df[f"recent_role_L{N}"])     
    
    df['game_spread_type'] = 0
    df['game_spread_type'] = np.where(abs(df.Spread) < 7, 1, df.game_spread_type) 
    df['game_spread_type'] = np.where((abs(df.Spread) >= 7) & (abs(df.Spread) <= 12), 2, df.game_spread_type) 
    df['game_spread_type'] = np.where(abs(df.Spread) > 12, 3, df.game_spread_type) 
    df['game_spread_type'] = np.where(df.is_OT > 0, 1, df.game_spread_type) 

    # Tell model games exist after players injuries/susp
    team_games = df[['Season', 'Team', 'Date', 'team_game_num']].drop_duplicates()
    players = df[['Season','Player','Team']].drop_duplicates()
    fabricated = (players.sort_values('Season').groupby('Player', as_index=False).last())
    fabricated['Season'] = fabricated['Season'] + 1
    players = pd.concat([players, fabricated], ignore_index=True).drop_duplicates(['Season','Player','Team'])
    expanded = team_games.merge(players, on=['Season', 'Team'], how='left')
    expanded = expanded.merge(df[['Season', 'Player', 'Date', 'MP']], on=['Season', 'Player', 'Date'], how='left').drop_duplicates(['Season', 'Date', 'Player', 'Team'])
    expanded['player_played'] = expanded['MP'].notna().astype(int)
    expanded['team_played_no_player'] = ((expanded['player_played'] == 0)).astype(int)
    expanded['tm_plays_after'] = (expanded.groupby(['Player'])['team_played_no_player'].shift(-1))
    expanded['missed_gms_aftr'] = 0
    expanded['missed_gms_aftr'] = np.where((expanded.player_played == 1) & (expanded.tm_plays_after == 1), 1, expanded.missed_gms_aftr)
    df = df.merge(expanded[['Date', 'Team', 'Player', 'missed_gms_aftr']], on=['Date', 'Team', 'Player'])
    
    df2 = create_df_missing(df, 'MP')
    df = df.merge(df2, on=["Season", "Date", "Team"], how='left')
    df['starters_out'] = df.starters_out.fillna(0)
    df['starters_out_L1'] = (
        df.groupby(['Player', 'Season'])['starters_out']
          .rolling(window=1, min_periods=1)
          .mean()
          .shift(1)
          .reset_index(level=[0, 1], drop=True)
    )
    df['starters_returning'] = np.where(df['starters_out_L1'] > df['starters_out'], df['starters_out_L1'] - df['starters_out'], 0)
    df['missed_games'] = (df.groupby(['Player', 'Team', 'Season'])['team_game_num'].diff().sub(1).fillna(0).astype(int))

    df['MP_Change'] = 0
    MP_Inc_conds = (
                    ((df.role != 3) & (df.starters_out > 2)) | 
                    ((df.role == 1) & (df.recent_role_L3 > 1)) | 
                    ((df.role == 1) & (df.recent_role_L5 > 1)) 
                   )
    df['MP_Change'] = np.where(MP_Inc_conds, 1, df['MP_Change'])
    MP_Dec_conds = (
                    ((df.role != 1) & (df.starters_returning > 2)) 
                   )
    df['MP_Change'] = np.where(MP_Dec_conds, -1, df['MP_Change'])
    
    df['MP_change_pct_L10'] = (df['MP'] - df['MP_L10_avg']) / df['MP_L10_avg']
    Injury_conds = (
        (
            ((df.role == 1) & (df['MP_change_pct_L10'] <= -0.25)) | 
            ((df.role == 2) & (df['MP_change_pct_L10'] <= -0.35)) | 
            ((df.role == 3) & (df['MP_change_pct_L10'] <= -0.45)) | 
            ((df.role == 1) & (df.MP_q4 == 0)
        ) & (df.missed_gms_aftr > 0) | (df.missed_games > 1))
    )
    df['Injured'] = (Injury_conds).astype(int)
    df['return_game'] = ((df.groupby('Player')['Injured'].shift(1) == 1) & (df.missed_games > 0)).astype(int)
    df['games_since_return'] = (df.groupby('Player')['return_game'].cumsum())
    df['games_since_return'] = (df.groupby(['Player', 'games_since_return']).cumcount())
    df['ramp_phase'] = 0
    df.loc[df.return_game == 1, 'ramp_phase'] = 1
    df.loc[(df.games_since_return.isin([1, 2, 3]) & (df.dataset_gm > 4)), 'ramp_phase'] = 2
    df.loc[df.games_since_return >= 4, 'ramp_phase'] = 0

    df = df.drop(['Season', 'Team_type', 'team_game_num', 'Spread', 'is_OT', 'starters_out_L1', 
                  'MP_h1', 'MP_h2', 'MP_q1', 'MP_q2', 'MP_q3', 'MP_q4', 'OT_adj_MP', 'MP_change_pct_L10',   
                  'missed_gms_aftr', 'Injured', 'return_game', 'games_since_return', 'dataset_gm'], axis=1)      
    return df

# Main Model

In [8]:
def setup_df_main(df, tgt_stat):
    
    df = df[['Season', 'Date', 'Team', 'Opp', 'Player', 'Pos', 'role', 'MP', 'team_game_num', 
             'PTS', 'FG', 'FGA', 'FG%', 'TPA', 'TPM', 'TP%', 'FT', 'FTA', 'FT%', 
             'MP_h1', 'MP_h2', 'MP_q1', 'MP_q2', 'MP_q3', 'MP_q4', 
             f'Off_{tgt_stat}', f'Off_L3_{tgt_stat}', f'Off_L5_{tgt_stat}', f'Off_L10_{tgt_stat}', f'Off_{tgt_stat}_Rk',
             f'Def_{tgt_stat}', f'Def_L3_{tgt_stat}', f'Def_L5_{tgt_stat}', f'Def_L10_{tgt_stat}', f'Def_{tgt_stat}_Rk',
             'Spread', 'Total', 'is_OT']]
    df['dataset_gm'] = (df.groupby('Player')['MP'].cumcount().add(1).reset_index(drop=True))
    
    # Create rolling + lag features    
    for col in ['MP', 'FG', 'FGA', 'FT', 'FTA', 'TPM', 'TPA']:
        for N in [3, 5, 10]:
            df[f'{col}_L{N}_avg'] = (
                df.groupby(['Player', 'Season'])[col]
                  .rolling(window=N, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0, 1], drop=True)
            )
            
    for N in [1, 3, 5]:
        df[f"recent_role_L{N}"] = (
            df
            .groupby(["Player", "Season"])["role"]
            .rolling(5, min_periods=1)
            .apply(lambda arr: np.bincount(arr.astype(int), minlength=4).argmax(), raw=True)
            .reset_index(level=[0, 1], drop=True)
        )

    df['OT_adj_MP'] = np.where(df.is_OT != 0, df.MP - (5 * df.is_OT), df.MP)
    df['role'] = np.where(((df.OT_adj_MP >= 24) & (df.role != 1)), 1, df.role)
    df['role'] = np.where(((df.OT_adj_MP < 24) & (df.role == 1)), 2, df.role)
    df['role'] = np.where(((df.OT_adj_MP < 14) & (df.role == 2)), 3, df.role)
    for N in [1, 3, 5]:
        df[f"recent_role_L{N}"] = (
            df
            .groupby(["Player", "Season"])["role"]
            .rolling(5, min_periods=1)
            .apply(lambda arr: np.bincount(arr.astype(int), minlength=4).argmax(), raw=True)
            .reset_index(level=[0, 1], drop=True)
        )
    
    df['game_spread_type'] = 0
    df['game_spread_type'] = np.where(abs(df.Spread) < 7, 1, df.game_spread_type) 
    df['game_spread_type'] = np.where((abs(df.Spread) >= 7) & (abs(df.Spread) <= 12), 2, df.game_spread_type) 
    df['game_spread_type'] = np.where(abs(df.Spread) > 12, 3, df.game_spread_type) 
    df['game_spread_type'] = np.where(df.is_OT > 0, 1, df.game_spread_type) 
    
    df['TeamPTS'] = (df.Total + (df.Spread * -1)) / 2
    df['TeamPTS_type'] = 0
    df['TeamPTS_type'] = np.where((df.TeamPTS > 104) & (df.TeamPTS <= 116), 1, df.TeamPTS_type)
    df['TeamPTS_type'] = np.where((df.TeamPTS > 116) & (df.TeamPTS <= 126), 2, df.TeamPTS_type)
    df['TeamPTS_type'] = np.where((df.TeamPTS > 126), 3, df.TeamPTS_type)
    
    # Tell model games exist after players injuries/susp
    team_games = df[['Season', 'Team', 'Date', 'team_game_num']].drop_duplicates()
    players = df[['Season','Player','Team']].drop_duplicates()
    fabricated = (players.sort_values('Season').groupby('Player', as_index=False).last())
    fabricated['Season'] = fabricated['Season'] + 1
    players = pd.concat([players, fabricated], ignore_index=True).drop_duplicates(['Season','Player','Team'])
    expanded = team_games.merge(players, on=['Season', 'Team'], how='left')
    expanded = expanded.merge(df[['Season', 'Player', 'Date', 'MP']], on=['Season', 'Player', 'Date'], how='left').drop_duplicates(['Season', 'Date', 'Player', 'Team'])
    expanded['player_played'] = expanded['MP'].notna().astype(int)
    expanded['team_played_no_player'] = ((expanded['player_played'] == 0)).astype(int)
    expanded['tm_plays_after'] = (expanded.groupby(['Player'])['team_played_no_player'].shift(-1))
    expanded['missed_gms_aftr'] = 0
    expanded['missed_gms_aftr'] = np.where((expanded.player_played == 1) & (expanded.tm_plays_after == 1), 1, expanded.missed_gms_aftr)
    df = df.merge(expanded[['Date', 'Team', 'Player', 'missed_gms_aftr']], on=['Date', 'Team', 'Player'])
    
    df2 = create_df_missing(df, 'MP')
    df = df.merge(df2, on=["Season", "Date", "Team"], how='left')
    df['starters_out'] = df.starters_out.fillna(0)
    df['starters_out_L1'] = (
        df.groupby(['Player', 'Season'])['starters_out']
          .rolling(window=1, min_periods=1)
          .mean()
          .shift(1)
          .reset_index(level=[0, 1], drop=True)
    )
    df['starters_returning'] = np.where(df['starters_out_L1'] > df['starters_out'], df['starters_out_L1'] - df['starters_out'], 0)
    df['missed_games'] = (df.groupby(['Player', 'Team', 'Season'])['team_game_num'].diff().sub(1).fillna(0).astype(int))

    df['MP_Change'] = 0
    MP_Inc_conds = (
                    ((df.role != 3) & (df.starters_out > 2)) | 
                    ((df.role == 1) & (df.recent_role_L3 > 1)) | 
                    ((df.role == 1) & (df.recent_role_L5 > 1)) 
                   )
    df['MP_Change'] = np.where(MP_Inc_conds, 1, df['MP_Change'])
    MP_Dec_conds = (
                    ((df.role != 1) & (df.starters_returning > 2)) 
                   )
    df['MP_Change'] = np.where(MP_Dec_conds, -1, df['MP_Change'])
    
    df['MP_change_pct_L10'] = (df['MP'] - df['MP_L10_avg']) / df['MP_L10_avg']
    Injury_conds = (
        (
            ((df.role == 1) & (df['MP_change_pct_L10'] <= -0.25)) | 
            ((df.role == 2) & (df['MP_change_pct_L10'] <= -0.35)) | 
            ((df.role == 3) & (df['MP_change_pct_L10'] <= -0.45)) | 
            ((df.role == 1) & (df.MP_q4 == 0)
        ) & (df.missed_gms_aftr > 0) | (df.missed_games > 1))
    )
    df['Injured'] = (Injury_conds).astype(int)
    df['return_game'] = ((df.groupby('Player')['Injured'].shift(1) == 1) & (df.missed_games > 0)).astype(int)
    df['games_since_return'] = (df.groupby('Player')['return_game'].cumsum())
    df['games_since_return'] = (df.groupby(['Player', 'games_since_return']).cumcount())
    df['ramp_phase'] = 0
    df.loc[df.return_game == 1, 'ramp_phase'] = 1
    df.loc[(df.games_since_return.isin([1, 2, 3]) & (df.dataset_gm > 4)), 'ramp_phase'] = 2
    df.loc[df.games_since_return >= 4, 'ramp_phase'] = 0
    
    df = df.drop(['Season', 'team_game_num', 'is_OT', 'Spread', 'Total', 'TeamPTS', 
                 'FG', 'FGA', 'FG%', 'TPA', 'TPM', 'TP%', 'FT', 'FTA', 'FT%', 
                 'MP_h1', 'MP_h2', 'MP_q1', 'MP_q2', 'MP_q3', 'MP_q4', 
                 'OT_adj_MP', 'MP_change_pct_L10', 'starters_out_L1', 
                 'missed_gms_aftr', 'Injured', 'return_game', 'games_since_return', 'dataset_gm'], axis=1)
        
    return df

### Today's predictions

In [12]:
def generate_predictions(tgt_stat):
    
    df_pred = create_base_df()
        
    mins_model = xgb.XGBRegressor()
    mins_model.load_model("../ML_models/mins_model.json")
    stat_model = xgb.XGBRegressor()
    stat_model.load_model(f"../ML_models/{tgt_stat}_model.json")
    
    df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
    df_lines['Date'] = pd.to_datetime(df_lines.Date)
    df_lines = df_lines[~(df_lines.Team.isnull())]

    # Predict Mins
    df_lines["Team"] = team_encoder.transform(df_lines["Team"])
    df_pred = df_pred.merge(df_lines[['Date', 'Team', 'Spread', 'Total']], on=['Date', 'Team'], how='left')
    df_pred = df_pred[~df_pred[['Date', 'Team', 'Player']].duplicated(keep='last')]
    df_pred['Spread_x'] = np.where(df_pred.Spread_x.isnull(), df_pred.Spread_y, df_pred.Spread_x)
    df_pred['Total_x'] = np.where(df_pred.Total_x.isnull(), df_pred.Total_y, df_pred.Total_x)
    df_pred = df_pred.rename(columns={"Spread_x": "Spread", "Total_x": "Total"}).drop(['Spread_y', 'Total_y'], axis=1)
    df_pred_mins = setup_df_mins(con, df_pred)
    
#     # debug mins preds
#     mins_chk = df_pred_mins[df_pred_mins.Date == now]
#     mins_chk['Team'] = team_encoder.inverse_transform(mins_chk["Team"])
#     mins_chk['Player'] = player_encoder.inverse_transform(mins_chk["Player"])
#     if mins_chk.shape[0] >= 50:
#         for tm in mins_chk.Team.unique():
#             display(mins_chk[mins_chk.Team == tm])
#     else:
#         display(mins_chk)
    
    df_pred_mins = df_pred_mins.drop(['Date', 'MP'], axis=1)
    df_pred['MP'] = mins_model.predict(df_pred_mins)

    # Predict Stat
    df_pred = setup_df_main(df_pred, tgt_stat)
    feature_cols = [col for col in df_pred.columns if col not in ['Date', tgt_stat]]
    df_pred = df_pred[df_pred.Date == now][feature_cols]
    df_pred[f"{tgt_stat}_proj"] = stat_model.predict(df_pred)

    # Setup results
    df_pred['Team'] = team_encoder.inverse_transform(df_pred["Team"])
    df_lines['Team'] = team_encoder.inverse_transform(df_lines["Team"])
    df_pred['Opp'] = team_encoder.inverse_transform(df_pred["Opp"])
    df_pred['Player'] = player_encoder.inverse_transform(df_pred["Player"])
    df_pred['Pos'] = position_encoder.inverse_transform(df_pred["Pos"])

    df_lines = df_lines[df_lines.Date == now][['Team', 'Player', f'{tgt_stat}_line']]
    df_pred = df_pred.merge(df_lines, on=['Team', 'Player'])
    df_pred = df_pred[~(df_pred[f'{tgt_stat}_line'].isnull())]
    df_pred['Diff'] = abs((df_pred[f'{tgt_stat}_line'] - df_pred[f'{tgt_stat}_proj']))
    df_pred['Diff2'] = abs((df_pred['MP'] - df_pred['MP_L5_avg']))
    df_pred = df_pred.sort_values('Diff', ascending=False).drop(['Diff', 'Diff2'], axis=1)

#     # debug stat preds
#     if df_pred.shape[0] >= 50:
#         print(df_pred.shape[0], 'rows')
#         for tm in df_pred.Team.unique():
#             display(df_pred[df_pred.Team == tm])
#     else:
#         display(df_pred)

    tds_picks = df_pred[['Team', 'Player', 'Pos', 'Opp', 'MP', 'MP_L5_avg', 'game_spread_type', f'{tgt_stat}_line', f'{tgt_stat}_proj']]
    if tds_picks.shape[0] >= 50:
        print(tds_picks.shape[0], 'rows')
        for tm in tds_picks.Team.unique():
            display(tds_picks[tds_picks.Team == tm])
    else:
        display(tds_picks)
    tds_picks.insert(0, 'Date', pd.to_datetime(now))
#     partition_save_df(tds_picks, f"../tables/2025/gmday_preds_{tgt_stat}.csv")

In [11]:
try: 
    generate_predictions('PTS')
except Exception as e:
    email('PTS', e)
    raise Exception(e)

131 rows


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
64,TOR,Brandon Ingram,SF,PHI,22.1992,30.191581,1,23.5,12.688999
21,TOR,Jamal Shead,PG,PHI,19.98724,23.700059,1,9.5,5.82763
52,TOR,Ja'Kobe Walter,SG,PHI,19.859539,19.567445,1,9.5,5.924194
53,TOR,Gradey Dick,SG,PHI,18.782957,15.08446,1,8.5,5.146115
91,TOR,Scottie Barnes,PF,PHI,36.68462,35.34024,1,20.5,17.31905
103,TOR,Collin Murray-Boyles,PF,PHI,32.034,27.104463,1,10.5,9.3845
115,TOR,Sandro Mamukelashvili,C,PHI,30.466541,23.982037,1,12.5,11.92347
10,TOR,Immanuel Quickley,PG,PHI,35.401215,32.830388,1,19.5,19.341419


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
6,DEN,Jamal Murray,PG,MIL,34.391682,32.647087,1,26.5,35.814831
49,DEN,Christian Braun,SG,MIL,30.0935,26.228261,1,11.5,6.793854
85,DEN,Aaron Gordon,PF,MIL,29.480959,21.293884,1,17.5,15.963993
38,DEN,Tim Hardaway Jr.,SG,MIL,21.456799,33.334396,1,13.5,12.173733
73,DEN,Peyton Watson,SF,MIL,37.118767,35.07777,1,18.5,19.586481


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
14,POR,Jrue Holiday,PG,NYK,30.040285,21.731239,1,9.5,18.327023
61,POR,Deni Avdija,SF,NYK,38.431068,36.601768,1,26.5,33.601574
121,POR,Donovan Clingan,C,NYK,33.961842,31.497495,1,10.5,13.590685
94,POR,Toumani Camara,PF,NYK,37.755119,34.627591,1,13.5,15.981298
23,POR,Shaedon Sharpe,SG,NYK,33.839455,32.44851,1,21.5,20.662472
129,POR,Robert Williams,C,NYK,17.715406,13.176799,1,4.5,4.723005


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
22,MIN,Anthony Edwards,SG,SAS,37.087063,34.955148,1,27.5,35.580345
111,MIN,Naz Reid,C,SAS,21.070822,25.191517,1,13.5,8.108838
116,MIN,Rudy Gobert,C,SAS,34.449745,32.344026,1,10.5,15.84306
32,MIN,Donte DiVincenzo,SG,SAS,33.842453,29.255724,1,13.5,9.406315
95,MIN,Jaden McDaniels,PF,SAS,32.903664,28.559544,1,13.5,16.250536
88,MIN,Julius Randle,PF,SAS,34.257137,32.51266,1,19.5,21.711851


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
26,MIA,Norman Powell,SG,OKC,29.221563,29.834697,3,19.5,27.387421
20,MIA,Davion Mitchell,PG,OKC,28.532516,28.380412,3,8.5,14.97807
117,MIA,Kel'el Ware,C,OKC,20.663593,27.783943,3,9.5,13.972959
69,MIA,Jaime Jaquez Jr.,SF,OKC,20.716204,26.563256,3,12.5,14.637495
109,MIA,Bam Adebayo,C,OKC,29.525358,31.512854,3,14.5,12.488502
70,MIA,Andrew Wiggins,SF,OKC,28.590954,29.563124,3,13.5,13.885085


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
77,SAS,Julian Champagnie,SF,MIN,33.932552,31.076299,1,11.5,19.301716
120,SAS,Luke Kornet,C,MIN,21.281755,27.773896,1,6.5,13.011765
106,SAS,Victor Wembanyama,C,MIN,29.859135,23.541755,1,20.5,25.876074
100,SAS,Harrison Barnes,PF,MIN,30.645403,26.131498,1,8.5,11.721201
74,SAS,Keldon Johnson,SF,MIN,21.401016,27.604317,1,11.5,8.382056
47,SAS,Dylan Harper,SG,MIN,19.979246,21.493315,1,9.5,7.129547
2,SAS,De'Aaron Fox,PG,MIN,36.541512,33.995211,1,18.5,17.959656
4,SAS,Stephon Castle,PG,MIN,33.385185,32.986485,1,16.5,16.069386


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
15,HOU,Reed Sheppard,SG,SAC,20.501705,24.452784,3,13.5,19.658159
105,HOU,Alperen Sengun,C,SAC,29.706995,24.756642,3,20.5,16.406336
5,HOU,Amen Thompson,SF,SAC,32.69289,36.141577,3,19.5,17.657049
81,HOU,Jabari Smith Jr.,PF,SAC,32.586296,35.881503,3,15.5,13.804819
123,HOU,Steven Adams,C,SAC,20.374338,25.250513,3,7.5,6.435905
60,HOU,Kevin Durant,SF,SAC,33.628376,36.624766,3,27.5,28.520247
104,HOU,Dorian Finney-Smith,PF,SAC,18.170443,16.675285,3,4.5,3.795691


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
56,BRK,Terance Mann,SG,MEM,28.268141,21.589084,2,8.5,14.438229
98,BRK,Danny Wolf,PF,MEM,28.003359,21.60125,2,10.5,8.331314
87,BRK,Noah Clowney,PF,MEM,31.56629,30.071037,2,15.5,13.731909
125,BRK,Day'Ron Sharpe,C,MEM,19.873707,24.705927,2,7.5,7.979961


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
63,GSW,Jimmy Butler,SF,ATL,22.085606,28.97444,1,20.5,15.3951
101,GSW,Quinten Post,PF,ATL,28.426371,15.445221,1,7.5,12.364469
126,GSW,Al Horford,C,ATL,18.562191,16.041546,1,5.5,9.025832
17,GSW,De'Anthony Melton,PG,ATL,20.436646,24.721752,1,10.5,7.907628
40,GSW,Brandin Podziemski,SG,ATL,20.797379,25.705088,1,9.5,7.193746
99,GSW,Draymond Green,PF,ATL,31.178934,22.940526,1,9.5,11.574181
3,GSW,Stephen Curry,PG,ATL,35.962883,31.36476,1,28.5,27.703243
44,GSW,Moses Moody,SG,ATL,29.268749,25.115842,1,9.5,9.300642


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
28,ATL,Nickeil Alexander-Walker,SG,GSW,22.065485,32.081922,1,18.5,13.636277
113,ATL,Kristaps Porzingis,C,GSW,17.891703,17.631705,1,12.5,10.742764
45,ATL,Dyson Daniels,SG,GSW,37.434597,32.71776,1,12.5,14.207202
65,ATL,Jalen Johnson,SF,GSW,38.148182,34.069304,1,22.5,20.978157
114,ATL,Onyeka Okongwu,C,GSW,35.635239,31.69371,1,14.5,15.276972


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
48,MEM,Cam Spencer,SG,BRK,30.890297,28.72378,2,13.5,9.10265
92,MEM,Santi Aldama,PF,BRK,20.389584,28.933611,2,14.5,10.778086
19,MEM,Vince Williams Jr.,SG,BRK,19.004324,18.597509,2,7.5,4.254595
118,MEM,Jock Landale,C,BRK,28.3491,22.978926,2,11.5,13.328555
46,MEM,Jaylen Wells,SG,BRK,30.903803,31.924485,2,11.5,9.988389
55,MEM,Kentavious Caldwell-Pope,SG,BRK,18.130449,19.477577,2,7.5,8.551744
108,MEM,Jaren Jackson Jr.,C,BRK,33.564014,33.71525,2,19.5,19.239082
41,MEM,Cedric Coward,SG,BRK,28.181547,22.063161,2,13.5,13.515941


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
71,PHI,Kelly Oubre Jr.,SF,TOR,20.043953,24.208294,1,11.5,7.40989
84,PHI,Paul George,PF,TOR,36.508858,33.482439,1,17.5,14.907802
35,PHI,Quentin Grimes,SG,TOR,22.009047,32.069554,1,12.5,10.523707
31,PHI,VJ Edgecombe,SG,TOR,39.384682,35.465653,1,16.5,18.163483
7,PHI,Tyrese Maxey,PG,TOR,39.74902,37.273232,1,29.5,27.854719
127,PHI,Andre Drummond,C,TOR,29.586933,9.27633,1,8.5,10.118086


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
82,MIL,Giannis Antetokounmpo,PF,DEN,31.45014,29.807447,1,28.5,24.555
96,MIL,Bobby Portis,PF,DEN,20.715845,22.479314,1,11.5,8.994323
97,MIL,Kyle Kuzma,PF,DEN,20.21488,22.772549,1,9.5,8.037911
112,MIL,Myles Turner,C,DEN,30.984467,26.815885,1,10.5,11.845847
9,MIL,Kevin Porter Jr.,PG,DEN,38.73679,36.939663,1,17.5,18.122568
13,MIL,Ryan Rollins,PG,DEN,34.306145,32.973775,1,14.5,14.435074


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
18,SAC,Russell Westbrook,SF,HOU,30.616783,30.224123,3,14.5,10.693207
36,SAC,Zach LaVine,SG,HOU,32.122326,27.900856,3,19.5,16.023083
58,SAC,Nique Clifford,SG,HOU,19.818607,21.656855,3,6.5,3.990608
50,SAC,Malik Monk,SG,HOU,18.136801,9.50055,3,10.5,7.995875
83,SAC,DeMar DeRozan,PF,HOU,31.962492,32.647873,3,18.5,16.803965
57,SAC,Keon Ellis,SG,HOU,19.453407,22.637222,3,6.5,5.297385
122,SAC,Maxime Raynaud,C,HOU,29.852377,30.991081,3,10.5,9.455486


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
16,ORL,Anthony Black,PG,NOP,34.644871,32.042197,2,19.5,15.914518
93,ORL,Paolo Banchero,PF,NOP,36.123909,36.142085,2,24.5,21.026386
24,ORL,Desmond Bane,SG,NOP,35.151028,32.807819,2,21.5,19.488739
54,ORL,Jase Richardson,SG,NOP,18.658167,18.944253,2,8.5,10.430037
124,ORL,Goga Bitadze,C,NOP,28.113998,15.335355,2,9.5,7.897514
80,ORL,Noah Penda,SF,NOP,30.751072,19.642443,2,9.5,9.877618


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
72,NYK,Josh Hart,SF,POR,33.954708,32.126924,1,10.5,13.53994
34,NYK,Miles McBride,SG,POR,21.397329,30.73511,1,10.5,12.148248
0,NYK,Jalen Brunson,PG,POR,37.206287,36.59678,1,28.5,27.385393
128,NYK,Mitchell Robinson,C,POR,20.509573,24.408742,1,4.5,3.685634
107,NYK,Karl-Anthony Towns,C,POR,31.492443,24.899896,1,19.5,18.839008
89,NYK,OG Anunoby,PF,POR,36.723598,35.876955,1,15.5,15.847692
42,NYK,Jordan Clarkson,SG,POR,19.449072,17.513796,1,7.5,7.16561
67,NYK,Mikal Bridges,SF,POR,35.829391,36.17344,1,14.5,14.217122


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
39,WAS,Bilal Coulibaly,SG,PHO,28.845047,29.382907,3,10.5,13.344759
27,WAS,CJ McCollum,SG,PHO,20.420061,28.253554,3,14.5,12.699813
68,WAS,Kyshawn George,SF,PHO,29.656078,27.93689,3,12.5,14.273004
75,WAS,Khris Middleton,SF,PHO,26.551008,20.195627,3,9.5,7.863298
79,WAS,Justin Champagnie,SF,PHO,19.955051,23.626708,3,8.5,10.06894
37,WAS,Tre Johnson,SG,PHO,27.622934,26.750179,3,13.5,13.79281


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
51,OKC,Cason Wallace,SG,MIA,20.34494,25.385196,3,6.5,3.802283
43,OKC,Aaron Wiggins,SG,MIA,27.557508,24.440887,3,9.5,10.448969
76,OKC,Luguentz Dort,SF,MIA,28.11953,26.067982,3,8.5,9.383653
86,OKC,Chet Holmgren,PF,MIA,29.838762,30.051599,3,17.5,16.780487
1,OKC,Shai Gilgeous-Alexander,PG,MIA,31.358166,32.849619,3,30.5,30.994329
29,OKC,Jalen Williams,SG,MIA,31.243525,30.079089,3,18.5,18.942802
33,OKC,Ajay Mitchell,SG,MIA,20.456688,29.229273,3,12.5,12.33849


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
25,PHO,Devin Booker,SG,WAS,31.210972,33.244641,3,27.5,24.82036
11,PHO,Collin Gillespie,PG,WAS,28.582413,31.107452,3,12.5,10.302176
30,PHO,Grayson Allen,SG,WAS,20.230078,22.164806,3,13.5,11.442109
110,PHO,Mark Williams,C,WAS,27.490305,22.864356,3,12.5,11.440335
78,PHO,Royce O'Neale,SF,WAS,29.230446,31.030149,3,9.5,10.541253
102,PHO,Oso Ighodaro,PF,WAS,20.156256,22.032922,3,5.5,6.184784
66,PHO,Dillon Brooks,SF,WAS,29.621733,32.302125,3,19.5,18.837486


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,game_spread_type,PTS_line,PTS_proj
8,NOP,Jordan Poole,PG,ORL,20.375111,23.742268,2,12.5,9.934707
59,NOP,Micah Peavy,SG,ORL,19.40373,19.958307,2,6.5,4.35697
62,NOP,Trey Murphy III,SF,ORL,36.689171,28.85042,2,20.5,22.319168
12,NOP,Jeremiah Fears,PG,ORL,29.383606,27.140632,2,13.5,14.883833
90,NOP,Zion Williamson,PF,ORL,31.388309,29.827905,2,23.5,22.206984
130,NOP,Yves Missi,C,ORL,18.410673,18.734984,2,4.5,5.498514
119,NOP,Derik Queen,C,ORL,30.569775,25.884009,2,13.5,13.207525
