In [1]:
import pandas as pd
import numpy as np
import duckdb
import warnings
import os

import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

# email
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

import warnings
from datetime import datetime, timedelta

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

categories = ['PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK']
con = duckdb.connect(database=":memory:")

cwd = os.path.abspath(os.getcwd()).replace("\\", "/")
if cwd.startswith("C:/Users/Rodolfo/"):
    RUN_LOCATION = "local"
else:
    RUN_LOCATION = "cloud"
time_offset = {"local": 3, "cloud": -5}
now = str((datetime.now() + timedelta(hours=time_offset[RUN_LOCATION]) + timedelta(hours=-3)).date())
print(f"Today's date:", now)

Today's date: 2026-01-06


In [2]:
%run ./common_utils.ipynb

# Initial Functions

In [3]:
def email(model, error):
    
    # Email details
    sender_email = "rodolfoe7157@gmail.com"
    receiver_email = "rodolfoe7157@gmail.com"
    password = "cqgu bfey cnyx sfue"  # See note below

    subject = "NBA create_Predictions error"
    body = f"Model: {model}_model\nERROR: {error}"

    # Create message
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = receiver_email
    msg['Subject'] = subject
    msg.attach(MIMEText(body, 'plain'))

    # Connect to Gmail SMTP server and send
    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
        server.login(sender_email, password)
        server.send_message(msg)

    print("Email sent successfully!")

In [4]:
def create_base_df():
    df = pd.DataFrame()
    df2 = pd.DataFrame()
    df3 = pd.DataFrame()
    df4 = pd.DataFrame()
    for i in [2022, 2023, 2024, 2025]:
        df_actuals = pd.read_csv(f"../tables/{i}/parlay_stats.csv")
        df_actuals['Season'] = i
        df = pd.concat([df, df_actuals])

        df_schd = pd.read_csv(f"../tables/{i}/nba_schedule.csv")
        df_schd['Season'] = i
        df2 = pd.concat([df2, df_schd])

        df_gms = pd.read_csv(f"../tables/{i}/season_gamelogs.csv")
        df_gms['Season'] = i
        df3 = pd.concat([df3, df_gms])

        df_inj = pd.read_csv(f"../tables/{i}/injuries.csv")
        df_inj['Season'] = i
        df4 = pd.concat([df4, df_inj])

    df['Date'] = pd.to_datetime(df.Date)
    df2['Date'] = pd.to_datetime(df2.Date)
    df3['Date'] = pd.to_datetime(df3.Date)
    df3 = df3[~df3[['Date', 'Team', 'Player']].duplicated(keep='last')]
    df4['Date'] = pd.to_datetime(df4.Date)

    df3_temp = df3.rename(columns={"3PM": "TPM", "3PA": "TPA", "3P%": "TP%", "TRB": "REB"}).drop(['Pos', 'Opp'], axis=1)
    df3_temp['PR'] = df3_temp.PTS + df3_temp.REB 
    df3_temp['PA'] = df3_temp.PTS + df3_temp.AST
    df3_temp['RA'] = df3_temp.REB + df3_temp.AST
    df3_temp['PRA'] = df3_temp.PTS + df3_temp.REB + df3_temp.AST
    df3_temp['STL_BLK'] = df3_temp.STL + df3_temp.BLK
    df = df.merge(df3_temp, on=['Season', 'Date', 'Team', 'Player'], how='left')

    df_mtch = df2[['Season', 'Date', 'AwayABV', 'HomeABV', 'AwayPTS', 'HomePTS', 'AwayB2B', 'HomeB2B', 'is_OT', 'cup_gm', 'pstszn_gm']]
    df_mtch['Team_type'] = 'Away'
    df_mtch = df_mtch.rename(columns={"AwayABV": "Team", "HomeABV": "Opp", "AwayB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'is_OT', 'cup_gm', 'pstszn_gm', 'Team_type']]
    df_mtch2 = df_mtch.copy().rename(columns={"Team": "Opp", "Opp": "Team", "HomeB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'is_OT', 'cup_gm', 'pstszn_gm']]
    df_mtch2['Team_type'] = 'Home'
    df_mtch = pd.concat([df_mtch, df_mtch2])
    df_mtch = df_mtch[['Season', 'Date', 'Team', 'Team_type', 'AwayPTS', 'HomePTS', 'is_OT', 'cup_gm', 'pstszn_gm']]
    df_mtch = df_mtch.sort_values(["Team", "Date"])
    df_mtch['team_game_num'] = df_mtch.groupby(["Team", "Season"]).cumcount() + 1
    df_mtch['Spread'] = np.where(df_mtch.Team_type == 'Home', df_mtch.HomePTS - df_mtch.AwayPTS, df_mtch.AwayPTS - df_mtch.HomePTS)
    df_mtch['Total'] = df_mtch.AwayPTS + df_mtch.HomePTS
    df_mtch['is_Win'] = np.where(df_mtch.Spread > 0, 1, 0)
    df_mtch['Szn_Wins'] = df_mtch.groupby(['Season', 'Team'])['is_Win'].cumsum()
    df = df.drop(['Season', 'Team_type'], axis=1).merge(df_mtch, on=['Date', 'Team'])

    df = df.merge(df4[['Date', 'Team', 'Player', 'Status']], on=['Date', 'Team', 'Player'], how='left')
    df['Status'] = np.where((df.Active == 1) & (df.Status.isnull()), 'Available', df.Status)
    df['Status'] = np.where((df.Active == 0), 'Out', df.Status)
    df['Status'] = np.where((df.Status == 'Out') & (df.Active != 0), 'Available', df.Status)
    
    return df

# Minutes Projection Model

In [5]:
def setup_df_mins(con, df):
    
    df = df[['Season', 'Date', 'Team', 'Team_type', 'Opp', 'Player', 'Pos', 'B2B', 'MP',
             'Spread', 'Total', 'team_game_num', 'Szn_Wins', 'pstszn_gm', 'is_OT']]
    
    for col in ['MP']:
        for N in [1, 3, 5, 10]:
            df[f'{col}_L{N}_avg'] = (
                df.groupby(['Player', 'Season'])[col]
                  .rolling(window=N, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0, 1], drop=True)
            )

    games_last_7_days = df.sort_values(['Player', 'Season', 'Date']).groupby(['Player', 'Season']).rolling('7D', on='Date', closed='left')['MP'].count().reset_index().rename(columns={"MP": "gms_L7_days"})
    games_last_7_days = games_last_7_days.drop_duplicates(
        subset=['Player', 'Season', 'Date']
    )
    df = df.merge(games_last_7_days, on=['Player', 'Season', 'Date'])
    df['gms_L7_days'] = df.gms_L7_days.fillna(0).astype(int)
    
    df['prev_team_mins_pct'] = (df.groupby(['Player', 'Season'])['MP'].shift(1)) / 240
           
    df['reserve_td'] = (df.MP < 8).astype(int)
    df['bench_td']   = ((df.MP >= 8) & (df.MP <= 25)).astype(int)
    df['starter_td'] = (df.MP > 25).astype(int)
    role_counts = df.groupby(['Season', 'Player'])[['reserve_td', 'bench_td', 'starter_td']].sum()
    role_counts['most_common_role'] = role_counts[['reserve_td', 'bench_td', 'starter_td']].idxmax(axis=1)
    role_counts['reserve'] = (role_counts['most_common_role'] == 'reserve_td').astype(int)
    role_counts['bench']   = (role_counts['most_common_role'] == 'bench_td').astype(int)
    role_counts['starter'] = (role_counts['most_common_role'] == 'starter_td').astype(int)
    df = df.merge(role_counts[['reserve', 'bench', 'starter']], on=['Season', 'Player'], how='left')
    
    df['role'] = 0
    df['role'] = np.where(df.starter == 1, 1, df.role)
    df['role'] = np.where(df.bench == 1, 2, df.role)
    df['role'] = np.where(df.reserve == 1, 3, df.role)
    
    for N in [1, 3, 5]:
        for role in ['reserve_td', 'bench_td', 'starter_td']:
            df[f'{role}_last{N}'] = (
                df.sort_values(['Player', 'Date']).groupby('Player')[role]
                  .rolling(N, min_periods=1)
                  .sum()
                  .shift(1)
                  .reset_index(0, drop=True)
            )
        rec_role_cols = [f'{role}_last{N}' for role in ['reserve_td', 'bench_td', 'starter_td']]
        df[f'recent_most_common_role_L{N}'] = df[rec_role_cols].idxmax(axis=1)
        df[f'recent_role_L{N}'] = 0
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'starter_td_last{N}', 1, df[f'recent_role_L{N}'])
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'bench_td_last{N}', 2, df[f'recent_role_L{N}'])
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'reserve_td_last{N}', 3, df[f'recent_role_L{N}'])      
        df = df.drop(f'recent_most_common_role_L{N}', axis=1)
        for role in ['reserve_td', 'bench_td', 'starter_td']:
            df = df.drop(f'{role}_last{N}', axis=1)
      
    df['missed_games'] = (
        df.groupby(['Player', 'Team', 'Season'])['team_game_num']      
          .diff()
          .sub(1)
          .fillna(0)
          .astype(int)
    )
    
    df['game_spread_type'] = 0
    df['game_spread_type'] = np.where(abs(df.Spread < 6), 1, df.game_spread_type) 
    df['game_spread_type'] = np.where((abs(df.Spread >= 6) & abs(df.Spread <= 14)), 2, df.game_spread_type) 
    df['game_spread_type'] = np.where(abs(df.Spread > 14), 3, df.game_spread_type) 
    
    df['missed_games_after'] = (
        df.groupby(['Player', 'Team', 'Season'])['team_game_num']
          .diff(-1)
          .abs()
          .sub(1)
          .fillna(0)
          .astype(int)
    )
    df['MP_diff_next_avg5'] = df.groupby('Player')['MP'].shift(-1) - df['MP_L5_avg']
    df['Injured'] = (
                        ((df.missed_games_after > 0) & (df.MP_diff_next_avg5 < -10) & (df.role == 1)) | 
                        ((df.missed_games_after > 0) & (df.MP_diff_next_avg5 < -5) & (df.role == 2))
                    ).astype(int)
    
    df['return_game'] = (df.groupby('Player')['Injured'].shift(1) == 1).astype(int)
    df['games_since_return'] = (df.groupby('Player')['return_game'].cumsum())
    df['games_since_return'] = (df.groupby(['Player', 'games_since_return']).cumcount())
    df['ramp_phase'] = 0
    df.loc[df.return_game == 1, 'ramp_phase'] = 1
    df.loc[df.games_since_return.isin([1, 2]), 'ramp_phase'] = 2
    df.loc[df.games_since_return >= 3, 'ramp_phase'] = 3
    df['starter_return'] = ((df.return_game == 1) & (df.role == 1)).astype(int)
    df['bench_return']   = ((df.return_game == 1) & (df.role == 2)).astype(int)
    
    df['MP_change_pct_L5'] = (df['MP_L1_avg'] - df['MP_L10_avg']) / df['MP_L10_avg']
    df['Early_stop'] = (
        (df['MP'] < 5) |
        ((df.role == 1) & (df.MP_change_pct_L5 <= -0.35)) |   # starters
        ((df.role == 2) & (df.MP_change_pct_L5 <= -0.45)) |   # bench
        ((df.role == 3) & (df.MP_change_pct_L5 <= -0.55))     # reserve
    ).astype(int)
    df['Early_stop'] = df.groupby('Player')['Early_stop'].shift(-1).fillna(0).astype(int)
    df['MP_increase'] = (
        ((df.role == 1) & (df.MP_change_pct_L5 >= 0.15)) |
        ((df.role == 2) & (df.MP_change_pct_L5 >= 0.10)) |
        ((df.role == 3) & (df.MP_change_pct_L5 >= 0.05))
    ).astype(int)
    df['MP_increase'] = df.groupby('Player')['MP_increase'].shift(-1).fillna(0).astype(int)
    
    for N in [1, 3, 5]:
        df[f'Early_stop_L{N}'] = df.sort_values(['Player', 'Date'])['Early_stop'].shift(1).rolling(N).sum()
        df[f'Injured_L{N}'] = df.sort_values(['Player', 'Date'])['Injured'].shift(1).rolling(N).sum()
    
    # Location based features
    df["DaysLstGm"] = (df.groupby("Player")["Date"].diff().dt.days).fillna(0).astype(int)
    df['Location'] = df.apply(lambda r: r['Team'] if r['Team_type'] == 'Home' else r['Opp'], axis=1)
    df['PrevLocation'] = df.groupby('Player')['Location'].shift(1)
    df['same_arena'] = (df['PrevLocation'] == df['Location']).astype(int)

    df = df.drop(['Season', 'Team_type', 'reserve_td', 'reserve', 'bench_td', 'bench', 'starter_td', 'starter', 
                  'PrevLocation', 'Location', 'Injured', 'missed_games_after', 'MP_diff_next_avg5', 'return_game'], axis=1)    
    
    return df

# Main Model

In [6]:
def setup_df_main(df, tgt_stat):
    
    # Stat dependent features 
    if tgt_stat == 'PTS':
        tgt_stat_cols = ['TPM', 'FG', 'FT', 'TPA', 'FGA', 'FTA']
        df = df[['Season', 'Date', 'Team', 'Opp', 'Player', 'Pos', 'MP', 'team_game_num', 
         'PTS', 'TPM', 'FG', 'FGA', 'TPA', 'FT', 'FTA', 
         f'Off_{tgt_stat}', f'Off_L3_{tgt_stat}', f'Off_L5_{tgt_stat}', f'Off_L10_{tgt_stat}', f'Off_{tgt_stat}_Rk',
         f'Def_{tgt_stat}', f'Def_L3_{tgt_stat}', f'Def_L5_{tgt_stat}', f'Def_L10_{tgt_stat}', f'Def_{tgt_stat}_Rk',
         'Spread', 'Total', 'is_OT']]
        
        # Efficiency metrics
        df['three_rate_raw'] =  np.where(df.FGA > 0, df['TPA'] / df['FGA'], 0)
        df['ft_rate_raw']    =  np.where(df.FGA > 0, df['FTA'] / df['FGA'], 0)
        df['eFG_raw'] = (df['FG'] + 0.5 * df['TPM']) / df['FGA']
        df['TS_raw'] = df['PTS'] / (2 * (df['FGA'] + 0.44 * df['FTA']))    
        df['usage_proxy_raw'] =  np.where(df.MP > 0, (df['FGA'] + 0.44 * df['FTA']) / df['MP'], 0)
        
        for w in [3, 5, 10]:
            for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
                col = f"{metric}_L{w}"
                df[col] = (
                    df.groupby(['Player','Season'])[f'{metric}_raw']
                      .rolling(w, min_periods=1)
                      .mean()
                      .shift(1)
                      .reset_index(level=[0,1], drop=True)
                )
        for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
            col = f'{metric}_weighted'
            df[col] = (
                0.6 * df[f'{metric}_L3'] +
                0.3 * df[f'{metric}_L5'] +
                0.1 * df[f'{metric}_L10']
            )
            df = df.drop(f'{metric}_raw', axis=1)
        
    elif tgt_stat == 'PRA':
        tgt_stat_cols = ['PTS', 'REB', 'AST', 'TPM', 'FG']
        df = df[['Season', 'Date', 'Team', 'Opp', 'Player', 'Pos', 'MP', 'team_game_num', 
         'PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK', 
         'FG', 'FGA', 'TPA', 'FT', 'FTA', 
         f'Off_{tgt_stat}', f'Off_L3_{tgt_stat}', f'Off_L5_{tgt_stat}', f'Off_L10_{tgt_stat}', f'Off_{tgt_stat}_Rk',
         f'Def_{tgt_stat}', f'Def_L3_{tgt_stat}', f'Def_L5_{tgt_stat}', f'Def_L10_{tgt_stat}', f'Def_{tgt_stat}_Rk',
         'Spread', 'Total', 'is_OT']]
        
        df['usage_proxy_raw'] =  np.where(df.MP > 0, (df['FGA'] + 0.44 * df['FTA']) / df['MP'], 0)
        for w in [3, 5, 10]:
            df[f"usage_proxy_L{w}"] = (
                df.groupby(['Player','Season'])[f'usage_proxy_raw']
                  .rolling(w, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0,1], drop=True)
            )
        df['usage_proxy_weighted'] = (
            0.6 * df[f'usage_proxy_L3'] +
            0.3 * df[f'usage_proxy_L5'] +
            0.1 * df[f'usage_proxy_L10']
        )
        df = df.drop('usage_proxy_raw', axis=1)
        
        
    else:
        tgt_stat_cols = []
        df = df[['Season', 'Date', 'Team', 'Opp', 'Player', 'Pos', 'MP', 'team_game_num', 
         'PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK',
         'FG', 'FGA', 'TPA', 'FT', 'FTA', 
          f'Off_{tgt_stat}', f'Off_L3_{tgt_stat}', f'Off_L5_{tgt_stat}', f'Off_L10_{tgt_stat}', f'Off_{tgt_stat}_Rk',
          f'Def_{tgt_stat}', f'Def_L3_{tgt_stat}', f'Def_L5_{tgt_stat}', f'Def_L10_{tgt_stat}', f'Def_{tgt_stat}_Rk',
         'Spread', 'Total', 'is_OT']]

    
    # Create rolling + lag features    
    for col in ['MP'] + tgt_stat_cols:
        df[f'{col}_lst_gm'] = (
            df
            .groupby(['Player', 'Season'])[col]
            .shift(1)
        )
        for N in [1, 3, 5, 10]:
            df[f'{col}_L{N}_avg'] = (
                df.groupby(['Player', 'Season'])[col]
                  .rolling(window=N, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0, 1], drop=True)
            )

    # Role identifiers features
    df['reserve_td'] = (df.MP < 8).astype(int)
    df['bench_td']   = ((df.MP >= 8) & (df.MP <= 25)).astype(int)
    df['starter_td'] = (df.MP > 25).astype(int)
    role_counts = df.groupby(['Season', 'Player'])[['reserve_td', 'bench_td', 'starter_td']].sum()
    role_counts['most_common_role'] = role_counts[['reserve_td', 'bench_td', 'starter_td']].idxmax(axis=1)
    role_counts['reserve'] = (role_counts['most_common_role'] == 'reserve_td').astype(int)
    role_counts['bench']   = (role_counts['most_common_role'] == 'bench_td').astype(int)
    role_counts['starter'] = (role_counts['most_common_role'] == 'starter_td').astype(int)
    df = df.merge(role_counts[['reserve', 'bench', 'starter']], on=['Season', 'Player'], how='left')
    df['role'] = 0
    df['role'] = np.where(df.starter == 1, 1, df.role)
    df['role'] = np.where(df.bench == 1, 2, df.role)
    df['role'] = np.where(df.reserve == 1, 3, df.role)
    
    for N in [1, 3, 5]:
        for role in ['reserve_td', 'bench_td', 'starter_td']:
            df[f'{role}_last{N}'] = (
                df.sort_values(['Player', 'Date']).groupby('Player')[role]
                  .rolling(N, min_periods=1)
                  .sum()
                  .shift(1)
                  .reset_index(0, drop=True)
            )
        rec_role_cols = [f'{role}_last{N}' for role in ['reserve_td', 'bench_td', 'starter_td']]
        df[f'recent_most_common_role_L{N}'] = df[rec_role_cols].idxmax(axis=1)
        df[f'recent_role_L{N}'] = 0
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'starter_td_last{N}', 1, df[f'recent_role_L{N}'])
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'bench_td_last{N}', 2, df[f'recent_role_L{N}'])
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'reserve_td_last{N}', 3, df[f'recent_role_L{N}'])      
        df = df.drop(f'recent_most_common_role_L{N}', axis=1)
        for role in ['reserve_td', 'bench_td', 'starter_td']:
            df = df.drop(f'{role}_last{N}', axis=1)
    
    df['game_spread_type'] = 0
    df['game_spread_type'] = np.where(abs(df.Spread < 6), 1, df.game_spread_type) 
    df['game_spread_type'] = np.where((abs(df.Spread >= 6) & abs(df.Spread <= 14)), 2, df.game_spread_type) 
    df['game_spread_type'] = np.where(abs(df.Spread > 14), 3, df.game_spread_type) 
    
    for col in categories + ['Season', 'FG', 'FGA', 'FT', 'FTA', 'TPM', 'TPA', 
                             'reserve_td', 'reserve', 'bench_td', 'bench', 'starter_td', 'starter'] + tgt_stat_cols:
        if col == tgt_stat:
            continue
        if col in df.columns:
            df = df.drop(col, axis=1)
        
    return df

### Today's predictions

In [7]:
def generate_predictions(tgt_stat):
    team_encoder = LabelEncoder()
    player_encoder = LabelEncoder()
    team_type_encoder = LabelEncoder()
    position_encoder = LabelEncoder()
    status_encoder = LabelEncoder()
    
    df_pred = create_base_df()
    
    # Encode string cols
    team_encoder.fit(pd.concat([df_pred["Team"], df_pred["Opp"]], axis=0))
    df_pred["Team"] = team_encoder.transform(df_pred["Team"])
    df_pred["Opp"] = team_encoder.transform(df_pred["Opp"])
    df_pred["Player"] = player_encoder.fit_transform(df_pred["Player"])
    df_pred["Pos"] = position_encoder.fit_transform(df_pred["Pos"])
    df_pred['Team_type'] = team_type_encoder.fit_transform(df_pred['Team_type'])
    df_pred["Status"] = status_encoder.fit_transform(df_pred["Status"])
    
    mins_model = xgb.XGBRegressor()
    mins_model.load_model("../ML_models/mins_model.json")
    stat_model = xgb.XGBRegressor()
    stat_model.load_model(f"../ML_models/{tgt_stat}_model.json")
    
    df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
    df_lines['Date'] = pd.to_datetime(df_lines.Date)
    df_lines = df_lines[~(df_lines.Team.isnull())]

    # Predict Mins
    df_lines["Team"] = team_encoder.transform(df_lines["Team"])
    df_pred = df_pred.merge(df_lines[['Date', 'Team', 'Spread', 'Total']], on=['Date', 'Team'], how='left')
    df_pred = df_pred[~df_pred[['Date', 'Team', 'Player']].duplicated(keep='last')]
    df_pred['Spread_x'] = np.where(df_pred.Spread_x.isnull(), df_pred.Spread_y, df_pred.Spread_x)
    df_pred['Total_x'] = np.where(df_pred.Total_x.isnull(), df_pred.Total_y, df_pred.Total_x)
    df_pred = df_pred.rename(columns={"Spread_x": "Spread", "Total_x": "Total"}).drop(['Spread_y', 'Total_y'], axis=1)
    df_pred_mins = setup_df_mins(con, df_pred)
    df_pred_mins = df_pred_mins.drop(['Date', 'MP'], axis=1)
    df_pred['MP'] = mins_model.predict(df_pred_mins)

    # Predict Stat
    df_pred = setup_df_main(df_pred, tgt_stat)
    feature_cols = [col for col in df_pred.columns if col not in ['Date', tgt_stat]]
    df_pred = df_pred[df_pred.Date == now][feature_cols]
    df_pred[f"{tgt_stat}_proj"] = stat_model.predict(df_pred)

    df_pred['Team'] = team_encoder.inverse_transform(df_pred["Team"])
    df_lines['Team'] = team_encoder.inverse_transform(df_lines["Team"])
    df_pred['Opp'] = team_encoder.inverse_transform(df_pred["Opp"])
    df_pred['Player'] = player_encoder.inverse_transform(df_pred["Player"])
    df_pred['Pos'] = position_encoder.inverse_transform(df_pred["Pos"])

    df_lines = df_lines[df_lines.Date == now][['Team', 'Player', f'{tgt_stat}_line']]
    df_pred = df_pred.merge(df_lines, on=['Team', 'Player'])

    tds_picks = df_pred[~(df_pred[f'{tgt_stat}_line'].isnull())]\
                [['Team', 'Player', 'Pos', 'Opp', 'MP', 'MP_L5_avg', f'{tgt_stat}_line', f'{tgt_stat}_proj']]
    tds_picks['Diff'] = abs((df_pred[f'{tgt_stat}_line'] - df_pred[f'{tgt_stat}_proj']))
    tds_picks['Diff2'] = abs((df_pred['MP'] - df_pred['MP_L5_avg']))
    tds_picks = tds_picks.sort_values('Diff', ascending=False).drop(['Diff', 'Diff2'], axis=1)
    if tds_picks.shape[0] >= 50:
        print(tds_picks.shape[0], 'rows')
        for tm in tds_picks.Team.unique():
            display(tds_picks[tds_picks.Team == tm])
    else:
        display(tds_picks)
    tds_picks.insert(0, 'Date', pd.to_datetime(now))
    partition_save_df(tds_picks, f"../tables/2025/gmday_preds_{tgt_stat}.csv")

In [8]:
try: 
    generate_predictions('PTS')
except Exception as e:
    email('PTS', e)
    raise Exception(e)

82 rows


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
3,CLE,Darius Garland,PG,IND,30.502794,30.972542,24.5,17.034899
53,CLE,Evan Mobley,PF,IND,30.405874,27.061473,20.5,13.772969
13,CLE,Craig Porter Jr.,PG,IND,14.378641,21.759105,6.5,3.621899
44,CLE,De'Andre Hunter,SF,IND,23.997719,25.047504,14.5,12.35858
66,CLE,Jarrett Allen,C,IND,25.163927,25.544936,14.5,13.718783
25,CLE,Jaylon Tyson,SG,IND,24.867628,25.964171,13.5,12.806716
21,CLE,Sam Merrill,SG,IND,22.391996,18.533059,12.5,12.022406
61,CLE,Dean Wade,PF,IND,16.370275,23.611877,5.5,5.373235


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
26,MEM,Cam Spencer,SG,SAS,24.446419,26.823523,15.5,8.214993
31,MEM,Vince Williams Jr.,SG,SAS,15.03282,15.115341,7.5,5.2692
75,MEM,Jock Landale,C,SAS,19.070923,14.869866,11.5,10.30377
57,MEM,Santi Aldama,PF,SAS,32.999393,31.991502,15.5,14.64255
68,MEM,Jaren Jackson Jr.,C,SAS,33.338287,32.865419,20.5,21.319283
30,MEM,Kentavious Caldwell-Pope,SG,SAS,18.98624,18.580672,9.5,9.100471
24,MEM,Jaylen Wells,SG,SAS,29.14994,29.768909,14.5,14.38828


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
56,MIN,Jaden McDaniels,PF,MIA,28.060642,29.394437,15.5,9.151445
23,MIN,Donte DiVincenzo,SG,MIA,29.275482,29.644635,14.5,8.955129
72,MIN,Rudy Gobert,C,MIA,32.553703,29.445419,12.5,13.569468
70,MIN,Naz Reid,C,MIA,25.67791,26.882315,15.5,16.364918
15,MIN,Anthony Edwards,SG,MIA,36.746254,33.944897,28.5,27.870865
51,MIN,Julius Randle,PF,MIA,33.388317,31.946173,22.5,22.015427


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
48,DAL,Anthony Davis,PF,SAC,33.596607,31.264408,24.5,19.030134
20,DAL,Max Christie,SG,SAC,28.018776,28.80005,12.5,16.107918
41,DAL,Klay Thompson,SF,SAC,18.482218,23.500222,11.5,8.700981
73,DAL,Daniel Gafford,C,SAC,15.004892,19.946695,8.5,6.498448
10,DAL,Cooper Flagg,PG,SAC,36.19669,36.203255,21.5,19.553329
37,DAL,Naji Marshall,SF,SAC,29.751879,30.130963,13.5,12.583587


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
18,MIA,Norman Powell,SG,MIN,30.847979,30.586245,22.5,27.960508
67,MIA,Kel'el Ware,C,MIN,27.49995,25.274283,10.5,13.175786
45,MIA,Andrew Wiggins,SF,MIN,29.836916,29.74595,15.5,14.319984
63,MIA,Bam Adebayo,C,MIN,32.423607,29.688206,16.5,15.514073
9,MIA,Davion Mitchell,PG,MIN,29.63023,29.818827,8.5,9.440441
33,MIA,Pelle Larsson,SG,MIN,18.920473,21.87036,8.5,7.86705
58,MIA,Nikola Jovic,PF,MIN,17.193504,22.899239,9.5,9.169388
19,MIA,Tyler Herro,SG,MIN,20.78289,17.575338,16.5,16.209776


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
1,LAL,Luka Doncic,PG,NOP,36.947445,33.248461,34.5,39.495937
64,LAL,Deandre Ayton,C,NOP,29.0888,29.553574,14.5,11.803295
40,LAL,LeBron James,SF,NOP,34.881908,31.646421,23.5,25.577763
62,LAL,Jarred Vanderbilt,PF,NOP,18.842424,19.490941,5.5,6.825625
27,LAL,Marcus Smart,SG,NOP,28.629282,27.957475,10.5,9.305077
77,LAL,Jaxson Hayes,C,NOP,15.205215,14.980919,5.5,6.633217
59,LAL,Jake LaRavia,PF,NOP,29.02663,29.58955,13.5,12.457846


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
39,IND,Aaron Nesmith,SF,CLE,24.976173,28.427271,16.5,11.930972
12,IND,T.J. McConnell,PG,CLE,15.22448,15.982102,10.5,6.541148
80,IND,Jay Huff,C,CLE,16.598114,16.931924,7.5,5.214184
34,IND,Ben Sheppard,SG,CLE,19.727808,19.684793,6.5,8.017266
60,IND,Jarace Walker,PF,CLE,14.79601,19.315166,7.5,6.340872
54,IND,Pascal Siakam,PF,CLE,34.92363,32.259611,24.5,25.326859
5,IND,Andrew Nembhard,PG,CLE,33.037189,31.728244,18.5,17.71347
35,IND,Johnny Furphy,SG,CLE,13.716458,16.499603,4.5,3.746264


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
17,SAC,Zach LaVine,SG,DAL,32.067833,28.105437,20.5,16.20639
11,SAC,Dennis Schroder,PG,DAL,24.437246,22.913,11.5,14.152641
8,SAC,Russell Westbrook,PG,DAL,30.596951,26.709042,14.5,15.999075
71,SAC,Maxime Raynaud,C,DAL,27.458019,29.580904,11.5,10.317214
52,SAC,DeMar DeRozan,PF,DAL,33.666763,30.696329,18.5,18.891594
76,SAC,Precious Achiuwa,C,DAL,16.167427,18.503255,6.5,6.659703


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
28,SAS,Dylan Harper,SG,MEM,18.915489,22.634641,11.5,7.729871
55,SAS,Harrison Barnes,PF,MEM,24.969099,26.791117,10.5,7.697577
78,SAS,Luke Kornet,C,MEM,21.606379,24.30675,7.5,10.044014
0,SAS,De'Aaron Fox,PG,MEM,32.838615,32.896072,21.5,23.47373
38,SAS,Keldon Johnson,SF,MEM,20.711487,24.217763,12.5,11.387563
2,SAS,Stephon Castle,PG,MEM,31.565693,31.025393,17.5,16.410685
43,SAS,Julian Champagnie,SF,MEM,25.673994,26.83143,11.5,12.135291
65,SAS,Victor Wembanyama,C,MEM,22.6427,24.371285,20.5,21.036633


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
46,WAS,Khris Middleton,SF,ORL,19.35446,21.296792,9.5,6.4282
22,WAS,CJ McCollum,SG,ORL,30.772987,32.288115,18.5,16.736149
29,WAS,Tre Johnson,SG,ORL,24.653568,28.551482,14.5,12.832604
47,WAS,Justin Champagnie,SF,ORL,20.855782,26.671095,8.5,9.284561
32,WAS,Bilal Coulibaly,SG,ORL,26.597803,26.020688,10.5,11.265107


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
50,ORL,Paolo Banchero,PF,WAS,36.49786,34.886351,27.5,29.286156
14,ORL,Tyus Jones,PG,WAS,18.866241,23.034855,4.5,2.739788
16,ORL,Desmond Bane,SG,WAS,36.183983,36.205182,23.5,21.977737
4,ORL,Anthony Black,PG,WAS,34.459328,34.119724,19.5,20.066366
69,ORL,Wendell Carter Jr.,C,WAS,31.750124,33.419242,13.5,13.932924
79,ORL,Goga Bitadze,C,WAS,14.429176,15.595267,5.5,5.661938


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PTS_line,PTS_proj
6,NOP,Jordan Poole,PG,LAL,25.406183,22.917624,15.5,17.184423
36,NOP,Trey Murphy III,SF,LAL,36.065701,33.534183,20.5,21.667994
49,NOP,Zion Williamson,PF,LAL,26.232391,27.269738,24.5,23.365585
81,NOP,Yves Missi,C,LAL,12.936969,19.856323,4.5,3.63729
42,NOP,Herbert Jones,SF,LAL,18.841911,22.091673,8.5,9.268623
7,NOP,Jeremiah Fears,PG,LAL,24.391014,26.860384,12.5,12.957457
74,NOP,Derik Queen,C,LAL,27.688173,26.650655,12.5,12.38092


../tables/2025/gmday_preds_PTS.csv saved!


In [9]:
try: 
    generate_predictions('PRA')
except Exception as e:
    email('PRA', e)
    raise Exception(e)

63 rows


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
26,MEM,Cam Spencer,SG,SAS,24.446419,26.823523,26.5,13.948709
31,MEM,Vince Williams Jr.,SG,SAS,15.03282,15.115341,15.5,10.940233
75,MEM,Jock Landale,C,SAS,19.070923,14.869866,20.5,16.982487
30,MEM,Kentavious Caldwell-Pope,SG,SAS,18.98624,18.580672,15.5,12.706201
24,MEM,Jaylen Wells,SG,SAS,29.14994,29.768909,20.5,18.813669
57,MEM,Santi Aldama,PF,SAS,32.999393,31.991502,26.5,25.126722
68,MEM,Jaren Jackson Jr.,C,SAS,33.338287,32.865419,29.5,30.837299


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
48,DAL,Anthony Davis,PF,SAC,33.596607,31.264408,40.5,27.951023
10,DAL,Cooper Flagg,PG,SAC,36.19669,36.203255,33.5,30.125397
20,DAL,Max Christie,SG,SAC,28.018776,28.80005,18.5,21.739285
37,DAL,Naji Marshall,SF,SAC,29.751879,30.130963,22.5,19.734087
73,DAL,Daniel Gafford,C,SAC,15.004892,19.946695,15.5,12.923293


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
3,CLE,Darius Garland,PG,IND,30.502794,30.972542,35.5,24.684643
53,CLE,Evan Mobley,PF,IND,30.405874,27.061473,34.5,25.076855
13,CLE,Craig Porter Jr.,PG,IND,14.378641,21.759105,14.5,8.372718
61,CLE,Dean Wade,PF,IND,16.370275,23.611877,12.5,9.132861
25,CLE,Jaylon Tyson,SG,IND,24.867628,25.964171,21.5,18.268873
44,CLE,De'Andre Hunter,SF,IND,23.997719,25.047504,21.5,18.71356
66,CLE,Jarrett Allen,C,IND,25.163927,25.544936,26.5,24.467392
21,CLE,Sam Merrill,SG,IND,22.391996,18.533059,18.5,16.695692


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
12,IND,T.J. McConnell,PG,CLE,15.22448,15.982102,18.5,10.872622
39,IND,Aaron Nesmith,SF,CLE,24.976173,28.427271,23.5,16.539009
54,IND,Pascal Siakam,PF,CLE,34.92363,32.259611,36.5,34.112236
5,IND,Andrew Nembhard,PG,CLE,33.037189,31.728244,29.5,30.64818


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
56,MIN,Jaden McDaniels,PF,MIA,28.060642,29.394437,23.5,17.096104
23,MIN,Donte DiVincenzo,SG,MIA,29.275482,29.644635,24.5,19.489904
70,MIN,Naz Reid,C,MIA,25.67791,26.882315,25.5,22.305704
51,MIN,Julius Randle,PF,MIA,33.388317,31.946173,36.5,33.48904
15,MIN,Anthony Edwards,SG,MIA,36.746254,33.944897,38.5,36.21283
72,MIN,Rudy Gobert,C,MIA,32.553703,29.445419,25.5,26.201231


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
49,NOP,Zion Williamson,PF,LAL,26.232391,27.269738,34.5,28.107313
36,NOP,Trey Murphy III,SF,LAL,36.065701,33.534183,28.5,32.160606
42,NOP,Herbert Jones,SF,LAL,18.841911,22.091673,13.5,15.010481
7,NOP,Jeremiah Fears,PG,LAL,24.391014,26.860384,19.5,18.236658
6,NOP,Jordan Poole,PG,LAL,25.406183,22.917624,21.5,21.307858
74,NOP,Derik Queen,C,LAL,27.688173,26.650655,24.5,24.32036


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
19,MIA,Tyler Herro,SG,MIN,20.78289,17.575338,22.5,16.507553
45,MIA,Andrew Wiggins,SF,MIN,29.836916,29.74595,23.5,21.256983
18,MIA,Norman Powell,SG,MIN,30.847979,30.586245,28.5,29.642925
63,MIA,Bam Adebayo,C,MIN,32.423607,29.688206,28.5,27.705456
9,MIA,Davion Mitchell,PG,MIN,29.63023,29.818827,17.5,18.188997
58,MIA,Nikola Jovic,PF,MIN,17.193504,22.899239,16.5,16.010744


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
1,LAL,Luka Doncic,PG,NOP,36.947445,33.248461,51.5,57.366856
64,LAL,Deandre Ayton,C,NOP,29.0888,29.553574,24.5,19.403126
59,LAL,Jake LaRavia,PF,NOP,29.02663,29.58955,21.5,18.852348
40,LAL,LeBron James,SF,NOP,34.881908,31.646421,36.5,35.372723
27,LAL,Marcus Smart,SG,NOP,28.629282,27.957475,17.5,18.626236


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
28,SAS,Dylan Harper,SG,MEM,18.915489,22.634641,18.5,13.254518
2,SAS,Stephon Castle,PG,MEM,31.565693,31.025393,30.5,27.051109
65,SAS,Victor Wembanyama,C,MEM,22.6427,24.371285,34.5,33.354851
0,SAS,De'Aaron Fox,PG,MEM,32.838615,32.896072,31.5,31.128609


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
29,WAS,Tre Johnson,SG,ORL,24.653568,28.551482,20.5,16.304619
22,WAS,CJ McCollum,SG,ORL,30.772987,32.288115,26.5,23.636696
46,WAS,Khris Middleton,SF,ORL,19.35446,21.296792,15.5,13.468051
32,WAS,Bilal Coulibaly,SG,ORL,26.597803,26.020688,18.5,19.197081


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
17,SAC,Zach LaVine,SG,DAL,32.067833,28.105437,26.5,22.370659
71,SAC,Maxime Raynaud,C,DAL,27.458019,29.580904,21.5,22.608603
8,SAC,Russell Westbrook,PG,DAL,30.596951,26.709042,27.5,26.708439
52,SAC,DeMar DeRozan,PF,DAL,33.666763,30.696329,26.5,26.612135


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
4,ORL,Anthony Black,PG,WAS,34.459328,34.119724,29.5,30.741405
16,ORL,Desmond Bane,SG,WAS,36.183983,36.205182,33.5,32.408367
50,ORL,Paolo Banchero,PF,WAS,36.49786,34.886351,42.5,42.656094
69,ORL,Wendell Carter Jr.,C,WAS,31.750124,33.419242,24.5,24.364519


../tables/2025/gmday_preds_PRA.csv saved!
