# To do:

 - Both
     - Figure out how to signal injuries
 - Mins
     - Create foul trouble signal feature
     - Tweak threshold for Early_Stop and MP_Increase
     - All L_avg functions are giving data when it should be null for first N games of the season

In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import duckdb
import warnings
import os

import xgboost as xgb
from xgboost import XGBRegressor
from scipy.stats import randint, uniform

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import joblib
import warnings
from datetime import datetime, timedelta

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

categories = ['PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK']
con = duckdb.connect(database=":memory:")

cwd = os.path.abspath(os.getcwd()).replace("\\", "/")
if cwd.startswith("C:/Users/Rodolfo/"):
    RUN_LOCATION = "local"
else:
    RUN_LOCATION = "cloud"
time_offset = {"local": 3, "cloud": -5}
now = str((datetime.now() + timedelta(hours=time_offset[RUN_LOCATION]) + timedelta(hours=-3)).date())
print(f"Today's date:", now)

tgt_stat = "PRA"
print('Target Stat:', tgt_stat)

Today's date: 2026-01-07
Target Stat: PRA


In [16]:
%run ./common_utils.ipynb

# ML Functions

In [17]:
def feature_importance(model, all_features):
    # Get gain importance
    importance = model.get_score(importance_type='gain')
    
    # Fill in 0 for missing features
    df_importance = pd.DataFrame({
        'feature': all_features,
        'importance': [importance.get(f, 0) for f in all_features]
    }).sort_values(by='importance', ascending=False).reset_index(drop=True)
    
    df_importance['pct'] = df_importance.importance.cumsum() / df_importance.importance.sum()
    if df_importance.shape[0] >= 50:
        with pd.option_context('display.max_rows', None):
            display(df_importance)
    else:
        display(df_importance)
    
    xgb.plot_importance(model)
    plt.show()

In [18]:
def create_baseline_model(df, pred_col, DFS):
    
    train_df, val_df, test_df = DFS

    if pred_col == 'MP':
        print('Minutes Model')
        feature_cols = [
            'MP_lst_gm',
            'MP_last_5_avg',
            'MP_last_10_avg',
            'starter', 'bench', 'reserve'
        ]
    else:
        print(f'{pred_col} Stats Model')
        feature_cols = [
            'MP_lst_gm',
            'MP_last_5_avg',
            'MP_last_10_avg',
            f'{pred_col}_last_3_avg', f'{pred_col}_last_5_avg', f'{pred_col}_last_10_avg',
            f'Def_{pred_col}', f'Def_L5_{pred_col}'
        ]
    
    print('Train:', len(train_df), '/ Validation:', len(val_df), '/ Test:', len(test_df))
    
    X_train, y_train = train_df[feature_cols], train_df[pred_col]
    X_val,   y_val   = val_df[feature_cols],   val_df[pred_col]
    X_test,  y_test  = test_df[feature_cols],  test_df[pred_col]

    # Convert to DMatrix (XGBoost internal format)
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval   = xgb.DMatrix(X_val, label=y_val)
    dtest  = xgb.DMatrix(X_test, label=y_test)

    params = {
        "objective": "reg:squarederror",
        "max_depth": 5,
        "learning_rate": 0.05,
        "subsample": 0.8,
        "colsample_bytree": 0.8,
        "seed": 42
    }

    # Train using native XGBoost API with early stopping
    evals = [(dtrain, "train"), (dval, "val")]
    bst = xgb.train(
        params,
        dtrain,
        num_boost_round=500,
        evals=evals,
        early_stopping_rounds=50,
        verbose_eval=False
    )

    # Predict on test set
    preds = bst.predict(dtest)

    rmse = np.sqrt(mean_squared_error(y_test, preds))
    mae = mean_absolute_error(y_test, preds)
    r2 = r2_score(y_test, preds)

    print("RMSE:", rmse)
    print("MAE:", mae)
    print("R²:", r2)
    
    return bst

In [19]:
def hyperparam_tuning(DFS, pred_col, n_iter=20, early_stopping_rounds=50):
    """
    Hyperparameter tuning using native XGBoost API and DMatrix,
    with early stopping support (compatible with XGBoost 3.1.2)
    """

    train_df, val_df, test_df = DFS
    feature_cols = [col for col in train_df.columns if col not in ['Date', pred_col]]
    X_train, y_train = train_df[feature_cols], train_df[pred_col]
    X_val,   y_val   = val_df[feature_cols],   val_df[pred_col]
    X_test,  y_test  = test_df[feature_cols],  test_df[pred_col]

    # Convert datasets to DMatrix
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval   = xgb.DMatrix(X_val, label=y_val)
    dtest  = xgb.DMatrix(X_test, label=y_test)

    # Hyperparameter search space
    param_dist = {
        "n_estimators": randint(300, 1500),
        "learning_rate": uniform(0.01, 0.05),
        "max_depth": randint(3, 6),
        "min_child_weight": randint(1, 8),
        "subsample": uniform(0.7, 0.3),
        "colsample_bytree": uniform(0.7, 0.3),
        "gamma": uniform(0, 2),
        "reg_lambda": uniform(0, 5),
        "reg_alpha": uniform(0, 2)
    }

    # Sample n_iter random parameter combinations
    param_list = []
    for _ in range(n_iter):
        sample = {k: (v.rvs() if hasattr(v, "rvs") else v) for k, v in param_dist.items()}
        sample['n_estimators'] = int(sample['n_estimators'])
        sample['max_depth'] = int(sample['max_depth'])
        sample['min_child_weight'] = int(sample['min_child_weight'])
        param_list.append(sample)

    best_mae = float('inf')
    best_params = None
    best_bst = None

    # Manual hyperparameter search
    for i, params in enumerate(param_list):
        print(f"\nTrial {i+1}/{n_iter}: {params}")
        num_boost_round = params.pop('n_estimators')
        params.update({
            "objective": "reg:squarederror",
            "tree_method": "hist",
            "device": "cuda",
            "seed": 42
        })
        evals = [(dtrain, 'train'), (dval, 'val')]
        bst = xgb.train(
            params,
            dtrain,
            num_boost_round=num_boost_round,
            evals=evals,
            early_stopping_rounds=early_stopping_rounds,
            verbose_eval=False
        )
        # Predict on validation set to compute MAE
        val_preds = bst.predict(dval, iteration_range=(0, bst.best_iteration))
        mae = mean_absolute_error(y_val, val_preds)
        print(f"Validation MAE: {mae:.4f}")
        if mae < best_mae:
            best_mae = mae
            best_params = params.copy()
            best_bst = bst

    print("\nBest validation MAE:", best_mae)
    print("Best parameters:", best_params)

    # Predict on test set using best model
    preds = best_bst.predict(dtest, iteration_range=(0, best_bst.best_iteration))
    test_df[pred_col] = y_test
    test_df[f'{pred_col}_preds'] = preds
    test_df['Team'] = team_encoder.inverse_transform(test_df["Team"])
    test_df['Opp'] = team_encoder.inverse_transform(test_df["Opp"])
    test_df['Player'] = player_encoder.inverse_transform(test_df["Player"])
    test_df['Pos'] = position_encoder.inverse_transform(test_df["Pos"])
    analyze_df = test_df[['Date', 'Team', 'Player', 'Pos', 'Opp', pred_col, f'{pred_col}_preds']]
    print("\nTest Metrics:")
    print("RMSE:", np.sqrt(mean_squared_error(y_test, preds)))
    print("MAE:", mean_absolute_error(y_test, preds))
    print("R²:", r2_score(y_test, preds))

    return best_bst, preds, y_test, analyze_df

### Create Base df

In [89]:
df = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
df4 = pd.DataFrame()
for i in [2021, 2022, 2023, 2024, 2025]:
    df_actuals = pd.read_csv(f"../tables/{i}/parlay_stats.csv")
    df_actuals['Season'] = i
    df = pd.concat([df, df_actuals])

    df_schd = pd.read_csv(f"../tables/{i}/nba_schedule.csv")
    df_schd['Season'] = i
    df2 = pd.concat([df2, df_schd])
    
    df_gms = pd.read_csv(f"../tables/{i}/season_gamelogs.csv")
    df_gms['Season'] = i
    df3 = pd.concat([df3, df_gms])
    
    df_inj = pd.read_csv(f"../tables/{i}/injuries.csv")
    df_inj['Season'] = i
    df4 = pd.concat([df4, df_inj])

df['Date'] = pd.to_datetime(df.Date)
df2['Date'] = pd.to_datetime(df2.Date)
df3['Date'] = pd.to_datetime(df3.Date)
df3 = df3[~df3[['Date', 'Team', 'Player']].duplicated(keep='last')]
df4['Date'] = pd.to_datetime(df4.Date)

df3 = df3.rename(columns={"3PM": "TPM", "3PA": "TPA", "3P%": "TP%", "TRB": "REB"}).drop(['Pos', 'Opp'], axis=1)
df3['PR'] = df3.PTS + df3.REB 
df3['PA'] = df3.PTS + df3.AST
df3['RA'] = df3.REB + df3.AST
df3['PRA'] = df3.PTS + df3.REB + df3.AST
df3['STL_BLK'] = df3.STL + df3.BLK
df = df.merge(df3, on=['Season', 'Date', 'Team', 'Player'], how='left')

df_mtch = df2[['Season', 'Date', 'AwayABV', 'HomeABV', 'AwayPTS', 'HomePTS', 'AwayB2B', 'HomeB2B', 'is_OT', 'cup_gm', 'pstszn_gm']]
df_mtch['Team_type'] = 'Away'
df_mtch = df_mtch.rename(columns={"AwayABV": "Team", "HomeABV": "Opp", "AwayB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'is_OT', 'cup_gm', 'pstszn_gm', 'Team_type']]
df_mtch2 = df_mtch.copy().rename(columns={"Team": "Opp", "Opp": "Team", "HomeB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'is_OT', 'cup_gm', 'pstszn_gm']]
df_mtch2['Team_type'] = 'Home'
df_mtch = pd.concat([df_mtch, df_mtch2])
df_mtch = df_mtch[['Season', 'Date', 'Team', 'Team_type', 'AwayPTS', 'HomePTS', 'is_OT', 'cup_gm', 'pstszn_gm']]
df_mtch = df_mtch.sort_values(["Team", "Date"])
df_mtch['team_game_num'] = df_mtch.groupby(["Team", "Season"]).cumcount() + 1
df_mtch['Spread'] = np.where(df_mtch.Team_type == 'Home', df_mtch.HomePTS - df_mtch.AwayPTS, df_mtch.AwayPTS - df_mtch.HomePTS)
df_mtch['Total'] = df_mtch.AwayPTS + df_mtch.HomePTS
df_mtch['is_Win'] = np.where(df_mtch.Spread > 0, 1, 0)
df_mtch['Szn_Wins'] = df_mtch.groupby(['Season', 'Team'])['is_Win'].cumsum()
df = df.drop(['Season', 'Team_type'], axis=1).merge(df_mtch, on=['Date', 'Team'])

df = df.merge(df4[['Date', 'Team', 'Player', 'Status']], on=['Date', 'Team', 'Player'], how='left')
df['Status'] = np.where((df.Active == 1) & (df.Status.isnull()), 'Available', df.Status)
df['Status'] = np.where((df.Active == 0), 'Out', df.Status)
df['Status'] = np.where((df.Status == 'Out') & (df.Active != 0), 'Available', df.Status)

team_encoder = LabelEncoder()
player_encoder = LabelEncoder()
team_type_encoder = LabelEncoder()
position_encoder = LabelEncoder()
status_encoder = LabelEncoder()

# Encode string cols
team_encoder.fit(pd.concat([df["Team"], df["Opp"]], axis=0))
df["Team"] = team_encoder.transform(df["Team"])
df["Opp"] = team_encoder.transform(df["Opp"])
df["Player_name"] = df.Player
df["Player"] = player_encoder.fit_transform(df["Player"])
df["Pos"] = position_encoder.fit_transform(df["Pos"])
df['Team_type'] = team_type_encoder.fit_transform(df['Team_type'])
df["Status"] = status_encoder.fit_transform(df["Status"])
df_pred = df.copy()
df = df[(df.Active == 1) & (df.MP > 0)].sort_values(['Season', 'Date', 'Team', 'Player']).reset_index(drop=True)
print('base df created', datetime.now())

base df created 2026-01-07 12:06:48.061641


In [21]:
# df3_temp = df3.copy().drop('Season', axis=1)
# df4_temp = df4.copy().drop('Season', axis=1)
# # display(df3_temp[(df3_temp.Date == '2025-12-25') & (df3_temp.game_id == '20251225_CLE_NYK')])
# df_temp = df4_temp.merge(df3_temp, on=['Date', 'Team', 'Player'], how='outer')
# df_temp['game_id'] = np.where(df_temp.game_id.isnull(), )

# df_temp = df_temp[(df_temp.Date == '2025-12-25') & (df_temp.Team.isin(['CLE', 'NYK']))]
# display(df_temp)

# Minutes Projection Model

In [93]:
def setup_df_mins(con, df):
    
    df = df[['Season', 'Date', 'Team', 'Team_type', 'Opp', 'Player', 'Pos', 'B2B', 'MP',
             'Spread', 'team_game_num', 'pstszn_gm', 'is_OT']]
    
    for col in ['MP']:
        for N in [1, 3, 5, 10]:
            df[f'{col}_L{N}_avg'] = (
                df.groupby(['Player', 'Season'])[col]
                  .rolling(window=N, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0, 1], drop=True)
            )
            df[f'prev_team_mins_pct_L{N}'] = df[f'{col}_L{N}_avg'] / 240

    games_last_7_days = df.sort_values(['Player', 'Season', 'Date']).groupby(['Player', 'Season']).rolling('7D', on='Date', closed='left')['MP'].count().reset_index().rename(columns={"MP": "gms_L7_days"})
    games_last_7_days = games_last_7_days.drop_duplicates(
        subset=['Player', 'Season', 'Date']
    )
    df = df.merge(games_last_7_days, on=['Player', 'Season', 'Date'])
    df['gms_L7_days'] = df.gms_L7_days.fillna(0).astype(int)
        
    df['reserve_td'] = (df.MP < 8).astype(int)
    df['bench_td']   = ((df.MP >= 8) & (df.MP <= 25)).astype(int)
    df['starter_td'] = (df.MP > 25).astype(int)
    role_counts = df.groupby(['Season', 'Player'])[['reserve_td', 'bench_td', 'starter_td']].sum()
    role_counts['most_common_role'] = role_counts[['reserve_td', 'bench_td', 'starter_td']].idxmax(axis=1)
    role_counts['reserve'] = (role_counts['most_common_role'] == 'reserve_td').astype(int)
    role_counts['bench']   = (role_counts['most_common_role'] == 'bench_td').astype(int)
    role_counts['starter'] = (role_counts['most_common_role'] == 'starter_td').astype(int)
    df = df.merge(role_counts[['reserve', 'bench', 'starter']], on=['Season', 'Player'], how='left')
    
    df['role'] = 0
    df['role'] = np.where(df.starter == 1, 1, df.role)
    df['role'] = np.where(df.bench == 1, 2, df.role)
    df['role'] = np.where(df.reserve == 1, 3, df.role)
    
    for N in [1, 3, 5]:
        for role in ['reserve_td', 'bench_td', 'starter_td']:
            df[f'{role}_last{N}'] = (
                df.groupby('Player')[role]
                  .rolling(N, min_periods=1)
                  .sum()
                  .shift(1)
                  .reset_index(0, drop=True)
            )
        rec_role_cols = [f'{role}_last{N}' for role in ['reserve_td', 'bench_td', 'starter_td']]
        df[f'recent_most_common_role_L{N}'] = df[rec_role_cols].idxmax(axis=1)
        df[f'recent_role_L{N}'] = 0
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'starter_td_last{N}', 1, df[f'recent_role_L{N}'])
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'bench_td_last{N}', 2, df[f'recent_role_L{N}'])
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'reserve_td_last{N}', 3, df[f'recent_role_L{N}'])      
        df = df.drop(f'recent_most_common_role_L{N}', axis=1)
        for role in ['reserve_td', 'bench_td', 'starter_td']:
            df = df.drop(f'{role}_last{N}', axis=1)
      
    df['missed_games'] = (
        df.groupby(['Player', 'Team', 'Season'])['team_game_num']      
          .diff()
          .sub(1)
          .fillna(0)
          .astype(int)
    )
    
    df['game_spread_type'] = 0
    df['game_spread_type'] = np.where(abs(df.Spread < 6), 1, df.game_spread_type) 
    df['game_spread_type'] = np.where((abs(df.Spread >= 6) & abs(df.Spread <= 14)), 2, df.game_spread_type) 
    df['game_spread_type'] = np.where(abs(df.Spread > 14), 3, df.game_spread_type) 

    # Tell model games exist after players injuries/susp
    team_games = df[['Season', 'Team', 'Date', 'team_game_num']].drop_duplicates()
    players = df[['Season','Player','Team']].drop_duplicates()
    fabricated = (players.sort_values('Season').groupby('Player', as_index=False).last())
    fabricated['Season'] = fabricated['Season'] + 1
    players = pd.concat([players, fabricated], ignore_index=True).drop_duplicates(['Season','Player','Team'])
    expanded = team_games.merge(players, on=['Season', 'Team'], how='left')
    expanded = expanded.merge(df[['Season', 'Player', 'Date', 'MP']], on=['Season', 'Player', 'Date'], how='left').drop_duplicates(['Season', 'Date', 'Player','Team'])
    expanded['player_played'] = expanded['MP'].notna().astype(int)
    expanded['team_played_no_player'] = ((expanded['player_played'] == 0)).astype(int)
    expanded['tm_plays_after'] = (expanded.groupby(['Player'])['team_played_no_player'].shift(-1))
    expanded['gms_after'] = 0
    expanded['gms_after'] = np.where((expanded.player_played == 1) & (expanded.tm_plays_after == 1), 1, expanded.gms_after)
    df = df.merge(expanded[['Date', 'Team', 'Player', 'gms_after']], on=['Date', 'Team', 'Player'])
    
    df['MP_change_pct_L10'] = (df['MP_L1_avg'] - df['MP_L10_avg']) / df['MP_L10_avg']
    df['Early_stop'] = (
        (df['MP_L1_avg'] < 5) |  
        ((df.role == 1) & (df.MP_change_pct_L10 <= -0.35)) | 
        ((df.role == 2) & (df.MP_change_pct_L10 <= -0.45)) | 
        ((df.role == 3) & (df.MP_change_pct_L10 <= -0.55))
    ).astype(int)
    df['Early_stop'] = df.groupby('Player')['Early_stop'].shift(-1).fillna(0).astype(int)
    Early_stop_conds = (
                        ((((df['MP'] - df['MP_L10_avg']) / df['MP_L10_avg']) <= -0.25) & (df.gms_after == 1)) | 
                        ((df.MP < 8) & (df.role != 3))
                       )
    df['Early_stop'] = np.where(Early_stop_conds, 1, df.Early_stop)
    
    df['MP_increase'] = (
        ((df.role == 1) & (df.MP_change_pct_L10 >= 0.15)) |
        ((df.role == 2) & (df.MP_change_pct_L10 >= 0.10)) |
        ((df.role == 3) & (df.MP_change_pct_L10 >= 0.05))
    ).astype(int)
    df['MP_increase'] = df.groupby('Player')['MP_increase'].shift(-1).fillna(0).astype(int)
    MP_Inc_conds = (
                    ((((df['MP'] - df['MP_L10_avg']) / df['MP_L10_avg']) >= 0.15))
                   )
    df['MP_increase'] = np.where(MP_Inc_conds, 1, df.MP_increase)
    df['MP_increase_extreme'] = (
            ((df.MP > df.MP_L5_avg * 3))
    ).astype(int)
    
    df['Injured'] = (
            ((df.MP < df.MP_L5_avg * 0.3) & (df.role != 3) & (df.recent_role_L1 != 3) & (df.recent_role_L3 != 3) & (df.recent_role_L5 != 3) & (df.gms_after > 0))
    ).astype(int)
    df['return_game'] = ((df.groupby('Player')['Injured'].shift(1) == 1) & (df.missed_games > 0)).astype(int)
    df['games_since_return'] = (df.groupby('Player')['return_game'].cumsum())
    df['games_since_return'] = (df.groupby(['Player', 'games_since_return']).cumcount())
    df['ramp_phase'] = 0
    df.loc[df.return_game == 1, 'ramp_phase'] = 1
    df.loc[df.games_since_return.isin([1, 2, 3]), 'ramp_phase'] = 2
    df.loc[df.games_since_return >= 4, 'ramp_phase'] = 3
    df['starter_return'] = ((df.return_game == 1) & (df.role == 1)).astype(int)
    df['bench_return']   = ((df.return_game == 1) & (df.role == 2)).astype(int)
    
    # Location based features
    df["DaysLstGm"] = (df.groupby("Player")["Date"].diff().dt.days).fillna(0).astype(int)
    df['Location'] = df.apply(lambda r: r['Team'] if r['Team_type'] == 'Home' else r['Opp'], axis=1)
    df['PrevLocation'] = df.groupby('Player')['Location'].shift(1)
    df['same_arena'] = (df['PrevLocation'] == df['Location']).astype(int)

    df = df.drop(['Season', 'Team_type', 'reserve_td', 'reserve', 'bench_td', 'bench', 'starter_td', 'starter', 
                  'PrevLocation', 'Location', 'gms_after', 'return_game', 'MP_change_pct_L10'], axis=1)    
    
    return df

In [96]:
df_mins = df.copy()
df_mins = setup_df_mins(con, df_mins)
display(df_mins)

n = len(df_mins)
train_end = int(0.8 * n)
val_end   = int(0.9 * n)
mins_train_df = df_mins.iloc[:train_end]
mins_val_df   = df_mins.iloc[train_end:val_end]
mins_test_df  = df_mins.iloc[val_end:]
mins_DFS = (mins_train_df, mins_val_df, mins_test_df)

# mins_model = create_baseline_model(df_mins, "MP", mins_DFS)
mins_model, mins_preds, y_test_mins, analyze_df_mins = hyperparam_tuning(mins_DFS, "MP", n_iter=1)
# feature_importance(mins_model, df_mins.columns.tolist())

# mins_model.save_model("../ML_models/mins_model.json")
# print('Saved minutes model!')

Unnamed: 0,Date,Team,Opp,Player,Pos,B2B,MP,Spread,team_game_num,pstszn_gm,is_OT,MP_L1_avg,prev_team_mins_pct_L1,MP_L3_avg,prev_team_mins_pct_L3,MP_L5_avg,prev_team_mins_pct_L5,MP_L10_avg,prev_team_mins_pct_L10,gms_L7_days,role,recent_role_L1,recent_role_L3,recent_role_L5,missed_games,game_spread_type,Early_stop,MP_increase,MP_increase_extreme,Injured,games_since_return,ramp_phase,starter_return,bench_return,DaysLstGm,same_arena
0,2021-10-19,2,16,71,0,0,22.98,-23.0,1,0,0,2.33,0.009708,5.410000,0.022542,3.964,0.016517,6.374000,0.026558,0,2,3,3,3,0,1,0,1,1,0,0,0,0,0,0,0
1,2021-10-19,2,16,96,3,0,3.75,-23.0,1,0,0,14.85,0.061875,15.760000,0.065667,21.184,0.088267,22.006000,0.091692,0,1,2,2,2,0,1,1,0,0,1,0,0,0,0,0,0
2,2021-10-19,2,16,112,4,0,3.75,-23.0,1,0,0,29.93,0.124708,26.863333,0.111931,25.166,0.104858,28.555000,0.118979,0,2,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0
3,2021-10-19,2,16,211,3,0,3.75,-23.0,1,0,0,12.33,0.051375,11.650000,0.048542,11.650,0.048542,11.650000,0.048542,0,2,2,2,3,0,1,1,0,0,0,0,0,0,0,0,0
4,2021-10-19,2,16,406,2,0,30.63,-23.0,1,0,0,2.78,0.011583,4.620000,0.019250,8.382,0.034925,7.942857,0.033095,0,1,3,3,3,0,1,0,1,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122566,2026-01-06,29,21,568,3,0,22.55,8.0,35,0,0,17.85,0.074375,21.103333,0.087931,21.048,0.087700,23.168000,0.096533,3,2,2,2,2,0,2,0,0,0,0,78,3,0,0,2,0
122567,2026-01-06,29,21,620,4,0,2.93,8.0,35,0,0,7.03,0.029292,12.566667,0.052361,9.214,0.038392,9.600000,0.040000,1,3,3,2,3,0,2,0,0,0,0,154,3,0,0,2,0
122568,2026-01-06,29,21,646,1,0,17.08,8.0,35,0,0,13.75,0.057292,16.746667,0.069778,18.814,0.078392,20.930000,0.087208,3,2,2,2,2,0,2,0,0,0,0,115,3,0,0,2,0
122569,2026-01-06,29,21,881,4,0,22.47,8.0,35,0,0,26.97,0.112375,27.780000,0.115750,26.798,0.111658,24.603000,0.102513,3,2,1,1,1,0,2,0,0,0,0,27,3,0,0,2,0



Trial 1/1: {'n_estimators': 871, 'learning_rate': np.float64(0.03952118305317796), 'max_depth': 3, 'min_child_weight': 2, 'subsample': np.float64(0.9289218458991529), 'colsample_bytree': np.float64(0.8852173328606004), 'gamma': np.float64(0.7077972197555418), 'reg_lambda': np.float64(0.28068611849876135), 'reg_alpha': np.float64(1.9340300260483556)}
Validation MAE: 2.6809

Best validation MAE: 2.6809280664469335
Best parameters: {'learning_rate': np.float64(0.03952118305317796), 'max_depth': 3, 'min_child_weight': 2, 'subsample': np.float64(0.9289218458991529), 'colsample_bytree': np.float64(0.8852173328606004), 'gamma': np.float64(0.7077972197555418), 'reg_lambda': np.float64(0.28068611849876135), 'reg_alpha': np.float64(1.9340300260483556), 'objective': 'reg:squarederror', 'tree_method': 'hist', 'device': 'cuda', 'seed': 42}

Test Metrics:
RMSE: 3.275675505806158
MAE: 2.580317441066145
R²: 0.903408310225952


In [24]:
df[df.Player_name == 'Tyrese Haliburton'].Player.unique().tolist()[0]

913

In [25]:
# df_mins[(df_mins.Player == 913)].tail(5)
# display(analyze_df_mins[analyze_df_mins.Player == 'Tyrese Haliburton'])

In [97]:
analyze_df_mins = mins_test_df.drop(['MP', 'MP_preds'], axis=1)\
                .merge(analyze_df_mins[['Date', 'Team', 'Player', 'MP', 'MP_preds']], on=['Date', 'Team', 'Player'])
analyze_df_mins['Diff'] = analyze_df_mins['MP'] - analyze_df_mins[f'MP_preds']
analyze_df_mins['Diff2'] = abs(analyze_df_mins['Diff'])
analyze_df_mins.sort_values('Diff', ascending=True).drop('Diff2', axis=1).head(15)
# display(analyze_df_mins)

# plt.figure(figsize=(10,6))
# hist_col = 'Diff'
# plt.hist(analyze_df_mins[hist_col], bins=30, color='skyblue', edgecolor='black')
# plt.title(f'Histogram of {hist_col}')
# plt.xlabel(hist_col)
# plt.ylabel('Frequency')
# plt.grid(axis='y', alpha=0.75)
# plt.show()

Unnamed: 0,Date,Team,Opp,Player,Pos,B2B,Spread,team_game_num,pstszn_gm,is_OT,MP_L1_avg,prev_team_mins_pct_L1,MP_L3_avg,prev_team_mins_pct_L3,MP_L5_avg,prev_team_mins_pct_L5,MP_L10_avg,prev_team_mins_pct_L10,gms_L7_days,role,recent_role_L1,recent_role_L3,recent_role_L5,missed_games,game_spread_type,Early_stop,MP_increase,MP_increase_extreme,Injured,games_since_return,ramp_phase,starter_return,bench_return,DaysLstGm,same_arena,MP,MP_preds,Diff
12215,2026-01-06,ORL,WAS,Anthony Black,PG,0,-8.0,37,0,0,35.55,0.148125,33.31,0.138792,32.44,0.135167,33.857,0.141071,3,1,1,1,1,0,1,0,0,0,0,131,3,0,0,2,0,20.5,34.319984,-13.819984
12143,2026-01-06,CLE,IND,Dean Wade,PF,0,4.0,38,0,0,23.4,0.0975,21.85,0.091042,23.98,0.099917,25.175,0.104896,1,2,2,2,2,2,1,0,0,0,0,25,3,0,0,6,0,8.42,21.951012,-13.531012
438,2025-04-30,GSW,HOU,Stephen Curry,PG,0,-15.0,88,1,0,39.12,0.163,39.143333,0.163097,39.142,0.163092,35.621,0.148421,3,1,1,1,1,0,1,0,0,0,0,305,3,0,0,2,1,23.45,36.59724,-13.14724
8414,2025-12-06,MIL,DET,Ryan Rollins,PG,1,-12.0,25,0,0,34.73,0.144708,34.37,0.143208,35.026,0.145942,34.98,0.14575,4,1,1,1,1,0,1,0,0,0,0,62,3,0,0,1,0,22.87,35.145302,-12.275302
1681,2025-10-22,PHI,BOS,Justin Edwards,SF,0,1.0,1,0,0,11.27,0.046958,24.953333,0.103972,27.572,0.114883,31.695,0.132062,0,2,2,1,1,0,1,1,0,0,0,43,3,0,0,200,0,0.1,12.355588,-12.255588
12216,2026-01-06,ORL,WAS,Desmond Bane,SG,0,-8.0,37,0,0,36.55,0.152292,35.89,0.149542,35.126,0.146358,35.278,0.146992,3,1,1,1,1,0,1,0,0,0,0,305,3,0,0,2,0,23.62,35.858395,-12.238395
12222,2026-01-06,ORL,WAS,Paolo Banchero,PF,0,-8.0,37,0,0,36.15,0.150625,35.26,0.146917,35.95,0.149792,36.3,0.15125,3,1,1,1,1,0,1,0,0,0,0,237,3,0,0,2,0,24.53,36.703953,-12.173953
4574,2025-11-10,DAL,MIL,P.J. Washington,PF,1,-2.0,11,0,0,36.58,0.152417,33.256667,0.138569,34.3,0.142917,33.865,0.141104,4,1,1,1,1,0,1,0,0,0,0,143,3,0,0,2,0,22.47,34.420891,-11.950891
1432,2025-06-19,IND,OKC,Tyrese Haliburton,PG,0,17.0,104,1,0,34.15,0.142292,35.643333,0.148514,36.02,0.150083,35.804,0.149183,2,1,1,1,1,0,3,0,0,0,0,312,3,0,0,3,1,22.87,34.818436,-11.948436
12225,2026-01-06,ORL,WAS,Wendell Carter Jr.,C,0,-8.0,37,0,0,31.52,0.131333,30.74,0.128083,32.344,0.134767,31.558,0.131492,3,1,1,1,1,0,1,0,0,0,0,103,3,0,0,2,0,19.57,31.217251,-11.647251


In [35]:
rmse = np.sqrt(mean_squared_error(y_test_mins, mins_preds)) # splits[5] = y_test
mae = mean_absolute_error(y_test_mins, mins_preds)
print('RMSE:', rmse)

df_yesterday = pd.read_csv(f'../tables/2025/gmday_preds_{tgt_stat}.csv')
df_yesterday['Date'] = pd.to_datetime(df_yesterday.Date)
df_yesterday = df_yesterday[(df_yesterday.Date == (datetime.strptime(now, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d"))]\
                .rename(columns={"MP": "MP_proj"})

df_gms = pd.read_csv(f"../tables/2025/season_gamelogs.csv")
df_gms['Date'] = pd.to_datetime(df_gms.Date)

df_yesterday = df_yesterday.merge(df_gms[['Date', 'Team', 'Player', 'MP']], on=['Date', 'Team', 'Player'])
df_yesterday = df_yesterday[['Date', 'Team', 'Player', 'Pos', 'Opp', 'MP_proj', 'MP', 'MP_L5_avg']][df_yesterday.MP > 0]

df_yesterday['Diff'] = abs(df_yesterday['MP_proj'] - df_yesterday['MP'])
df_yesterday['InRMSE_Range'] = np.where(df_yesterday['Diff'] <= rmse, 1, 0)

print("\nYesterday's Results:")
print("Total Accuracy (InRMSE_Range):", ((df_yesterday.InRMSE_Range == 1).sum() / df_yesterday.shape[0]))
print((df_yesterday.InRMSE_Range == 1).sum(), '/', df_yesterday.shape[0])

df_yesterday = df_yesterday.drop(['Diff'], axis=1)

if df_yesterday.shape[0] >= 50:
    for tm in df_yesterday.Team.unique():
        display(df_yesterday[df_yesterday.Team == tm])
else:
    display(df_yesterday)

RMSE: 3.241698038762518

Yesterday's Results:
Total Accuracy (InRMSE_Range): 0.4603174603174603
29 / 63


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
0,2026-01-06,MEM,Cam Spencer,SG,SAS,24.446419,29.93,26.823523,0
14,2026-01-06,MEM,Vince Williams Jr.,SG,SAS,15.03282,22.12,15.115341,0
18,2026-01-06,MEM,Jock Landale,C,SAS,19.070923,23.67,14.869866,0
27,2026-01-06,MEM,Kentavious Caldwell-Pope,SG,SAS,18.98624,19.58,18.580672,1
38,2026-01-06,MEM,Jaylen Wells,SG,SAS,29.14994,28.3,29.768909,1
40,2026-01-06,MEM,Santi Aldama,PF,SAS,32.999393,23.9,31.991502,0
41,2026-01-06,MEM,Jaren Jackson Jr.,C,SAS,33.338287,35.85,32.865419,1


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
1,2026-01-06,DAL,Anthony Davis,PF,SAC,33.596607,35.62,31.264408,1
20,2026-01-06,DAL,Cooper Flagg,PG,SAC,36.19669,35.0,36.203255,1
22,2026-01-06,DAL,Max Christie,SG,SAC,28.018776,35.88,28.80005,0
29,2026-01-06,DAL,Naji Marshall,SF,SAC,29.751879,31.53,30.130963,1
31,2026-01-06,DAL,Daniel Gafford,C,SAC,15.004892,22.48,19.946695,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
2,2026-01-06,CLE,Darius Garland,PG,IND,30.502794,32.97,30.972542,1
3,2026-01-06,CLE,Evan Mobley,PF,IND,30.405874,35.6,27.061473,0
8,2026-01-06,CLE,Craig Porter Jr.,PG,IND,14.378641,27.97,21.759105,0
21,2026-01-06,CLE,Dean Wade,PF,IND,16.370275,8.42,23.611877,0
23,2026-01-06,CLE,Jaylon Tyson,SG,IND,24.867628,27.48,25.964171,1
28,2026-01-06,CLE,De'Andre Hunter,SF,IND,23.997719,21.75,25.047504,1
35,2026-01-06,CLE,Jarrett Allen,C,IND,25.163927,33.65,25.544936,0
37,2026-01-06,CLE,Sam Merrill,SG,IND,22.391996,31.43,18.533059,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
4,2026-01-06,IND,T.J. McConnell,PG,CLE,15.22448,17.52,15.982102,1
5,2026-01-06,IND,Aaron Nesmith,SF,CLE,24.976173,33.1,28.427271,0
32,2026-01-06,IND,Pascal Siakam,PF,CLE,34.92363,34.58,32.259611,1
44,2026-01-06,IND,Andrew Nembhard,PG,CLE,33.037189,30.48,31.728244,1


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
6,2026-01-06,MIN,Jaden McDaniels,PF,MIA,28.060642,33.65,29.394437,0
13,2026-01-06,MIN,Donte DiVincenzo,SG,MIA,29.275482,30.52,29.644635,1
24,2026-01-06,MIN,Naz Reid,C,MIA,25.67791,24.52,26.882315,1
25,2026-01-06,MIN,Julius Randle,PF,MIA,33.388317,26.73,31.946173,0
33,2026-01-06,MIN,Anthony Edwards,SG,MIA,36.746254,29.17,33.944897,0
53,2026-01-06,MIN,Rudy Gobert,C,MIA,32.553703,32.63,29.445419,1


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
7,2026-01-06,NOP,Zion Williamson,PF,LAL,26.232391,33.33,27.269738,0
17,2026-01-06,NOP,Trey Murphy III,SF,LAL,36.065701,39.07,33.534183,1
39,2026-01-06,NOP,Herbert Jones,SF,LAL,18.841911,34.92,22.091673,0
42,2026-01-06,NOP,Jeremiah Fears,PG,LAL,24.391014,19.92,26.860384,0
58,2026-01-06,NOP,Jordan Poole,PG,LAL,25.406183,18.32,22.917624,0
59,2026-01-06,NOP,Derik Queen,C,LAL,27.688173,31.7,26.650655,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
9,2026-01-06,MIA,Tyler Herro,SG,MIN,20.78289,28.62,17.575338,0
34,2026-01-06,MIA,Andrew Wiggins,SF,MIN,29.836916,25.52,29.74595,0
46,2026-01-06,MIA,Norman Powell,SG,MIN,30.847979,30.93,30.586245,1
51,2026-01-06,MIA,Bam Adebayo,C,MIN,32.423607,28.47,29.688206,0
55,2026-01-06,MIA,Davion Mitchell,PG,MIN,29.63023,27.88,29.818827,1
56,2026-01-06,MIA,Nikola Jovic,PF,MIN,17.193504,26.18,22.899239,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
10,2026-01-06,LAL,Luka Doncic,PG,NOP,36.947445,37.48,33.248461,1
12,2026-01-06,LAL,Deandre Ayton,C,NOP,29.0888,31.9,29.553574,1
30,2026-01-06,LAL,Jake LaRavia,PF,NOP,29.02663,34.77,29.58955,0
47,2026-01-06,LAL,LeBron James,SF,NOP,34.881908,33.15,31.646421,1
48,2026-01-06,LAL,Marcus Smart,SG,NOP,28.629282,36.65,27.957475,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
11,2026-01-06,SAS,Dylan Harper,SG,MEM,18.915489,18.03,22.634641,1
19,2026-01-06,SAS,Stephon Castle,PG,MEM,31.565693,28.38,31.025393,1
45,2026-01-06,SAS,Victor Wembanyama,C,MEM,22.6427,21.02,24.371285,1
57,2026-01-06,SAS,De'Aaron Fox,PG,MEM,32.838615,29.92,32.896072,1


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
15,2026-01-06,WAS,Tre Johnson,SG,ORL,24.653568,22.47,28.551482,1
26,2026-01-06,WAS,CJ McCollum,SG,ORL,30.772987,38.17,32.288115,0
36,2026-01-06,WAS,Khris Middleton,SF,ORL,19.35446,22.55,21.296792,1
54,2026-01-06,WAS,Bilal Coulibaly,SG,ORL,26.597803,34.05,26.020688,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
16,2026-01-06,SAC,Zach LaVine,SG,DAL,32.067833,37.25,28.105437,0
49,2026-01-06,SAC,Maxime Raynaud,C,DAL,27.458019,34.7,29.580904,0
52,2026-01-06,SAC,Russell Westbrook,PG,DAL,30.596951,31.13,26.709042,1
62,2026-01-06,SAC,DeMar DeRozan,PF,DAL,33.666763,36.52,30.696329,1


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,MP_L5_avg,InRMSE_Range
43,2026-01-06,ORL,Anthony Black,PG,WAS,34.459328,20.5,34.119724,0
50,2026-01-06,ORL,Desmond Bane,SG,WAS,36.183983,23.62,36.205182,0
60,2026-01-06,ORL,Paolo Banchero,PF,WAS,36.49786,24.53,34.886351,0
61,2026-01-06,ORL,Wendell Carter Jr.,C,WAS,31.750124,19.57,33.419242,0


# Main Model

In [36]:
def setup_df_main(df, tgt_stat):
    
    # Stat dependent features 
    if tgt_stat == 'PTS':
        tgt_stat_cols = ['TPM', 'FG', 'FT', 'TPA', 'FGA', 'FTA']
        df = df[['Season', 'Date', 'Team', 'Opp', 'Player', 'Pos', 'MP', 'team_game_num', 
         'PTS', 'TPM', 'FG', 'FGA', 'TPA', 'FT', 'FTA', 
         f'Off_{tgt_stat}', f'Off_L3_{tgt_stat}', f'Off_L5_{tgt_stat}', f'Off_L10_{tgt_stat}', f'Off_{tgt_stat}_Rk',
         f'Def_{tgt_stat}', f'Def_L3_{tgt_stat}', f'Def_L5_{tgt_stat}', f'Def_L10_{tgt_stat}', f'Def_{tgt_stat}_Rk',
         'Spread', 'Total', 'is_OT']]
        
        # Efficiency metrics
        df['three_rate_raw'] =  np.where(df.FGA > 0, df['TPA'] / df['FGA'], 0)
        df['ft_rate_raw']    =  np.where(df.FGA > 0, df['FTA'] / df['FGA'], 0)
        df['eFG_raw'] = (df['FG'] + 0.5 * df['TPM']) / df['FGA']
        df['TS_raw'] = df['PTS'] / (2 * (df['FGA'] + 0.44 * df['FTA']))    
        df['usage_proxy_raw'] =  np.where(df.MP > 0, (df['FGA'] + 0.44 * df['FTA']) / df['MP'], 0)
        
        for w in [3, 5, 10]:
            for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
                col = f"{metric}_L{w}"
                df[col] = (
                    df.groupby(['Player','Season'])[f'{metric}_raw']
                      .rolling(w, min_periods=1)
                      .mean()
                      .shift(1)
                      .reset_index(level=[0,1], drop=True)
                )
        for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
            col = f'{metric}_weighted'
            df[col] = (
                0.6 * df[f'{metric}_L3'] +
                0.3 * df[f'{metric}_L5'] +
                0.1 * df[f'{metric}_L10']
            )
            df = df.drop(f'{metric}_raw', axis=1)
        
    elif tgt_stat == 'PRA':
        tgt_stat_cols = ['PTS', 'REB', 'AST', 'TPM', 'FG']
        df = df[['Season', 'Date', 'Team', 'Opp', 'Player', 'Pos', 'MP', 'team_game_num', 
         'PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK', 
         'FG', 'FGA', 'TPA', 'FT', 'FTA', 
         f'Off_{tgt_stat}', f'Off_L3_{tgt_stat}', f'Off_L5_{tgt_stat}', f'Off_L10_{tgt_stat}', f'Off_{tgt_stat}_Rk',
         f'Def_{tgt_stat}', f'Def_L3_{tgt_stat}', f'Def_L5_{tgt_stat}', f'Def_L10_{tgt_stat}', f'Def_{tgt_stat}_Rk',
         'Spread', 'Total', 'is_OT']]
        
        df['usage_proxy_raw'] =  np.where(df.MP > 0, (df['FGA'] + 0.44 * df['FTA']) / df['MP'], 0)
        for w in [3, 5, 10]:
            df[f"usage_proxy_L{w}"] = (
                df.groupby(['Player','Season'])[f'usage_proxy_raw']
                  .rolling(w, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0,1], drop=True)
            )
        df['usage_proxy_weighted'] = (
            0.6 * df[f'usage_proxy_L3'] +
            0.3 * df[f'usage_proxy_L5'] +
            0.1 * df[f'usage_proxy_L10']
        )
        df = df.drop('usage_proxy_raw', axis=1)
        
        
    else:
        tgt_stat_cols = []
        df = df[['Season', 'Date', 'Team', 'Opp', 'Player', 'Pos', 'MP', 'team_game_num', 
         'PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK',
         'FG', 'FGA', 'TPA', 'FT', 'FTA', 
          f'Off_{tgt_stat}', f'Off_L3_{tgt_stat}', f'Off_L5_{tgt_stat}', f'Off_L10_{tgt_stat}', f'Off_{tgt_stat}_Rk',
          f'Def_{tgt_stat}', f'Def_L3_{tgt_stat}', f'Def_L5_{tgt_stat}', f'Def_L10_{tgt_stat}', f'Def_{tgt_stat}_Rk',
         'Spread', 'Total', 'is_OT']]

    
    # Create rolling + lag features    
    for col in ['MP'] + tgt_stat_cols:
        for N in [1, 3, 5, 10]:
            df[f'{col}_L{N}_avg'] = (
                df.groupby(['Player', 'Season'])[col]
                  .rolling(window=N, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0, 1], drop=True)
            )

    # Role identifiers features
    df['reserve_td'] = (df.MP < 8).astype(int)
    df['bench_td']   = ((df.MP >= 8) & (df.MP <= 25)).astype(int)
    df['starter_td'] = (df.MP > 25).astype(int)
    role_counts = df.groupby(['Season', 'Player'])[['reserve_td', 'bench_td', 'starter_td']].sum()
    role_counts['most_common_role'] = role_counts[['reserve_td', 'bench_td', 'starter_td']].idxmax(axis=1)
    role_counts['reserve'] = (role_counts['most_common_role'] == 'reserve_td').astype(int)
    role_counts['bench']   = (role_counts['most_common_role'] == 'bench_td').astype(int)
    role_counts['starter'] = (role_counts['most_common_role'] == 'starter_td').astype(int)
    df = df.merge(role_counts[['reserve', 'bench', 'starter']], on=['Season', 'Player'], how='left')
    df['role'] = 0
    df['role'] = np.where(df.starter == 1, 1, df.role)
    df['role'] = np.where(df.bench == 1, 2, df.role)
    df['role'] = np.where(df.reserve == 1, 3, df.role)
    
    for N in [1, 3, 5]:
        for role in ['reserve_td', 'bench_td', 'starter_td']:
            df[f'{role}_last{N}'] = (
                df.sort_values(['Player', 'Date']).groupby('Player')[role]
                  .rolling(N, min_periods=1)
                  .sum()
                  .shift(1)
                  .reset_index(0, drop=True)
            )
        rec_role_cols = [f'{role}_last{N}' for role in ['reserve_td', 'bench_td', 'starter_td']]
        df[f'recent_most_common_role_L{N}'] = df[rec_role_cols].idxmax(axis=1)
        df[f'recent_role_L{N}'] = 0
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'starter_td_last{N}', 1, df[f'recent_role_L{N}'])
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'bench_td_last{N}', 2, df[f'recent_role_L{N}'])
        df[f'recent_role_L{N}'] = np.where(df[f'recent_most_common_role_L{N}'] == f'reserve_td_last{N}', 3, df[f'recent_role_L{N}'])      
        df = df.drop(f'recent_most_common_role_L{N}', axis=1)
        for role in ['reserve_td', 'bench_td', 'starter_td']:
            df = df.drop(f'{role}_last{N}', axis=1)
    
    df['game_spread_type'] = 0
    df['game_spread_type'] = np.where(abs(df.Spread < 6), 1, df.game_spread_type) 
    df['game_spread_type'] = np.where((abs(df.Spread >= 6) & abs(df.Spread <= 14)), 2, df.game_spread_type) 
    df['game_spread_type'] = np.where(abs(df.Spread > 14), 3, df.game_spread_type) 
    
    for col in categories + ['Season', 'FG', 'FGA', 'FT', 'FTA', 'TPM', 'TPA', 
                             'reserve_td', 'reserve', 'bench_td', 'bench', 'starter_td', 'starter'] + tgt_stat_cols:
        if col == tgt_stat:
            continue
        if col in df.columns:
            df = df.drop(col, axis=1)
        
    return df

In [37]:
df_main = df.copy()
df_main = setup_df_main(df_main, tgt_stat)
display(df_main)

n = len(df_main)
train_end = int(0.65 * n)
val_end   = int(0.85 * n)
main_train_df = df_main.iloc[:train_end]
main_val_df   = df_main.iloc[train_end:val_end]
main_test_df  = df_main.iloc[val_end:]
main_DFS = (main_train_df, main_val_df, main_test_df)

# stat_model = create_baseline_model(df_main, tgt_stat, main_DFS)
stat_model, stat_preds, y_test_stat, analyze_df_stat = hyperparam_tuning(main_DFS, tgt_stat, n_iter=1)
# feature_importance(stat_model, df_main.columns.tolist())

# stat_model.save_model(f"../ML_models/{tgt_stat}_model.json")
# print(f'Saved {tgt_stat} model!')

Unnamed: 0,Date,Team,Opp,Player,Pos,MP,team_game_num,PRA,Off_PRA,Off_L3_PRA,Off_L5_PRA,Off_L10_PRA,Off_PRA_Rk,Def_PRA,Def_L3_PRA,Def_L5_PRA,Def_L10_PRA,Def_PRA_Rk,Spread,Total,is_OT,usage_proxy_L3,usage_proxy_L5,usage_proxy_L10,usage_proxy_weighted,MP_L1_avg,MP_L3_avg,MP_L5_avg,MP_L10_avg,PTS_L1_avg,PTS_L3_avg,PTS_L5_avg,PTS_L10_avg,REB_L1_avg,REB_L3_avg,REB_L5_avg,REB_L10_avg,AST_L1_avg,AST_L3_avg,AST_L5_avg,AST_L10_avg,TPM_L1_avg,TPM_L3_avg,TPM_L5_avg,TPM_L10_avg,FG_L1_avg,FG_L3_avg,FG_L5_avg,FG_L10_avg,role,recent_role_L1,recent_role_L3,recent_role_L5,game_spread_type
0,2021-10-19,2,16,71,0,22.98,1,11.0,11.000000,11.000000,11.0,11.0,7.0,30.000000,30.000000,30.0,30.0,2.0,-23.0,231.0,0,0.302402,0.286831,0.167386,0.284229,2.33,5.410000,3.964,6.374000,2.0,2.000000,1.4,0.900000,0.0,0.333333,0.4,0.700000,0.0,0.333333,0.2,0.300000,0.0,0.000000,0.0,0.000000,1.0,1.000000,0.6,0.400000,2,3,3,3,1
1,2021-10-19,2,16,96,3,3.75,1,1.0,1.000000,1.000000,1.0,1.0,10.0,13.000000,13.000000,13.0,13.0,2.0,-23.0,231.0,0,0.450357,0.395621,0.340359,0.422936,14.85,15.760000,21.184,22.006000,0.0,3.000000,4.8,7.700000,3.0,2.000000,3.8,4.000000,3.0,1.333333,1.8,1.600000,0.0,1.000000,1.2,2.000000,0.0,1.000000,1.6,2.400000,1,2,2,2,1
2,2021-10-19,2,16,112,4,3.75,1,2.0,2.000000,2.000000,2.0,2.0,7.0,25.000000,25.000000,25.0,25.0,3.0,-23.0,231.0,0,0.352772,0.335899,0.364068,0.348840,29.93,26.863333,25.166,28.555000,21.0,10.333333,9.2,13.600000,8.0,4.000000,3.2,3.600000,8.0,7.333333,6.8,7.700000,3.0,1.333333,1.0,2.400000,8.0,3.666667,3.4,4.500000,2,1,1,1,1
3,2021-10-19,2,16,211,3,3.75,1,0.0,0.000000,0.000000,0.0,0.0,11.0,13.000000,13.000000,13.0,13.0,2.0,-23.0,231.0,0,0.262976,0.262976,0.262976,0.262976,12.33,11.650000,11.650,11.650000,2.0,4.500000,4.5,4.500000,5.0,5.000000,5.0,5.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,1.0,2.000000,2.0,2.000000,2,2,2,3,1
4,2021-10-19,2,16,406,2,30.63,1,36.0,36.000000,36.000000,36.0,36.0,2.0,38.000000,38.000000,38.0,38.0,3.0,-23.0,231.0,0,0.490097,0.469884,0.517130,0.486736,2.78,4.620000,8.382,7.942857,2.0,1.333333,4.8,4.142857,0.0,0.666667,1.4,1.714286,0.0,1.000000,0.6,0.428571,0.0,0.000000,0.4,0.428571,1.0,0.666667,1.8,1.571429,1,3,3,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122566,2026-01-06,29,21,568,3,22.55,35,21.0,16.913043,12.333333,13.6,16.9,35.0,38.828571,37.333333,32.4,36.5,13.0,8.0,232.0,0,0.313724,0.345971,0.401551,0.332181,17.85,21.103333,21.048,23.168000,10.0,6.333333,7.0,10.000000,2.0,3.000000,4.0,3.700000,1.0,3.000000,2.6,3.200000,2.0,0.666667,0.4,0.700000,4.0,2.333333,2.4,3.700000,2,2,2,2,2
122567,2026-01-06,29,21,620,4,2.93,35,1.0,6.055556,6.333333,5.6,6.6,92.0,34.333333,22.000000,27.0,34.3,4.0,8.0,232.0,0,0.351529,0.357164,0.354919,0.353558,7.03,12.566667,9.214,9.600000,4.0,4.666667,3.8,4.000000,1.0,1.333333,1.4,1.900000,0.0,0.333333,0.4,0.700000,1.0,0.333333,0.4,0.300000,1.0,1.666667,1.4,1.400000,3,3,2,3,2
122568,2026-01-06,29,21,646,1,17.08,35,15.0,17.103448,15.333333,16.8,20.1,36.0,29.000000,43.666667,46.2,37.0,2.0,8.0,232.0,0,0.298322,0.321900,0.383645,0.313928,13.75,16.746667,18.814,20.930000,6.0,9.000000,9.6,12.100000,3.0,4.666667,5.6,6.000000,0.0,1.666667,1.6,2.000000,0.0,0.000000,0.0,0.100000,3.0,4.000000,4.0,5.000000,2,2,2,2,2
122569,2026-01-06,29,21,881,4,22.47,35,13.0,16.814815,18.333333,20.0,19.3,39.0,34.333333,22.000000,27.0,34.3,4.0,8.0,232.0,0,0.404659,0.434907,0.461707,0.419438,26.97,27.780000,26.798,24.603000,13.0,13.000000,15.4,14.000000,3.0,3.333333,2.2,3.100000,3.0,2.000000,2.4,2.200000,1.0,2.333333,2.8,2.300000,3.0,4.333333,5.4,4.900000,2,1,1,1,2



Trial 1/1: {'n_estimators': 826, 'learning_rate': np.float64(0.0440505717459272), 'max_depth': 5, 'min_child_weight': 1, 'subsample': np.float64(0.7253185871239016), 'colsample_bytree': np.float64(0.8555483330612412), 'gamma': np.float64(1.9082123653149152), 'reg_lambda': np.float64(1.4915065721291927), 'reg_alpha': np.float64(1.5466518660794175)}
Validation MAE: 2.7776

Best validation MAE: 2.777551960476345
Best parameters: {'learning_rate': np.float64(0.0440505717459272), 'max_depth': 5, 'min_child_weight': 1, 'subsample': np.float64(0.7253185871239016), 'colsample_bytree': np.float64(0.8555483330612412), 'gamma': np.float64(1.9082123653149152), 'reg_lambda': np.float64(1.4915065721291927), 'reg_alpha': np.float64(1.5466518660794175), 'objective': 'reg:squarederror', 'tree_method': 'hist', 'device': 'cuda', 'seed': 42}

Test Metrics:
RMSE: 3.869804354774119
MAE: 2.9026192279501553
R²: 0.9019395436242048


In [38]:
analyze_df_stat = main_test_df.drop([tgt_stat, f'{tgt_stat}_preds'], axis=1)\
                .merge(analyze_df_stat[['Date', 'Team', 'Player', tgt_stat, f'{tgt_stat}_preds']], on=['Date', 'Team', 'Player'])
analyze_df_stat['Diff'] = analyze_df_stat[tgt_stat] - analyze_df_stat[f'{tgt_stat}_preds']
analyze_df_stat['Diff2'] = abs(analyze_df_stat['Diff'])
analyze_df_stat.sort_values('Diff', ascending=True).drop('Diff2', axis=1).head(15)
# display(analyze_df_stat[(analyze_df_stat.Date == '2026-01-04') & (analyze_df_stat.Team == 'LAL')])

# plt.figure(figsize=(10,6))
# hist_col = 'Diff2'
# plt.hist(analyze_df_stat[hist_col], bins=30, color='skyblue', edgecolor='black')
# plt.title(f'Histogram of {hist_col}')
# plt.xlabel(hist_col)
# plt.ylabel('Frequency')
# plt.grid(axis='y', alpha=0.75)
# plt.show()

Unnamed: 0,Date,Team,Opp,Player,Pos,MP,team_game_num,Off_PRA,Off_L3_PRA,Off_L5_PRA,Off_L10_PRA,Off_PRA_Rk,Def_PRA,Def_L3_PRA,Def_L5_PRA,Def_L10_PRA,Def_PRA_Rk,Spread,Total,is_OT,usage_proxy_L3,usage_proxy_L5,usage_proxy_L10,usage_proxy_weighted,MP_L1_avg,MP_L3_avg,MP_L5_avg,MP_L10_avg,PTS_L1_avg,PTS_L3_avg,PTS_L5_avg,PTS_L10_avg,REB_L1_avg,REB_L3_avg,REB_L5_avg,REB_L10_avg,AST_L1_avg,AST_L3_avg,AST_L5_avg,AST_L10_avg,TPM_L1_avg,TPM_L3_avg,TPM_L5_avg,TPM_L10_avg,FG_L1_avg,FG_L3_avg,FG_L5_avg,FG_L10_avg,role,recent_role_L1,recent_role_L3,recent_role_L5,game_spread_type,PRA,PRA_preds,Diff
3336,2025-03-31,DAL,BRK,Anthony Davis,PF,28.45,76,40.97619,41.666667,42.0,40.2,2.0,29.835821,27.666667,34.4,29.6,4.0,-4.0,222.0,0,0.657488,0.588907,0.642321,0.635397,30.05,28.476667,25.242,29.921,18.0,15.0,15.0,22.2,7.0,6.666667,7.6,10.8,5.0,3.333333,3.4,3.3,0.0,0.333333,0.6,0.4,7.0,6.0,6.0,8.8,1,1,1,1,1,24.0,45.791084,-21.791084
8532,2025-10-27,DEN,MIN,Aaron Gordon,PF,32.82,3,31.666667,31.666667,31.666667,31.666667,11.0,45.0,41.0,45.0,45.0,23.0,13.0,241.0,0,0.576456,0.576456,0.576456,0.576456,25.2,31.925,31.925,31.925,17.0,33.5,33.5,33.5,2.0,5.0,5.0,5.0,1.0,1.5,1.5,1.5,1.0,5.5,5.5,5.5,5.0,11.0,11.0,11.0,1,1,1,1,2,15.0,36.014221,-21.014221
2666,2025-03-27,DAL,ORL,Anthony Davis,PF,28.85,74,40.97619,41.666667,42.0,40.2,2.0,36.838235,26.0,30.2,34.1,28.0,9.0,193.0,0,0.437103,0.594571,0.599756,0.500609,26.53,22.436667,28.066,30.932,12.0,14.0,24.0,24.1,6.0,8.0,12.0,11.8,3.0,3.333333,3.0,3.2,0.0,0.666667,0.4,0.6,6.0,6.0,9.6,9.9,1,1,1,1,2,24.0,43.826973,-19.826973
11607,2025-11-16,GSW,NOP,Stephen Curry,PG,28.12,15,34.75,42.666667,35.0,33.4,9.0,35.307692,31.0,33.8,36.1,14.0,18.0,230.0,0,0.830381,0.821618,0.743282,0.819042,36.05,29.853333,30.442,30.668,49.0,35.333333,31.6,29.7,4.0,3.333333,2.8,3.8,2.0,2.333333,2.4,3.7,9.0,5.0,4.8,4.7,16.0,11.0,10.0,9.6,1,1,1,1,3,17.0,36.778229,-19.778229
14630,2025-12-07,LAL,PHI,Austin Reaves,SG,39.13,23,40.55,41.666667,38.8,39.3,2.0,43.0,31.333333,40.6,37.0,24.0,4.0,220.0,0,0.604655,0.556912,0.539471,0.583814,33.32,35.83,37.688,37.192,36.0,32.0,33.4,30.2,3.0,4.0,5.0,5.9,8.0,7.0,6.4,5.5,3.0,3.666667,4.2,3.2,9.0,9.333333,9.8,9.2,1,1,1,1,1,19.0,38.666077,-19.666077
17812,2026-01-03,ATL,TOR,Onyeka Okongwu,C,30.3,37,27.657143,36.666667,36.0,31.2,13.0,31.83871,22.333333,28.2,33.5,11.0,-17.0,251.0,0,0.56179,0.539034,0.459688,0.544753,29.77,30.576667,32.578,34.189,23.0,22.0,21.8,18.1,9.0,10.333333,10.6,9.4,2.0,4.333333,3.6,3.7,3.0,2.666667,2.8,2.3,9.0,7.666667,7.6,6.6,1,1,1,1,1,11.0,30.383541,-19.383541
2987,2025-03-29,DAL,CHI,Anthony Davis,PF,30.05,75,40.97619,41.666667,42.0,40.2,2.0,31.298507,36.0,40.6,36.5,8.0,1.0,239.0,0,0.590486,0.592041,0.615265,0.59343,28.85,28.776667,26.566,30.579,15.0,17.666667,19.8,22.6,7.0,9.666667,10.8,11.2,2.0,4.0,2.8,3.2,1.0,1.0,0.6,0.5,5.0,7.0,8.0,9.1,1,1,1,1,1,30.0,48.560257,-18.560257
1666,2025-03-20,SAC,CHI,Zach LaVine,SG,32.85,69,33.285714,32.666667,34.4,36.1,6.0,34.418182,26.0,24.4,28.8,24.0,-12.0,244.0,0,0.32446,0.37231,0.426055,0.348974,37.33,35.156667,37.108,36.927,23.0,19.333333,21.0,22.5,2.0,2.0,3.4,4.2,6.0,3.666667,3.2,3.4,5.0,3.666667,2.8,3.4,7.0,6.333333,7.2,8.2,1,1,1,1,1,14.0,32.502449,-18.502449
18127,2026-01-05,BOS,CHI,Jaylen Brown,SF,34.57,35,41.34375,46.666667,45.0,44.1,2.0,45.0,38.666667,51.2,45.8,29.0,14.0,216.0,0,0.772333,0.741518,0.731249,0.75898,35.13,34.5,34.524,34.87,50.0,34.0,33.8,32.4,3.0,6.333333,5.8,6.6,5.0,6.333333,5.4,5.1,6.0,2.666667,2.4,2.2,18.0,12.666667,13.0,11.7,1,1,1,1,2,26.0,42.903477,-16.903477
18205,2026-01-05,NYK,DET,Karl-Anthony Towns,C,22.5,37,36.393939,32.666667,35.2,34.1,4.0,28.375,30.333333,34.4,26.7,2.0,-31.0,211.0,0,0.455521,0.528526,0.506714,0.482542,33.62,32.353333,30.848,31.974,23.0,18.333333,20.4,21.1,14.0,11.0,12.4,11.1,2.0,3.333333,2.4,1.9,0.0,1.333333,1.4,1.5,6.0,5.0,5.6,6.5,1,1,1,1,1,8.0,24.350565,-16.350565


In [39]:
rmse = np.sqrt(mean_squared_error(y_test_stat, stat_preds)) # splits[5] = y_test
mae = mean_absolute_error(y_test_stat, stat_preds)
print('RMSE:', rmse)

df_yesterday = pd.read_csv(f'../tables/2025/gmday_preds_{tgt_stat}.csv')
df_yesterday['Date'] = pd.to_datetime(df_yesterday.Date)
df_yesterday = df_yesterday[(df_yesterday.Date == (datetime.strptime(now, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d"))]\
                .rename(columns={"MP": "MP_proj"})

df_gms = pd.read_csv(f"../tables/2025/season_gamelogs.csv")
df_gms['Date'] = pd.to_datetime(df_gms.Date)
df_gms = df_gms.rename(columns={"TRB": "REB", "3PM": "TPM", "3PA": "TPA"})
df_gms['STL_BLK'] = df_gms.STL + df_gms.BLK
df_gms['PR'] = df_gms.PTS + df_gms.REB 
df_gms['PA'] = df_gms.PTS + df_gms.AST
df_gms['RA'] = df_gms.REB + df_gms.AST
df_gms['PRA'] = df_gms.PTS + df_gms.REB + df_gms.AST

df_yesterday = df_yesterday.merge(df_gms[['Date', 'Team', 'Player', tgt_stat, 'MP']], on=['Date', 'Team', 'Player'])
df_yesterday = df_yesterday[['Date', 'Team', 'Player', 'Pos', 'Opp', 'MP_proj', 'MP', f'{tgt_stat}_line', f'{tgt_stat}_proj', tgt_stat]][df_yesterday.MP > 0]

df_yesterday['Diff'] = df_yesterday[f'{tgt_stat}_proj'] - df_yesterday[f'{tgt_stat}_line']
df_yesterday['Diff2'] = abs(df_yesterday[f'{tgt_stat}_proj'] - df_yesterday[tgt_stat])
df_yesterday['Act_Res'] = np.where(df_yesterday[tgt_stat] > df_yesterday[f'{tgt_stat}_line'], 'O', 'U')
df_yesterday['Pred_Res'] = np.where(df_yesterday[f'{tgt_stat}_proj'] > df_yesterday[f'{tgt_stat}_line'], 'O', 'U')
df_yesterday['ParlayHit'] = np.where(df_yesterday['Act_Res'] == df_yesterday['Pred_Res'], 1, 0)
df_yesterday['InRMSE_Range'] = np.where(df_yesterday['Diff2'] <= rmse, 1, 0)
# df_yesterday = df_yesterday[(abs(df_yesterday.Diff) > rmse)]
# df_yesterday = df_yesterday[df_yesterday.InRMSE_Range == 0]

print("Total Accuracy (ParlayHit):", ((df_yesterday.ParlayHit == 1).sum() / df_yesterday.shape[0]))
print((df_yesterday.ParlayHit == 1).sum(), "/", df_yesterday.shape[0])

print("\nTotal Accuracy (InRMSE_Range):", ((df_yesterday.InRMSE_Range == 1).sum() / df_yesterday.shape[0]))
print((df_yesterday.InRMSE_Range == 1).sum(), "/", df_yesterday.shape[0])

df_yesterday = df_yesterday.drop(['Diff', 'Diff2', 'Act_Res', 'Pred_Res'], axis=1).sort_values(f'{tgt_stat}_line', ascending=False)

if df_yesterday.shape[0] >= 50:
    for tm in df_yesterday.Team.unique():
        display(df_yesterday[(df_yesterday.Team == tm)]) #  & (df_yesterday.ParlayHit == 1)
else:
    display(df_yesterday)

RMSE: 3.869804354774119
Total Accuracy (ParlayHit): 0.5396825396825397
34 / 63

Total Accuracy (InRMSE_Range): 0.2698412698412698
17 / 63


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
10,2026-01-06,LAL,Luka Doncic,PG,NOP,36.947445,37.48,51.5,57.366856,42,0,0
47,2026-01-06,LAL,LeBron James,SF,NOP,34.881908,33.15,36.5,35.372723,46,0,0
12,2026-01-06,LAL,Deandre Ayton,C,NOP,29.0888,31.9,24.5,19.403126,32,0,0
30,2026-01-06,LAL,Jake LaRavia,PF,NOP,29.02663,34.77,21.5,18.852348,10,1,0
48,2026-01-06,LAL,Marcus Smart,SG,NOP,28.629282,36.65,17.5,18.626236,23,1,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
60,2026-01-06,ORL,Paolo Banchero,PF,WAS,36.49786,24.53,42.5,42.656094,22,0,0
50,2026-01-06,ORL,Desmond Bane,SG,WAS,36.183983,23.62,33.5,32.408367,20,1,0
43,2026-01-06,ORL,Anthony Black,PG,WAS,34.459328,20.5,29.5,30.741405,15,0,0
61,2026-01-06,ORL,Wendell Carter Jr.,C,WAS,31.750124,19.57,24.5,24.364519,12,1,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
1,2026-01-06,DAL,Anthony Davis,PF,SAC,33.596607,35.62,40.5,27.951023,37,1,0
20,2026-01-06,DAL,Cooper Flagg,PG,SAC,36.19669,35.0,33.5,30.125397,34,0,0
29,2026-01-06,DAL,Naji Marshall,SF,SAC,29.751879,31.53,22.5,19.734087,23,0,1
22,2026-01-06,DAL,Max Christie,SG,SAC,28.018776,35.88,18.5,21.739285,16,0,0
31,2026-01-06,DAL,Daniel Gafford,C,SAC,15.004892,22.48,15.5,12.923293,23,0,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
33,2026-01-06,MIN,Anthony Edwards,SG,MIA,36.746254,29.17,38.5,36.21283,34,1,1
25,2026-01-06,MIN,Julius Randle,PF,MIA,33.388317,26.73,36.5,33.48904,31,1,1
24,2026-01-06,MIN,Naz Reid,C,MIA,25.67791,24.52,25.5,22.305704,22,1,1
53,2026-01-06,MIN,Rudy Gobert,C,MIA,32.553703,32.63,25.5,26.201231,30,1,1
13,2026-01-06,MIN,Donte DiVincenzo,SG,MIA,29.275482,30.52,24.5,19.489904,19,1,1
6,2026-01-06,MIN,Jaden McDaniels,PF,MIA,28.060642,33.65,23.5,17.096104,24,0,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
32,2026-01-06,IND,Pascal Siakam,PF,CLE,34.92363,34.58,36.5,34.112236,29,1,0
44,2026-01-06,IND,Andrew Nembhard,PG,CLE,33.037189,30.48,29.5,30.64818,27,0,1
5,2026-01-06,IND,Aaron Nesmith,SF,CLE,24.976173,33.1,23.5,16.539009,26,0,0
4,2026-01-06,IND,T.J. McConnell,PG,CLE,15.22448,17.52,18.5,10.872622,13,1,1


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
2,2026-01-06,CLE,Darius Garland,PG,IND,30.502794,32.97,35.5,24.684643,37,0,0
3,2026-01-06,CLE,Evan Mobley,PF,IND,30.405874,35.6,34.5,25.076855,31,1,0
35,2026-01-06,CLE,Jarrett Allen,C,IND,25.163927,33.65,26.5,24.467392,35,0,0
28,2026-01-06,CLE,De'Andre Hunter,SF,IND,23.997719,21.75,21.5,18.71356,23,0,0
23,2026-01-06,CLE,Jaylon Tyson,SG,IND,24.867628,27.48,21.5,18.268873,9,1,0
37,2026-01-06,CLE,Sam Merrill,SG,IND,22.391996,31.43,18.5,16.695692,25,0,0
8,2026-01-06,CLE,Craig Porter Jr.,PG,IND,14.378641,27.97,14.5,8.372718,26,0,0
21,2026-01-06,CLE,Dean Wade,PF,IND,16.370275,8.42,12.5,9.132861,4,1,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
7,2026-01-06,NOP,Zion Williamson,PF,LAL,26.232391,33.33,34.5,28.107313,24,1,0
17,2026-01-06,NOP,Trey Murphy III,SF,LAL,36.065701,39.07,28.5,32.160606,50,1,0
59,2026-01-06,NOP,Derik Queen,C,LAL,27.688173,31.7,24.5,24.32036,31,0,0
58,2026-01-06,NOP,Jordan Poole,PG,LAL,25.406183,18.32,21.5,21.307858,2,1,0
42,2026-01-06,NOP,Jeremiah Fears,PG,LAL,24.391014,19.92,19.5,18.236658,15,1,1
39,2026-01-06,NOP,Herbert Jones,SF,LAL,18.841911,34.92,13.5,15.010481,6,0,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
45,2026-01-06,SAS,Victor Wembanyama,C,MEM,22.6427,21.02,34.5,33.354851,38,0,0
57,2026-01-06,SAS,De'Aaron Fox,PG,MEM,32.838615,29.92,31.5,31.128609,22,1,0
19,2026-01-06,SAS,Stephon Castle,PG,MEM,31.565693,28.38,30.5,27.051109,27,1,1
11,2026-01-06,SAS,Dylan Harper,SG,MEM,18.915489,18.03,18.5,13.254518,9,1,0


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
41,2026-01-06,MEM,Jaren Jackson Jr.,C,SAS,33.338287,35.85,29.5,30.837299,32,1,1
40,2026-01-06,MEM,Santi Aldama,PF,SAS,32.999393,23.9,26.5,25.126722,21,1,0
0,2026-01-06,MEM,Cam Spencer,SG,SAS,24.446419,29.93,26.5,13.948709,37,0,0
18,2026-01-06,MEM,Jock Landale,C,SAS,19.070923,23.67,20.5,16.982487,30,0,0
38,2026-01-06,MEM,Jaylen Wells,SG,SAS,29.14994,28.3,20.5,18.813669,13,1,0
14,2026-01-06,MEM,Vince Williams Jr.,SG,SAS,15.03282,22.12,15.5,10.940233,26,0,0
27,2026-01-06,MEM,Kentavious Caldwell-Pope,SG,SAS,18.98624,19.58,15.5,12.706201,12,1,1


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
51,2026-01-06,MIA,Bam Adebayo,C,MIN,32.423607,28.47,28.5,27.705456,21,1,0
46,2026-01-06,MIA,Norman Powell,SG,MIN,30.847979,30.93,28.5,29.642925,28,0,1
34,2026-01-06,MIA,Andrew Wiggins,SF,MIN,29.836916,25.52,23.5,21.256983,16,1,0
9,2026-01-06,MIA,Tyler Herro,SG,MIN,20.78289,28.62,22.5,16.507553,29,0,0
55,2026-01-06,MIA,Davion Mitchell,PG,MIN,29.63023,27.88,17.5,18.188997,14,0,0
56,2026-01-06,MIA,Nikola Jovic,PF,MIN,17.193504,26.18,16.5,16.010744,16,1,1


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
52,2026-01-06,SAC,Russell Westbrook,PG,DAL,30.596951,31.13,27.5,26.708439,21,1,0
62,2026-01-06,SAC,DeMar DeRozan,PF,DAL,33.666763,36.52,26.5,26.612135,28,1,1
16,2026-01-06,SAC,Zach LaVine,SG,DAL,32.067833,37.25,26.5,22.370659,27,0,0
49,2026-01-06,SAC,Maxime Raynaud,C,DAL,27.458019,34.7,21.5,22.608603,24,1,1


Unnamed: 0,Date,Team,Player,Pos,Opp,MP_proj,MP,PRA_line,PRA_proj,PRA,ParlayHit,InRMSE_Range
26,2026-01-06,WAS,CJ McCollum,SG,ORL,30.772987,38.17,26.5,23.636696,38,0,0
15,2026-01-06,WAS,Tre Johnson,SG,ORL,24.653568,22.47,20.5,16.304619,13,1,1
54,2026-01-06,WAS,Bilal Coulibaly,SG,ORL,26.597803,34.05,18.5,19.197081,25,1,0
36,2026-01-06,WAS,Khris Middleton,SF,ORL,19.35446,22.55,15.5,13.468051,21,0,0


### Today's predictions

In [92]:
df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
df_lines['Date'] = pd.to_datetime(df_lines.Date)
df_lines = df_lines[~(df_lines.Team.isnull())]

# Predict Mins
df_lines["Team"] = team_encoder.transform(df_lines["Team"])
df_pred = df_pred.merge(df_lines[['Date', 'Team', 'Spread', 'Total']], on=['Date', 'Team'], how='left')
df_pred = df_pred[~df_pred[['Date', 'Team', 'Player']].duplicated(keep='last')]
df_pred['Spread_x'] = np.where(df_pred.Spread_x.isnull(), df_pred.Spread_y, df_pred.Spread_x)
df_pred['Total_x'] = np.where(df_pred.Total_x.isnull(), df_pred.Total_y, df_pred.Total_x)
df_pred = df_pred.rename(columns={"Spread_x": "Spread", "Total_x": "Total"}).drop(['Spread_y', 'Total_y'], axis=1)
df_pred_mins = setup_df_mins(con, df_pred)
df_pred_mins = df_pred_mins.drop(['Date', 'MP'], axis=1)
DM_mins = xgb.DMatrix(df_pred_mins)
df_pred['MP'] = mins_model.predict(DM_mins)

# Predict Stat
df_pred = setup_df_main(df_pred, tgt_stat)
feature_cols = [col for col in df_pred.columns if col not in ['Date', tgt_stat]]
df_pred = df_pred[df_pred.Date == now][feature_cols]
DM_stats = xgb.DMatrix(df_pred)
df_pred[f"{tgt_stat}_proj"] = stat_model.predict(DM_stats)

df_pred['Team'] = team_encoder.inverse_transform(df_pred["Team"])
df_lines['Team'] = team_encoder.inverse_transform(df_lines["Team"])
df_pred['Opp'] = team_encoder.inverse_transform(df_pred["Opp"])
df_pred['Player'] = player_encoder.inverse_transform(df_pred["Player"])
df_pred['Pos'] = position_encoder.inverse_transform(df_pred["Pos"])

df_lines = df_lines[df_lines.Date == now][['Team', 'Player', f'{tgt_stat}_line']]
df_pred = df_pred.merge(df_lines, on=['Team', 'Player'])

tds_picks = df_pred[~(df_pred[f'{tgt_stat}_line'].isnull())]\
            [['Team', 'Player', 'Pos', 'Opp', 'MP', 'MP_L5_avg', f'{tgt_stat}_line', f'{tgt_stat}_proj']]
tds_picks['Diff'] = abs((df_pred[f'{tgt_stat}_line'] - df_pred[f'{tgt_stat}_proj']))
tds_picks['Diff2'] = abs((df_pred['MP'] - df_pred['MP_L5_avg']))
tds_picks = tds_picks.sort_values('Diff', ascending=False).drop(['Diff', 'Diff2'], axis=1)
if tds_picks.shape[0] >= 50:
    print(tds_picks.shape[0], 'rows')
    for tm in tds_picks.Team.unique():
        display(tds_picks[tds_picks.Team == tm])
else:
    display(tds_picks)
tds_picks.insert(0, 'Date', pd.to_datetime(now))
# partition_save_df(tds_picks, f"../tables/2025/gmday_preds_{tgt_stat}.csv")

debug1 166751
debug2 284514
debug3 284514
debug4 166751
87 rows


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
91,MIL,Giannis Antetokounmpo,PF,GSW,24.441488,27.736293,46.5,37.758007
4,MIL,Kevin Porter Jr.,PG,GSW,37.428226,35.757752,29.5,33.998981
5,MIL,Ryan Rollins,PG,GSW,31.100552,33.154122,24.5,27.770237


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
128,LAC,Ivica Zubac,C,NYK,25.645128,26.054806,26.5,18.463831
59,LAC,Kawhi Leonard,SF,NYK,37.296501,33.639221,38.5,45.212135
8,LAC,James Harden,PG,NYK,34.718346,33.221351,36.5,32.785336
24,LAC,Kris Dunn,PG,NYK,28.333652,27.488377,15.5,15.727388


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
88,TOR,Scottie Barnes,PF,CHO,36.056728,35.975849,33.5,41.37178
70,TOR,RJ Barrett,SF,CHO,27.604395,26.395746,26.5,21.83112
65,TOR,Brandon Ingram,SF,CHO,35.60537,36.07592,32.5,34.687481
9,TOR,Immanuel Quickley,PG,CHO,32.419304,33.829497,26.5,25.754154


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
2,OKC,Shai Gilgeous-Alexander,PG,UTA,30.765444,30.701754,41.5,34.523483
27,OKC,Jalen Williams,SG,UTA,27.816597,29.375444,32.5,25.990849


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
62,POR,Deni Avdija,SF,HOU,36.206581,32.567034,41.5,48.3769
36,POR,Shaedon Sharpe,SG,HOU,29.637911,30.20766,28.5,24.606077
98,POR,Toumani Camara,PF,HOU,35.097572,31.986909,20.5,23.420013
132,POR,Donovan Clingan,C,HOU,29.674671,28.101994,24.5,26.017036


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
133,UTA,Jusuf Nurkic,C,OKC,26.821812,29.671378,24.5,31.353138
95,UTA,Lauri Markkanen,PF,OKC,33.247463,35.337796,31.5,37.226006
10,UTA,Keyonte George,PG,OKC,33.037933,34.997027,32.5,38.036324


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
64,LAL,LeBron James,SF,SAS,33.800983,32.515364,34.5,40.877991
1,LAL,Luka Doncic,PG,SAS,36.52475,34.266182,51.5,56.732296
112,LAL,Jake LaRavia,PF,SAS,31.045681,31.944142,20.5,23.591866
46,LAL,Marcus Smart,SG,SAS,29.586779,29.573124,17.5,18.290165


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
16,CHO,LaMelo Ball,PG,TOR,25.581257,25.22373,30.5,24.206741
72,CHO,Brandon Miller,SF,TOR,33.241718,33.814197,28.5,31.378448
89,CHO,Miles Bridges,PF,TOR,31.786106,27.615314,28.5,29.30978
75,CHO,Kon Knueppel,SF,TOR,30.830315,28.044119,27.5,27.571539


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
14,NYK,Jalen Brunson,PG,LAC,35.985596,36.101883,39.5,33.422733
119,NYK,Karl-Anthony Towns,C,LAC,25.345182,28.004575,34.5,29.1469
102,NYK,OG Anunoby,PF,LAC,34.597519,34.497978,22.5,26.546337
33,NYK,Miles McBride,SG,LAC,26.980553,25.900577,16.5,17.439936
66,NYK,Mikal Bridges,SF,LAC,35.457909,33.517464,22.5,22.495356


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
0,DET,Cade Cunningham,PG,CHI,34.912422,33.997157,44.5,38.472054


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
12,MEM,Ja Morant,PG,PHO,24.448685,29.148228,28.5,22.953997
101,MEM,Santi Aldama,PF,PHO,30.547995,32.310362,24.5,27.403812
123,MEM,Jaren Jackson Jr.,C,PHO,33.38261,34.72123,29.5,26.791161
35,MEM,Jaylen Wells,SG,PHO,28.780895,29.43968,18.5,18.178143


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
80,DEN,Peyton Watson,SF,BOS,35.373886,35.529967,22.5,28.004446
7,DEN,Jamal Murray,PG,BOS,37.6982,35.980485,40.5,43.13324
97,DEN,Aaron Gordon,PF,BOS,22.307323,20.158051,21.5,18.95199
51,DEN,Christian Braun,SG,BOS,25.643448,23.953828,15.5,16.194983


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
63,NOP,Trey Murphy III,SF,ATL,36.388931,34.559558,29.5,34.58004
94,NOP,Zion Williamson,PF,ATL,27.284555,29.162663,33.5,28.594633
19,NOP,Jeremiah Fears,PG,ATL,23.241879,25.401976,20.5,18.385933
122,NOP,Derik Queen,C,ATL,27.26177,26.703622,25.5,25.721859


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
49,SAS,Dylan Harper,SG,LAL,18.138357,20.662593,17.5,12.473082
6,SAS,De'Aaron Fox,PG,LAL,31.53861,33.081327,29.5,27.968544
13,SAS,Stephon Castle,PG,LAL,30.638077,32.990528,30.5,29.385962
120,SAS,Victor Wembanyama,C,LAL,22.551386,23.686398,31.5,31.306229


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
31,BOS,Derrick White,SG,DEN,36.551353,35.070485,30.5,34.854313
40,BOS,Anfernee Simons,SG,DEN,23.548656,25.708534,18.5,17.143492
20,BOS,Payton Pritchard,PG,DEN,34.664581,34.696771,26.5,25.278164
58,BOS,Jaylen Brown,SF,DEN,32.240906,32.193,42.5,41.613823


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
22,CHI,Tre Jones,PG,DET,23.705698,28.047641,20.5,16.806145
126,CHI,Nikola Vucevic,C,DET,32.279961,32.422316,32.5,35.909409
76,CHI,Kevin Huerter,SF,DET,21.139416,24.67427,18.5,20.270777


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
38,ATL,Dyson Daniels,SG,NOP,35.089333,33.447072,26.5,29.724884
118,ATL,Onyeka Okongwu,C,NOP,33.97496,32.839597,27.5,30.200909
26,ATL,Nickeil Alexander-Walker,SG,NOP,33.574509,33.137335,29.5,31.009701
69,ATL,Jalen Johnson,SF,NOP,36.500111,34.634848,44.5,44.397621


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
32,PHO,Devin Booker,SG,MEM,33.72422,32.479131,36.5,33.628204
60,PHO,Dillon Brooks,SF,MEM,31.980133,29.562387,25.5,24.475571
17,PHO,Collin Gillespie,PG,MEM,28.236429,29.871948,21.5,21.140974
79,PHO,Royce O'Neale,SF,MEM,28.891178,28.900802,16.5,16.291651


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
15,HOU,Reed Sheppard,PG,POR,25.423048,29.149751,18.5,15.770065
68,HOU,Kevin Durant,SF,POR,36.919521,35.16671,37.5,38.520279
78,HOU,Amen Thompson,SF,POR,36.458981,35.125385,31.5,31.811413
96,HOU,Jabari Smith Jr.,PF,POR,35.757938,34.092481,24.5,24.675905


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
54,BRK,Terance Mann,SG,ORL,25.2925,25.994504,12.5,15.215766
114,BRK,Danny Wolf,PF,ORL,20.407967,21.322405,12.5,13.955594
61,BRK,Michael Porter Jr.,SF,ORL,33.537956,32.298068,35.5,36.697048


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
11,GSW,Stephen Curry,PG,MIL,34.521271,32.88648,36.5,34.340874
25,GSW,De'Anthony Melton,PG,MIL,19.460524,25.010564,14.5,13.292466
71,GSW,Jimmy Butler,SF,MIL,33.121384,32.136222,30.5,31.383078
109,GSW,Draymond Green,PF,MIL,23.581291,23.998622,18.5,19.376461
43,GSW,Brandin Podziemski,SG,MIL,25.292475,24.974501,17.5,18.31629


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
93,PHI,Paul George,PF,WAS,33.154968,33.496389,25.5,23.706253
3,PHI,Tyrese Maxey,PG,WAS,38.078541,39.554726,40.5,42.258827
116,PHI,Joel Embiid,C,WAS,33.416473,35.362301,37.5,38.561073
30,PHI,Quentin Grimes,SG,WAS,31.479519,35.247188,20.5,19.83853


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
39,WAS,Tre Johnson,SG,PHI,21.502598,26.133717,19.5,17.925444
28,WAS,CJ McCollum,SG,PHI,31.895512,32.621534,26.5,27.86948
44,WAS,Bilal Coulibaly,SG,PHI,26.447138,27.44652,19.5,20.029491


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_L5_avg,PRA_line,PRA_proj
34,ORL,Desmond Bane,SG,BRK,33.578907,33.265458,29.5,30.305302
92,ORL,Paolo Banchero,PF,BRK,34.661865,32.862243,37.5,37.432449
23,ORL,Anthony Black,PG,BRK,31.920979,31.554041,26.5,26.450691
