# To do:

 - Figure out how to signal injuries
 - Add shooting percentages features for PTS stats
 - Add team shooting percentages for AST stats
 - Add def team shooting percentages for REB stats
 - Add TOV stats for STL
 - Add field goal attempts and 3 point attempts features FOR pts stats

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import duckdb
import warnings
import math         # haversine_km()
import os

import xgboost as xgb
from xgboost import XGBRegressor
from scipy.stats import randint, uniform

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import joblib
import warnings
from datetime import datetime, timedelta
from haversine import haversine

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

categories = ['PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK']
con = duckdb.connect(database=":memory:")
now = str(datetime.now().date())
print(f"Today's date:", now)

Today's date: 2025-12-19


In [2]:
%run ./common_utils.ipynb

# ML Functions

In [3]:
def feature_importance(model):
    importance = model.get_score(importance_type='gain')

    # Convert to table
    df_importance = (
        pd.DataFrame({
            'feature': list(importance.keys()),
            'importance': list(importance.values())
        })
        .sort_values(by='importance', ascending=False)
        .reset_index(drop=True)
    )

    df_importance['pct'] = df_importance.importance.cumsum() / df_importance.importance.sum()
    display(df_importance)

    xgb.plot_importance(model)
    plt.show()

In [18]:
def create_baseline_model(df, pred_col, train_df, val_df, test_df):

    if pred_col == 'MP':
        print('Minutes Model')
        feature_cols = [
            'MP',
            'MP_lst_gm',
            'MP_last_5_avg',
            'MP_last_10_avg',
            'starter', 'bench', 'reserve'
        ]
    else:
        print('Stats Model')
        feature_cols = [
            tgt_stat,
            'MP_lst_gm',
            'MP_last_5_avg',
            'MP_last_10_avg',
            f'Off_{tgt_stat}', f'Off_L5_{tgt_stat}',
            f'Def_{tgt_stat}', f'Def_L5_{tgt_stat}',
            'DaysLstGm'
        ]
    
    print('Train:', len(train_df), '/ Validation:', len(val_df), '/ Test:', len(test_df))
    
    feature_cols = [col for col in df.columns if col not in ['Date', pred_col]]

    X_train, y_train = train_df[feature_cols], train_df[pred_col]
    X_val,   y_val   = val_df[feature_cols],   val_df[pred_col]
    X_test,  y_test  = test_df[feature_cols],  test_df[pred_col]

    # Convert to DMatrix (XGBoost internal format)
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval   = xgb.DMatrix(X_val, label=y_val)
    dtest  = xgb.DMatrix(X_test, label=y_test)

    params = {
        "objective": "reg:squarederror",
        "max_depth": 5,
        "learning_rate": 0.05,
        "subsample": 0.8,
        "colsample_bytree": 0.8,
        "seed": 42
    }

    # Train using native XGBoost API with early stopping
    evals = [(dtrain, "train"), (dval, "val")]
    bst = xgb.train(
        params,
        dtrain,
        num_boost_round=500,
        evals=evals,
        early_stopping_rounds=50,
        verbose_eval=False
    )

    # Predict on test set
    preds = bst.predict(dtest)

    rmse = np.sqrt(mean_squared_error(y_test, preds))
    mae = mean_absolute_error(y_test, preds)
    r2 = r2_score(y_test, preds)

    print("RMSE:", rmse)
    print("MAE:", mae)
    print("R²:", r2)
    
    return bst, (X_train, y_train, X_val, y_val, X_test, y_test)

In [5]:
def hyperparam_tuning(splits, n_iter=20, early_stopping_rounds=50):
    """
    Hyperparameter tuning using native XGBoost API and DMatrix,
    with early stopping support (compatible with XGBoost 3.1.2)
    """

    X_train, y_train, X_val, y_val, X_test, y_test = splits

    # Convert datasets to DMatrix
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval   = xgb.DMatrix(X_val, label=y_val)
    dtest  = xgb.DMatrix(X_test, label=y_test)

    # Hyperparameter search space
    param_dist = {
        "n_estimators": randint(300, 1500),
        "learning_rate": uniform(0.01, 0.05),
        "max_depth": randint(3, 6),
        "min_child_weight": randint(1, 8),
        "subsample": uniform(0.7, 0.3),
        "colsample_bytree": uniform(0.7, 0.3),
        "gamma": uniform(0, 2),
        "reg_lambda": uniform(0, 5),
        "reg_alpha": uniform(0, 2)
    }

    # Sample n_iter random parameter combinations
    param_list = []
    for _ in range(n_iter):
        sample = {k: (v.rvs() if hasattr(v, "rvs") else v) for k, v in param_dist.items()}
        sample['n_estimators'] = int(sample['n_estimators'])
        sample['max_depth'] = int(sample['max_depth'])
        sample['min_child_weight'] = int(sample['min_child_weight'])
        param_list.append(sample)

    best_mae = float('inf')
    best_params = None
    best_bst = None

    # Manual hyperparameter search
    for i, params in enumerate(param_list):
        print(f"\nTrial {i+1}/{n_iter}: {params}")
        num_boost_round = params.pop('n_estimators')
        params.update({
            "objective": "reg:squarederror",
            "tree_method": "hist",
            "device": "cuda",
            "seed": 42
        })
        evals = [(dtrain, 'train'), (dval, 'val')]
        bst = xgb.train(
            params,
            dtrain,
            num_boost_round=num_boost_round,
            evals=evals,
            early_stopping_rounds=early_stopping_rounds,
            verbose_eval=False
        )
        # Predict on validation set to compute MAE
        val_preds = bst.predict(dval, iteration_range=(0, bst.best_iteration))
        mae = mean_absolute_error(y_val, val_preds)
        print(f"Validation MAE: {mae:.4f}")
        if mae < best_mae:
            best_mae = mae
            best_params = params.copy()
            best_bst = bst

    print("\nBest validation MAE:", best_mae)
    print("Best parameters:", best_params)

    # Predict on test set using best model
    preds = best_bst.predict(dtest, iteration_range=(0, best_bst.best_iteration))
    print("\nTest Metrics:")
    print("RMSE:", np.sqrt(mean_squared_error(y_test, preds)))
    print("MAE:", mean_absolute_error(y_test, preds))
    print("R²:", r2_score(y_test, preds))

    return best_bst, preds

### Create Base df

In [6]:
df = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
for i in [2022, 2023, 2024, 2025]:
    df_actuals = pd.read_csv(f"../tables/{i}/parlay_actuals.csv")
    df_actuals['Season'] = i
    df = pd.concat([df, df_actuals])

    df_schd = pd.read_csv(f"../tables/{i}/nba_schedule.csv")
    df_schd['Season'] = i
    df2 = pd.concat([df2, df_schd])
    
    df_gms = pd.read_csv(f"../tables/{i}/season_gamelogs.csv")
    df_gms['Season'] = i
    df3 = pd.concat([df3, df_gms])

df['Date'] = pd.to_datetime(df.Date)
df2['Date'] = pd.to_datetime(df2.Date)
df3['Date'] = pd.to_datetime(df3.Date)

df['Tms'] = df['game_id'].apply(lambda x: x.split("_")[1:3])
df['WrngTm'] = df.apply(lambda row: 0 if row['Team'] in row['Tms'] else 1, axis=1)
df['WrngOpp'] = df.apply(lambda row: 0 if row['Opp'] in row['Tms'] else 1, axis=1)
df = df[(df.WrngTm == 0) & (df.WrngOpp == 0)].drop(['WrngTm', 'WrngOpp', 'Tms'], axis=1)

df3 = df3[['game_id', 'Date', 'Team', 'Player', 'FG', 'FGA', 'FG%', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TOV', 'PF', '+/-']]\
        .rename(columns={"3PA": "TPA", "3P%": "TP%"})
df3 = df3[~df3[['Date', 'Team', 'Player']].duplicated(keep='last')]
df = df.merge(df3, on=['game_id', 'Date', 'Team', 'Player'])

df_mtch = df2[['Season', 'Date', 'AwayABV', 'HomeABV', 'AwayPTS', 'HomePTS', 'AwayB2B', 'HomeB2B', 'cup_gm', 'pstszn_gm']]
df_mtch['Team_type'] = 'Away'
df_mtch = df_mtch.rename(columns={"AwayABV": "Team", "HomeABV": "Opp", "AwayB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'cup_gm', 'pstszn_gm', 'Team_type']]
df_mtch2 = df_mtch.copy().rename(columns={"Team": "Opp", "Opp": "Team", "HomeB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'cup_gm', 'pstszn_gm']]
df_mtch2['Team_type'] = 'Home'
df_mtch = pd.concat([df_mtch, df_mtch2])
df_mtch = df_mtch[['Season', 'Date', 'Team', 'Team_type', 'AwayPTS', 'HomePTS', 'cup_gm', 'pstszn_gm']]
df_mtch = df_mtch.sort_values(["Team", "Date"])
df_mtch['team_game_num'] = df_mtch.groupby(["Team", "Season"]).cumcount() + 1
df_mtch['Spread'] = np.where(df_mtch.Team_type == 'Home', df_mtch.HomePTS - df_mtch.AwayPTS, df_mtch.AwayPTS - df_mtch.HomePTS)
df_mtch['Total'] = df_mtch.AwayPTS + df_mtch.HomePTS
df_mtch['is_Win'] = np.where(df_mtch.Spread > 0, 1, 0)
df_mtch['Szn_Wins'] = df_mtch.groupby(['Season', 'Team'])['is_Win'].cumsum()
df = df.drop(['Spread', 'Total'], axis=1).merge(df_mtch, on=['Season', 'Date', 'Team'])

team_encoder = LabelEncoder()
player_encoder = LabelEncoder()
team_type_encoder = LabelEncoder()
position_encoder = LabelEncoder()

# Encode string cols
team_encoder.fit(pd.concat([df["Team"], df["Opp"]], axis=0))
df["Team"] = team_encoder.transform(df["Team"])
df["Opp"] = team_encoder.transform(df["Opp"])
df["Player_name"] = df.Player
df["Player"] = player_encoder.fit_transform(df["Player"])
df["Pos"] = position_encoder.fit_transform(df["Pos"])
df['Team_type'] = team_type_encoder.fit_transform(df['Team_type'])
df = df.sort_values(['Season', 'Date', 'Team', 'Player']).reset_index(drop=True)
print('base df created', datetime.now())

base df created 2025-12-19 20:23:55.015748


# Minutes Projection Model

In [7]:
def setup_df_mins(con, df):
    
    df = df[['Season', 'Date', 'Team', 'Team_type', 'Opp', 'Player', 'Pos', 'B2B', 'MP', 'TOV', 'PF', '+/-',
             'Spread', 'Total', 'team_game_num', 'Szn_Wins', 'cup_gm', 'pstszn_gm']]
    
    for col in ['MP', 'TOV', 'PF', '+/-']:
        df[f'{col}_lst_gm'] = (
            df
            .groupby(['Player', 'Season'])[col]
            .shift(1)
        )

        df[f'{col}_last_5_avg'] = (
            df.groupby(['Player', 'Season'])[col]
              .rolling(window=5, min_periods=1)
              .mean()
              .shift(1)
              .reset_index(level=[0, 1], drop=True)
        )

        df[f'{col}_last_10_avg'] = (
            df.groupby(['Player', 'Season'])[col]
              .rolling(window=10, min_periods=1)
              .mean()
              .shift(1)
              .reset_index(level=[0, 1], drop=True)
        )

    games_last_7_days = df.groupby(['Player', 'Season']).rolling('7D', on='Date')['MP'].count().shift(1).to_frame(name='games_last_7_days').reset_index()
    df = df.merge(games_last_7_days, on=['Player', 'Season', 'Date'])
    df['games_last_7_days'] = df.games_last_7_days.fillna(0).astype(int)
    
    df['prev_team_mins_pct'] = (df.groupby(['Player', 'Season'])['MP'].shift(1)) / 240
        
    
    df['reserve_td'] = (df.MP < 8).astype(int)
    df['bench_td']   = ((df.MP >= 8) & (df.MP <= 25)).astype(int)
    df['starter_td'] = (df.MP > 25).astype(int)
    role_counts = df.groupby(['Season', 'Player'])[['reserve_td', 'bench_td', 'starter_td']].sum()
    role_counts['most_common_role'] = role_counts[['reserve_td', 'bench_td', 'starter_td']].idxmax(axis=1)
    role_counts['reserve'] = (role_counts['most_common_role'] == 'reserve_td').astype(int)
    role_counts['bench']   = (role_counts['most_common_role'] == 'bench_td').astype(int)
    role_counts['starter'] = (role_counts['most_common_role'] == 'starter_td').astype(int)
    df = df.merge(role_counts[['reserve', 'bench', 'starter']], on=['Season', 'Player'], how='left')
      
    df['missed_games'] = (
        df.groupby(['Player', 'Team', 'Season'])['team_game_num']      
          .diff()
          .sub(1)
          .fillna(0)
          .astype(int)
    )

#     df["career_min_mins"] = (
#         df.assign(MP_nonzero=df["MP"].replace(0, np.nan))
#           .groupby("Player")["MP_nonzero"]
#           .cummin()
#           .shift(1)
#     )
#     df["career_max_mins"] = df.groupby("Player")["MP"].cummax().shift(1)
    
    df['blowout'] = np.where(abs(df.Spread >= 15), 1, 0)
    
#     df['Szn_Wins'] = df.groupby(['Player', 'Season', 'Team'])['Szn_Wins'].shift(1).fillna(0)
#     df['Win_Pct'] = df.Szn_Wins / df.team_game_num
    
    # Have to derive OppSzn_Wins and then add it to the df
#     df['OppSzn_Wins'] = df.groupby(['Player', 'Season', 'Opp'])['Szn_Wins'].shift(1).fillna(0)
    
    df = df.drop(['reserve_td', 'bench_td', 'starter_td', 'Szn_Wins', 'TOV', 'PF', '+/-'], axis=1)    
    
    return df

In [19]:
df_mins = df.copy()
df_mins = setup_df_mins(con, df_mins)
display(df_mins)

n = len(df_mins)
train_end = int(0.8 * n)
val_end   = int(0.9 * n)
mins_train_df = df_mins.iloc[:train_end]
mins_val_df   = df_mins.iloc[train_end:val_end]
mins_test_df  = df_mins.iloc[val_end:]

mins_model, mins_splits = create_baseline_model(df_mins, "MP", mins_train_df, mins_val_df, mins_test_df)
mins_model, mins_preds = hyperparam_tuning(mins_splits, n_iter=1)
# feature_importance(mins_model)

Unnamed: 0,Season,Date,Team,Team_type,Opp,Player,Pos,B2B,MP,Spread,Total,team_game_num,cup_gm,pstszn_gm,MP_lst_gm,MP_last_5_avg,MP_last_10_avg,TOV_lst_gm,TOV_last_5_avg,TOV_last_10_avg,PF_lst_gm,PF_last_5_avg,PF_last_10_avg,+/-_lst_gm,+/-_last_5_avg,+/-_last_10_avg,games_last_7_days,prev_team_mins_pct,reserve,bench,starter,missed_games,blowout
0,2022,2022-10-21,0,1,21,2,3,0,0.00,10.0,206.0,2,0,0,,25.226,18.698,,0.8,0.7,,1.8,1.6,,-1.0,-1.1,3,,0,1,0,0,0
1,2022,2022-10-21,0,1,21,5,2,0,14.37,10.0,206.0,2,0,0,,24.286,26.067,,1.0,1.4,,2.2,1.9,,17.0,12.5,3,,1,0,0,0,0
2,2022,2022-10-21,0,1,21,120,0,0,31.62,10.0,206.0,2,0,0,,0.934,3.465,,0.0,0.0,,0.0,0.0,,1.8,1.2,3,,0,0,1,0,0
3,2022,2022-10-21,0,1,21,171,3,0,32.53,10.0,206.0,2,0,0,,31.028,31.442,,2.6,3.3,,1.8,1.8,,7.0,1.9,3,,0,0,1,0,0
4,2022,2022-10-21,0,1,21,178,4,0,39.62,10.0,206.0,2,0,0,,29.280,28.101,,1.4,1.0,,2.8,2.0,,-4.0,0.9,2,,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95085,2025,2025-12-18,28,1,13,704,3,0,16.98,-8.0,278.0,26,0,0,0.00,20.166,22.319,0.0,1.2,0.9,0.0,0.8,1.2,0.0,-1.4,-2.7,2,0.000000,0,1,0,0,0
95086,2025,2025-12-18,29,0,26,59,4,0,22.57,-25.0,213.0,25,0,0,24.82,29.488,27.462,1.0,2.0,1.8,0.0,3.2,2.9,-20.0,-1.6,-5.1,3,0.103417,0,0,1,4,0
95087,2025,2025-12-18,29,0,26,89,4,0,29.10,-25.0,213.0,25,0,0,34.13,32.222,31.599,1.0,2.6,1.8,2.0,2.8,2.4,10.0,-8.6,-5.9,2,0.142208,0,0,1,0,0
95088,2025,2025-12-18,29,0,26,491,3,0,36.08,-25.0,213.0,25,0,0,35.88,32.040,31.974,3.0,3.0,3.3,4.0,4.0,3.5,13.0,-5.2,-6.7,2,0.149500,0,0,1,0,0


Minutes Model
Train: 76072 / Validation: 9509 / Test: 9509
RMSE: 5.992516767555047
MAE: 4.353414731596578
R²: 0.7647876554147662

Trial 1/1: {'n_estimators': 1259, 'learning_rate': 0.032616804562126074, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.7920513742695932, 'colsample_bytree': 0.8549459786154316, 'gamma': 1.491264576860441, 'reg_lambda': 2.035506243942166, 'reg_alpha': 1.1509770763555862}
Validation MAE: 4.9779

Best validation MAE: 4.977856854298272
Best parameters: {'learning_rate': 0.032616804562126074, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.7920513742695932, 'colsample_bytree': 0.8549459786154316, 'gamma': 1.491264576860441, 'reg_lambda': 2.035506243942166, 'reg_alpha': 1.1509770763555862, 'objective': 'reg:squarederror', 'tree_method': 'hist', 'device': 'cuda', 'seed': 42}

Test Metrics:
RMSE: 6.003796323450515
MAE: 4.365876409919973
R²: 0.7639013541118618


In [10]:
rmse = np.sqrt(mean_squared_error(mins_splits[5], mins_preds)) # splits[5] = y_test
mins_test_df['MP_pred'] = mins_preds
df_test = mins_test_df.copy()

df_test['Team'] = team_encoder.inverse_transform(df_test['Team'])
df_test['Opp'] = team_encoder.inverse_transform(df_test['Opp'])
df_test['Player'] = player_encoder.inverse_transform(df_test['Player'])
df_test['Pos'] = position_encoder.inverse_transform(df_test['Pos'])

df_test['Diff'] = abs(df_test['MP_pred'] - df_test['MP'])
df_test['InRMSE_Range'] = np.where(df_test['Diff'] <= rmse, 1, 0)

print("Total Accuracy (InRMSE_Range):", ((df_test.InRMSE_Range == 1).sum() / df_test.shape[0]))
print((df_test.InRMSE_Range == 1).sum(), "/", df_test.shape[0])

df_ystrday = df_test[(df_test.Date == (datetime.strptime(now, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d"))]\
            [['Team', 'Player', 'Pos', 'Opp', 'MP', 'MP_pred', 'InRMSE_Range', 'Diff', 'Spread']]
print("\nYesterday's Results:")
print("Total Accuracy (InRMSE_Range):", ((df_ystrday.InRMSE_Range == 1).sum() / df_ystrday.shape[0]))
if df_ystrday.shape[0] >= 50:
    for tm in df_ystrday.Team.unique():
        display(df_ystrday[df_ystrday.Team == tm])
else:
    display(df_ystrday)

Total Accuracy (InRMSE_Range): 0.7505521085287622
7137 / 9509

Yesterday's Results:
Total Accuracy (InRMSE_Range): 0.7160493827160493


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94928,ATL,Dyson Daniels,SG,CHO,31.57,37.24873,1,5.67873,-7.0
94929,ATL,Jalen Johnson,SF,CHO,39.48,35.143307,1,4.336693,-7.0
94930,ATL,Nickeil Alexander-Walker,SG,CHO,34.1,35.658966,1,1.558966,-7.0
94931,ATL,Onyeka Okongwu,C,CHO,36.15,35.485264,1,0.664736,-7.0
94932,ATL,Trae Young,PG,CHO,20.15,22.861771,1,2.711771,-7.0
94933,ATL,Zaccharie Risacher,SF,CHO,20.47,23.478081,1,3.008081,-7.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94934,BRK,Danny Wolf,PF,MIA,19.58,21.345135,1,1.765135,-11.0
94935,BRK,Michael Porter Jr.,SF,MIA,33.73,33.864834,1,0.134834,-11.0
94936,BRK,Noah Clowney,PF,MIA,26.2,31.579412,1,5.379412,-11.0
94937,BRK,Terance Mann,SG,MIA,30.53,25.119055,1,5.410945,-11.0
94938,BRK,Tyrese Martin,SG,MIA,18.93,18.164412,1,0.765588,-11.0
94939,BRK,Ziaire Williams,SF,MIA,18.7,22.695459,1,3.995459,-11.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94940,CHO,Brandon Miller,SF,ATL,35.28,32.377811,1,2.902189,7.0
94941,CHO,Kon Knueppel,SF,ATL,34.98,37.013588,1,2.033588,7.0
94942,CHO,LaMelo Ball,PG,ATL,29.2,26.167982,1,3.032018,7.0
94943,CHO,Miles Bridges,PF,ATL,32.1,36.690392,1,4.590392,7.0
94944,CHO,Moussa Diabate,C,ATL,17.57,20.710745,1,3.140745,7.0
94945,CHO,Ryan Kalkbrenner,C,ATL,27.33,26.580406,1,0.749594,7.0
94946,CHO,Sion James,SG,ATL,20.42,31.263847,0,10.843847,7.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94947,DAL,Anthony Davis,PF,DET,37.37,30.502388,0,6.867612,2.0
94948,DAL,Cooper Flagg,PG,DET,39.92,35.747604,1,4.172396,2.0
94949,DAL,Daniel Gafford,C,DET,15.63,19.442776,1,3.812776,2.0
94950,DAL,Klay Thompson,SF,DET,19.32,20.401781,1,1.081781,2.0
94951,DAL,Max Christie,SG,DET,19.2,28.630215,0,9.430215,2.0
94952,DAL,Naji Marshall,SF,DET,33.28,31.896248,1,1.383752,2.0
94953,DAL,P.J. Washington,PF,DET,36.67,32.760468,1,3.909532,2.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94954,DEN,Bruce Brown,SG,ORL,34.42,27.185076,0,7.234924,11.0
94955,DEN,Cameron Johnson,SF,ORL,35.68,35.963577,1,0.283577,11.0
94956,DEN,Jamal Murray,PG,ORL,35.67,36.178726,1,0.508726,11.0
94957,DEN,Jonas Valanciunas,C,ORL,13.18,15.700444,1,2.520444,11.0
94958,DEN,Nikola Jokic,C,ORL,37.78,36.610672,1,1.169328,11.0
94959,DEN,Tim Hardaway Jr.,SG,ORL,26.97,30.977585,1,4.007585,11.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94960,DET,Ausar Thompson,SF,DAL,8.9,26.949787,0,18.049787,-2.0
94961,DET,Cade Cunningham,PG,DAL,44.53,36.374043,0,8.155957,-2.0
94962,DET,Caris LeVert,SG,DAL,27.12,19.322769,0,7.797231,-2.0
94963,DET,Duncan Robinson,SF,DAL,13.03,29.142593,0,16.112593,-2.0
94964,DET,Isaiah Stewart,C,DAL,31.4,20.699133,0,10.700867,-2.0
94965,DET,Jaden Ivey,SG,DAL,14.48,15.23967,1,0.75967,-2.0
94966,DET,Jalen Duren,C,DAL,34.4,30.990278,1,3.409722,-2.0
94967,DET,Tobias Harris,PF,DAL,18.18,29.187511,0,11.007511,-2.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94968,GSW,Brandin Podziemski,SG,PHO,27.77,28.725267,1,0.955267,-1.0
94969,GSW,Buddy Hield,SG,PHO,9.97,18.363159,0,8.393159,-1.0
94970,GSW,De'Anthony Melton,PG,PHO,14.92,19.258841,1,4.338841,-1.0
94971,GSW,Draymond Green,PF,PHO,30.05,29.434849,1,0.615151,-1.0
94972,GSW,Jimmy Butler,SF,PHO,33.93,33.466866,1,0.463134,-1.0
94973,GSW,Moses Moody,SG,PHO,30.45,21.642513,0,8.807487,-1.0
94974,GSW,Quinten Post,PF,PHO,26.63,23.494457,1,3.135543,-1.0
94975,GSW,Stephen Curry,PG,PHO,33.25,33.093338,1,0.156662,-1.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94976,HOU,Alperen Sengun,C,NOP,35.7,36.373772,1,0.673772,-5.0
94977,HOU,Amen Thompson,SF,NOP,40.6,37.639938,1,2.960062,-5.0
94978,HOU,Jabari Smith Jr.,PF,NOP,44.13,36.66629,0,7.46371,-5.0
94979,HOU,Josh Okogie,SG,NOP,30.17,23.082279,0,7.087721,-5.0
94980,HOU,Kevin Durant,SF,NOP,42.68,37.799664,1,4.880336,-5.0
94981,HOU,Reed Sheppard,PG,NOP,26.95,29.575979,1,2.625979,-5.0
94982,HOU,Steven Adams,C,NOP,27.82,19.060303,0,8.759697,-5.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94983,IND,Andrew Nembhard,PG,NYK,37.42,32.912361,1,4.507639,-1.0
94984,IND,Bennedict Mathurin,SF,NYK,38.47,33.813457,1,4.656543,-1.0
94985,IND,Isaiah Jackson,C,NYK,26.3,18.551481,0,7.748519,-1.0
94986,IND,Jarace Walker,PF,NYK,16.28,22.611088,0,6.331088,-1.0
94987,IND,Jay Huff,C,NYK,18.1,23.790844,1,5.690844,-1.0
94988,IND,Pascal Siakam,PF,NYK,37.55,33.181084,1,4.368916,-1.0
94989,IND,T.J. McConnell,PG,NYK,17.9,15.799555,1,2.100445,-1.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94990,LAC,Bogdan Bogdanovic,SG,OKC,24.43,19.888721,1,4.541279,-21.0
94991,LAC,Ivica Zubac,C,OKC,31.68,30.608543,1,1.071457,-21.0
94992,LAC,John Collins,PF,OKC,28.43,26.060587,1,2.369413,-21.0
94993,LAC,Kawhi Leonard,SF,OKC,32.15,30.373501,1,1.776499,-21.0
94994,LAC,Kris Dunn,PG,OKC,31.37,25.104839,0,6.265161,-21.0
94995,LAC,Nicolas Batum,PF,OKC,19.82,22.219479,1,2.399479,-21.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
94996,LAL,Jake LaRavia,PF,UTA,28.22,19.664722,0,8.555278,8.0
94997,LAL,Jaxson Hayes,C,UTA,22.18,16.407967,1,5.772033,8.0
94998,LAL,LeBron James,SF,UTA,32.62,36.853813,1,4.233813,8.0
94999,LAL,Luka Doncic,PG,UTA,39.55,37.614571,1,1.935429,8.0
95000,LAL,Marcus Smart,SG,UTA,27.7,28.11755,1,0.41755,8.0
95001,LAL,Rui Hachimura,PF,UTA,32.78,32.656109,1,0.123891,8.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95002,MIA,Andrew Wiggins,SF,BRK,32.9,33.20105,1,0.30105,11.0
95003,MIA,Bam Adebayo,C,BRK,31.07,32.898521,1,1.828521,11.0
95004,MIA,Davion Mitchell,PG,BRK,29.95,28.644768,1,1.305232,11.0
95005,MIA,Dru Smith,SG,BRK,24.35,13.03628,0,11.31372,11.0
95006,MIA,Jaime Jaquez Jr.,SF,BRK,29.0,28.478374,1,0.521626,11.0
95007,MIA,Kel'el Ware,C,BRK,30.28,24.919733,1,5.360267,11.0
95008,MIA,Norman Powell,SG,BRK,32.65,31.837353,1,0.812647,11.0
95009,MIA,Simone Fontecchio,SF,BRK,22.17,13.339989,0,8.830011,11.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95010,MIL,Bobby Portis,PF,TOR,39.48,19.738354,0,19.741646,-6.0
95011,MIL,Gary Trent Jr.,SG,TOR,30.43,29.599926,1,0.830074,-6.0
95012,MIL,Kevin Porter Jr.,PG,TOR,40.15,34.638474,1,5.511526,-6.0
95013,MIL,Myles Turner,C,TOR,30.95,26.87328,1,4.07672,-6.0
95014,MIL,Ryan Rollins,PG,TOR,28.22,33.281891,1,5.061891,-6.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95015,NOP,Derik Queen,C,HOU,35.35,29.98078,1,5.36922,5.0
95016,NOP,Herbert Jones,SF,HOU,38.22,23.947186,0,14.272814,5.0
95017,NOP,Jeremiah Fears,PG,HOU,20.2,30.645061,0,10.445061,5.0
95018,NOP,Jordan Poole,PG,HOU,24.53,30.416517,1,5.886517,5.0
95019,NOP,Jose Alvarado,PG,HOU,30.37,22.437584,0,7.932416,5.0
95020,NOP,Saddiq Bey,SF,HOU,34.32,33.285271,1,1.034729,5.0
95021,NOP,Trey Murphy III,SF,HOU,38.48,34.743568,1,3.736432,5.0
95022,NOP,Zion Williamson,PF,HOU,21.43,31.438133,0,10.008133,5.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95023,NYK,Jalen Brunson,PG,IND,34.37,38.805725,1,4.435725,1.0
95024,NYK,Jordan Clarkson,SG,IND,29.37,22.317333,0,7.052667,1.0
95025,NYK,Mikal Bridges,SF,IND,37.27,35.224125,1,2.045875,1.0
95026,NYK,OG Anunoby,PF,IND,33.7,34.987816,1,1.287816,1.0
95027,NYK,Tyler Kolek,PG,IND,26.2,13.796141,0,12.403859,1.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95028,OKC,Ajay Mitchell,SG,LAC,28.58,25.840012,1,2.739988,21.0
95029,OKC,Alex Caruso,SG,LAC,15.42,21.113873,1,5.693873,21.0
95030,OKC,Cason Wallace,SG,LAC,29.18,28.203079,1,0.976921,21.0
95031,OKC,Chet Holmgren,PF,LAC,30.52,29.12044,1,1.39956,21.0
95032,OKC,Isaiah Joe,SG,LAC,22.23,18.449619,1,3.780381,21.0
95033,OKC,Jalen Williams,SG,LAC,28.72,29.417391,1,0.697391,21.0
95034,OKC,Luguentz Dort,SF,LAC,18.07,25.661144,0,7.591144,21.0
95035,OKC,Shai Gilgeous-Alexander,PG,LAC,28.68,31.35528,1,2.67528,21.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95036,ORL,Anthony Black,PG,DEN,37.42,33.178894,1,4.241106,-11.0
95037,ORL,Desmond Bane,SG,DEN,36.4,36.024544,1,0.375456,-11.0
95038,ORL,Goga Bitadze,C,DEN,19.43,14.662088,1,4.767912,-11.0
95039,ORL,Jett Howard,SF,DEN,28.85,8.170355,0,20.679645,-11.0
95040,ORL,Paolo Banchero,PF,DEN,39.28,31.017298,0,8.262702,-11.0
95041,ORL,Tyus Jones,PG,DEN,19.15,15.197382,1,3.952618,-11.0
95042,ORL,Wendell Carter Jr.,C,DEN,28.57,29.898867,1,1.328867,-11.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95043,PHO,Collin Gillespie,PG,GSW,33.73,33.60302,1,0.12698,1.0
95044,PHO,Devin Booker,SG,GSW,32.95,33.330933,1,0.380933,1.0
95045,PHO,Dillon Brooks,SF,GSW,33.03,31.686214,1,1.343786,1.0
95046,PHO,Mark Williams,C,GSW,19.02,27.540649,0,8.520649,1.0
95047,PHO,Royce O'Neale,SF,GSW,31.32,29.380947,1,1.939053,1.0
95048,PHO,Ryan Dunn,SF,GSW,17.17,18.740759,1,1.570759,1.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95049,POR,Deni Avdija,SF,SAC,42.22,36.773315,1,5.446685,1.0
95050,POR,Donovan Clingan,C,SAC,34.97,24.662264,0,10.307736,1.0
95051,POR,Jerami Grant,PF,SAC,40.43,35.963703,1,4.466297,1.0
95052,POR,Kris Murray,SF,SAC,17.32,28.706196,0,11.386196,1.0
95053,POR,Robert Williams,C,SAC,17.22,14.966729,1,2.253271,1.0
95054,POR,Shaedon Sharpe,SG,SAC,35.1,33.724171,1,1.375829,1.0
95055,POR,Toumani Camara,PF,SAC,34.62,32.336426,1,2.283574,1.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95056,SAC,DeMar DeRozan,PF,POR,34.85,35.928474,1,1.078474,-1.0
95057,SAC,Dennis Schroder,PG,POR,22.33,28.369888,0,6.039888,-1.0
95058,SAC,Keegan Murray,PF,POR,42.8,38.731388,1,4.068612,-1.0
95059,SAC,Malik Monk,SG,POR,0.0,28.460934,0,28.460934,-1.0
95060,SAC,Maxime Raynaud,C,POR,36.87,25.890116,0,10.979884,-1.0
95061,SAC,Nique Clifford,SG,POR,19.95,18.885401,1,1.064599,-1.0
95062,SAC,Precious Achiuwa,C,POR,14.0,21.917187,0,7.917187,-1.0
95063,SAC,Russell Westbrook,PG,POR,35.42,34.049248,1,1.370752,-1.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95064,SAS,De'Aaron Fox,PG,WAS,24.98,28.521229,1,3.541229,25.0
95065,SAS,Devin Vassell,SG,WAS,26.35,27.438797,1,1.088797,25.0
95066,SAS,Dylan Harper,SG,WAS,22.6,22.258949,1,0.341051,25.0
95067,SAS,Harrison Barnes,PF,WAS,18.23,27.110249,0,8.880249,25.0
95068,SAS,Julian Champagnie,SF,WAS,25.85,26.34852,1,0.49852,25.0
95069,SAS,Keldon Johnson,SF,WAS,21.03,18.242628,1,2.787372,25.0
95070,SAS,Luke Kornet,C,WAS,23.18,24.844181,1,1.664181,25.0
95071,SAS,Stephon Castle,PG,WAS,25.13,26.689598,1,1.559598,25.0
95072,SAS,Victor Wembanyama,C,WAS,17.3,28.766712,0,11.466712,25.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95073,TOR,Brandon Ingram,SF,MIL,35.27,35.731308,1,0.461308,6.0
95074,TOR,Immanuel Quickley,PG,MIL,28.77,33.783787,1,5.013787,6.0
95075,TOR,Jakob Poeltl,C,MIL,0.0,27.70652,0,27.70652,6.0
95076,TOR,Jamal Shead,PG,MIL,23.45,22.478491,1,0.971509,6.0
95077,TOR,Ochai Agbaji,SG,MIL,20.47,18.45825,1,2.01175,6.0
95078,TOR,Sandro Mamukelashvili,C,MIL,36.42,18.956289,0,17.463711,6.0
95079,TOR,Scottie Barnes,PF,MIL,35.97,35.252609,1,0.717391,6.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95080,UTA,Ace Bailey,SF,LAL,26.17,25.074553,1,1.095447,-8.0
95081,UTA,Isaiah Collier,PG,LAL,28.13,15.843615,0,12.286385,-8.0
95082,UTA,Jusuf Nurkic,C,LAL,31.57,22.629253,0,8.940747,-8.0
95083,UTA,Keyonte George,PG,LAL,39.08,36.559608,1,2.520392,-8.0
95084,UTA,Kyle Filipowski,C,LAL,27.22,28.602905,1,1.382905,-8.0
95085,UTA,Svi Mykhailiuk,SF,LAL,16.98,15.946594,1,1.033406,-8.0


Unnamed: 0,Team,Player,Pos,Opp,MP,MP_pred,InRMSE_Range,Diff,Spread
95086,WAS,Bilal Coulibaly,SG,SAS,22.57,23.580845,1,1.010845,-25.0
95087,WAS,CJ McCollum,SG,SAS,29.1,28.583271,1,0.516729,-25.0
95088,WAS,Kyshawn George,SF,SAS,36.08,28.236397,0,7.843603,-25.0
95089,WAS,Tre Johnson,SG,SAS,24.1,18.044113,0,6.055887,-25.0


# Main Model

In [8]:
def haversine_km(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in km
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

def travel_km_from_row(row):
    prev = row['PrevLocation']
    cur  = row['Location']
    # missing prev => first game => no travel
    if pd.isna(prev) or pd.isna(cur):
        return 0.0
    # same arena => 0
    if prev == cur:
        return 0.0
    # lookup coords
    prev_coords = arenas.get(prev)
    cur_coords  = arenas.get(cur)
    if not prev_coords or not cur_coords:
        # fallback if code not found
        return 0.0
    return haversine_km(prev_coords[0], prev_coords[1], cur_coords[0], cur_coords[1])

In [14]:
def setup_df_main(df):
    
    # Minutes based Features
    df['MP_lst_gm'] = (
        df
        .groupby(['Player', 'Season'])['MP']
        .shift(1)
    )

    df['MP_last_5_avg'] = (
        df.groupby(['Player', 'Season'])['MP']
          .rolling(window=5, min_periods=1)
          .mean()
          .shift(1)
          .reset_index(level=[0, 1], drop=True)
    )
    
    df['MP_last_10_avg'] = (
        df.groupby(['Player', 'Season'])['MP']
          .rolling(window=10, min_periods=1)
          .mean()
          .shift(1)
          .reset_index(level=[0, 1], drop=True)
    )
    
    # Location based features
    df["PrevOpp"] = df.groupby("Player")["Opp"].shift(1)
    df["DaysLstGm"] = (df.groupby("Player")["Date"].diff().dt.days).fillna(0).astype(int)
    df['Location'] = df.apply(lambda r: r['Team'] if r['Team_type'] == 'Home' else r['Opp'], axis=1)
    df['PrevLocation'] = df.groupby('Player')['Location'].shift(1)
    df['travel_km'] = df.apply(travel_km_from_row, axis=1).fillna(0)
    df['travel_hours'] = df['travel_km'] / 800.0      # approximate flight hours
    df['is_long_trip'] = (df['travel_km'] > 1500).astype(int)
    df['same_arena'] = (df['PrevLocation'] == df['Location']).astype(int)
    
    # Efficiency metrics
    df['three_rate_raw'] =  np.where(df.FGA > 0, df['TPA'] / df['FGA'], 0)
    df['ft_rate_raw']    =  np.where(df.FGA > 0, df['FTA'] / df['FGA'], 0)
    df['eFG_raw'] = (df['FG'] + 0.5 * df['TPM']) / df['FGA']
    df['TS_raw'] = df['PTS'] / (2 * (df['FGA'] + 0.44 * df['FTA']))    
    df['usage_proxy_raw'] =  np.where(df.MP > 0, (df['FGA'] + 0.44 * df['FTA']) / df['MP'], 0)
    eff_cols = []
    for w in [3, 5, 10]:
        for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
            col = f"{metric}_L{w}"
            df[col] = (
                df.groupby(['Player','Season'])[f'{metric}_raw']
                  .rolling(w, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0,1], drop=True)
            )
            eff_cols.append(col)
    for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
        col = f'{metric}_weighted'
        df[col] = (
            0.6 * df[f'{metric}_L3'] +
            0.3 * df[f'{metric}_L5'] +
            0.1 * df[f'{metric}_L10']
        )
        eff_cols.append(col)
    
    df['FGA_L5_avg'] = df.groupby(['Player', 'Season'])['FGA'].rolling(5, min_periods=1).mean().shift(1).reset_index(level=[0, 1], drop=True)
    df['TPA_L5_avg'] = df.groupby(['Player', 'Season'])['TPA'].rolling(5, min_periods=1).mean().shift(1).reset_index(level=[0, 1], drop=True)
    df['FTA_L5_avg'] = df.groupby(['Player', 'Season'])['FTA'].rolling(5, min_periods=1).mean().shift(1).reset_index(level=[0, 1], drop=True)
    
    stat_cols = []
    for col in [f'Off_{tgt_stat}', f'Off_L5_{tgt_stat}', f'Def_{tgt_stat}', f'Def_L5_{tgt_stat}']:
        stat_cols.append(col)

    final_cols = ['Date', 'Team', 'Team_type', 'B2B', 'cup_gm', 'pstszn_gm', 'Player', 'Pos', 'Opp', 
                  'MP', 'MP_lst_gm', 'MP_last_5_avg', 'MP_last_10_avg', 'Spread', 'Total',
                  'DaysLstGm', 'travel_km', 'travel_hours', 'PrevLocation', 'is_long_trip', 'same_arena', 
                  'FGA_L5_avg', 'TPA_L5_avg', 'FTA_L5_avg',
                  tgt_stat] + stat_cols + eff_cols
    df = df[final_cols]
    
    for col in df.select_dtypes(include='number').columns:
        df[col] = df[col].fillna(0)
        
    # PRA features
    if tgt_stat == 'PTS':
        pass
    elif tgt_stat == 'PRA':
        df['PRA_per_min'] = np.where(df.MP > 0, df.PRA / df['MP'], 0)
        df['PRA_last_5_per_min_avg'] = df.groupby('Player')['PRA_per_min'].rolling(5, min_periods=1).mean().shift(1).reset_index(level=[0,1], drop=True)
        df = df.drop(['PRA_per_min'], axis=1)
        
    return df

In [20]:
tgt_stat = "PRA"
df_main = df.copy()
df_main = setup_df_main(df_main)
display(df_main)

n = len(df)
train_end = int(0.65 * n)
val_end   = int(0.85 * n)
main_train_df = df_main.iloc[:train_end]
main_val_df   = df_main.iloc[train_end:val_end]
main_test_df  = df_main.iloc[val_end:]

stat_model, main_splits = create_baseline_model(df_main, tgt_stat, main_train_df, main_val_df, main_test_df)
stat_model, stat_preds = hyperparam_tuning(main_splits, n_iter=1)
# feature_importance(stat_model)

Unnamed: 0,Date,Team,Team_type,B2B,cup_gm,pstszn_gm,Player,Pos,Opp,MP,MP_lst_gm,MP_last_5_avg,MP_last_10_avg,Spread,Total,DaysLstGm,travel_km,travel_hours,PrevLocation,is_long_trip,same_arena,FGA_L5_avg,TPA_L5_avg,FTA_L5_avg,PRA,Off_PRA,Off_L5_PRA,Def_PRA,Def_L5_PRA,three_rate_L3,ft_rate_L3,eFG_L3,TS_L3,usage_proxy_L3,three_rate_L5,ft_rate_L5,eFG_L5,TS_L5,usage_proxy_L5,three_rate_L10,ft_rate_L10,eFG_L10,TS_L10,usage_proxy_L10,three_rate_weighted,ft_rate_weighted,eFG_weighted,TS_weighted,usage_proxy_weighted,PRA_last_5_per_min_avg
0,2022-10-21,0,1,0,0,0,2,3,21,0.00,0.00,25.226,18.698,10.0,206.0,0,0.0,0.0,0.0,0,0,8.4,4.0,5.2,0,0.000000,0.0,17.00000,17.0,0.508995,0.681481,0.505556,0.606562,0.419508,0.486349,0.646984,0.496984,0.583267,0.428189,0.551508,0.369325,0.479742,0.513125,0.425240,0.506452,0.639917,0.500403,0.590230,0.422685,
1,2022-10-21,0,1,0,0,0,5,2,21,14.37,0.00,24.286,26.067,10.0,206.0,0,0.0,0.0,0.0,0,0,11.4,4.4,4.2,7,6.000000,6.0,29.00000,29.0,0.559259,0.829630,0.416667,0.519801,0.616138,0.508283,0.691717,0.475758,0.563433,0.552491,0.479785,0.587601,0.523390,0.606242,0.509510,0.536019,0.764053,0.445066,0.541535,0.586381,0.448430
2,2022-10-21,0,1,0,0,0,120,0,21,31.62,0.00,0.934,3.465,10.0,206.0,0,0.0,0.0,0.0,0,0,0.2,0.2,0.0,15,10.000000,10.0,23.50000,23.5,0.333333,0.000000,1.500000,1.500000,0.071378,0.200000,0.000000,1.500000,1.500000,0.042827,0.300000,0.000000,0.600000,0.600000,0.088741,0.290000,0.000000,1.410000,1.410000,0.064549,0.420937
3,2022-10-21,0,1,0,0,0,171,3,21,32.53,0.00,31.028,31.442,10.0,206.0,0,0.0,0.0,0.0,0,0,12.8,6.0,3.6,19,26.000000,26.0,17.00000,17.0,0.476068,0.409402,0.427457,0.491737,0.447576,0.470668,0.318368,0.465031,0.522407,0.460416,0.407527,0.324184,0.507822,0.559126,0.563756,0.467594,0.373570,0.446766,0.507677,0.463046,0.280624
4,2022-10-21,0,1,0,0,0,178,4,21,39.62,0.00,29.280,28.101,10.0,206.0,0,0.0,0.0,0.0,0,0,9.4,0.0,1.0,38,36.000000,36.0,26.00000,26.0,0.000000,0.134680,0.821549,0.799477,0.334001,0.000000,0.098990,0.705051,0.694954,0.338075,0.000000,0.175991,0.721115,0.718415,0.332501,0.000000,0.128104,0.776556,0.760014,0.335073,0.210468
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95085,2025-12-18,28,1,0,0,0,704,3,13,16.98,0.00,20.166,22.319,-8.0,278.0,3,0.0,0.0,6.0,0,0,4.4,3.2,0.0,12,13.280000,12.8,30.95000,33.2,0.666667,0.000000,0.800000,0.800000,0.100368,0.616667,0.000000,0.655208,0.655208,0.193944,0.612143,0.033333,0.600904,0.607579,0.246628,0.646214,0.003333,0.736653,0.737320,0.143067,0.988909
95086,2025-12-18,29,0,0,0,0,59,4,26,22.57,24.82,29.488,27.462,-25.0,213.0,16,0.0,0.0,22.0,0,0,7.0,3.2,2.2,9,15.833333,13.6,26.96000,31.6,0.533333,0.111111,0.381481,0.375814,0.270905,0.453333,0.333333,0.378889,0.431399,0.267869,0.465455,0.417273,0.398535,0.476885,0.297470,0.502545,0.208394,0.382409,0.402597,0.272651,0.965009
95087,2025-12-18,29,0,0,0,0,89,4,26,29.10,34.13,32.222,31.599,-25.0,213.0,4,0.0,0.0,11.0,0,0,16.8,7.8,3.6,18,25.541667,27.6,26.96000,31.6,0.440850,0.228431,0.586765,0.603236,0.568000,0.471176,0.203725,0.536821,0.561277,0.570557,0.458431,0.158815,0.577289,0.591184,0.584800,0.451706,0.214058,0.570834,0.589443,0.570447,0.000000
95088,2025-12-18,29,0,0,0,0,491,3,26,36.08,35.88,32.040,31.974,-25.0,213.0,4,0.0,0.0,11.0,0,0,10.0,6.0,2.2,24,25.545455,23.4,28.28125,29.8,0.660606,0.157576,0.565152,0.578805,0.353568,0.601558,0.224416,0.597532,0.623845,0.343995,0.530955,0.184477,0.515166,0.541190,0.352268,0.629927,0.180318,0.569867,0.588556,0.350566,0.000000


Stats Model
Train: 61808 / Validation: 19018 / Test: 14264
RMSE: 5.393322432844529
MAE: 3.7765390872955322
R²: 0.8311665058135986

Trial 1/1: {'n_estimators': 1381, 'learning_rate': 0.05371259957130203, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9719985486951008, 'colsample_bytree': 0.9470014872779348, 'gamma': 0.3713205303200495, 'reg_lambda': 2.548038596595007, 'reg_alpha': 0.9341494351378603}
Validation MAE: 3.6048

Best validation MAE: 3.6047847270965576
Best parameters: {'learning_rate': 0.05371259957130203, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9719985486951008, 'colsample_bytree': 0.9470014872779348, 'gamma': 0.3713205303200495, 'reg_lambda': 2.548038596595007, 'reg_alpha': 0.9341494351378603, 'objective': 'reg:squarederror', 'tree_method': 'hist', 'device': 'cuda', 'seed': 42}

Test Metrics:
RMSE: 5.386928894502758
MAE: 3.7652580738067627
R²: 0.8315665125846863


In [14]:
rmse = np.sqrt(mean_squared_error(main_splits[5], stat_preds)) # splits[5] = y_test
mae = mean_absolute_error(main_splits[5], stat_preds)
pred_col = f'{tgt_stat}_Pred'
df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
df_lines['Date'] = pd.to_datetime(df_lines.Date)
df_lines = df_lines[['Date', 'Team', 'Player', f'{tgt_stat}_line']]
main_test_df[pred_col] = stat_preds
main_test_df['Team'] = team_encoder.inverse_transform(main_test_df["Team"])
main_test_df['Player'] = player_encoder.inverse_transform(main_test_df["Player"])

df_test = main_test_df.merge(df_lines, on=['Date', 'Team', 'Player'])
df_test = df_test[[c for c in df_test.columns if c != pred_col] + [pred_col]]

df_test['Diff'] = df_test[f'{tgt_stat}_Pred'] - df_test[f'{tgt_stat}_line']
df_test['Act_Res'] = np.where(df_test[tgt_stat] > df_test[f'{tgt_stat}_line'], 'O', 'U')
df_test['Pred_Res'] = np.where(df_test[pred_col] > df_test[f'{tgt_stat}_line'], 'O', 'U')
df_test['ParlayHit'] = np.where(df_test['Act_Res'] == df_test['Pred_Res'], 1, 0)

df_test['Diff2'] = abs(df_test[f'{tgt_stat}_Pred'] - df_test[tgt_stat])
df_test['InRMSE_Range'] = np.where(df_test['Diff2'] <= rmse, 1, 0)

print("Total Accuracy (ParlayHit):", ((df_test.ParlayHit == 1).sum() / df_test.shape[0]))
print((df_test.ParlayHit == 1).sum(), "/", df_test.shape[0])

print("\nTotal Accuracy (InRMSE_Range):", ((df_test.InRMSE_Range == 1).sum() / df_test.shape[0]))
print((df_test.InRMSE_Range == 1).sum(), "/", df_test.shape[0])

df_ystrday = df_test[(df_test.Date == (datetime.strptime(now, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d")) & ~(df_test[f'{tgt_stat}_line'].isnull())]\
            [['Team', 'Player', tgt_stat, f'{tgt_stat}_line', f'{tgt_stat}_Pred', 'ParlayHit', 'Diff', 'InRMSE_Range', 'Diff2']]\
            .sort_values(f'{tgt_stat}_line', ascending=False)

print("\nYesterday's Results:")
print("Total Accuracy (ParlayHit):", ((df_ystrday.ParlayHit == 1).sum() / df_ystrday.shape[0]))
print("Total Accuracy (InRMSE_Range):", ((df_ystrday.InRMSE_Range == 1).sum() / df_ystrday.shape[0]))
if df_ystrday.shape[0] >= 50:
    for tm in df_ystrday.Team.unique():
        display(df_ystrday[df_ystrday.Team == tm])
else:
    display(df_ystrday)

Total Accuracy (ParlayHit): 0.7829806393516434
1739 / 2221

Total Accuracy (InRMSE_Range): 0.6231427285006754
1384 / 2221

Yesterday's Results:
Total Accuracy (ParlayHit): 0.7454545454545455
Total Accuracy (InRMSE_Range): 0.6


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2130,LAL,Luka Doncic,70,55.5,60.524353,1,5.024353,0,9.475647
2129,LAL,LeBron James,45,36.5,35.717392,0,-0.782608,0,9.282608
2131,LAL,Marcus Smart,22,17.5,20.134361,1,2.634361,1,1.865639


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2089,DEN,Nikola Jokic,47,53.5,55.61956,0,2.11956,0,8.61956
2087,DEN,Jamal Murray,37,37.5,42.412128,0,4.912128,0,5.412128
2086,DEN,Cameron Johnson,33,20.5,23.814518,1,3.314518,0,9.185482
2085,DEN,Bruce Brown,15,13.5,25.766384,1,12.266384,0,10.766384


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2092,DET,Cade Cunningham,48,42.5,46.766323,1,4.266323,1,1.233677
2097,DET,Jalen Duren,31,31.5,34.104385,0,2.604385,1,3.104385
2098,DET,Tobias Harris,7,20.5,12.766238,1,-7.733762,0,5.766238
2091,DET,Ausar Thompson,12,18.5,6.710421,1,-11.789579,1,5.289579
2093,DET,Caris LeVert,12,13.5,18.186069,0,4.686069,0,6.186069
2094,DET,Duncan Robinson,4,13.5,5.790594,1,-7.709406,1,1.790594


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2180,POR,Deni Avdija,45,41.5,44.967888,1,3.467888,1,0.032112
2185,POR,Shaedon Sharpe,34,31.5,36.791672,1,5.291672,1,2.791672
2182,POR,Jerami Grant,34,27.5,41.065243,1,13.565243,0,7.065243
2186,POR,Toumani Camara,30,21.5,23.986217,1,2.486217,0,6.013783
2181,POR,Donovan Clingan,31,21.5,31.51083,1,10.01083,1,0.51083
2184,POR,Robert Williams,10,14.5,14.003721,1,-0.496279,1,4.003721


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2166,OKC,Shai Gilgeous-Alexander,45,41.5,35.646629,0,-5.853371,0,9.353371
2164,OKC,Jalen Williams,26,29.5,31.054466,0,1.554466,1,5.054466
2162,OKC,Chet Holmgren,31,26.5,32.045734,1,5.545734,1,1.045734
2159,OKC,Ajay Mitchell,28,16.5,23.662659,1,7.162659,1,4.337341
2161,OKC,Cason Wallace,10,13.5,19.206848,0,5.706848,0,9.206848
2165,OKC,Luguentz Dort,2,12.5,9.513138,1,-2.986862,0,7.513138


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2154,NYK,Jalen Brunson,39,40.5,37.751507,1,-2.748493,1,1.248493
2156,NYK,Mikal Bridges,35,26.5,27.816708,1,1.316708,0,7.183292
2157,NYK,OG Anunoby,23,25.5,25.497862,1,-0.002138,1,2.497862


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2107,HOU,Alperen Sengun,47,39.5,44.870193,1,5.370193,1,2.129807
2111,HOU,Kevin Durant,41,33.5,43.075249,1,9.575249,1,2.075249
2108,HOU,Amen Thompson,33,30.5,38.852371,1,8.352371,0,5.852371
2109,HOU,Jabari Smith Jr.,26,23.5,33.808998,1,10.308998,0,7.808998
2112,HOU,Reed Sheppard,22,17.5,19.726498,1,2.226498,1,2.273502


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2060,ATL,Jalen Johnson,63,39.5,48.369572,1,8.869572,0,14.630428
2063,ATL,Trae Young,19,27.5,19.269846,1,-8.230154,1,0.269846
2062,ATL,Onyeka Okongwu,17,27.5,30.972847,0,3.472847,0,13.972847
2061,ATL,Nickeil Alexander-Walker,38,24.5,31.038456,1,6.538456,0,6.961544
2059,ATL,Dyson Daniels,23,23.5,25.464211,0,1.964211,1,2.464211


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2214,UTA,Keyonte George,46,36.5,42.343529,1,5.843529,1,3.656471
2213,UTA,Jusuf Nurkic,30,22.5,31.496109,1,8.996109,1,1.496109
2212,UTA,Isaiah Collier,31,16.5,21.89447,1,5.39447,0,9.10553
2216,UTA,Svi Mykhailiuk,12,10.5,10.124356,0,-0.375644,1,1.875644


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2171,ORL,Paolo Banchero,52,36.5,39.681824,1,3.181824,0,12.318176
2168,ORL,Desmond Bane,16,32.5,31.891392,1,-0.608608,0,15.891392
2167,ORL,Anthony Black,27,25.5,31.782017,1,6.282017,1,4.782017
2173,ORL,Wendell Carter Jr.,34,22.5,21.284582,0,-1.215418,0,12.715418


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2106,GSW,Stephen Curry,31,36.5,34.807945,1,-1.692055,1,3.807945
2103,GSW,Jimmy Butler,37,30.5,28.782558,0,-1.717442,0,8.217442
2102,GSW,Draymond Green,16,20.5,18.236233,1,-2.263767,1,2.236233
2099,GSW,Brandin Podziemski,22,18.5,17.790287,0,-0.709713,1,4.209713
2105,GSW,Quinten Post,21,15.5,16.554035,1,1.054035,1,4.445965


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2066,BRK,Michael Porter Jr.,37,36.5,27.480236,0,-9.019764,0,9.519764
2067,BRK,Noah Clowney,17,21.5,15.575342,1,-5.924658,1,1.424658
2065,BRK,Danny Wolf,16,17.5,14.263113,1,-3.236887,1,1.736887
2068,BRK,Terance Mann,9,15.5,18.575163,0,3.075163,0,9.575163


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2175,PHO,Devin Booker,29,35.5,29.869228,1,-5.630772,1,0.869228
2176,PHO,Dillon Brooks,33,23.5,27.54796,1,4.04796,0,5.45204
2174,PHO,Collin Gillespie,27,22.5,22.648022,1,0.148022,1,4.351978
2178,PHO,Royce O'Neale,17,15.5,17.201391,1,1.701391,1,0.201391


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2203,SAS,Victor Wembanyama,27,35.5,21.751442,1,-13.748558,1,5.248558
2202,SAS,Stephon Castle,26,30.5,27.347797,1,-3.152203,1,1.347797
2195,SAS,De'Aaron Fox,12,29.5,26.11153,1,-3.38847,0,14.11153
2197,SAS,Dylan Harper,32,22.5,23.778496,1,1.278496,0,8.221504
2196,SAS,Devin Vassell,27,20.5,21.009121,1,0.509121,0,5.990879
2200,SAS,Keldon Johnson,18,15.5,19.926435,1,4.426435,1,1.926435
2201,SAS,Luke Kornet,17,14.5,18.833939,1,4.333939,1,1.833939


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2078,DAL,Anthony Davis,31,35.5,42.016869,0,6.516869,0,11.016869
2079,DAL,Cooper Flagg,37,29.5,40.039711,1,10.539711,1,3.039711
2084,DAL,P.J. Washington,23,24.5,32.647495,0,8.147495,0,9.647495
2083,DAL,Naji Marshall,20,18.5,22.97662,1,4.47662,1,2.97662


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2204,TOR,Brandon Ingram,40,34.5,33.756218,0,-0.743782,0,6.243782
2210,TOR,Scottie Barnes,37,33.5,34.769386,1,1.269386,1,2.230614
2205,TOR,Immanuel Quickley,25,26.5,21.763086,1,-4.736914,1,3.236914


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2119,IND,Pascal Siakam,37,34.5,41.160938,1,6.660938,1,4.160938
2115,IND,Bennedict Mathurin,26,27.5,31.981619,0,4.481619,0,5.981619
2114,IND,Andrew Nembhard,36,26.5,31.974569,1,5.474569,1,4.025431
2118,IND,Jay Huff,17,17.5,10.938584,1,-6.561416,0,6.061416
2120,IND,T.J. McConnell,18,15.5,14.824691,0,-0.675309,1,3.175309


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2143,MIL,Kevin Porter Jr.,39,33.5,35.304298,1,1.804298,1,3.695702
2145,MIL,Ryan Rollins,26,26.5,22.403484,1,-4.096516,1,3.596516
2141,MIL,Bobby Portis,36,22.5,33.023582,1,10.523582,1,2.976418


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2134,MIA,Bam Adebayo,26,32.5,29.695129,1,-2.804871,1,3.695129
2139,MIA,Norman Powell,32,30.5,29.157303,0,-1.342697,1,2.842697
2133,MIA,Andrew Wiggins,16,24.5,24.313702,1,-0.186298,0,8.313702
2135,MIA,Davion Mitchell,17,21.5,21.189119,1,-0.310881,1,4.189119


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2124,LAC,Kawhi Leonard,36,32.5,27.803864,0,-4.696136,0,8.196136
2122,LAC,Ivica Zubac,25,26.5,26.687094,0,0.187094,1,1.687094
2121,LAC,Bogdan Bogdanovic,17,16.5,14.717379,0,-1.782621,1,2.282621
2125,LAC,Kris Dunn,23,13.5,18.667562,1,5.167562,1,4.332438


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2074,CHO,Miles Bridges,21,31.5,34.124756,0,2.624756,0,13.124756
2071,CHO,Brandon Miller,39,29.5,36.123787,1,6.623787,1,2.876213
2073,CHO,LaMelo Ball,45,28.5,37.910736,1,9.410736,0,7.089264
2072,CHO,Kon Knueppel,40,27.5,31.529745,1,4.029745,0,8.470255


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2194,SAC,Russell Westbrook,33,30.5,33.570587,1,3.070587,1,0.570587
2187,SAC,DeMar DeRozan,40,28.5,30.592588,1,2.092588,0,9.407412
2189,SAC,Keegan Murray,22,25.5,33.860725,0,8.360725,0,11.860725
2191,SAC,Maxime Raynaud,42,21.5,33.888809,1,12.388809,0,8.111191
2190,SAC,Malik Monk,0,21.5,1.481148,1,-20.018852,1,1.481148
2188,SAC,Dennis Schroder,14,19.5,19.051968,1,-0.448032,1,5.051968
2193,SAC,Precious Achiuwa,9,15.5,11.337854,1,-4.162146,1,2.337854
2192,SAC,Nique Clifford,10,12.5,12.076347,1,-0.423653,1,2.076347


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2153,NOP,Zion Williamson,17,27.5,21.49297,1,-6.00703,1,4.49297
2152,NOP,Trey Murphy III,32,25.5,38.695427,1,13.195427,0,6.695427
2146,NOP,Derik Queen,31,22.5,32.195835,1,9.695835,1,1.195835
2149,NOP,Jordan Poole,18,19.5,20.389423,0,0.889423,1,2.389423
2148,NOP,Jeremiah Fears,16,18.5,18.029392,1,-0.470608,1,2.029392
2150,NOP,Jose Alvarado,16,11.5,21.170832,1,9.670832,1,5.170832


Unnamed: 0,Team,Player,PRA,PRA_line,PRA_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
2218,WAS,CJ McCollum,18,26.5,21.554485,1,-4.945515,1,3.554485
2219,WAS,Kyshawn George,24,25.5,24.294819,1,-1.205181,1,0.294819
2217,WAS,Bilal Coulibaly,9,15.5,11.100009,1,-4.399991,1,2.100009


### Today's predictions

In [20]:
df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
df_lines['Date'] = pd.to_datetime(df_lines.Date)

df_pred = pd.read_csv("../tables/2025/parlay_stats.csv")
df_pred['Date'] = pd.to_datetime(df_pred.Date)
df_pred['Season'] = 2025
for col in df_pred.select_dtypes(include='object').columns:
    df_pred[col] = df_pred[col].astype('category')
df_pred = df_pred.drop(['Spread', 'Total'], axis=1).merge(df_mtch, on=['Season', 'Date', 'Team'])
df_pred[tgt_stat] = 0

# Predict minutes
df_act_mins = pd.read_csv("../tables/2025/parlay_actuals.csv")
df_act_mins['Date'] = pd.to_datetime(df_act_mins.Date)
df_pred = df_pred[df_pred.Player.isin(df.Player_name.unique())].merge(df_act_mins[['Date', 'Team', 'Player', 'MP', 'TPM']], on=['Date', 'Team', 'Player'], how='left')
df_pred = df_pred.merge(df3[['Date', 'Team', 'Player', 'TOV', 'PF', '+/-', 'FGA', 'FG', 'TPA', 'FT', 'FTA']], on=['Date', 'Team', 'Player'], how='left')

df_pred = df_pred.merge(df_lines, on=['Date', 'Team', 'Player'], how='left')
df_pred['Spread_x'] = np.where(df_pred.Spread_x.isnull(), df_pred.Spread_y, df_pred.Spread_x)
df_pred['Total_x'] = np.where(df_pred.Total_x.isnull(), df_pred.Total_y, df_pred.Total_x)
df_pred = df_pred.rename(columns={"Spread_x": "Spread", "Total_x": "Total"}).drop(['Spread_y', 'Total_y'], axis=1)
df_pred_mins = setup_df_mins(con, df_pred)

df_pred_mins = df_pred_mins.drop(['Date', 'MP'], axis=1)
df_pred_mins["Team"] = team_encoder.transform(df_pred_mins["Team"])
df_pred_mins["Opp"] = team_encoder.transform(df_pred_mins["Opp"])
df_pred_mins['Team_type'] = team_type_encoder.transform(df_pred_mins['Team_type'])
df_pred_mins["Player"] = player_encoder.transform(df_pred_mins["Player"])
df_pred_mins["Pos"] = position_encoder.transform(df_pred_mins["Pos"])
DM_mins = xgb.DMatrix(df_pred_mins)
df_pred['MP'] = mins_model.predict(DM_mins)
df_pred['N_TPM'] = df_pred.FG - df_pred.TPM
df_pred['PTS'] = (df_pred.FT * 1) + (df_pred.N_TPM * 2) + (df_pred.TPM * 3)
df_pred = setup_df_main(df_pred)
feature_cols = [col for col in df_pred.columns if col not in ['Date', tgt_stat]]
df_pred = df_pred[df_pred.Date == now][feature_cols]

# Predict stat
df_pred["Team"] = team_encoder.transform(df_pred["Team"])
df_pred["Opp"] = team_encoder.transform(df_pred["Opp"])
df_pred = df_pred[~(df_pred.PrevLocation.isnull())] # Filters out players who are debuting on the year
df_pred["PrevLocation"] = team_encoder.transform(df_pred["PrevLocation"])
df_pred["Player"] = player_encoder.transform(df_pred["Player"])
df_pred["Pos"] = position_encoder.transform(df_pred["Pos"])
df_pred['Team_type'] = team_type_encoder.transform(df_pred['Team_type'])
DM_stats = xgb.DMatrix(df_pred)
df_pred[f"{tgt_stat}_proj"] = stat_model.predict(DM_stats)

df_pred['Team'] = team_encoder.inverse_transform(df_pred["Team"])
df_pred['Opp'] = team_encoder.inverse_transform(df_pred["Opp"])
df_pred['Player'] = player_encoder.inverse_transform(df_pred["Player"])
df_pred['Pos'] = position_encoder.inverse_transform(df_pred["Pos"])

df_lines = df_lines[df_lines.Date == now][['Team', 'Player', f'{tgt_stat}_line']]
df_pred = df_pred.merge(df_lines, on=['Team', 'Player'])

tds_picks = df_pred[~(df_pred[f'{tgt_stat}_line'].isnull())]\
            [['Team', 'Player', 'Pos', 'Opp', 'MP', 'MP_last_5_avg', f'{tgt_stat}_line', f'{tgt_stat}_proj']]
tds_picks['Diff'] = abs((df_pred[f'{tgt_stat}_line'] - df_pred[f'{tgt_stat}_proj']))
tds_picks['Diff2'] = abs((df_pred['MP'] - df_pred['MP_last_5_avg']))
tds_picks = tds_picks[(tds_picks.Diff >= mae) & (tds_picks.Diff2 <= 5)].sort_values('Diff', ascending=False).drop(['Diff', 'Diff2'], axis=1)
display(tds_picks)
tds_picks.insert(0, 'Date', pd.to_datetime(now))
partition_save_df(tds_picks, f"../tables/2025/gmday_preds_{tgt_stat}.csv")

Unnamed: 0,Team,Player,Pos,Opp,MP,MP_last_5_avg,PRA_line,PRA_proj
55,SAS,Victor Wembanyama,C,ATL,32.148602,33.70049,29.5,39.46862
48,MIN,Julius Randle,PF,OKC,36.000908,35.597958,30.5,38.073723
49,MIN,Jaden McDaniels,PF,OKC,34.776131,33.480041,20.5,27.88372
9,NYK,Jalen Brunson,PG,PHI,38.037884,37.398185,36.5,42.592529
26,OKC,Ajay Mitchell,SG,MIN,28.070049,25.22036,14.5,20.225014
59,MIN,Naz Reid,C,OKC,31.157631,29.525701,22.5,27.424723
15,PHI,VJ Edgecombe,SG,NYK,33.963295,31.698192,23.5,27.861828
46,NYK,OG Anunoby,PF,PHI,34.187729,31.097015,23.5,27.804369
18,ATL,Dyson Daniels,SG,SAS,35.935863,35.693183,24.5,28.802477
5,CLE,Darius Garland,PG,CHI,33.266472,31.701362,34.5,30.281181


../tables/2025/gmday_preds_PRA.csv saved!
