# To do:

 - Figure out how to signal injuries
 - Create model that generates minutes projections
 - To df_lines add the real spread result and total result

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import duckdb
import warnings

import xgboost as xgb
from xgboost import XGBRegressor
from scipy.stats import randint, uniform

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import joblib
import warnings
from datetime import datetime, timedelta
from haversine import haversine

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

categories = ['PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK']
con = duckdb.connect(database=":memory:")
now = str(datetime.now().date())
print(f"Today's date:", now)

Today's date: 2025-12-15


In [2]:
%run ./common_utils.ipynb

# Feature Engineering

In [3]:
def feature_importance(model):
    importance = model.get_booster().get_score(importance_type='gain')

    # Convert to table
    df_importance = (
        pd.DataFrame({
            'feature': list(importance.keys()),
            'importance': list(importance.values())
        })
        .sort_values(by='importance', ascending=False)
        .reset_index(drop=True)
    )

    df_importance['pct'] = df_importance.importance.cumsum() / df_importance.importance.sum()
    display(df_importance[(df_importance.pct <= .85)])

    xgb.plot_importance(model)
    plt.show()

In [4]:
import math

def haversine_km(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in km
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

def travel_km_from_row(row):
    prev = row['PrevLocation']
    cur  = row['Location']
    # missing prev => first game => no travel
    if pd.isna(prev) or pd.isna(cur):
        return 0.0
    # same arena => 0
    if prev == cur:
        return 0.0
    # lookup coords
    prev_coords = arenas.get(prev)
    cur_coords  = arenas.get(cur)
    if not prev_coords or not cur_coords:
        # fallback if code not found
        return 0.0
    return haversine_km(prev_coords[0], prev_coords[1], cur_coords[0], cur_coords[1])

In [5]:
def create_baseline_model(df, pred_col, train_df, val_df, test_df):

    print('Train:', len(train_df), '/ Validation:', len(val_df), '/ Test:', len(test_df))

    feature_cols = [col for col in df.columns 
                    if col not in ['Date', pred_col]]

    X_train, y_train = train_df[feature_cols], train_df[pred_col]
    X_val,   y_val   = val_df[feature_cols],   val_df[pred_col]
    X_test,  y_test  = test_df[feature_cols],  test_df[pred_col]


    model = XGBRegressor(
        enable_categorical=True,
        n_estimators=300,    # number of trees
        learning_rate=0.05,  # step size
        max_depth=6,         # complexity
        subsample=0.8,       # row sampling
        colsample_bytree=0.8,
        objective='reg:squarederror',
        random_state=42,
    )

    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        verbose=False
    )

    preds = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, preds))
    mae = mean_absolute_error(y_test, preds)
    r2 = r2_score(y_test, preds)

    print("RMSE:", rmse)
    print("MAE:", mae)
    print("R²:", r2)
    
    return model, (X_train, y_train, X_val, y_val, X_test,  y_test)

In [6]:
def hyperparam_tuning(model, splits):

    X_train = splits[0]
    y_train = splits[1]
    X_val = splits[2]
    y_val = splits[3]
    X_test = splits[4]
    y_test = splits[5]
    
    param_dist = {
        "n_estimators": randint(300, 2000),
        "learning_rate": uniform(0.005, 0.05),
        "max_depth": randint(3, 8),
        "min_child_weight": randint(1, 15),
        "subsample": uniform(0.7, 0.3),
        "colsample_bytree": uniform(0.7, 0.3),
        "gamma": uniform(0, 10),
        "reg_lambda": uniform(0, 10),
        "reg_alpha": uniform(0, 5),
        "max_leaves": randint(10, 80)
    }

    random_search = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_dist,
        n_iter=20,
        scoring='neg_mean_squared_error',
        cv=3,
        verbose=1,
        n_jobs=-1,
        random_state=42
    )

    random_search.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        verbose=False
    )

    best_model = random_search.best_estimator_
    print("Best params:", random_search.best_params_, "\n")

    preds = best_model.predict(X_test)
    print("RMSE:", np.sqrt(mean_squared_error(y_test, preds)))
    print("MAE:", mean_absolute_error(y_test, preds))
    print("R²:", r2_score(y_test, preds))
    
    return best_model, preds

In [7]:
df = pd.DataFrame()
df2 = pd.DataFrame()
for i in [2023, 2024, 2025]:
    df_actuals = pd.read_csv(f"../tables/{i}/parlay_actuals.csv")
    df_actuals['Season'] = i
    df = pd.concat([df, df_actuals])

    df_gms = pd.read_csv(f"../tables/{i}/nba_schedule.csv")
    df_gms['Date'] = pd.to_datetime(df_gms.Date)
    df2 = pd.concat([df2, df_gms])

df['Date'] = pd.to_datetime(df.Date)

df['Tms'] = df['game_id'].apply(lambda x: x.split("_")[1:3])
df['WrngTm'] = df.apply(lambda row: 0 if row['Team'] in row['Tms'] else 1, axis=1)
df['WrngOpp'] = df.apply(lambda row: 0 if row['Opp'] in row['Tms'] else 1, axis=1)
df = df[(df.WrngTm == 0) & (df.WrngOpp == 0)]

team_encoder = LabelEncoder()
player_encoder = LabelEncoder()
team_type_encoder = LabelEncoder()

df2['Date'] = pd.to_datetime(df2.Date)
df_mtch = df2[['Date', 'AwayABV', 'HomeABV', 'AwayB2B', 'HomeB2B', 'cup_gm', 'pstszn_gm']]
df_mtch['Team_type'] = 'Home'
df_mtch = df_mtch.rename(columns={"AwayABV": "Team", "HomeABV": "Opp", "AwayB2B": "B2B"})[['Date','Team', 'Opp', 'B2B', 'cup_gm', 'pstszn_gm', 'Team_type']]
df_mtch2 = df_mtch.copy().rename(columns={"Team": "Opp", "Opp": "Team", "HomeB2B": "B2B"})[['Date','Team', 'Opp', 'B2B', 'cup_gm', 'pstszn_gm']]
df_mtch2['Team_type'] = 'Away'
df_mtch = pd.concat([df_mtch, df_mtch2])
df_mtch = df_mtch[['Date', 'Team', 'Team_type', 'cup_gm', 'pstszn_gm']]
df = df.merge(df_mtch, on=['Date', 'Team'])

# Encode string cols
team_encoder.fit(pd.concat([df["Team"], df["Opp"]], axis=0))
df["Team"] = team_encoder.transform(df["Team"])
df["Opp"] = team_encoder.transform(df["Opp"])
df["Player"] = player_encoder.fit_transform(df["Player"])
df['Team_type'] = team_type_encoder.fit_transform(df['Team_type'])
print('base df created', datetime.now())

base df created 2025-12-15 23:21:28.762938


# Minutes Projection Model

In [79]:
def setup_df_mins(con, df):
    
    df = df[['Season', 'Date', 'Team', 'Team_type', 'Opp', 'Player', 'B2B', 'MP', 'cup_gm', 'pstszn_gm']]
    df = df.sort_values(['Player', 'Season', 'Date']).reset_index(drop=True)
    
    df['MP_lst_gm'] = (
        df
        .groupby(['Player', 'Season'])['MP']
        .shift(1)
    )

    df['MP_last_5_avg'] = (
        df.groupby(['Player', 'Season'])['MP']
          .rolling(window=5, min_periods=1)
          .mean()
          .shift(1)
          .reset_index(level=[0, 1], drop=True)
    )

    games_last_7_days = df.groupby(['Player', 'Season']).rolling('7D', on='Date')['MP'].count().shift(1).to_frame(name='games_last_7_days').reset_index()
    df = df.merge(games_last_7_days, on=['Player', 'Season', 'Date'])
    df['games_last_7_days'] = df.games_last_7_days.fillna(0).astype(int)
    
    df['mp_rank_team_game'] = (
        df
        .groupby(['Season', 'Date', 'Team'])['MP']
        .rank(method='first', ascending=False)
    )
    df['top5_minutes'] = (df['mp_rank_team_game'] <= 5).astype(int)
    df['was_top5_last_game'] = (
        df
        .groupby(['Player', 'Season', 'Team'])['top5_minutes']
        .shift(1)
    )
    df['was_top5_last_game'] = df['was_top5_last_game'].fillna(0)
        
    df['reserve_plyr'] = np.where(df.MP < 8, 1, 0)
    df['bench_plyr'] = np.where((df.MP <= 25) & (df.reserve_plyr == 0), 1, 0)
    df['starter_plyr'] = np.where((df.bench_plyr == 0) & (df.reserve_plyr == 0), 1, 0)
    
    df['Team_mins'] = (
        df.groupby(['Team', 'Date'])['MP']
          .transform('sum')
    )
    df = con.execute("""SELECT *, LAG(Team_mins) OVER (PARTITION BY Date, Team) as prev_team_mins, 
                        MP_lst_gm / prev_team_mins as prev_team_mins_pct FROM df""").fetchdf()
    
    df['MP_Pct'] = df.MP / df.Team_mins
    
    # EXPERIMENTAL
    df['Lst5_TmMins'] = (
        df.groupby(['Team', 'Season'])['Team_mins']
          .rolling(window=5, min_periods=1)
          .sum()
          .shift(1)
          .reset_index(level=[0, 1], drop=True)
    )
    df['Lst5_PlyrMins'] = (
        df.groupby(['Player', 'Season'])['MP']
          .rolling(window=5, min_periods=1)
          .sum()
          .shift(1)
          .reset_index(level=[0, 1], drop=True)
    )
    df['TmMinsPct_AvgLst5'] = df.Lst5_PlyrMins / df.Lst5_TmMins
    
    # 2.) EXPERIMENT: take full season team mins and take full season players MP and get pct that way, then rank player 
    # usage rate per team    
    # 3.) Add games missed column
    # 4.) Add coming back from injury column
    
    # In attempt to keep minutes realistic
    # 5.) Add column that signals max career reg games mins
    # 6.) Add column that signals max career OT mins too?
   
    df['OT'] = np.where(df.Team_mins >= 260, 1, 0)
    
    df = df.drop(['mp_rank_team_game', 'top5_minutes', 'Team_mins', 'Lst5_TmMins', 'Lst5_PlyrMins', 'MP'], axis=1)    

    
    return df

In [81]:
df_mins = df.copy()
df_mins = setup_df_mins(con, df_mins)
display(df_mins)

n = len(df_mins)
train_end = int(0.7 * n)
val_end   = int(0.85 * n)
mins_train_df = df_mins.iloc[:train_end]
mins_val_df   = df_mins.iloc[train_end:val_end]
mins_test_df  = df_mins.iloc[val_end:]

mins_model, mins_splits = create_baseline_model(df_mins, "MP_Pct", mins_train_df, mins_val_df, mins_test_df)
mins_model, mins_preds = hyperparam_tuning(mins_model, mins_splits)

Unnamed: 0,Season,Date,Team,Team_type,Opp,Player,B2B,cup_gm,pstszn_gm,MP_lst_gm,MP_last_5_avg,games_last_7_days,was_top5_last_game,reserve_plyr,bench_plyr,starter_plyr,prev_team_mins,prev_team_mins_pct,MP_Pct,TmMinsPct_AvgLst5,OT
0,2024,2025-05-03,7,0,12,146,0,0,1,0.00,0.000000,3,0.0,1,0,0,240.01,0.000000,0.000000,0.020858,0
1,2024,2025-05-03,7,0,12,499,0,0,1,24.40,34.452000,3,0.0,0,0,1,240.01,0.101662,0.124161,0.670430,0
2,2024,2025-05-03,7,0,12,301,0,0,1,44.47,41.090000,3,1.0,0,0,1,240.01,0.185284,0.172993,0.354673,0
3,2024,2025-05-03,7,0,12,383,0,0,1,0.00,2.560000,3,0.0,1,0,0,240.01,0.000000,0.013958,0.064358,0
4,2024,2025-05-03,7,0,12,158,0,0,1,2.97,5.312000,3,0.0,1,0,0,240.01,0.012374,0.028749,0.016301,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68118,2025,2025-12-14,13,1,23,284,0,0,0,8.75,18.154000,4,0.0,0,1,0,188.35,0.046456,0.112556,0.099021,0
68119,2025,2025-12-14,13,1,23,594,0,0,0,30.83,30.830000,4,1.0,0,0,1,188.35,0.163685,0.164428,0.169244,0
68120,2025,2025-12-14,13,1,23,457,0,0,0,40.93,37.360000,2,1.0,0,0,1,188.35,0.217308,0.202814,0.192372,0
68121,2025,2025-12-14,13,1,23,447,0,0,0,36.43,34.356000,3,1.0,0,0,1,188.35,0.193417,0.192355,0.176130,0


Train: 47686 / Validation: 10218 / Test: 10219
RMSE: 0.01535482368959716
MAE: 0.012103963310969974
R²: 0.926389939031559
Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best params: {'colsample_bytree': np.float64(0.9582191749769029), 'gamma': np.float64(0.06952130531190703), 'learning_rate': np.float64(0.03053736512887829), 'max_depth': 3, 'max_leaves': 71, 'min_child_weight': 5, 'n_estimators': 1548, 'reg_alpha': np.float64(0.599326836668414), 'reg_lambda': np.float64(3.3761517140362796), 'subsample': np.float64(0.9828729111737557)} 

RMSE: 0.016002382772002038
MAE: 0.012756847301354342
R²: 0.9200503037484505


# Main Model

In [24]:
def setup_df_main(df):
    df = df.sort_values(['Player', 'Date'])
    
    # Location based features
    df["PrevOpp"] = df.groupby("Player")["Opp"].shift(1)
    df["DaysLstGm"] = (df.groupby("Player")["Date"].diff().dt.days).fillna(0).astype(int)
    df['Location'] = df.apply(lambda r: r['Team'] if r['Team_type'] == 'Home' else r['Opp'], axis=1)
    df['PrevLocation'] = df.groupby('Player')['Location'].shift(1)
    df['travel_km'] = df.apply(travel_km_from_row, axis=1).fillna(0)
    df['travel_hours'] = df['travel_km'] / 800.0      # approximate flight hours
    df['is_long_trip'] = (df['travel_km'] > 1500).astype(int)
    df['same_arena'] = (df['PrevLocation'] == df['Location']).astype(int)

    stat_cols = []
    for stat in categories:
        for col in [f'Off_{stat}', f'Off_L5_{stat}', f'Def_{stat}', f'Def_L5_{stat}']:
            stat_cols.append(col)

    final_cols = ['Date', 'Team', 'Team_type', 'B2B', 'cup_gm', 'pstszn_gm', 'Player', 'Opp', 'MP',
                  'DaysLstGm', 'travel_km', 'travel_hours', 'PrevLocation', 'is_long_trip', 'same_arena', 
                  tgt_stat] + stat_cols
    df = df[final_cols]
    
    for col in df.select_dtypes(include='number').columns:
        df[col] = df[col].fillna(0)

    return df

In [25]:
tgt_stat = "PTS"
df_main = df.copy()
df_main = setup_df_main(df_main)
display(df_main)

n = len(df)
train_end = int(0.7 * n)
val_end   = int(0.85 * n)
main_train_df = df_main.iloc[:train_end]
main_val_df   = df_main.iloc[train_end:val_end]
main_test_df  = df_main.iloc[val_end:]

stat_model, main_splits = create_baseline_model(df_main, tgt_stat, main_train_df, main_val_df, main_test_df)
stat_model, stat_preds = hyperparam_tuning(stat_model, main_splits)

Unnamed: 0,Date,Team,Team_type,B2B,cup_gm,pstszn_gm,Player,Opp,MP,DaysLstGm,travel_km,travel_hours,PrevLocation,is_long_trip,same_arena,PTS,Off_PTS,Off_L5_PTS,Def_PTS,Def_L5_PTS,Off_AST,Off_L5_AST,Def_AST,Def_L5_AST,Off_REB,Off_L5_REB,Def_REB,Def_L5_REB,Off_PR,Off_L5_PR,Def_PR,Def_L5_PR,Off_PA,Off_L5_PA,Def_PA,Def_L5_PA,Off_RA,Off_L5_RA,Def_RA,Def_L5_RA,Off_PRA,Off_L5_PRA,Def_PRA,Def_L5_PRA,Off_TPM,Off_L5_TPM,Def_TPM,Def_L5_TPM,Off_STL,Off_L5_STL,Def_STL,Def_L5_STL,Off_BLK,Off_L5_BLK,Def_BLK,Def_L5_BLK,Off_STL_BLK,Off_L5_STL_BLK,Def_STL_BLK,Def_L5_STL_BLK
393,2023-10-26,16,0,0,0,0,0,22,0.00,0,0.0,0.0,0.0,0,0,0,0.000000,0.000000,5.000000,5.0,0.000000,0.000000,3.000000,3.0,0.000000,0.000000,4.000000,4.0,0.000000,0.000000,9.000000,9.0,0.0,0.0,8.000000,8.0,0.00,0.00,7.000000,7.0,0.000000,0.000000,12.000000,12.0,0.000000,0.000000,1.000000,1.0,0.000000,0.0,2.000000,2.0,0.00,0.00,0.000000,0.0,0.000,0.0,2.000000,2.0
981,2023-10-29,16,0,0,0,0,0,0,5.62,3,0.0,0.0,22.0,0,0,7,3.500000,3.500000,16.833333,15.4,0.000000,0.000000,3.500000,3.0,0.500000,0.500000,3.500000,3.2,4.000000,4.000000,20.333333,18.6,3.5,3.5,20.333333,18.4,0.50,0.50,7.000000,6.2,4.000000,4.000000,23.833333,21.6,1.000000,1.000000,2.500000,2.6,0.000000,0.0,0.833333,1.0,0.00,0.00,0.166667,0.2,0.000,0.0,1.000000,1.2
1216,2023-10-30,16,0,0,0,0,0,15,15.35,1,0.0,0.0,0.0,0,0,0,2.333333,2.333333,12.800000,12.8,0.666667,0.666667,3.000000,3.0,0.333333,0.333333,3.800000,3.8,2.666667,2.666667,16.600000,16.6,3.0,3.0,15.800000,15.8,1.00,1.00,6.800000,6.8,3.333333,3.333333,19.600000,19.6,0.666667,0.666667,2.400000,2.4,0.000000,0.0,0.800000,0.8,0.00,0.00,1.200000,1.2,0.000,0.0,2.000000,2.0
1613,2023-11-01,16,1,0,0,0,0,27,5.52,2,0.0,0.0,15.0,0,0,2,2.250000,2.250000,11.125000,11.4,0.750000,0.750000,2.125000,2.0,0.500000,0.500000,6.375000,6.0,2.750000,2.750000,17.500000,17.4,3.0,3.0,13.250000,13.4,1.25,1.25,8.500000,8.0,3.500000,3.500000,19.625000,19.4,0.500000,0.500000,1.875000,2.0,0.000000,0.0,1.125000,1.2,0.00,0.00,0.625000,1.0,0.000,0.0,1.750000,2.2
2132,2023-11-03,16,0,0,1,0,0,19,0.00,2,0.0,0.0,27.0,0,0,0,1.800000,1.800000,17.285714,18.2,0.600000,0.600000,3.714286,3.6,0.400000,0.400000,4.571429,3.8,2.200000,2.200000,21.857143,22.0,2.4,2.4,21.000000,21.8,1.00,1.00,8.285714,7.4,2.800000,2.800000,25.571429,25.6,0.400000,0.400000,2.000000,2.6,0.000000,0.0,1.285714,1.2,0.00,0.00,0.428571,0.2,0.000,0.0,1.714286,1.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68090,2025-12-14,18,1,0,0,0,714,3,26.62,15,0.0,0.0,9.0,0,0,18,22.100000,21.400000,24.100000,26.8,4.000000,3.400000,3.350000,4.2,6.222222,4.400000,6.800000,7.2,27.700000,25.800000,30.900000,34.0,26.1,24.8,27.450000,31.0,9.60,7.80,10.150000,11.4,31.700000,29.200000,34.250000,38.2,0.000000,0.000000,0.000000,0.0,2.285714,1.6,1.000000,0.8,1.25,1.25,0.950000,0.8,2.625,2.2,1.950000,1.6
51134,2025-01-30,14,0,0,0,0,715,10,0.00,0,0.0,0.0,0.0,0,0,0,0.000000,0.000000,13.171717,20.4,0.000000,0.000000,2.979798,4.8,0.000000,0.000000,3.818182,6.0,0.000000,0.000000,16.989899,26.4,0.0,0.0,16.151515,25.2,0.00,0.00,6.797980,10.8,0.000000,0.000000,19.969697,31.2,0.000000,0.000000,1.818182,1.8,0.000000,0.0,1.010101,1.8,0.00,0.00,0.373737,0.6,0.000,0.0,1.383838,2.4
51852,2025-02-03,14,0,0,0,0,715,26,0.82,4,0.0,0.0,10.0,0,0,0,0.000000,0.000000,13.696078,8.8,0.000000,0.000000,3.225490,2.6,0.000000,0.000000,3.833333,3.2,0.000000,0.000000,17.529412,12.0,0.0,0.0,16.921569,11.4,0.00,0.00,7.058824,5.8,0.000000,0.000000,20.754902,14.6,0.000000,0.000000,1.970588,0.6,0.000000,0.0,1.147059,0.4,0.00,0.00,0.254902,0.0,0.000,0.0,1.401961,0.4
52878,2025-02-08,14,0,1,0,0,715,20,0.82,5,0.0,0.0,26.0,0,0,0,0.000000,0.000000,12.155963,15.2,0.000000,0.000000,2.697248,2.2,0.000000,0.000000,3.954128,5.6,0.000000,0.000000,16.110092,20.8,0.0,0.0,14.853211,17.4,0.00,0.00,6.651376,7.8,0.000000,0.000000,18.807339,23.0,0.000000,0.000000,1.669725,1.6,0.000000,0.0,0.798165,1.0,0.00,0.00,0.366972,0.4,0.000,0.0,1.165138,1.4


Train: 47686 / Validation: 10218 / Test: 10219
RMSE: 4.13012377093971
MAE: 2.7630364894866943
R²: 0.8081825971603394
Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best params: {'colsample_bytree': np.float64(0.7880464524154114), 'gamma': np.float64(0.14079822715084456), 'learning_rate': np.float64(0.01494212020444026), 'max_depth': 5, 'max_leaves': 44, 'min_child_weight': 1, 'n_estimators': 678, 'reg_alpha': np.float64(0.3702232586704518), 'reg_lambda': np.float64(3.5846572854427263), 'subsample': np.float64(0.7347607178575388)} 

RMSE: 4.101093955752676
MAE: 2.7552807331085205
R²: 0.8108696341514587


In [12]:
rmse = np.sqrt(mean_squared_error(main_splits[5], stat_preds)) # splits[5] = y_test
pred_col = f'{tgt_stat}_Pred'
df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
df_lines['Date'] = pd.to_datetime(df_lines.Date)
df_lines = df_lines[['Date', 'Team', 'Player', f'{tgt_stat}_line']]
main_test_df[pred_col] = stat_preds
main_test_df['Team'] = team_encoder.inverse_transform(main_test_df["Team"])
main_test_df['Player'] = player_encoder.inverse_transform(main_test_df["Player"])

df_test = main_test_df.merge(df_lines, on=['Date', 'Team', 'Player'])
df_test = df_test[[c for c in df_test.columns if c != pred_col] + [pred_col]]

df_test['Diff'] = df_test[f'{tgt_stat}_Pred'] - df_test[f'{tgt_stat}_line']
df_test['Act_Res'] = np.where(df_test[tgt_stat] > df_test[f'{tgt_stat}_line'], 'O', 'U')
df_test['Pred_Res'] = np.where(df_test[pred_col] > df_test[f'{tgt_stat}_line'], 'O', 'U')
df_test['ParlayHit'] = np.where(df_test['Act_Res'] == df_test['Pred_Res'], 1, 0)

df_test['Diff2'] = abs(df_test[f'{tgt_stat}_Pred'] - df_test[tgt_stat])
df_test['InRMSE_Range'] = np.where(df_test['Diff2'] <= rmse, 1, 0)

print("Total Accuracy (ParlayHit):", ((df_test.ParlayHit == 1).sum() / df_test.shape[0]))
print((df_test.ParlayHit == 1).sum(), "/", df_test.shape[0])

print("\nTotal Accuracy (InRMSE_Range):", ((df_test.InRMSE_Range == 1).sum() / df_test.shape[0]))
print((df_test.InRMSE_Range == 1).sum(), "/", df_test.shape[0])

Total Accuracy (ParlayHit): 0.635036496350365
174 / 274

Total Accuracy (InRMSE_Range): 0.5401459854014599
148 / 274


In [13]:
df_ystrday = df_test[(df_test.Date == (datetime.strptime(now, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d"))]\
            [['Team', 'Player', tgt_stat, f'{tgt_stat}_line', f'{tgt_stat}_Pred', 'ParlayHit', 'Diff', 'InRMSE_Range', 'Diff2']]\
            .sort_values(f'{tgt_stat}_line', ascending=False)

print("Yesterday's Results:")
print("Total Accuracy (ParlayHit):", ((df_ystrday.ParlayHit == 1).sum() / df_ystrday.shape[0]))
print("Total Accuracy (InRMSE_Range):", ((df_ystrday.InRMSE_Range == 1).sum() / df_ystrday.shape[0]))
if df_ystrday.shape[0] >= 50:
    for tm in df_ystrday.Team.unique():
        display(df_ystrday[df_ystrday.Team == tm])
else:
    display(df_ystrday)

Yesterday's Results:
Total Accuracy (ParlayHit): 0.5714285714285714
Total Accuracy (InRMSE_Range): 0.35714285714285715


Unnamed: 0,Team,Player,PTS,PTS_line,PTS_Pred,ParlayHit,Diff,InRMSE_Range,Diff2
77,GSW,Stephen Curry,48,28.5,28.690525,1,0.190525,0,19.309475
58,POR,Shaedon Sharpe,35,22.5,20.387621,0,-2.112379,0,14.612379
259,SAC,Zach LaVine,10,20.5,9.865946,1,-10.634054,1,0.134054
273,NOP,Zion Williamson,18,20.5,20.465416,1,-0.034584,1,2.465416
160,NOP,Trey Murphy III,20,19.5,26.163469,1,6.663469,0,6.163469
9,MIL,Ryan Rollins,11,17.5,9.816794,1,-7.683206,1,1.183206
191,PHI,VJ Edgecombe,26,14.5,19.069555,1,4.569555,0,6.930445
19,NOP,Saddiq Bey,10,12.5,15.862622,0,3.362622,0,5.862622
140,POR,Toumani Camara,7,12.5,10.35977,1,-2.14023,1,3.35977
149,CHI,Tre Jones,6,10.5,13.07725,0,2.57725,0,7.07725


### Today's predictions

In [82]:
df_pred = pd.read_csv("../tables/2025/parlay_stats.csv")
df_pred['Date'] = pd.to_datetime(df_pred.Date)
df_pred['Season'] = 2025
for col in df_pred.select_dtypes(include='object').columns:
    df_pred[col] = df_pred[col].astype('category')
df_pred = df_pred.merge(df_mtch, on=['Date', 'Team'])
df_pred[tgt_stat] = 0

# Predict minutes
df_act_mins = pd.read_csv("../tables/2025/parlay_actuals.csv")
df_act_mins['Date'] = pd.to_datetime(df_act_mins.Date)
df_pred = df_pred.merge(df_act_mins[['Date', 'Team', 'Player', 'MP']], on=['Date', 'Team', 'Player'], how='left')
display(df_pred[(df_pred.Date == now)])
df_pred_mins = setup_df_mins(con, df_pred)
display(df_pred_mins[(df_pred_mins.Date == now)])
df_pred_mins = df_pred_mins.drop(['Date', 'MP_Pct'], axis=1)
df_pred_mins["Team"] = team_encoder.transform(df_pred_mins["Team"])
df_pred_mins['Team_type'] = team_type_encoder.fit_transform(df_pred_mins['Team_type'])
df_pred_mins["Player"] = player_encoder.fit_transform(df_pred_mins["Player"])
df_pred['MP'] = mins_model.predict(df_pred_mins) * 240

df_pred = setup_df_main(df_pred)
feature_cols = [col for col in df_pred.columns if col not in ['Date', tgt_stat]]
df_pred = df_pred[df_pred.Date == now][feature_cols]

# Predict stat
df_pred["Team"] = team_encoder.transform(df_pred["Team"])
df_pred["Opp"] = team_encoder.transform(df_pred["Opp"])
df_pred = df_pred[~(df_pred.PrevLocation.isnull())] # Filters out players who are debuting on the year
df_pred["PrevLocation"] = team_encoder.transform(df_pred["PrevLocation"])
df_pred["Player"] = player_encoder.fit_transform(df_pred["Player"])
df_pred['Team_type'] = team_type_encoder.fit_transform(df_pred['Team_type'])
df_pred[f"{tgt_stat}_proj"] = stat_model.predict(df_pred)
df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
df_lines['Date'] = pd.to_datetime(df_lines.Date)

df_pred['Team'] = team_encoder.inverse_transform(df_pred["Team"])
df_pred['Player'] = player_encoder.inverse_transform(df_pred["Player"])

df_lines = df_lines[df_lines.Date == now][['Team', 'Player', f'{tgt_stat}_line']]
df_pred = df_pred.merge(df_lines, on=['Team', 'Player'])

for tm in df_pred.Team.unique():
    display(df_pred[(df_pred.Team == tm) & ~(df_pred[f'{tgt_stat}_line'].isnull())]\
            [['Team', 'Player', 'MP', f'{tgt_stat}_line', f'{tgt_stat}_proj']]\
            .sort_values(f'{tgt_stat}_proj', ascending=False))

Unnamed: 0,Date,Team,B2B,Player,Opp,Off_PTS,AVG_PTS_H2H,Def_PTS,PTS_line,Off_AST,AVG_AST_H2H,Def_AST,AST_line,Off_REB,AVG_REB_H2H,Def_REB,REB_line,Off_PR,AVG_PR_H2H,Def_PR,PR_line,Off_PA,AVG_PA_H2H,Def_PA,PA_line,Off_RA,AVG_RA_H2H,Def_RA,RA_line,Off_PRA,AVG_PRA_H2H,Def_PRA,PRA_line,Off_TPM,AVG_TPM_H2H,Def_TPM,TPM_line,Off_STL,AVG_STL_H2H,Def_STL,STL_line,Off_BLK,AVG_BLK_H2H,Def_BLK,BLK_line,Off_STL_BLK,AVG_STL_BLK_H2H,Def_STL_BLK,STL_BLK_line,Off_L5_PTS,Def_L5_PTS,Off_L5_AST,Def_L5_AST,Off_L5_REB,Def_L5_REB,Off_L5_PR,Def_L5_PR,Off_L5_PA,Def_L5_PA,Off_L5_RA,Def_L5_RA,Off_L5_PRA,Def_L5_PRA,Off_L5_TPM,Def_L5_TPM,Off_L5_STL,Def_L5_STL,Off_L5_BLK,Def_L5_BLK,Off_L5_STL_BLK,Def_L5_STL_BLK,Spread,Total,PTS_Hit_Pct,PTS_P_Diff,AST_Hit_Pct,AST_P_Diff,REB_Hit_Pct,REB_P_Diff,PR_Hit_Pct,PR_P_Diff,PA_Hit_Pct,PA_P_Diff,RA_Hit_Pct,RA_P_Diff,PRA_Hit_Pct,PRA_P_Diff,TPM_Hit_Pct,TPM_P_Diff,STL_Hit_Pct,STL_P_Diff,BLK_Hit_Pct,BLK_P_Diff,STL_BLK_Hit_Pct,STL_BLK_P_Diff,Season,Team_type,cup_gm,pstszn_gm,PTS,MP
2025,2025-12-15,LAC,0,Kris Dunn,MEM,8.391304,6.857143,23.210526,7.5,3.227273,2.142857,5.315789,3.5,3.238095,3.000000,5.000000,2.5,10.440000,9.857143,28.210526,10.5,10.560000,9.000000,28.526316,11.5,5.791667,5.142857,10.315789,6.5,13.280000,12.000000,33.526316,14.5,1.437500,1.571429,3.473684,0.5,1.909091,2.142857,1.105263,1.5,,,,,,,,,8.6,24.0,4.0,6.2,3.4,5.2,10.2,29.2,11.4,30.2,6.8,11.4,14.2,35.4,2.0,3.6,2.0,2.4,,,,,-4.5,228.5,0.600000,9.0,0.600000,6.0,0.700000,5.0,0.600000,11.0,0.600000,12.0,0.750000,7.0,0.600000,14.0,0.600000,1.5,0.400000,0.5,,,,,2025,Away,0,0,0,
2026,2025-12-15,LAC,0,James Harden,MEM,26.583333,17.444444,23.210526,25.5,8.208333,8.777778,5.315789,8.5,5.565217,5.111111,5.000000,5.5,31.916667,22.555556,28.210526,30.5,34.791667,26.222222,28.526316,33.5,13.541667,13.888889,10.315789,13.5,40.125000,31.333333,33.526316,39.5,3.695652,2.333333,3.473684,3.5,1.875000,0.888889,1.105263,1.5,,,,,,,,,22.4,24.0,6.4,6.2,3.2,5.2,25.6,29.2,28.8,30.2,9.6,11.4,32.0,35.4,2.8,3.6,2.8,2.4,,,,,-4.5,228.5,0.500000,-10.0,0.500000,-5.0,0.300000,-8.0,0.500000,-19.0,0.500000,-17.0,0.300000,-19.0,0.600000,-29.0,0.400000,-6.0,0.666667,3.5,,,,,2025,Away,0,0,0,
2027,2025-12-15,DAL,0,Cooper Flagg,UTA,17.480000,,22.421053,18.5,3.909091,,6.947368,3.5,6.280000,,4.578947,5.5,23.760000,,27.000000,25.5,20.920000,,29.368421,22.5,9.720000,,11.526316,10.5,27.200000,,33.947368,29.5,,,,,1.650000,,1.789474,1.5,1.307692,,0.473684,0.5,2.272727,,2.263158,1.5,20.6,19.8,3.4,5.2,5.2,5.2,25.8,25.0,24.0,25.0,8.6,10.4,29.2,30.2,,,1.8,1.8,1.2,0.8,2.8,2.6,-2.5,241.5,0.600000,36.0,0.400000,6.0,0.600000,-1.0,0.600000,33.0,0.700000,40.0,0.500000,2.0,0.700000,37.0,,,0.555556,0.5,0.428571,-0.5,0.875000,4.0,2025,Home,0,0,0,
2028,2025-12-15,DAL,0,Brandon Williams,UTA,11.727273,,22.421053,10.5,4.545455,,6.947368,4.5,,,,,14.409091,,27.000000,12.5,14.916667,,29.368421,14.5,,,,,17.375000,,33.947368,17.5,,,,,,,,,,,,,,,,,9.4,19.8,4.6,5.2,,,11.4,25.0,14.0,25.0,,,16.0,30.2,,,,,,,,,-2.5,241.5,0.500000,-5.0,0.666667,10.0,,,0.333333,-7.0,0.500000,3.0,,,0.500000,1.0,,,,,,,,,2025,Home,0,0,0,
2029,2025-12-15,MEM,0,Ja Morant,LAC,18.153846,,22.277778,18.5,7.769231,,6.111111,7.5,3.538462,,4.333333,3.5,21.692308,,26.611111,22.5,25.923077,,28.388889,26.5,11.307692,,10.444444,11.5,29.461538,,32.722222,30.5,2.000000,,2.944444,1.5,1.333333,,1.388889,0.5,,,,,,,,,15.2,22.8,8.6,7.8,3.2,5.0,18.4,27.8,23.8,30.6,11.8,12.8,27.0,35.6,2.0,2.4,1.4,1.4,,,,,4.5,228.5,,,,,,,,,,,,,,,,,,,,,,,2025,Home,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,2025-12-15,LAC,0,Ivica Zubac,MEM,16.600000,11.666667,14.785714,16.5,2.666667,1.666667,4.642857,2.5,11.440000,11.888889,10.500000,11.5,28.040000,23.555556,25.285714,29.5,19.160000,13.333333,19.428571,19.5,14.000000,13.555556,15.142857,14.5,30.600000,25.222222,29.928571,32.5,,,,,1.200000,1.000000,1.071429,0.5,1.500000,1.000000,1.642857,0.5,1.800000,2.000000,2.714286,1.5,17.6,8.4,2.2,4.0,11.2,9.4,28.8,17.8,19.8,12.4,13.4,13.4,31.0,21.8,,,1.0,0.6,1.4,2.2,1.6,2.8,-4.5,228.5,0.400000,9.0,0.500000,3.0,0.600000,15.0,0.500000,18.0,0.300000,12.0,0.800000,15.0,0.600000,19.0,,,0.250000,-2.0,0.625000,4.0,0.428571,0.5,2025,Away,0,0,0,
2095,2025-12-15,BOS,0,Neemias Queta,DET,10.086957,2.250000,14.444444,9.5,2.312500,0.875000,2.333333,1.5,8.260870,1.625000,8.833333,8.5,18.347826,3.875000,23.277778,17.5,11.695652,3.125000,16.777778,10.5,9.869565,2.500000,11.166667,9.5,19.956522,4.750000,25.611111,19.5,,,,,,,,,2.066667,0.500000,1.388889,1.5,,,,,11.0,11.8,2.6,2.0,7.4,8.0,18.4,19.8,12.6,13.8,9.0,10.0,20.0,21.8,,,,,2.4,2.4,,,-1.5,229.5,0.500000,4.0,0.600000,-0.5,0.333333,-12.0,0.166667,-11.0,0.500000,3.0,0.250000,-14.0,0.200000,-12.5,,,,,0.166667,-3.0,,,2025,Away,0,0,0,
2096,2025-12-15,MIA,0,Kel'el Ware,TOR,12.260870,1.000000,14.375000,7.5,,,,,10.120000,1.500000,9.312500,7.5,21.400000,2.500000,23.687500,15.5,,,,,,,,,21.960000,2.750000,26.062500,15.5,,,,,,,,,1.571429,0.250000,1.562500,1.5,,,,,9.4,14.2,,,7.8,10.4,14.8,24.6,,,,,15.4,26.4,,,,,1.2,1.2,,,-5.5,235.5,0.571429,3.5,,,0.571429,3.5,0.500000,13.0,,,,,0.500000,1.0,,,,,0.500000,0.0,,,2025,Away,0,0,0,
2097,2025-12-15,MIA,0,Bam Adebayo,TOR,19.157895,17.000000,14.375000,18.5,2.666667,4.571429,2.375000,2.5,8.947368,10.714286,9.312500,9.5,28.105263,27.714286,23.687500,28.5,21.684211,21.571429,16.750000,21.5,11.473684,15.285714,11.687500,12.5,30.631579,32.285714,26.062500,31.5,2.133333,0.714286,0.375000,1.5,1.461538,1.142857,0.687500,1.5,1.800000,0.285714,1.562500,0.5,2.466667,1.428571,2.250000,2.5,20.0,14.2,2.4,1.8,10.2,10.4,30.2,24.6,22.4,16.0,12.6,12.2,32.6,26.4,2.0,0.4,1.6,0.6,1.8,1.2,3.2,1.8,-5.5,235.5,0.428571,-4.5,0.571429,-2.5,0.428571,2.5,0.285714,-2.5,0.428571,-7.5,0.285714,-1.5,0.285714,-7.5,0.428571,0.5,0.600000,1.5,0.666667,5.0,0.600000,2.5,2025,Away,0,0,0,


Unnamed: 0,Season,Date,Team,Team_type,Opp,Player,B2B,cup_gm,pstszn_gm,MP_lst_gm,MP_last_5_avg,games_last_7_days,was_top5_last_game,reserve_plyr,bench_plyr,starter_plyr,prev_team_mins,prev_team_mins_pct,MP_Pct,TmMinsPct_AvgLst5,OT
102,2025,2025-12-15,HOU,Home,DEN,Reed Sheppard,0,0,0,26.02,23.664000,3,1.0,0,0,1,,,,inf,0
103,2025,2025-12-15,HOU,Home,DEN,Kevin Durant,0,0,0,36.72,34.502000,3,1.0,0,0,1,0.0,inf,,inf,0
104,2025,2025-12-15,HOU,Home,DEN,Josh Okogie,0,0,0,21.03,22.570000,3,0.0,0,0,1,0.0,inf,,inf,0
105,2025,2025-12-15,HOU,Home,DEN,Steven Adams,0,0,0,17.92,21.873333,2,0.0,0,0,1,0.0,inf,,inf,0
106,2025,2025-12-15,HOU,Home,DEN,Tari Eason,0,0,0,,13.842000,2,0.0,0,0,1,0.0,,,inf,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
888,2025,2025-12-15,TOR,Home,MIA,Jamal Shead,0,0,0,33.55,23.220000,3,1.0,0,0,1,0.0,inf,,0.103687,0
889,2025,2025-12-15,TOR,Home,MIA,Immanuel Quickley,0,0,0,,33.130000,2,0.0,0,0,1,0.0,,,0.225631,0
890,2025,2025-12-15,TOR,Home,MIA,Sandro Mamukelashvili,0,0,0,15.00,22.636000,3,0.0,0,0,1,0.0,inf,,0.331434,0
891,2025,2025-12-15,TOR,Home,MIA,Jakob Poeltl,0,0,0,24.82,26.575000,2,1.0,0,0,1,0.0,inf,,inf,0


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
67,UTA,Svi Mykhailiuk,47.483486,7.5,19.651943
0,UTA,Ace Bailey,52.446941,12.5,19.405302
46,UTA,Kevin Love,41.664825,7.5,16.48377
51,UTA,Lauri Markkanen,17.999481,25.5,11.947755
47,UTA,Keyonte George,18.372707,23.5,11.024519
50,UTA,Kyle Filipowski,17.789541,12.5,8.455231


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
45,HOU,Kevin Durant,40.181953,23.5,26.751448
1,HOU,Alperen Sengun,34.046944,21.5,25.560062
2,HOU,Amen Thompson,37.852177,17.5,21.131037
26,HOU,Jabari Smith Jr.,39.083992,15.5,18.206123
61,HOU,Reed Sheppard,38.873814,9.5,15.348583
41,HOU,Josh Okogie,27.435326,6.5,10.181674
66,HOU,Steven Adams,21.440664,5.5,8.320226


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
57,MIA,Norman Powell,36.91394,22.5,28.700533
70,MIA,Tyler Herro,33.954296,22.5,24.112438
27,MIA,Jaime Jaquez Jr.,37.562992,10.5,18.47369
43,MIA,Kel'el Ware,36.492573,7.5,15.824672
7,MIA,Bam Adebayo,22.872934,18.5,15.591222
3,MIA,Andrew Wiggins,19.342802,15.5,11.141611
19,MIA,Davion Mitchell,21.325153,8.5,8.055747


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
20,BOS,Derrick White,43.216805,18.5,30.397903
4,BOS,Anfernee Simons,44.712559,12.5,22.935598
39,BOS,Jordan Walsh,33.675491,7.5,17.128403
59,BOS,Payton Pritchard,35.171795,16.5,17.009754
34,BOS,Jaylen Brown,20.840809,29.5,15.070051
54,BOS,Neemias Queta,36.871117,9.5,14.345879
40,BOS,Josh Minott,34.479012,6.5,13.623742
62,BOS,Sam Hauser,33.655621,7.5,13.193247


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
17,DAL,Cooper Flagg,27.837597,18.5,20.320202
53,DAL,Naji Marshall,37.039127,13.5,19.616161
5,DAL,Anthony Davis,23.734867,24.5,18.496971
10,DAL,Brandon Williams,38.178722,10.5,17.981293
48,DAL,Klay Thompson,36.619408,11.5,16.391787
58,DAL,P.J. Washington,19.703514,16.5,15.015099
52,DAL,Max Christie,29.614902,10.5,13.50127
18,DAL,Daniel Gafford,21.75704,7.5,7.900558


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
29,DET,Jalen Duren,37.341301,16.5,18.404533
69,DET,Tobias Harris,38.141888,13.5,17.878183
21,DET,Duncan Robinson,37.394341,10.5,17.137852
15,DET,Caris LeVert,35.792603,8.5,16.667295
12,DET,Cade Cunningham,23.988829,26.5,14.666997
6,DET,Ausar Thompson,23.734867,10.5,11.510365
23,DET,Isaiah Stewart,19.639595,7.5,8.789162


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
32,LAC,James Harden,48.478878,25.5,30.30541
49,LAC,Kris Dunn,50.643616,7.5,17.39773
24,LAC,Ivica Zubac,34.496185,16.5,14.621339
8,LAC,Bogdan Bogdanovic,36.229305,10.5,14.421634
37,LAC,John Collins,23.734867,13.5,13.06475
55,LAC,Nicolas Batum,23.779482,5.5,9.61038
42,LAC,Kawhi Leonard,7.7795,26.5,6.080211


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
22,TOR,Immanuel Quickley,39.224976,17.5,22.155912
31,TOR,Jamal Shead,38.614506,6.5,17.007059
65,TOR,Scottie Barnes,23.788517,21.5,14.862984
63,TOR,Sandro Mamukelashvili,34.99688,8.5,13.315618
28,TOR,Jakob Poeltl,34.327778,10.5,12.537161
9,TOR,Brandon Ingram,6.295698,24.5,5.47751


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
56,DEN,Nikola Jokic,24.187305,28.5,21.67524
30,DEN,Jamal Murray,29.07725,23.5,21.573318
60,DEN,Peyton Watson,35.468521,12.5,17.792158
14,DEN,Cameron Johnson,34.98436,14.5,16.961967
11,DEN,Bruce Brown,37.1064,7.5,15.140272
68,DEN,Tim Hardaway Jr.,22.816824,12.5,12.684325
38,DEN,Jonas Valanciunas,21.088837,7.5,8.826536


Unnamed: 0,Team,Player,MP,PTS_line,PTS_proj
35,MEM,Jaylen Wells,52.556038,11.5,21.928728
13,MEM,Cam Spencer,44.533302,10.5,21.143713
16,MEM,Cedric Coward,37.299301,12.5,16.421356
36,MEM,Jock Landale,31.728748,10.5,12.271949
33,MEM,Jaren Jackson Jr.,33.384296,17.5,12.222929
44,MEM,Kentavious Caldwell-Pope,26.538486,7.5,10.180406
25,MEM,Ja Morant,21.171288,18.5,10.113546
64,MEM,Santi Aldama,22.407358,15.5,9.973991
71,MEM,Vince Williams Jr.,20.583494,5.5,6.956872


# sort out issues with minutes columns
 - Should never have 0 Team_mins
 - Resolve NaNs
 - Chatgpt says im leaking data, deal with that first

In [108]:
df_pred = pd.read_csv("../tables/2025/parlay_stats.csv")
df_pred['Date'] = pd.to_datetime(df_pred.Date)
df_pred['Season'] = 2025
for col in df_pred.select_dtypes(include='object').columns:
    df_pred[col] = df_pred[col].astype('category')
df_pred = df_pred.merge(df_mtch, on=['Date', 'Team'])
df_pred[tgt_stat] = 0

# Predict minutes
df_act_mins = pd.read_csv("../tables/2025/parlay_actuals.csv")
df_act_mins['Date'] = pd.to_datetime(df_act_mins.Date)
df = df_pred.merge(df_act_mins[['Date', 'Team', 'Player', 'MP']], on=['Date', 'Team', 'Player'], how='left')
# df_pred_mins = setup_df_mins(con, df_pred)


    
df = df.sort_values(['Player', 'Season', 'Date']).reset_index(drop=True)

df['MP_lst_gm'] = (
    df
    .groupby(['Player', 'Season'])['MP']
    .shift(1)
)

df['MP_last_5_avg'] = (
    df.groupby(['Player', 'Season'])['MP']
      .rolling(window=5, min_periods=1)
      .mean()
      .shift(1)
      .reset_index(level=[0, 1], drop=True)
)

games_last_7_days = df.groupby(['Player', 'Season']).rolling('7D', on='Date')['MP'].count().shift(1).to_frame(name='games_last_7_days').reset_index()
df = df.merge(games_last_7_days, on=['Player', 'Season', 'Date'])
df['games_last_7_days'] = df.games_last_7_days.fillna(0).astype(int)

df['mp_rank_team_game'] = (
    df
    .groupby(['Season', 'Date', 'Team'])['MP']
    .rank(method='first', ascending=False)
)
df['top5_minutes'] = (df['mp_rank_team_game'] <= 5).astype(int)
df['was_top5_last_game'] = (
    df
    .groupby(['Player', 'Season', 'Team'])['top5_minutes']
    .shift(1)
)
df['was_top5_last_game'] = df['was_top5_last_game'].fillna(0)

df['reserve_plyr'] = np.where(df.MP < 8, 1, 0)
df['bench_plyr'] = np.where((df.MP <= 25) & (df.reserve_plyr == 0), 1, 0)
df['starter_plyr'] = np.where((df.bench_plyr == 0) & (df.reserve_plyr == 0), 1, 0)

df['Team_mins'] = (
    df.groupby(['Team', 'Date'])['MP']
      .transform('sum')
)
df = con.execute("""SELECT *, LAG(Team_mins) OVER (PARTITION BY Player, Date) as prev_team_mins, 
                    MP_lst_gm / prev_team_mins as prev_team_mins_pct FROM df""").fetchdf()

df['MP_Pct'] = df.MP / df.Team_mins

df['Lst5_TmMins'] = (
    df.groupby(['Team', 'Season'])['Team_mins']
      .rolling(window=5, min_periods=1)
      .sum()
      .shift(1)
      .reset_index(level=[0, 1], drop=True)
)
df['Lst5_PlyrMins'] = (
    df.groupby(['Player', 'Season'])['MP']
      .rolling(window=5, min_periods=1)
      .sum()
      .shift(1)
      .reset_index(level=[0, 1], drop=True)
)
df['TmMinsPct_AvgLst5'] = df.Lst5_PlyrMins / df.Lst5_TmMins

# 2.) EXPERIMENT: take full season team mins and take full season players MP and get pct that way, then rank player 
# usage rate per team    
# 3.) Add games missed column
# 4.) Add coming back from injury column
# Predict pct of team mins instead??? only if projected numbers are still bad

df['OT'] = np.where(df.Team_mins >= 260, 1, 0)


    

display(df)

# df_pred_mins = df_pred_mins.drop(['Date', 'MP_Pct'], axis=1)
# df_pred_mins["Team"] = team_encoder.transform(df_pred_mins["Team"])
# df_pred_mins['Team_type'] = team_type_encoder.fit_transform(df_pred_mins['Team_type'])
# df_pred_mins["Player"] = player_encoder.fit_transform(df_pred_mins["Player"])
# df_pred['MP'] = mins_model.predict(df_pred_mins) * 240




Unnamed: 0,Date,Team,B2B,Player,Opp,Off_PTS,AVG_PTS_H2H,Def_PTS,PTS_line,Off_AST,AVG_AST_H2H,Def_AST,AST_line,Off_REB,AVG_REB_H2H,Def_REB,REB_line,Off_PR,AVG_PR_H2H,Def_PR,PR_line,Off_PA,AVG_PA_H2H,Def_PA,PA_line,Off_RA,AVG_RA_H2H,Def_RA,RA_line,Off_PRA,AVG_PRA_H2H,Def_PRA,PRA_line,Off_TPM,AVG_TPM_H2H,Def_TPM,TPM_line,Off_STL,AVG_STL_H2H,Def_STL,STL_line,Off_BLK,AVG_BLK_H2H,Def_BLK,BLK_line,Off_STL_BLK,AVG_STL_BLK_H2H,Def_STL_BLK,STL_BLK_line,Off_L5_PTS,Def_L5_PTS,Off_L5_AST,Def_L5_AST,Off_L5_REB,Def_L5_REB,Off_L5_PR,Def_L5_PR,Off_L5_PA,Def_L5_PA,Off_L5_RA,Def_L5_RA,Off_L5_PRA,Def_L5_PRA,Off_L5_TPM,Def_L5_TPM,Off_L5_STL,Def_L5_STL,Off_L5_BLK,Def_L5_BLK,Off_L5_STL_BLK,Def_L5_STL_BLK,Spread,Total,PTS_Hit_Pct,PTS_P_Diff,AST_Hit_Pct,AST_P_Diff,REB_Hit_Pct,REB_P_Diff,PR_Hit_Pct,PR_P_Diff,PA_Hit_Pct,PA_P_Diff,RA_Hit_Pct,RA_P_Diff,PRA_Hit_Pct,PRA_P_Diff,TPM_Hit_Pct,TPM_P_Diff,STL_Hit_Pct,STL_P_Diff,BLK_Hit_Pct,BLK_P_Diff,STL_BLK_Hit_Pct,STL_BLK_P_Diff,Season,Team_type,cup_gm,pstszn_gm,PTS,MP,MP_lst_gm,MP_last_5_avg,games_last_7_days,mp_rank_team_game,top5_minutes,was_top5_last_game,reserve_plyr,bench_plyr,starter_plyr,Team_mins,prev_team_mins,prev_team_mins_pct,MP_Pct,Lst5_TmMins,Lst5_PlyrMins,TmMinsPct_AvgLst5,OT
0,2025-12-01,HOU,1,Alperen Sengun,UTA,21.250000,27.800000,18.142857,21.5,6.937500,5.800000,2.142857,6.5,9.000000,10.800000,10.214286,9.5,30.250000,38.600000,28.357143,31.5,28.187500,33.600000,20.285714,27.5,15.937500,16.600000,12.357143,15.5,37.187500,44.400000,30.500000,37.5,,,,,1.416667,0.400000,0.785714,1.5,1.625,1.000000,1.714286,0.5,2.142857,1.4,2.5000,1.5,20.6,26.4,5.8,2.4,6.2,10.4,26.8,36.8,26.4,28.8,12.0,12.8,32.6,39.2,,,1.4,1.2,1.6,2.0,1.2,3.2,-11.5,233.5,,,,,,,,,,,,,,,,,,,,,,,2025,Home,0,0,0,38.15,,35.425000,2,2.0,1,0.0,0,0,1,209.08,,,0.182466,779.34,,,0
1,2025-12-11,HOU,0,Alperen Sengun,LAC,22.166667,17.200000,15.550000,21.5,7.111111,8.000000,2.550000,6.5,9.000000,11.600000,8.900000,8.5,31.166667,28.800000,24.450000,30.5,29.277778,25.200000,18.100000,27.5,16.111111,19.600000,11.450000,14.5,38.277778,36.800000,27.000000,36.5,1.666667,0.800000,1.250000,0.5,1.642857,1.200000,1.200000,1.5,2.000,1.200000,1.000000,0.5,2.562500,2.4,2.2000,1.5,24.0,10.6,6.6,2.2,6.6,7.2,30.6,17.8,30.6,12.8,13.2,9.4,37.2,20.0,2.0,1.2,1.8,0.4,2.4,0.6,2.8,1.0,-9.5,221.5,0.500000,-2.0,0.250000,1.0,0.25,-12.0,0.500000,-14.0,0.50,0.0,0.25,-10.0,0.500000,-13.0,,,0.50,1.0,0.50,4.0,0.333333,2.5,2025,Away,0,0,0,36.03,,34.210000,2,4.0,1,0.0,0,0,1,233.51,,,0.154297,209.08,38.15,0.182466,0
2,2025-11-20,PHI,1,Andre Drummond,MIL,9.333333,5.250000,18.142857,11.5,,,,,9.769231,7.000000,10.428571,12.5,18.384615,12.250000,28.571429,24.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,13.4,19.0,,,13.2,9.6,26.6,28.6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2025,Home,0,0,0,18.33,,35.730000,1,6.0,0,0.0,0,1,0,212.43,,,0.086287,826.55,153.95,0.186256,0
3,2025-12-02,PHI,1,Andre Drummond,WAS,8.294118,5.800000,19.666667,9.5,,,,,9.777778,6.200000,11.111111,10.5,17.611111,12.000000,30.777778,20.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.8,19.6,,,9.8,10.6,15.6,30.2,,,,,,,,,,,,,,,-13.5,235.5,,,,,,,,,,,,,,,,,,,,,,,2025,Away,0,0,0,17.18,10.52,17.056667,2,5.0,1,0.0,0,1,0,112.62,,,0.152548,212.43,18.33,0.086287,0
4,2025-12-04,MIN,0,Anthony Edwards,NOP,29.823529,29.428571,19.062500,29.5,4.125000,4.714286,3.843750,4.5,4.529412,4.714286,4.156250,4.5,34.352941,34.142857,23.218750,35.5,33.705882,34.142857,22.906250,34.5,8.411765,9.428571,8.000000,9.5,38.235294,38.857143,27.062500,40.5,3.875000,4.428571,2.906250,3.5,2.333333,1.285714,1.187500,1.5,1.000,0.714286,0.500000,0.5,2.214286,2.0,1.6875,1.5,37.8,23.6,4.8,4.6,5.0,4.4,42.8,28.0,42.6,28.2,9.8,9.0,47.6,32.6,5.0,4.0,2.8,1.2,1.0,1.0,2.8,2.2,-12.5,233.5,,,0.250000,0.0,0.50,0.0,,,,,0.25,-1.0,,,,,0.50,1.0,0.75,1.0,0.500000,4.0,2025,Home,0,0,0,31.37,47.45,40.982500,2,5.0,1,1.0,0,0,1,208.16,,,0.150701,878.13,115.14,0.131120,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,2025-12-01,BRK,0,Tyrese Martin,CHO,9.882353,4.000000,16.842105,10.5,2.578947,2.250000,4.210526,2.5,3.368421,2.750000,4.315789,3.5,12.210526,6.750000,21.157895,13.5,11.421053,6.250000,21.052632,13.5,5.947368,5.000000,8.526316,6.5,14.789474,9.000000,25.368421,17.5,2.000000,0.500000,2.210526,1.5,,,,,,,,,,,,,13.0,18.8,3.0,3.4,4.4,5.2,17.4,24.0,16.0,22.2,7.4,8.6,20.4,27.4,2.0,2.2,,,,,,,4.5,229.5,,,,,,,,,,,,,,,,,,,,,,,2025,Away,0,0,0,18.50,23.82,22.416000,4,5.0,1,1.0,0,1,0,142.10,,,0.130190,664.69,117.08,0.176142,0
2095,2025-12-05,PHI,1,VJ Edgecombe,MIL,14.444444,,17.700000,10.5,4.222222,,3.600000,3.5,5.777778,,4.400000,4.5,20.222222,,22.100000,14.5,18.666667,,21.300000,13.5,10.000000,,8.000000,7.5,24.444444,,25.700000,18.5,2.125000,,2.800000,1.5,2.000000,,0.900000,0.5,,,,,,,,,11.8,15.0,4.0,3.4,6.0,3.0,17.8,18.0,15.8,18.4,10.0,6.4,21.8,21.4,1.4,2.4,2.2,1.0,,,,,-1.5,221.5,,,0.500000,4.0,0.75,5.0,0.500000,-5.0,0.25,-9.0,0.75,9.0,0.750000,-2.0,0.500000,0.0,,,,,,,2025,Home,0,0,0,24.43,23.83,26.797500,3,4.0,1,1.0,0,1,0,166.23,,,0.146965,867.59,161.43,0.186067,0
2096,2025-11-25,LAC,0,Yanic Konan Niederhauser,LAL,5.000000,,10.416667,,1.500000,,2.916667,,1.500000,,8.000000,,4.625000,,18.416667,,4.666667,,13.333333,,1.875000,,10.916667,,5.000000,,21.333333,,,,,,,,,,1.000,,0.916667,,1.000000,,2.2500,,5.0,8.2,1.5,3.0,1.8,8.2,4.8,16.4,4.8,11.2,2.0,11.2,5.0,19.4,,,,,1.0,1.0,1.0,2.2,,,,,,,,,,,,,,,,,,,,,,,,,2025,Home,1,0,0,,,19.220000,1,,0,0.0,0,0,1,187.28,,,,883.20,19.22,0.021762,0
2097,2025-12-03,ATL,0,Zaccharie Risacher,LAC,11.400000,,18.619048,11.5,,,,,2.789474,,5.714286,2.5,14.050000,,24.333333,13.5,,,,,,,,,15.550000,,27.619048,15.5,1.933333,,2.857143,1.5,,,,,,,,,,,,,10.4,21.6,,,2.6,5.6,13.0,27.2,,,,,14.0,30.6,2.2,3.2,,,,,,,-3.5,226.5,0.333333,-27.0,,,,,0.166667,-32.0,,,,,0.250000,-20.0,0.166667,-5.0,,,,,,,2025,Away,0,0,0,33.22,21.78,25.010000,3,3.0,1,0.0,0,0,1,196.94,,,0.168681,1145.35,126.08,0.110080,0


In [109]:
display(df[(df.Player == 'Norman Powell')].sort_values('Date'))

Unnamed: 0,Date,Team,B2B,Player,Opp,Off_PTS,AVG_PTS_H2H,Def_PTS,PTS_line,Off_AST,AVG_AST_H2H,Def_AST,AST_line,Off_REB,AVG_REB_H2H,Def_REB,REB_line,Off_PR,AVG_PR_H2H,Def_PR,PR_line,Off_PA,AVG_PA_H2H,Def_PA,PA_line,Off_RA,AVG_RA_H2H,Def_RA,RA_line,Off_PRA,AVG_PRA_H2H,Def_PRA,PRA_line,Off_TPM,AVG_TPM_H2H,Def_TPM,TPM_line,Off_STL,AVG_STL_H2H,Def_STL,STL_line,Off_BLK,AVG_BLK_H2H,Def_BLK,BLK_line,Off_STL_BLK,AVG_STL_BLK_H2H,Def_STL_BLK,STL_BLK_line,Off_L5_PTS,Def_L5_PTS,Off_L5_AST,Def_L5_AST,Off_L5_REB,Def_L5_REB,Off_L5_PR,Def_L5_PR,Off_L5_PA,Def_L5_PA,Off_L5_RA,Def_L5_RA,Off_L5_PRA,Def_L5_PRA,Off_L5_TPM,Def_L5_TPM,Off_L5_STL,Def_L5_STL,Off_L5_BLK,Def_L5_BLK,Off_L5_STL_BLK,Def_L5_STL_BLK,Spread,Total,PTS_Hit_Pct,PTS_P_Diff,AST_Hit_Pct,AST_P_Diff,REB_Hit_Pct,REB_P_Diff,PR_Hit_Pct,PR_P_Diff,PA_Hit_Pct,PA_P_Diff,RA_Hit_Pct,RA_P_Diff,PRA_Hit_Pct,PRA_P_Diff,TPM_Hit_Pct,TPM_P_Diff,STL_Hit_Pct,STL_P_Diff,BLK_Hit_Pct,BLK_P_Diff,STL_BLK_Hit_Pct,STL_BLK_P_Diff,Season,Team_type,cup_gm,pstszn_gm,PTS,MP,MP_lst_gm,MP_last_5_avg,games_last_7_days,mp_rank_team_game,top5_minutes,was_top5_last_game,reserve_plyr,bench_plyr,starter_plyr,Team_mins,prev_team_mins,prev_team_mins_pct,MP_Pct,Lst5_TmMins,Lst5_PlyrMins,TmMinsPct_AvgLst5,OT
903,2025-11-21,MIA,0,Norman Powell,CHI,25.416667,,13.615385,25.5,3.0,,3.384615,2.5,4.0,,4.461538,3.5,29.416667,,18.076923,29.5,27.916667,,17.0,28.5,6.5,,7.846154,5.5,31.916667,,21.461538,31.5,3.545455,,1.769231,3.5,1.4,,1.461538,1.5,,,,,,,,,28.4,13.8,3.2,4.4,3.2,4.6,31.6,18.4,31.0,18.2,5.8,9.0,34.2,22.8,4.4,2.4,1.2,0.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2025,Home,1,0,0,24.65,,,0,5.0,1,0.0,0,1,0,181.05,,,0.13615,541.62,31.9,0.058897,0
1422,2025-11-26,MIA,0,Norman Powell,MIL,25.428571,,17.857143,18.5,2.75,,3.428571,1.5,3.857143,,4.857143,2.5,29.285714,,22.714286,22.5,27.785714,,21.285714,20.5,6.214286,,8.285714,4.5,31.642857,,26.142857,24.5,3.461538,,2.928571,2.5,1.5,,0.857143,0.5,,,,,,,,,26.6,21.6,2.8,4.4,3.6,6.0,30.2,27.6,28.4,26.0,5.4,10.4,32.0,32.0,4.6,4.0,1.4,0.4,,,,,-8.5,238.5,,,,,,,,,,,,,,,,,,,,,,,2025,Away,1,0,0,,24.65,24.65,1,,0,1.0,0,0,1,0.0,,,,729.65,88.83,0.121743,0
1656,2025-11-29,MIA,1,Norman Powell,DET,24.466667,,16.52381,19.5,2.692308,,3.0,1.5,3.866667,,4.619048,2.5,28.333333,,21.142857,22.5,26.8,,19.52381,22.5,6.2,,7.619048,4.5,30.666667,,24.142857,25.5,3.461538,,2.0,2.5,1.461538,,0.904762,1.5,,,,,,,,,21.2,19.6,2.2,2.6,3.4,4.8,24.6,24.4,23.4,22.2,5.6,7.4,26.8,27.0,4.6,3.0,1.4,1.2,,,,,-3.5,239.5,,,,,,,,,,,,,,,,,,,,,,,2025,Away,0,0,0,33.03,,24.65,1,3.0,1,0.0,0,0,1,189.2,,,0.174577,454.07,83.5,0.183892,0
767,2025-12-01,MIA,0,Norman Powell,LAC,24.6875,,17.0,20.5,2.714286,,4.263158,2.5,3.75,,4.894737,3.5,28.4375,,21.894737,24.5,27.0625,,21.263158,22.5,6.125,,9.157895,5.5,30.8125,,26.157895,26.5,3.428571,,2.210526,2.5,1.428571,,1.210526,1.5,,,,,,,,,23.0,20.6,2.2,3.4,3.6,6.8,26.6,27.4,25.2,24.0,5.8,10.2,28.8,30.8,4.2,2.4,1.4,1.2,,,,,-6.5,236.5,,,,,,,,,,,,,,,,,,,,,,,2025,Away,0,0,0,31.9,33.03,28.84,1,2.0,1,1.0,0,0,1,192.89,,,0.165379,558.28,,,0
904,2025-12-05,MIA,0,Norman Powell,ORL,25.0,19.0,16.823529,22.5,,,,,3.705882,4.0,3.941176,3.5,28.705882,23.0,20.764706,25.5,27.411765,21.25,20.588235,24.5,6.117647,6.25,7.705882,5.5,,,,,3.6,2.25,2.058824,2.5,,,,,,,,,,,,,24.0,14.0,,,3.0,4.4,27.0,18.4,26.2,18.0,5.2,8.4,,,3.8,2.6,,,,,,,6.5,241.5,0.666667,11.5,,,,,0.666667,7.5,0.666667,11.5,0.666667,-0.5,,,0.666667,2.5,,,,,,,2025,Home,0,0,0,32.28,31.9,29.86,2,4.0,1,1.0,0,0,1,188.03,,,0.171675,722.67,56.55,0.078251,0
1542,2025-12-06,MIA,0,Norman Powell,SAC,25.166667,17.166667,19.821429,25.5,2.8125,1.333333,4.071429,2.5,3.666667,3.333333,4.785714,3.5,28.833333,20.5,24.607143,28.5,27.666667,18.5,23.892857,27.5,6.166667,4.666667,8.857143,5.5,31.333333,21.833333,28.678571,31.5,3.5,3.0,2.714286,2.5,1.4,0.666667,1.178571,1.5,,,,,,,,,25.8,20.8,2.6,2.8,3.2,5.6,29.0,26.4,28.4,23.6,5.8,8.4,31.6,29.2,3.4,3.0,1.4,1.0,,,,,-8.5,242.5,0.857143,33.5,0.666667,1.5,,,0.857143,29.5,0.857143,41.5,0.857143,5.5,0.666667,8.5,0.285714,0.5,0.333333,-0.5,,,,,2025,Away,0,0,0,26.57,32.28,30.465,3,3.0,1,1.0,0,0,1,142.39,,,0.1866,692.6,88.83,0.128256,0
1543,2025-12-15,MIA,0,Norman Powell,TOR,24.6,13.5,16.269231,22.5,2.666667,1.0,3.923077,1.5,3.75,2.5,3.884615,3.5,28.35,16.0,20.153846,25.5,27.0,14.5,20.192308,24.5,6.15,3.5,7.807692,5.5,30.75,17.0,24.076923,27.5,3.388889,2.0,2.230769,2.5,1.5,0.5,0.961538,1.5,,,,,,,,,25.0,25.2,2.6,5.0,3.4,3.6,28.4,28.8,27.6,30.2,6.0,8.6,31.0,33.8,3.2,3.4,1.6,1.4,,,,,-5.5,235.5,0.6,9.5,0.5,0.0,,,0.6,4.5,0.6,10.5,0.6,-1.5,0.5,-2.0,0.4,0.5,0.25,-2.0,,,,,2025,Away,0,0,0,,26.57,30.945,3,,0,1.0,0,0,1,0.0,,,,646.96,115.4,0.178373,0
