# To do:

 - Figure out how to signal injuries
 - Learn basic PyTorch

In [1]:
import pandas as pd
import numpy as np
import duckdb
import warnings
import math         # haversine_km()
import os

import ray
from ray import tune
from ray import train
import torch
import torch.nn as nn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error

import joblib
import warnings
from datetime import datetime, timedelta
from haversine import haversine

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

categories = ['PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK']
con = duckdb.connect(database=":memory:")
now = str(datetime.now().date())
print(f"Today's date:", now)
tgt_stat = "PRA"
print('Target Stat:', tgt_stat)

Using device: cuda
Today's date: 2025-12-24
Target Stat: PRA


In [2]:
%run ./common_utils.ipynb

# ML Functions

In [3]:
def base_feature_importance(model, feature_cols):
    # Get weights from the model
    weights = model.network.weight.detach().cpu().numpy().flatten()  # shape (n_features,)

    # Create DataFrame
    feature_importance_df = pd.DataFrame({
        "feature": feature_cols,
        "weight": weights,
        "abs_weight": np.abs(weights)
    })

    # Sort by absolute weight
    feature_importance_df = feature_importance_df.sort_values(by="abs_weight", ascending=False).reset_index(drop=True).drop(columns="abs_weight")

    display(feature_importance_df)

In [4]:
def permutation_importance(model, X, y, feature_names):
    X_val = torch.tensor(X, dtype=torch.float32).to(device)
    y_val = torch.tensor(y, dtype=torch.float32).unsqueeze(1).to(device)
    model.eval()
    with torch.no_grad():
        baseline_preds = model(X_val)
        baseline_rmse = mean_squared_error(y_val.cpu(), baseline_preds.cpu())

    importances = []
    X_val_np = X_val.cpu().numpy()

    for i in range(X_val_np.shape[1]):
        X_val_shuffled = X_val_np.copy()
        np.random.shuffle(X_val_shuffled[:, i])
        X_val_shuffled_tensor = torch.tensor(X_val_shuffled, dtype=torch.float32).to(X_val.device)

        with torch.no_grad():
            preds = model(X_val_shuffled_tensor)
            rmse = mean_squared_error(y_val.cpu(), preds.cpu())
        
        importances.append(rmse - baseline_rmse)  # increase in RMSE = importance

    df = pd.DataFrame({"feature": feature_names, "importance": importances})
    df = df.sort_values(by="importance", ascending=False)
    display(df)

In [5]:
class BaselineRegression(nn.Module):
    def __init__(self, input_dim, hidden_units=None):
        super().__init__()
        
        if hidden_units:  # if hidden_units is provided, create a hidden layer
            self.network = nn.Sequential(
                nn.Linear(input_dim, hidden_units),
                nn.ReLU(),                 # activation for hidden layer
                nn.Linear(hidden_units, 1) # output layer
            )
        else:  # if no hidden_units, just a simple linear regression
            self.network = nn.Linear(input_dim, 1)

    def forward(self, x):
        return self.network(x)

In [6]:
def create_baseline_model(df, pred_col, feature_cols):

    if pred_col == 'MP':
        print('Minutes Model')
    else:
        print(f'{tgt_stat} Stats Model')

    X = df[feature_cols].values
    y = df[pred_col].values

    X = torch.tensor(X, dtype=torch.float32).to(device)
    y = torch.tensor(y, dtype=torch.float32).unsqueeze(1).to(device)

    print("tensor X", X.shape)
    print("tensor y", y.shape)

    N = len(X)
    split = int(0.8 * N)

    X_train, X_val = X[:split], X[split:]
    y_train, y_val = y[:split], y[split:]

    model = BaselineRegression(input_dim=X.shape[1]).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    num_epochs = 1000
    print_every = num_epochs // 10  # 10% intervals

    for epoch in range(num_epochs):
        model.train()

        optimizer.zero_grad()
        preds = model(X_train)
        loss = criterion(preds, y_train)
        loss.backward()
        optimizer.step()

        # Print every 10 percentile (10%, 20%, ..., 100%)
        if (epoch + 1) % print_every == 0 or epoch == 0:
            model.eval()
            with torch.no_grad():
                val_preds = model(X_val)
                val_loss = criterion(val_preds, y_val)

            train_rmse = torch.sqrt(loss).item()
            val_rmse = torch.sqrt(val_loss).item()

            percentile = int((epoch + 1) / num_epochs * 100)

            print(
                f"Epoch {epoch+1:04d} ({percentile}%) | "
                f"Train RMSE: {train_rmse:.4f} | "
                f"Val RMSE: {val_rmse:.4f}"
            )

    return model

In [7]:
def train_mdl(model, X_train, y_train, num_epochs=50, lr=0.001):
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()

    for epoch in range(num_epochs):
        optimizer.zero_grad()
        preds = model(X_train)
        loss = criterion(preds, y_train)
        loss.backward()
        optimizer.step()
        
    rmse = torch.sqrt(loss).item()  # Convert MSE to RMSE
    return rmse
    
def eval_mdl(model, X_val, y_val):
    model.eval()
    with torch.no_grad():
        preds = model(X_val)
        criterion = torch.nn.MSELoss()
        val_loss = criterion(preds, y_val)
    rmse = torch.sqrt(val_loss).item()  # Convert MSE to RMSE
    return rmse

def train_and_eval(config):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    model = BaselineRegression(input_dim=config["input_dim"], hidden_units=config["hidden_units"]).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    criterion = nn.MSELoss()
    
    X_train_device = config["X_train"].to(device)
    y_train_device = config["y_train"].to(device)
    X_val_device = config["X_val"].to(device)
    y_val_device = config["y_val"].to(device)
    
    train_rmse = train_mdl(model, X_train_device, y_train_device, num_epochs=1000, lr=config["lr"])
    val_rmse = eval_mdl(model, X_val_device, y_val_device)
    
    tune.report({"train_rmse": train_rmse, "val_rmse": val_rmse})

def hyperparam_tuning(X, y, num_samples):

    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)

    N = len(X)
    train_end = int(0.7 * N)
    val_end = int(0.85 * N)

    X_train, y_train = X[:train_end], y[:train_end]
    X_val, y_val = X[train_end:val_end], y[train_end:val_end]
    X_test, y_test = X[val_end:], y[val_end:]

    search_space = {
        "lr": tune.loguniform(1e-4, 1e-2),
        "hidden_units": tune.choice([8,16,32]),
        "input_dim": X.shape[1],
        "X_train": X_train,
        "y_train": y_train,
        "X_val": X_val,
        "y_val": y_val
    }

    abs_path = os.path.abspath("../ray_temp_outputs")  # convert to absolute path
    analysis = tune.run(train_and_eval, config=search_space, num_samples=num_samples, storage_path=abs_path)
    best_trial = analysis.get_best_trial(
        metric="val_rmse",  # the metric you want to optimize
        mode="min",         # 'min' because lower val_loss is better
        scope="all"         # look across all reported steps
    )
    # Select device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Move final model to device
    best_config = best_trial.config
    final_model = BaselineRegression(input_dim=X.shape[1], 
                                     hidden_units=best_config["hidden_units"]).to(device)

    # Combine training + validation data optionally (common practice for final training)
    X_trainval = torch.cat([X_train, X_val], dim=0).to(device)
    y_trainval = torch.cat([y_train, y_val], dim=0).to(device)

    # Train final model with best hyperparameters
    train_mdl(final_model, X_trainval, y_trainval, num_epochs=1000, lr=best_config["lr"])

    # Evaluate on test set
    X_test_device = X_test.to(device)
    y_test_device = y_test.to(device)
    test_rmse  = eval_mdl(final_model, X_test_device, y_test_device)
    print("Final test RMSE:", test_rmse)

    # Get predictions (e.g., on test or new data)
    final_model.eval()
    with torch.no_grad():
        preds = final_model(X_test_device)
        preds_np = preds.cpu().numpy()

    
    return final_model

### Create Base df

In [8]:
df = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
for i in [2022, 2023, 2024, 2025]:
    df_actuals = pd.read_csv(f"../tables/{i}/parlay_actuals.csv")
    df_actuals['Season'] = i
    df = pd.concat([df, df_actuals])

    df_schd = pd.read_csv(f"../tables/{i}/nba_schedule.csv")
    df_schd['Season'] = i
    df2 = pd.concat([df2, df_schd])
    
    df_gms = pd.read_csv(f"../tables/{i}/season_gamelogs.csv")
    df_gms['Season'] = i
    df3 = pd.concat([df3, df_gms])

df['Date'] = pd.to_datetime(df.Date)
df2['Date'] = pd.to_datetime(df2.Date)
df3['Date'] = pd.to_datetime(df3.Date)

df['Tms'] = df['game_id'].apply(lambda x: x.split("_")[1:3])
df['WrngTm'] = df.apply(lambda row: 0 if row['Team'] in row['Tms'] else 1, axis=1)
df['WrngOpp'] = df.apply(lambda row: 0 if row['Opp'] in row['Tms'] else 1, axis=1)
df = df[(df.WrngTm == 0) & (df.WrngOpp == 0)].drop(['WrngTm', 'WrngOpp', 'Tms'], axis=1)

df3 = df3[['game_id', 'Date', 'Team', 'Player', 'FG', 'FGA', 'FG%', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TOV', 'PF', '+/-']]\
        .rename(columns={"3PA": "TPA", "3P%": "TP%"})
df3 = df3[~df3[['Date', 'Team', 'Player']].duplicated(keep='last')]
df = df.merge(df3, on=['game_id', 'Date', 'Team', 'Player'])

df_mtch = df2[['Season', 'Date', 'AwayABV', 'HomeABV', 'AwayPTS', 'HomePTS', 'AwayB2B', 'HomeB2B', 'cup_gm', 'pstszn_gm']]
df_mtch['Team_type'] = 'Away'
df_mtch = df_mtch.rename(columns={"AwayABV": "Team", "HomeABV": "Opp", "AwayB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'cup_gm', 'pstszn_gm', 'Team_type']]
df_mtch2 = df_mtch.copy().rename(columns={"Team": "Opp", "Opp": "Team", "HomeB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'cup_gm', 'pstszn_gm']]
df_mtch2['Team_type'] = 'Home'
df_mtch = pd.concat([df_mtch, df_mtch2])
df_mtch = df_mtch[['Season', 'Date', 'Team', 'Team_type', 'AwayPTS', 'HomePTS', 'cup_gm', 'pstszn_gm']]
df_mtch = df_mtch.sort_values(["Team", "Date"])
df_mtch['team_game_num'] = df_mtch.groupby(["Team", "Season"]).cumcount() + 1
df_mtch['Spread'] = np.where(df_mtch.Team_type == 'Home', df_mtch.HomePTS - df_mtch.AwayPTS, df_mtch.AwayPTS - df_mtch.HomePTS)
df_mtch['Total'] = df_mtch.AwayPTS + df_mtch.HomePTS
df_mtch['is_Win'] = np.where(df_mtch.Spread > 0, 1, 0)
df_mtch['Szn_Wins'] = df_mtch.groupby(['Season', 'Team'])['is_Win'].cumsum()
df = df.drop(['Spread', 'Total'], axis=1).merge(df_mtch, on=['Season', 'Date', 'Team'])

team_encoder = LabelEncoder()
player_encoder = LabelEncoder()
team_type_encoder = LabelEncoder()
position_encoder = LabelEncoder()

# Encode string cols
team_encoder.fit(pd.concat([df["Team"], df["Opp"]], axis=0))
df["Team"] = team_encoder.transform(df["Team"])
df["Opp"] = team_encoder.transform(df["Opp"])
df["Player_name"] = df.Player
df["Player"] = player_encoder.fit_transform(df["Player"])
df["Pos"] = position_encoder.fit_transform(df["Pos"])
df['Team_type'] = team_type_encoder.fit_transform(df['Team_type'])
df = df.sort_values(['Season', 'Date', 'Team', 'Player']).reset_index(drop=True)
print('base df created', datetime.now())

base df created 2025-12-24 20:05:13.579223


# Minutes Projection Model

In [9]:
def haversine_km(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in km
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

def travel_km_from_row(row):
    prev = row['PrevLocation']
    cur  = row['Location']
    # missing prev => first game => no travel
    if pd.isna(prev) or pd.isna(cur):
        return 0.0
    # same arena => 0
    if prev == cur:
        return 0.0
    # lookup coords
    prev_coords = arenas.get(prev)
    cur_coords  = arenas.get(cur)
    if not prev_coords or not cur_coords:
        # fallback if code not found
        return 0.0
    return haversine_km(prev_coords[0], prev_coords[1], cur_coords[0], cur_coords[1])

In [10]:
def setup_df_mins(con, df):
    
    df = df[['Season', 'Date', 'Team', 'Team_type', 'Opp', 'Player', 'Pos', 'B2B', 'MP', 'TOV', 'PF', '+/-',
             'Spread', 'Total', 'team_game_num', 'Szn_Wins', 'cup_gm', 'pstszn_gm']]
    
    for col in ['MP', 'TOV', 'PF', '+/-']:
        df[f'{col}_lst_gm'] = (
            df
            .groupby(['Player', 'Season'])[col]
            .shift(1)
        )
        for N in [3, 5, 10]:
            df[f'{col}_last_{N}_avg'] = (
                df.groupby(['Player', 'Season'])[col]
                  .rolling(window=N, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0, 1], drop=True)
            )
            df[f"{col}_last_{N}_std"] = (
                df.groupby(['Player', 'Season'])[col]
                  .shift(1)
                  .rolling(window=N, min_periods=1)
                  .std()
            )
        df[f"{col}_change_L1"] = df[f"{col}_lst_gm"] - df[f"{col}_last_5_avg"]
        df[f"{col}_change_L3"] = df[f"{col}_last_3_avg"] - df[f"{col}_last_10_avg"]
        df[f"{col}_pct_change"] = (
            (df[f"{col}_lst_gm"] - df[f"{col}_last_10_avg"]) /
            (df[f"{col}_last_10_avg"] + 1e-6)
        )
    df["MP_spike"] = (df["MP_lst_gm"] > df["MP_last_10_avg"] + 8).astype(int)
    df["MP_drop"]  = (df["MP_lst_gm"] < df["MP_last_10_avg"] - 8).astype(int)
    df["MP_trend"] = df["MP_last_3_avg"] - df["MP_last_10_avg"]

    games_last_7_days = df.groupby(['Player', 'Season']).rolling('7D', on='Date')['MP'].count().shift(1).to_frame(name='games_last_7_days').reset_index()
    df = df.merge(games_last_7_days, on=['Player', 'Season', 'Date'])
    df['games_last_7_days'] = df.games_last_7_days.fillna(0).astype(int)
    
    df['prev_team_mins_pct'] = (df.groupby(['Player', 'Season'])['MP'].shift(1)) / 240
        
    
    df['reserve_td'] = (df.MP < 8).astype(int)
    df['bench_td']   = ((df.MP >= 8) & (df.MP <= 25)).astype(int)
    df['starter_td'] = (df.MP > 25).astype(int)
    role_counts = df.groupby(['Season', 'Player'])[['reserve_td', 'bench_td', 'starter_td']].sum()
    role_counts['most_common_role'] = role_counts[['reserve_td', 'bench_td', 'starter_td']].idxmax(axis=1)
    role_counts['reserve'] = (role_counts['most_common_role'] == 'reserve_td').astype(int)
    role_counts['bench']   = (role_counts['most_common_role'] == 'bench_td').astype(int)
    role_counts['starter'] = (role_counts['most_common_role'] == 'starter_td').astype(int)
    df = df.merge(role_counts[['reserve', 'bench', 'starter']], on=['Season', 'Player'], how='left')
      
    df['missed_games'] = (
        df.groupby(['Player', 'Team', 'Season'])['team_game_num']      
          .diff()
          .sub(1)
          .fillna(0)
          .astype(int)
    )
    
    df['blowout'] = np.where(abs(df.Spread >= 15), 1, 0)
    
    # Location based features
    df["PrevOpp"] = df.groupby("Player")["Opp"].shift(1)
    df["DaysLstGm"] = (df.groupby("Player")["Date"].diff().dt.days).fillna(0).astype(int)
    df['Location'] = df.apply(lambda r: r['Team'] if r['Team_type'] == 'Home' else r['Opp'], axis=1)
    df['PrevLocation'] = df.groupby('Player')['Location'].shift(1)
    df['travel_km'] = df.apply(travel_km_from_row, axis=1).fillna(0)
    df['travel_hours'] = df['travel_km'] / 800.0      # approximate flight hours
    df['is_long_trip'] = (df['travel_km'] > 1500).astype(int)
    df['same_arena'] = (df['PrevLocation'] == df['Location']).astype(int)
    
    df = df.drop(['reserve_td', 'bench_td', 'starter_td', 'Szn_Wins', 'TOV', 'PF', '+/-', 
                  'PrevOpp', 'PrevLocation', 'Location'], axis=1)    
    
    return df

In [11]:
df_mins = df.copy()
df_mins = setup_df_mins(con, df_mins)
df_mins = df_mins.dropna() # TEMP SOLUTION TO NULLS
display(df_mins)

mins_feature_cols = [
    'Team', 'Player', 
    'MP_lst_gm', 'MP_last_5_avg', 'MP_last_10_avg',
    'starter', 'bench', 'reserve'
]
mins_model = create_baseline_model(df_mins, 'MP', mins_feature_cols)
# base_feature_importance(mins_model, mins_feature_cols)

df_mins = df_mins.drop(['Season', 'Date'], axis=1)
for col in df_mins.columns:
    if not pd.api.types.is_float_dtype(df_mins[col]):
        df_mins[col] = df_mins[col].astype(float)
        
X = df_mins.drop('MP', axis=1).values
y = df_mins['MP'].values
# mins_model = hyperparam_tuning(X, y, 1)
# permutation_importance(mins_model, X, y, df_mins.drop('MP', axis=1).columns.tolist())

Unnamed: 0,Season,Date,Team,Team_type,Opp,Player,Pos,B2B,MP,Spread,Total,team_game_num,cup_gm,pstszn_gm,MP_lst_gm,MP_last_3_avg,MP_last_3_std,MP_last_5_avg,MP_last_5_std,MP_last_10_avg,MP_last_10_std,MP_change_L1,MP_change_L3,MP_pct_change,TOV_lst_gm,TOV_last_3_avg,TOV_last_3_std,TOV_last_5_avg,TOV_last_5_std,TOV_last_10_avg,TOV_last_10_std,TOV_change_L1,TOV_change_L3,TOV_pct_change,PF_lst_gm,PF_last_3_avg,PF_last_3_std,PF_last_5_avg,PF_last_5_std,PF_last_10_avg,PF_last_10_std,PF_change_L1,PF_change_L3,PF_pct_change,+/-_lst_gm,+/-_last_3_avg,+/-_last_3_std,+/-_last_5_avg,+/-_last_5_std,+/-_last_10_avg,+/-_last_10_std,+/-_change_L1,+/-_change_L3,+/-_pct_change,MP_spike,MP_drop,MP_trend,games_last_7_days,prev_team_mins_pct,reserve,bench,starter,missed_games,blowout,DaysLstGm,travel_km,travel_hours,is_long_trip,same_arena
282,2022,2022-10-22,1,0,21,188,4,1,36.95,6.0,246.0,3,0,0,24.65,24.650000,17.430182,24.650,17.430182,24.650,17.430182,0.000,0.000000,0.000000,1.0,1.000000,0.707107,1.0,0.707107,1.0,0.707107,0.0,0.000000,0.000000,4.0,4.000000,2.828427,4.0,2.828427,4.0,2.828427,0.0,0.000000,0.000000,1.0,1.000000,0.707107,1.0,0.707107,1.0,0.707107,0.0,0.000000,0.000000,0,0,0.000000,1,0.102708,0,0,1,0,0,1,0.0,0.0,0,0
283,2022,2022-10-22,1,0,21,256,1,1,31.05,6.0,246.0,3,0,0,25.40,25.400000,14.453056,25.400,14.453056,25.400,14.453056,0.000,0.000000,0.000000,1.0,1.000000,0.577350,1.0,0.577350,1.0,0.577350,0.0,0.000000,0.000000,1.0,1.000000,2.081666,1.0,2.081666,1.0,2.081666,0.0,0.000000,0.000000,15.0,15.000000,8.386497,15.0,8.386497,15.0,8.386497,0.0,0.000000,0.000000,0,0,0.000000,1,0.105833,0,0,1,0,0,1,0.0,0.0,0,0
284,2022,2022-10-22,1,0,21,291,4,1,0.00,6.0,246.0,3,0,0,0.00,0.000000,14.453056,0.000,14.451434,0.000,14.451434,0.000,0.000000,0.000000,0.0,0.000000,0.577350,0.0,0.577350,0.0,0.577350,0.0,0.000000,0.000000,0.0,0.000000,2.081666,0.0,1.892969,0.0,1.892969,0.0,0.000000,0.000000,0.0,0.000000,8.386497,0.0,7.348469,0.0,7.348469,0.0,0.000000,0.000000,0,0,0.000000,1,0.000000,1,0,0,0,0,1,0.0,0.0,0,0
285,2022,2022-10-22,1,0,21,356,3,1,37.50,6.0,246.0,3,0,0,34.02,34.020000,17.686270,34.020,15.784406,34.020,15.784406,0.000,0.000000,0.000000,3.0,3.000000,1.527525,3.0,1.224745,3.0,1.224745,0.0,0.000000,0.000000,5.0,5.000000,2.645751,5.0,2.345208,5.0,2.345208,0.0,0.000000,0.000000,-4.0,-4.000000,10.016653,-4.0,7.300685,-4.0,7.300685,0.0,0.000000,-0.000000,0,0,0.000000,1,0.141750,0,0,1,0,0,1,0.0,0.0,0,0
286,2022,2022-10-22,1,0,21,364,3,1,37.65,6.0,246.0,3,0,0,36.78,36.780000,20.484736,36.780,14.508332,36.780,16.302091,0.000,0.000000,0.000000,2.0,2.000000,1.527525,2.0,1.140175,2.0,1.169045,0.0,0.000000,0.000000,1.0,1.000000,2.645751,1.0,2.167948,1.0,2.136976,0.0,0.000000,0.000000,-1.0,-1.000000,2.081666,-1.0,7.395945,-1.0,6.675827,0.0,0.000000,-0.000000,0,0,0.000000,1,0.153250,0,0,1,0,0,1,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95445,2025,2025-12-22,28,0,7,433,0,0,34.53,-23.0,247.0,28,0,0,21.10,26.723333,3.510005,23.894,6.474915,24.516,8.022162,-2.794,2.207333,-0.139338,1.0,2.333333,1.154701,2.2,1.643168,2.1,1.828782,-1.2,0.233333,-0.523809,5.0,3.333333,2.645751,3.0,2.345208,3.4,1.873796,2.0,-0.066667,0.470588,-5.0,-3.000000,5.686241,-8.0,5.412947,-5.9,11.067972,3.0,2.900000,-0.152542,0,0,2.207333,2,0.087917,0,1,0,0,0,2,0.0,0.0,0,0
95446,2025,2025-12-22,28,0,7,471,2,0,33.92,-23.0,247.0,28,0,0,41.55,40.493333,10.391084,36.620,8.127326,33.418,8.254620,4.930,7.075333,0.243342,5.0,3.000000,2.000000,3.6,2.000000,3.8,1.828782,1.4,-0.800000,0.315789,5.0,4.000000,0.577350,3.8,2.408319,3.2,1.873796,1.2,0.800000,0.562500,7.0,1.666667,6.658328,-6.2,4.868265,-7.8,10.276186,13.2,9.466667,-1.897436,1,0,7.075333,3,0.173125,0,0,1,0,0,2,0.0,0.0,0,0
95447,2025,2025-12-22,28,0,7,486,0,0,12.65,-23.0,247.0,28,0,0,21.13,28.633333,11.798162,28.150,8.478411,28.221,8.552769,-7.020,0.412333,-0.251267,1.0,1.333333,2.309401,1.0,1.788854,1.0,1.873796,0.0,0.333333,0.000000,2.0,3.000000,1.732051,2.8,2.167948,3.1,1.873796,-0.8,-0.100000,-0.354839,-7.0,7.666667,7.571878,2.2,6.418723,2.7,9.968840,-9.2,4.966667,-3.592591,0,0,0.412333,3,0.088042,0,1,0,0,0,2,0.0,0.0,0,0
95448,2025,2025-12-22,28,0,7,496,1,0,36.20,-23.0,247.0,28,0,0,39.78,33.943333,11.313206,34.736,9.873041,34.609,8.475977,5.044,-0.665667,0.149412,1.0,2.000000,2.309401,1.4,1.788854,1.3,1.873796,-0.4,0.700000,-0.230769,3.0,2.000000,1.527525,1.6,1.643168,1.4,1.763834,1.4,0.600000,1.142856,27.0,-1.333333,17.088007,2.6,13.520355,-2.5,10.764654,24.4,1.166667,-11.800005,0,0,-0.665667,2,0.165750,0,0,1,2,0,7,0.0,0.0,0,0


Minutes Model
tensor X torch.Size([93326, 8])
tensor y torch.Size([93326, 1])
Epoch 0001 (0%) | Train RMSE: 110.2155 | Val RMSE: 105.5947
Epoch 0100 (10%) | Train RMSE: 9.3275 | Val RMSE: 9.3121
Epoch 0200 (20%) | Train RMSE: 6.7042 | Val RMSE: 6.9701
Epoch 0300 (30%) | Train RMSE: 6.6622 | Val RMSE: 6.9451
Epoch 0400 (40%) | Train RMSE: 6.6567 | Val RMSE: 6.9436
Epoch 0500 (50%) | Train RMSE: 6.6528 | Val RMSE: 6.9425
Epoch 0600 (60%) | Train RMSE: 6.6496 | Val RMSE: 6.9411
Epoch 0700 (70%) | Train RMSE: 6.6465 | Val RMSE: 6.9393
Epoch 0800 (80%) | Train RMSE: 6.6435 | Val RMSE: 6.9372
Epoch 0900 (90%) | Train RMSE: 6.6405 | Val RMSE: 6.9350
Epoch 1000 (100%) | Train RMSE: 6.6376 | Val RMSE: 6.9328


In [62]:
# rmse = np.sqrt(mean_squared_error(mins_splits[5], mins_preds)) # splits[5] = y_test
# mins_test_df['MP_pred'] = mins_preds
# df_test = mins_test_df.copy()

# df_test['Team'] = team_encoder.inverse_transform(df_test['Team'])
# df_test['Opp'] = team_encoder.inverse_transform(df_test['Opp'])
# df_test['Player'] = player_encoder.inverse_transform(df_test['Player'])
# df_test['Pos'] = position_encoder.inverse_transform(df_test['Pos'])

# df_test['Diff'] = abs(df_test['MP_pred'] - df_test['MP'])
# df_test['InRMSE_Range'] = np.where(df_test['Diff'] <= rmse, 1, 0)

# print("Total Accuracy (InRMSE_Range):", ((df_test.InRMSE_Range == 1).sum() / df_test.shape[0]))
# print((df_test.InRMSE_Range == 1).sum(), "/", df_test.shape[0])

# df_ystrday = df_test[(df_test.Date == (datetime.strptime(now, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d"))]\
#             [['Team', 'Player', 'Pos', 'Opp', 'MP', 'MP_pred', 'InRMSE_Range', 'Diff', 'Spread']]
# print("\nYesterday's Results:")
# print("Total Accuracy (InRMSE_Range):", ((df_ystrday.InRMSE_Range == 1).sum() / df_ystrday.shape[0]))
# if df_ystrday.shape[0] >= 50:
#     for tm in df_ystrday.Team.unique():
#         display(df_ystrday[df_ystrday.Team == tm])
# else:
#     display(df_ystrday)

# Main Model

In [92]:
def setup_df_main(df):
        
    df = df[['Season', 'Date', 'Team', 'Opp', 'Player', 'Pos', 'B2B', 'MP', 
             'PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK',
             'FG', 'FGA', 'TPA', 'FT', 'FTA', f'Def_{tgt_stat}', f'Def_L5_{tgt_stat}',
             'Spread', 'Total', 'cup_gm', 'pstszn_gm']]
    
    
    if tgt_stat == 'PTS':
        
        # Efficiency metrics
        df['three_rate_raw'] =  np.where(df.FGA > 0, df['TPA'] / df['FGA'], 0)
        df['ft_rate_raw']    =  np.where(df.FGA > 0, df['FTA'] / df['FGA'], 0)
        df['eFG_raw'] = (df['FG'] + 0.5 * df['TPM']) / df['FGA']
        df['TS_raw'] = df['PTS'] / (2 * (df['FGA'] + 0.44 * df['FTA']))    
        df['usage_proxy_raw'] =  np.where(df.MP > 0, (df['FGA'] + 0.44 * df['FTA']) / df['MP'], 0)
        
        for w in [3, 5, 10]:
            for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
                col = f"{metric}_L{w}"
                df[col] = (
                    df.groupby(['Player','Season'])[f'{metric}_raw']
                      .rolling(w, min_periods=1)
                      .mean()
                      .shift(1)
                      .reset_index(level=[0,1], drop=True)
                )
        for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
            col = f'{metric}_weighted'
            df[col] = (
                0.6 * df[f'{metric}_L3'] +
                0.3 * df[f'{metric}_L5'] +
                0.1 * df[f'{metric}_L10']
            )
            df = df.drop(f'{metric}_raw', axis=1)
        tgt_stat_cols = ['PTS']
        
    elif tgt_stat == 'PRA':
        tgt_stat_cols = ['PTS', 'REB', 'AST']
        
    else:
        tgt_stat_cols = []

        
    for col in ['MP', 'FGA', 'TPA', 'FTA', tgt_stat] + tgt_stat_cols:
        df[f'{col}_lst_gm'] = (
            df
            .groupby(['Player', 'Season'])[col]
            .shift(1)
        )
        for N in [3, 5, 10]:
            df[f'{col}_last_{N}_avg'] = (
                df.groupby(['Player', 'Season'])[col]
                  .rolling(window=N, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0, 1], drop=True)
            )
            df[f"{col}_last_{N}_std"] = (
                df.groupby(['Player', 'Season'])[col]
                  .shift(1)
                  .rolling(window=N, min_periods=1)
                  .std()
            )

    for col in categories + ['Season', 'FG', 'FGA', 'FT', 'FTA', 'TPM', 'TPA'] + tgt_stat_cols:
        if col == tgt_stat:
            continue
        if col in df.columns:
            df = df.drop(col, axis=1)
        
    return df

In [125]:
df_main = df.copy()
df_main = setup_df_main(df_main)
df_main = df_main.dropna() # TEMP SOLUTION TO NULLS
display(df_main)

stat_feature_cols = [
    'Team', 'Player', 'MP',
    'MP_lst_gm', 'MP_last_5_avg', 'MP_last_10_avg',
    f'{tgt_stat}_last_3_avg', f'{tgt_stat}_last_5_avg', f'{tgt_stat}_last_10_avg',
    f'Def_{tgt_stat}', f'Def_L5_{tgt_stat}'
]
stat_model = create_baseline_model(df_main, tgt_stat, stat_feature_cols)
# base_feature_importance(stat_model, stat_feature_cols)

df_main = df_main.drop(['Date'], axis=1)
for col in df_main.columns:
    if not pd.api.types.is_float_dtype(df_main[col]):
        df_main[col] = df_main[col].astype(float)

X = df_main.drop(tgt_stat, axis=1).values
y = df_main[tgt_stat].values
stat_model = hyperparam_tuning(X, y, 1)
# permutation_importance(stat_model, X, y, df_main.drop(tgt_stat, axis=1).columns.tolist())

Unnamed: 0,Date,Team,Opp,Player,Pos,B2B,MP,PRA,Def_PRA,Def_L5_PRA,Spread,Total,cup_gm,pstszn_gm,MP_lst_gm,MP_last_3_avg,MP_last_3_std,MP_last_5_avg,MP_last_5_std,MP_last_10_avg,MP_last_10_std,FGA_lst_gm,FGA_last_3_avg,FGA_last_3_std,FGA_last_5_avg,FGA_last_5_std,FGA_last_10_avg,FGA_last_10_std,TPA_lst_gm,TPA_last_3_avg,TPA_last_3_std,TPA_last_5_avg,TPA_last_5_std,TPA_last_10_avg,TPA_last_10_std,FTA_lst_gm,FTA_last_3_avg,FTA_last_3_std,FTA_last_5_avg,FTA_last_5_std,FTA_last_10_avg,FTA_last_10_std,PRA_lst_gm,PRA_last_3_avg,PRA_last_3_std,PRA_last_5_avg,PRA_last_5_std,PRA_last_10_avg,PRA_last_10_std,PTS_lst_gm,PTS_last_3_avg,PTS_last_3_std,PTS_last_5_avg,PTS_last_5_std,PTS_last_10_avg,PTS_last_10_std,REB_lst_gm,REB_last_3_avg,REB_last_3_std,REB_last_5_avg,REB_last_5_std,REB_last_10_avg,REB_last_10_std,AST_lst_gm,AST_last_3_avg,AST_last_3_std,AST_last_5_avg,AST_last_5_std,AST_last_10_avg,AST_last_10_std
282,2022-10-22,1,21,188,4,1,36.95,35,32.000000,32.0,6.0,246.0,0,0,24.65,24.650000,17.430182,24.650,17.430182,24.650,17.430182,5.0,5.000000,3.535534,5.0,3.535534,5.0,3.535534,4.0,4.000000,2.828427,4.0,2.828427,4.0,2.828427,4.0,4.000000,2.828427,4.0,2.828427,4.0,2.828427,18.0,18.000000,12.727922,18.0,12.727922,18.0,12.727922,10.0,10.000000,7.071068,10.0,7.071068,10.0,7.071068,7.0,7.000000,4.949747,7.0,4.949747,7.0,4.949747,1.0,1.000000,0.707107,1.0,0.707107,1.0,0.707107
283,2022-10-22,1,21,256,1,1,31.05,21,33.500000,33.5,6.0,246.0,0,0,25.40,25.400000,14.453056,25.400,14.453056,25.400,14.453056,6.0,6.000000,3.214550,6.0,3.214550,6.0,3.214550,4.0,4.000000,2.309401,4.0,2.309401,4.0,2.309401,0.0,0.000000,2.309401,0.0,2.309401,0.0,2.309401,19.0,19.000000,10.692677,19.0,10.692677,19.0,10.692677,10.0,10.000000,5.773503,10.0,5.773503,10.0,5.773503,7.0,7.000000,4.041452,7.0,4.041452,7.0,4.041452,2.0,2.000000,1.000000,2.0,1.000000,2.0,1.000000
284,2022-10-22,1,21,291,4,1,0.00,0,32.000000,32.0,6.0,246.0,0,0,0.00,0.000000,14.453056,0.000,14.451434,0.000,14.451434,0.0,0.000000,3.214550,0.0,3.201562,0.0,3.201562,0.0,0.000000,2.309401,0.0,2.309401,0.0,2.309401,0.0,0.000000,2.309401,0.0,2.000000,0.0,2.000000,0.0,0.000000,10.692677,0.0,10.688779,0.0,10.688779,0.0,0.000000,5.773503,0.0,5.773503,0.0,5.773503,0.0,0.000000,4.041452,0.0,4.041452,0.0,4.041452,0.0,0.000000,1.000000,0.0,0.957427,0.0,0.957427
285,2022-10-22,1,21,356,3,1,37.50,25,16.000000,16.0,6.0,246.0,0,0,34.02,34.020000,17.686270,34.020,15.784406,34.020,15.784406,18.0,18.000000,9.165151,18.0,7.362065,18.0,7.362065,3.0,3.000000,2.081666,3.0,2.049390,3.0,2.049390,2.0,2.000000,1.154701,2.0,1.788854,2.0,1.788854,35.0,35.000000,17.521415,35.0,14.774979,35.0,14.774979,28.0,28.000000,14.189198,28.0,11.436783,28.0,11.436783,4.0,4.000000,3.511885,4.0,3.507136,4.0,3.507136,3.0,3.000000,1.527525,3.0,1.303840,3.0,1.303840
286,2022-10-22,1,21,364,3,1,37.65,49,16.000000,16.0,6.0,246.0,0,0,36.78,36.780000,20.484736,36.780,14.508332,36.780,16.302091,22.0,22.000000,11.718931,22.0,9.338094,22.0,9.332738,7.0,7.000000,3.511885,7.0,2.509980,7.0,2.683282,7.0,7.000000,3.605551,7.0,2.966479,7.0,2.857738,38.0,38.000000,21.126603,38.0,15.280707,38.0,16.354408,29.0,29.000000,16.462078,29.0,12.641202,29.0,12.937027,5.0,5.000000,2.645751,5.0,2.880972,5.0,3.188521,4.0,4.000000,2.081666,4.0,1.581139,4.0,1.632993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95445,2025-12-22,28,7,433,0,0,34.53,37,28.705882,30.2,-23.0,247.0,0,0,21.10,26.723333,3.510005,23.894,6.474915,24.516,8.022162,5.0,7.333333,3.511885,7.2,3.834058,7.8,5.258855,0.0,1.000000,2.081666,1.0,4.159327,0.9,3.665151,0.0,4.666667,3.464102,3.6,3.033150,2.9,3.190263,17.0,28.000000,8.386497,22.2,5.974948,24.0,8.941787,4.0,10.666667,7.023769,8.6,5.272571,10.3,6.384878,8.0,11.000000,2.081666,9.4,1.673320,9.2,3.306559,5.0,6.333333,3.511885,4.2,2.949576,4.5,3.622461
95446,2025-12-22,28,7,471,2,0,33.92,35,29.619048,25.2,-23.0,247.0,0,0,41.55,40.493333,10.391084,36.620,8.127326,33.418,8.254620,22.0,20.666667,8.544004,18.8,6.348228,16.5,5.963780,8.0,9.666667,4.358899,8.4,3.535534,6.8,3.521363,10.0,11.333333,5.033223,9.2,4.335897,8.3,3.190263,40.0,44.666667,11.676187,40.6,10.014989,35.4,8.511430,27.0,32.666667,11.590226,29.0,8.426150,25.1,6.947422,4.0,4.333333,2.081666,4.8,1.732051,3.9,3.381321,9.0,7.666667,2.309401,6.8,3.286335,6.4,3.205897
95447,2025-12-22,28,7,486,0,0,12.65,8,28.705882,30.2,-23.0,247.0,0,0,21.13,28.633333,11.798162,28.150,8.478411,28.221,8.552769,7.0,9.000000,9.291573,9.2,6.670832,8.6,5.963780,3.0,2.666667,4.041452,2.6,3.114482,3.1,3.399346,4.0,3.333333,5.033223,2.0,3.633180,2.5,3.198958,11.0,21.666667,15.307950,21.0,11.970798,21.7,9.286190,6.0,12.333333,12.741010,12.0,9.370165,11.7,7.441625,4.0,8.666667,2.309401,8.0,1.732051,7.8,1.969207,1.0,0.666667,4.000000,1.0,3.768289,2.2,3.098387
95448,2025-12-22,28,7,496,1,0,36.20,36,28.560000,35.8,-23.0,247.0,0,0,39.78,33.943333,11.313206,34.736,9.873041,34.609,8.475977,23.0,19.333333,8.962886,18.6,8.348653,17.4,6.666667,6.0,7.333333,2.516611,6.0,3.361547,6.4,3.433495,12.0,8.333333,4.163332,8.4,4.774935,7.3,4.087923,54.0,40.000000,21.931712,39.2,17.369514,34.4,12.773236,33.0,25.666667,14.177447,27.2,12.700394,23.8,9.211104,16.0,11.333333,6.928203,10.0,5.079370,8.4,3.835507,5.0,3.000000,4.000000,2.0,3.346640,2.2,2.981424


PRA Stats Model
tensor X torch.Size([93139, 11])
tensor y torch.Size([93139, 1])
Epoch 0001 (0%) | Train RMSE: 111.1316 | Val RMSE: 104.8702
Epoch 0100 (10%) | Train RMSE: 7.7133 | Val RMSE: 7.9648
Epoch 0200 (20%) | Train RMSE: 6.8290 | Val RMSE: 7.1269
Epoch 0300 (30%) | Train RMSE: 6.5657 | Val RMSE: 6.8536
Epoch 0400 (40%) | Train RMSE: 6.3276 | Val RMSE: 6.6057
Epoch 0500 (50%) | Train RMSE: 6.1268 | Val RMSE: 6.3955
Epoch 0600 (60%) | Train RMSE: 5.9664 | Val RMSE: 6.2267
Epoch 0700 (70%) | Train RMSE: 5.8438 | Val RMSE: 6.0971
Epoch 0800 (80%) | Train RMSE: 5.7537 | Val RMSE: 6.0012
Epoch 0900 (90%) | Train RMSE: 5.6894 | Val RMSE: 5.9324


2025-12-24 19:27:23,314	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


Epoch 1000 (100%) | Train RMSE: 5.6444 | Val RMSE: 5.8840


0,1
Current time:,2025-12-24 19:27:36
Running for:,00:00:13.34
Memory:,21.9/63.9 GiB

Trial name,status,loc,hidden_units,lr,iter,total time (s),train_rmse,val_rmse
train_and_eval_a0aca_00000,TERMINATED,127.0.0.1:134236,32,0.00721844,1,2.34459,5.17115,5.25149


Trial name,train_rmse,val_rmse
train_and_eval_a0aca_00000,5.17115,5.25149


2025-12-24 19:27:36,668	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/Rodolfo/Jupyter_files/FantasyBasketball/ray_temp_outputs/train_and_eval_2025-12-24_19-27-23' in 4.1302s.
2025-12-24 19:27:36,699	INFO tune.py:1041 -- Total run time: 13.39 seconds (9.21 seconds for the tuning loop).


Final test RMSE: 5.433882236480713


In [94]:
# rmse = np.sqrt(mean_squared_error(main_splits[5], stat_preds)) # splits[5] = y_test
# mae = mean_absolute_error(main_splits[5], stat_preds)
# pred_col = f'{tgt_stat}_Pred'
# df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
# df_lines['Date'] = pd.to_datetime(df_lines.Date)
# df_lines = df_lines[['Date', 'Team', 'Player', f'{tgt_stat}_line']]
# main_test_df[pred_col] = stat_preds
# main_test_df['Team'] = team_encoder.inverse_transform(main_test_df["Team"])
# main_test_df['Player'] = player_encoder.inverse_transform(main_test_df["Player"])

# df_test = main_test_df.merge(df_lines, on=['Date', 'Team', 'Player'])
# df_test = df_test[[c for c in df_test.columns if c != pred_col] + [pred_col]]

# df_test['Diff'] = df_test[f'{tgt_stat}_Pred'] - df_test[f'{tgt_stat}_line']
# df_test['Act_Res'] = np.where(df_test[tgt_stat] > df_test[f'{tgt_stat}_line'], 'O', 'U')
# df_test['Pred_Res'] = np.where(df_test[pred_col] > df_test[f'{tgt_stat}_line'], 'O', 'U')
# df_test['ParlayHit'] = np.where(df_test['Act_Res'] == df_test['Pred_Res'], 1, 0)

# df_test['Diff2'] = abs(df_test[f'{tgt_stat}_Pred'] - df_test[tgt_stat])
# df_test['InRMSE_Range'] = np.where(df_test['Diff2'] <= rmse, 1, 0)

# print("Total Accuracy (ParlayHit):", ((df_test.ParlayHit == 1).sum() / df_test.shape[0]))
# print((df_test.ParlayHit == 1).sum(), "/", df_test.shape[0])

# print("\nTotal Accuracy (InRMSE_Range):", ((df_test.InRMSE_Range == 1).sum() / df_test.shape[0]))
# print((df_test.InRMSE_Range == 1).sum(), "/", df_test.shape[0])

# df_ystrday = df_test[(df_test.Date == (datetime.strptime(now, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d")) & ~(df_test[f'{tgt_stat}_line'].isnull())]\
#             [['Team', 'Player', tgt_stat, f'{tgt_stat}_line', f'{tgt_stat}_Pred', 'ParlayHit', 'Diff', 'InRMSE_Range', 'Diff2']]\
#             .sort_values(f'{tgt_stat}_line', ascending=False)

# print("\nYesterday's Results:")
# print("Total Accuracy (ParlayHit):", ((df_ystrday.ParlayHit == 1).sum() / df_ystrday.shape[0]))
# print("Total Accuracy (InRMSE_Range):", ((df_ystrday.InRMSE_Range == 1).sum() / df_ystrday.shape[0]))
# if df_ystrday.shape[0] >= 50:
#     for tm in df_ystrday.Team.unique():
#         display(df_ystrday[df_ystrday.Team == tm])
# else:
#     display(df_ystrday)

### Today's predictions

In [128]:
df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
df_lines['Date'] = pd.to_datetime(df_lines.Date)

df_pred = pd.read_csv("../tables/2025/parlay_stats.csv")
df_pred['Date'] = pd.to_datetime(df_pred.Date)
df_pred['Season'] = 2025
for col in df_pred.select_dtypes(include='object').columns:
    df_pred[col] = df_pred[col].astype('category')
df_pred = df_pred.drop(['Spread', 'Total'], axis=1).merge(df_mtch, on=['Season', 'Date', 'Team'])
df_pred[tgt_stat] = 0

# Predict minutes
df_act_mins = pd.read_csv("../tables/2025/parlay_actuals.csv")
df_act_mins['Date'] = pd.to_datetime(df_act_mins.Date)
df_pred = df_pred[df_pred.Player.isin(df.Player_name.unique())].merge(df_act_mins[['Date', 'Team', 'Player', 'MP', 'TPM']], on=['Date', 'Team', 'Player'], how='left')
df_temp = df.copy()
df_temp["Team"] = team_encoder.inverse_transform(df_temp["Team"])
df_temp['Player'] = player_encoder.inverse_transform(df_temp["Player"])
df_pred = df_pred.merge(df_temp[['Date', 'Team', 'Player', 'TOV', 'PF', '+/-', 'FGA', 'FG', 'TPA', 'FT', 'FTA',
                             'AST', 'REB', 'PR', 'PA', 'RA', 'STL', 'BLK', 'STL_BLK']], on=['Date', 'Team', 'Player'], how='left')

df_pred = df_pred.merge(df_lines, on=['Date', 'Team', 'Player'], how='left')
df_pred['Spread_x'] = np.where(df_pred.Spread_x.isnull(), df_pred.Spread_y, df_pred.Spread_x)
df_pred['Total_x'] = np.where(df_pred.Total_x.isnull(), df_pred.Total_y, df_pred.Total_x)
df_pred = df_pred.rename(columns={"Spread_x": "Spread", "Total_x": "Total"}).drop(['Spread_y', 'Total_y'], axis=1)
df_pred_mins = setup_df_mins(con, df_pred)
df_pred_mins = df_pred_mins.drop(['Season', 'Date', 'MP'], axis=1)
df_pred_mins["Team"] = team_encoder.transform(df_pred_mins["Team"])
df_pred_mins["Opp"] = team_encoder.transform(df_pred_mins["Opp"])
df_pred_mins['Team_type'] = team_type_encoder.transform(df_pred_mins['Team_type'])
df_pred_mins["Player"] = player_encoder.transform(df_pred_mins["Player"])
df_pred_mins["Pos"] = position_encoder.transform(df_pred_mins["Pos"])
X_pred = df_pred_mins.values
X_pred_tensor = torch.tensor(X_pred, dtype=torch.float32).to(device)
mins_model.eval()
with torch.no_grad():
    mins_preds = mins_model(X_pred_tensor)          # PyTorch tensor predictions
    df_pred['MP'] = mins_preds.cpu().numpy()       # Convert to NumPy array if needed

df_pred['N_TPM'] = df_pred.FG - df_pred.TPM
df_pred['PTS'] = (df_pred.FT * 1) + (df_pred.N_TPM * 2) + (df_pred.TPM * 3)
df_pred = setup_df_main(df_pred)
feature_cols = [col for col in df_pred.columns if col not in ['Date', tgt_stat]]
df_pred = df_pred[df_pred.Date == now][feature_cols]

# Predict stat
df_pred["Team"] = team_encoder.transform(df_pred["Team"])
df_pred["Opp"] = team_encoder.transform(df_pred["Opp"])
df_pred["Player"] = player_encoder.transform(df_pred["Player"])
df_pred['Pos'] = position_encoder.transform(df_pred["Pos"])
X_pred = df_pred.values
X_pred_tensor = torch.tensor(X_pred, dtype=torch.float32).to(device)
stat_model.eval()
with torch.no_grad():
    stat_preds = stat_model(X_pred_tensor)          # PyTorch tensor predictions
    df_pred[f"{tgt_stat}_proj"] = stat_preds.cpu().numpy()       # Convert to NumPy array if needed

df_pred['Team'] = team_encoder.inverse_transform(df_pred["Team"])
df_pred['Opp'] = team_encoder.inverse_transform(df_pred["Opp"])
df_pred['Player'] = player_encoder.inverse_transform(df_pred["Player"])
df_pred['Pos'] = position_encoder.inverse_transform(df_pred["Pos"])

df_lines = df_lines[df_lines.Date == now][['Team', 'Player', f'{tgt_stat}_line']]
df_pred = df_pred.merge(df_lines, on=['Team', 'Player'])

tds_picks = df_pred[~(df_pred[f'{tgt_stat}_line'].isnull())]\
            [['Team', 'Player', 'MP', 'MP_last_5_avg', f'{tgt_stat}_line', f'{tgt_stat}_proj']]
tds_picks['Diff'] = abs((df_pred[f'{tgt_stat}_line'] - df_pred[f'{tgt_stat}_proj']))
tds_picks['Diff2'] = abs((df_pred['MP'] - df_pred['MP_last_5_avg']))
tds_picks = tds_picks.sort_values('Diff', ascending=False).drop(['Diff', 'Diff2'], axis=1)
display(tds_picks)
# tds_picks.insert(0, 'Date', pd.to_datetime(now))
# partition_save_df(tds_picks, f"../tables/2025/gmday_preds_{tgt_stat}.csv")

Unnamed: 0,Team,Player,MP,MP_last_5_avg,PRA_line,PRA_proj
0,GSW,Stephen Curry,32.35358,79.525756,37.5,-10.709106


In [104]:
now = '2025-12-25'