# To do:

 - Figure out how to signal injuries
 - Learn basic PyTorch

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import duckdb
import warnings
import math         # haversine_km()
import os

import ray
from ray import tune
from ray import train
import torch
import torch.nn as nn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# import xgboost as xgb
# from xgboost import XGBRegressor
# from scipy.stats import randint, uniform

from sklearn.preprocessing import LabelEncoder
# from sklearn.model_selection import train_test_split
# from sklearn.model_selection import RandomizedSearchCV
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import joblib
import warnings
from datetime import datetime, timedelta
from haversine import haversine

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

categories = ['PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK']
con = duckdb.connect(database=":memory:")
now = str(datetime.now().date())
print(f"Today's date:", now)

Using device: cuda
Today's date: 2025-12-19


In [2]:
%run ./common_utils.ipynb

# ML Functions

In [3]:
def feature_importance(model):
    importance = model.get_score(importance_type='gain')

    # Convert to table
    df_importance = (
        pd.DataFrame({
            'feature': list(importance.keys()),
            'importance': list(importance.values())
        })
        .sort_values(by='importance', ascending=False)
        .reset_index(drop=True)
    )

    df_importance['pct'] = df_importance.importance.cumsum() / df_importance.importance.sum()
    display(df_importance)

    xgb.plot_importance(model)
    plt.show()

In [4]:
class BaselineRegression(nn.Module):
    def __init__(self, input_dim, hidden_units=None):
        super().__init__()
        
        if hidden_units:  # if hidden_units is provided, create a hidden layer
            self.network = nn.Sequential(
                nn.Linear(input_dim, hidden_units),
                nn.ReLU(),                 # activation for hidden layer
                nn.Linear(hidden_units, 1) # output layer
            )
        else:  # if no hidden_units, just a simple linear regression
            self.network = nn.Linear(input_dim, 1)

    def forward(self, x):
        return self.network(x)

In [5]:
def create_baseline_model(df, pred_col):

    if pred_col == 'MP':
        print('Minutes Model')
        feature_cols = [
            'Team', 'Player', 
            'MP', 'MP_lst_gm', 'MP_last_5_avg', 'MP_last_10_avg',
            'starter', 'bench', 'reserve'
        ]
    else:
        print('Stats Model')
        feature_cols = [
            tgt_stat,
            'MP_lst_gm',
            'MP_last_5_avg',
            'MP_last_10_avg',
            f'Off_{tgt_stat}', f'Off_L5_{tgt_stat}',
            f'Def_{tgt_stat}', f'Def_L5_{tgt_stat}',
            'DaysLstGm'
        ]

    X = df[feature_cols].values
    y = df[pred_col].values

    X = torch.tensor(X, dtype=torch.float32).to(device)
    y = torch.tensor(y, dtype=torch.float32).unsqueeze(1).to(device)

    print("tensor X", X.shape)
    print("tensor y", y.shape)

    N = len(X)
    split = int(0.8 * N)

    X_train, X_val = X[:split], X[split:]
    y_train, y_val = y[:split], y[split:]

    model = BaselineRegression(input_dim=X.shape[1]).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    num_epochs = 100

    for epoch in range(num_epochs):
        model.train()

        optimizer.zero_grad()
        preds = model(X_train)
        loss = criterion(preds, y_train)
        loss.backward()
        optimizer.step()

        if epoch % 10 == 0:
            model.eval()
            with torch.no_grad():
                val_preds = model(X_val)
                val_loss = criterion(val_preds, y_val)

            print(
                f"Epoch {epoch:03d} | "
                f"Train MSE: {loss.item():.4f} | "
                f"Val MSE: {val_loss.item():.4f}"
            )

In [12]:
def train_mdl(model, X_train, y_train, num_epochs=50, lr=0.001):
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()

    for epoch in range(num_epochs):
        optimizer.zero_grad()
        preds = model(X_train)
        loss = criterion(preds, y_train)
        loss.backward()
        optimizer.step()
        
    return loss.item()
    
def eval_mdl(model, X_val, y_val):
    model.eval()
    with torch.no_grad():
        preds = model(X_val)
        criterion = torch.nn.MSELoss()
        val_loss = criterion(preds, y_val)
    return val_loss.item()
def train_and_eval(config):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    model = BaselineRegression(input_dim=config["input_dim"], hidden_units=config["hidden_units"]).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    criterion = nn.MSELoss()
    
    X_train_device = config["X_train"].to(device)
    y_train_device = config["y_train"].to(device)
    X_val_device = config["X_val"].to(device)
    y_val_device = config["y_val"].to(device)
    
    train_loss = train_mdl(model, X_train_device, y_train_device, num_epochs=50, lr=config["lr"])
    val_loss = eval_mdl(model, X_val_device, y_val_device)
    
    tune.report({"train_loss": train_loss, "val_loss": val_loss})

def hyperparam_tuning(X, y, num_samples):

    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)

    N = len(X)
    train_end = int(0.7 * N)
    val_end = int(0.85 * N)

    X_train, y_train = X[:train_end], y[:train_end]
    X_val, y_val = X[train_end:val_end], y[train_end:val_end]
    X_test, y_test = X[val_end:], y[val_end:]

    search_space = {
        "lr": tune.loguniform(1e-4, 1e-2),
        "hidden_units": tune.choice([8,16,32]),
        "input_dim": X.shape[1],
        "X_train": X_train,
        "y_train": y_train,
        "X_val": X_val,
        "y_val": y_val
    }

    abs_path = os.path.abspath("../ray_temp_outputs")  # convert to absolute path
    analysis = tune.run(train_and_eval, config=search_space, num_samples=num_samples, storage_path=abs_path)
    best_trial = analysis.get_best_trial(
        metric="val_loss",  # the metric you want to optimize
        mode="min",         # 'min' because lower val_loss is better
        scope="all"         # look across all reported steps
    )
    # Select device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Move final model to device
    best_config = best_trial.config
    final_model = BaselineRegression(input_dim=X.shape[1], 
                                     hidden_units=best_config["hidden_units"]).to(device)

    # Combine training + validation data optionally (common practice for final training)
    X_trainval = torch.cat([X_train, X_val], dim=0).to(device)
    y_trainval = torch.cat([y_train, y_val], dim=0).to(device)

    # Train final model with best hyperparameters
    train_mdl(final_model, X_trainval, y_trainval, num_epochs=100, lr=best_config["lr"])

    # Evaluate on test set
    X_test_device = X_test.to(device)
    y_test_device = y_test.to(device)
    test_loss = eval_mdl(final_model, X_test_device, y_test_device)
    print("Final test loss:", test_loss)

    # Get predictions (e.g., on test or new data)
    final_model.eval()
    with torch.no_grad():
        preds = final_model(X_test_device)
        preds_np = preds.cpu().numpy()
    print(preds_np)

### Create Base df

In [7]:
df = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
for i in [2022, 2023, 2024, 2025]:
    df_actuals = pd.read_csv(f"../tables/{i}/parlay_actuals.csv")
    df_actuals['Season'] = i
    df = pd.concat([df, df_actuals])

    df_schd = pd.read_csv(f"../tables/{i}/nba_schedule.csv")
    df_schd['Season'] = i
    df2 = pd.concat([df2, df_schd])
    
    df_gms = pd.read_csv(f"../tables/{i}/season_gamelogs.csv")
    df_gms['Season'] = i
    df3 = pd.concat([df3, df_gms])

df['Date'] = pd.to_datetime(df.Date)
df2['Date'] = pd.to_datetime(df2.Date)
df3['Date'] = pd.to_datetime(df3.Date)

df['Tms'] = df['game_id'].apply(lambda x: x.split("_")[1:3])
df['WrngTm'] = df.apply(lambda row: 0 if row['Team'] in row['Tms'] else 1, axis=1)
df['WrngOpp'] = df.apply(lambda row: 0 if row['Opp'] in row['Tms'] else 1, axis=1)
df = df[(df.WrngTm == 0) & (df.WrngOpp == 0)].drop(['WrngTm', 'WrngOpp', 'Tms'], axis=1)

df3 = df3[['game_id', 'Date', 'Team', 'Player', 'FG', 'FGA', 'FG%', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TOV', 'PF', '+/-']]\
        .rename(columns={"3PA": "TPA", "3P%": "TP%"})
df3 = df3[~df3[['Date', 'Team', 'Player']].duplicated(keep='last')]
df = df.merge(df3, on=['game_id', 'Date', 'Team', 'Player'])

df_mtch = df2[['Season', 'Date', 'AwayABV', 'HomeABV', 'AwayPTS', 'HomePTS', 'AwayB2B', 'HomeB2B', 'cup_gm', 'pstszn_gm']]
df_mtch['Team_type'] = 'Away'
df_mtch = df_mtch.rename(columns={"AwayABV": "Team", "HomeABV": "Opp", "AwayB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'cup_gm', 'pstszn_gm', 'Team_type']]
df_mtch2 = df_mtch.copy().rename(columns={"Team": "Opp", "Opp": "Team", "HomeB2B": "B2B"})[['Season', 'Date', 'Team', 'AwayPTS', 'HomePTS', 'Opp', 'B2B', 'cup_gm', 'pstszn_gm']]
df_mtch2['Team_type'] = 'Home'
df_mtch = pd.concat([df_mtch, df_mtch2])
df_mtch = df_mtch[['Season', 'Date', 'Team', 'Team_type', 'AwayPTS', 'HomePTS', 'cup_gm', 'pstszn_gm']]
df_mtch = df_mtch.sort_values(["Team", "Date"])
df_mtch['team_game_num'] = df_mtch.groupby(["Team", "Season"]).cumcount() + 1
df_mtch['Spread'] = np.where(df_mtch.Team_type == 'Home', df_mtch.HomePTS - df_mtch.AwayPTS, df_mtch.AwayPTS - df_mtch.HomePTS)
df_mtch['Total'] = df_mtch.AwayPTS + df_mtch.HomePTS
df_mtch['is_Win'] = np.where(df_mtch.Spread > 0, 1, 0)
df_mtch['Szn_Wins'] = df_mtch.groupby(['Season', 'Team'])['is_Win'].cumsum()
df = df.drop(['Spread', 'Total'], axis=1).merge(df_mtch, on=['Season', 'Date', 'Team'])

team_encoder = LabelEncoder()
player_encoder = LabelEncoder()
team_type_encoder = LabelEncoder()
position_encoder = LabelEncoder()

# Encode string cols
team_encoder.fit(pd.concat([df["Team"], df["Opp"]], axis=0))
df["Team"] = team_encoder.transform(df["Team"])
df["Opp"] = team_encoder.transform(df["Opp"])
df["Player_name"] = df.Player
df["Player"] = player_encoder.fit_transform(df["Player"])
df["Pos"] = position_encoder.fit_transform(df["Pos"])
df['Team_type'] = team_type_encoder.fit_transform(df['Team_type'])
df = df.sort_values(['Season', 'Date', 'Team', 'Player']).reset_index(drop=True)
print('base df created', datetime.now())

base df created 2025-12-19 22:25:43.799450


# Minutes Projection Model

In [8]:
def setup_df_mins(con, df):
    
    df = df[['Season', 'Date', 'game_id', 'Team', 'Team_type', 'Opp', 'Player', 'Pos', 'B2B', 'MP', 'TOV', 'PF', '+/-',
             'Spread', 'Total', 'team_game_num', 'Szn_Wins', 'cup_gm', 'pstszn_gm']]
    
    for col in ['MP', 'TOV', 'PF', '+/-']:
        df[f'{col}_lst_gm'] = (
            df
            .groupby(['Player', 'Season'])[col]
            .shift(1)
        )

        df[f'{col}_last_5_avg'] = (
            df.groupby(['Player', 'Season'])[col]
              .rolling(window=5, min_periods=1)
              .mean()
              .shift(1)
              .reset_index(level=[0, 1], drop=True)
        )

        df[f'{col}_last_10_avg'] = (
            df.groupby(['Player', 'Season'])[col]
              .rolling(window=10, min_periods=1)
              .mean()
              .shift(1)
              .reset_index(level=[0, 1], drop=True)
        )

    games_last_7_days = df.groupby(['Player', 'Season']).rolling('7D', on='Date')['MP'].count().shift(1).to_frame(name='games_last_7_days').reset_index()
    df = df.merge(games_last_7_days, on=['Player', 'Season', 'Date'])
    df['games_last_7_days'] = df.games_last_7_days.fillna(0).astype(int)
    
    df['prev_team_mins_pct'] = (df.groupby(['Player', 'Season'])['MP'].shift(1)) / 240
        
    
    df['reserve_td'] = (df.MP < 8).astype(int)
    df['bench_td']   = ((df.MP >= 8) & (df.MP <= 25)).astype(int)
    df['starter_td'] = (df.MP > 25).astype(int)
    role_counts = df.groupby(['Season', 'Player'])[['reserve_td', 'bench_td', 'starter_td']].sum()
    role_counts['most_common_role'] = role_counts[['reserve_td', 'bench_td', 'starter_td']].idxmax(axis=1)
    role_counts['reserve'] = (role_counts['most_common_role'] == 'reserve_td').astype(int)
    role_counts['bench']   = (role_counts['most_common_role'] == 'bench_td').astype(int)
    role_counts['starter'] = (role_counts['most_common_role'] == 'starter_td').astype(int)
    df = df.merge(role_counts[['reserve', 'bench', 'starter']], on=['Season', 'Player'], how='left')
      
    df['missed_games'] = (
        df.groupby(['Player', 'Team', 'Season'])['team_game_num']      
          .diff()
          .sub(1)
          .fillna(0)
          .astype(int)
    )

#     df["career_min_mins"] = (
#         df.assign(MP_nonzero=df["MP"].replace(0, np.nan))
#           .groupby("Player")["MP_nonzero"]
#           .cummin()
#           .shift(1)
#     )
#     df["career_max_mins"] = df.groupby("Player")["MP"].cummax().shift(1)
    
    df['blowout'] = np.where(abs(df.Spread >= 15), 1, 0)
    
#     df['Szn_Wins'] = df.groupby(['Player', 'Season', 'Team'])['Szn_Wins'].shift(1).fillna(0)
#     df['Win_Pct'] = df.Szn_Wins / df.team_game_num
    
    # Have to derive OppSzn_Wins and then add it to the df
#     df['OppSzn_Wins'] = df.groupby(['Player', 'Season', 'Opp'])['Szn_Wins'].shift(1).fillna(0)
    
    df = df.drop(['reserve_td', 'bench_td', 'starter_td', 'Szn_Wins', 'TOV', 'PF', '+/-'], axis=1)    
    
    return df

In [9]:
df_mins = df.copy()
df_mins = setup_df_mins(con, df_mins)
df_mins = df_mins.dropna() # TEMP SOLUTION TO NULLS
display(df_mins)

create_baseline_model(df_mins, 'MP')

Unnamed: 0,Season,Date,game_id,Team,Team_type,Opp,Player,Pos,B2B,MP,Spread,Total,team_game_num,cup_gm,pstszn_gm,MP_lst_gm,MP_last_5_avg,MP_last_10_avg,TOV_lst_gm,TOV_last_5_avg,TOV_last_10_avg,PF_lst_gm,PF_last_5_avg,PF_last_10_avg,+/-_lst_gm,+/-_last_5_avg,+/-_last_10_avg,games_last_7_days,prev_team_mins_pct,reserve,bench,starter,missed_games,blowout
281,2022,2022-10-22,20221022_BOS_ORL,1,0,21,61,0,1,9.55,6.0,246.0,3,0,0,0.00,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.000000,1,0,0,0,0
282,2022,2022-10-22,20221022_BOS_ORL,1,0,21,188,4,1,36.95,6.0,246.0,3,0,0,24.65,24.650,24.650,1.0,1.0,1.0,4.0,4.0,4.0,1.0,1.0,1.0,1,0.102708,0,0,1,0,0
283,2022,2022-10-22,20221022_BOS_ORL,1,0,21,256,1,1,31.05,6.0,246.0,3,0,0,25.40,25.400,25.400,1.0,1.0,1.0,1.0,1.0,1.0,15.0,15.0,15.0,1,0.105833,0,0,1,0,0
284,2022,2022-10-22,20221022_BOS_ORL,1,0,21,291,4,1,0.00,6.0,246.0,3,0,0,0.00,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.000000,1,0,0,0,0
285,2022,2022-10-22,20221022_BOS_ORL,1,0,21,356,3,1,37.50,6.0,246.0,3,0,0,34.02,34.020,34.020,3.0,3.0,3.0,5.0,5.0,5.0,-4.0,-4.0,-4.0,1,0.141750,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95085,2025,2025-12-18,20251218_LAL_UTA,28,1,13,704,3,0,16.98,-8.0,278.0,26,0,0,0.00,20.166,22.319,0.0,1.2,0.9,0.0,0.8,1.2,0.0,-1.4,-2.7,2,0.000000,0,1,0,0,0
95086,2025,2025-12-18,20251218_WAS_SAS,29,0,26,59,4,0,22.57,-25.0,213.0,25,0,0,24.82,29.488,27.462,1.0,2.0,1.8,0.0,3.2,2.9,-20.0,-1.6,-5.1,3,0.103417,0,0,1,4,0
95087,2025,2025-12-18,20251218_WAS_SAS,29,0,26,89,4,0,29.10,-25.0,213.0,25,0,0,34.13,32.222,31.599,1.0,2.6,1.8,2.0,2.8,2.4,10.0,-8.6,-5.9,2,0.142208,0,0,1,0,0
95088,2025,2025-12-18,20251218_WAS_SAS,29,0,26,491,3,0,36.08,-25.0,213.0,25,0,0,35.88,32.040,31.974,3.0,3.0,3.3,4.0,4.0,3.5,13.0,-5.2,-6.7,2,0.149500,0,0,1,0,0


Minutes Model
tensor X torch.Size([93006, 9])
tensor y torch.Size([93006, 1])
Epoch 000 | Train MSE: 13359.7246 | Val MSE: 12483.2119
Epoch 010 | Train MSE: 3987.5242 | Val MSE: 3489.2412
Epoch 020 | Train MSE: 382.0411 | Val MSE: 274.4695
Epoch 030 | Train MSE: 97.7389 | Val MSE: 125.1749
Epoch 040 | Train MSE: 211.2621 | Val MSE: 207.6669
Epoch 050 | Train MSE: 80.2512 | Val MSE: 72.8527
Epoch 060 | Train MSE: 37.4241 | Val MSE: 41.7847
Epoch 070 | Train MSE: 44.5399 | Val MSE: 48.2944
Epoch 080 | Train MSE: 37.8444 | Val MSE: 40.8183
Epoch 090 | Train MSE: 35.8572 | Val MSE: 39.0529


In [14]:
df_mins = df.copy()
df_mins = setup_df_mins(con, df_mins)
df_mins = df_mins.dropna() # TEMP SOLUTION TO NULLS
df_mins = df_mins.drop(['Season', 'Date', 'game_id'], axis=1)
for col in df_mins.columns:
    if not pd.api.types.is_float_dtype(df_mins[col]):
        df_mins[col] = df_mins[col].astype(float)

X = df_mins.drop('MP', axis=1).values
y = df_mins['MP'].values
hyperparam_tuning(X, y, 10)

2025-12-19 22:27:35,788	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2025-12-19 22:28:00
Running for:,00:00:24.88
Memory:,25.8/63.9 GiB

Trial name,status,loc,hidden_units,lr,iter,total time (s),train_loss,val_loss
train_and_eval_f9580_00000,TERMINATED,127.0.0.1:102856,8,0.000801581,1,1.62539,656.307,641.867
train_and_eval_f9580_00001,TERMINATED,127.0.0.1:28428,8,0.000626826,1,1.65317,195.782,184.507
train_and_eval_f9580_00002,TERMINATED,127.0.0.1:92416,16,0.00074841,1,1.62589,176.717,172.894
train_and_eval_f9580_00003,TERMINATED,127.0.0.1:103588,16,0.000215781,1,1.76328,404.082,378.514
train_and_eval_f9580_00004,TERMINATED,127.0.0.1:104000,32,0.00422903,1,1.65267,46.4583,47.5831
train_and_eval_f9580_00005,TERMINATED,127.0.0.1:108464,32,0.000341047,1,1.72608,167.818,155.295
train_and_eval_f9580_00006,TERMINATED,127.0.0.1:8800,32,0.00874704,1,1.74394,46.015,47.1273
train_and_eval_f9580_00007,TERMINATED,127.0.0.1:101864,8,0.00701523,1,1.65366,88.7405,83.317
train_and_eval_f9580_00008,TERMINATED,127.0.0.1:80560,8,0.000696234,1,1.77965,328.974,328.822
train_and_eval_f9580_00009,TERMINATED,127.0.0.1:58288,8,0.0076273,1,1.69483,45.7641,45.9884


Trial name,train_loss,val_loss
train_and_eval_f9580_00000,656.307,641.867
train_and_eval_f9580_00001,195.782,184.507
train_and_eval_f9580_00002,176.717,172.894
train_and_eval_f9580_00003,404.082,378.514
train_and_eval_f9580_00004,46.4583,47.5831
train_and_eval_f9580_00005,167.818,155.295
train_and_eval_f9580_00006,46.015,47.1273
train_and_eval_f9580_00007,88.7405,83.317
train_and_eval_f9580_00008,328.974,328.822
train_and_eval_f9580_00009,45.7641,45.9884


2025-12-19 22:28:00,681	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/Rodolfo/Jupyter_files/FantasyBasketball/ray_temp_outputs/train_and_eval_2025-12-19_22-27-35' in 11.7699s.
2025-12-19 22:28:00,708	INFO tune.py:1041 -- Total run time: 24.92 seconds (13.11 seconds for the tuning loop).


Final test loss: 48.07678985595703
[[35.287098]
 [35.190907]
 [19.317976]
 ...
 [29.821783]
 [34.515034]
 [18.184652]]


In [None]:
# rmse = np.sqrt(mean_squared_error(mins_splits[5], mins_preds)) # splits[5] = y_test
# mins_test_df['MP_pred'] = mins_preds
# df_test = mins_test_df.copy()

# df_test['Team'] = team_encoder.inverse_transform(df_test['Team'])
# df_test['Opp'] = team_encoder.inverse_transform(df_test['Opp'])
# df_test['Player'] = player_encoder.inverse_transform(df_test['Player'])
# df_test['Pos'] = position_encoder.inverse_transform(df_test['Pos'])

# df_test['Diff'] = abs(df_test['MP_pred'] - df_test['MP'])
# df_test['InRMSE_Range'] = np.where(df_test['Diff'] <= rmse, 1, 0)

# print("Total Accuracy (InRMSE_Range):", ((df_test.InRMSE_Range == 1).sum() / df_test.shape[0]))
# print((df_test.InRMSE_Range == 1).sum(), "/", df_test.shape[0])

# df_ystrday = df_test[(df_test.Date == (datetime.strptime(now, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d"))]\
#             [['Team', 'Player', 'Pos', 'Opp', 'MP', 'MP_pred', 'InRMSE_Range', 'Diff', 'Spread']]
# print("\nYesterday's Results:")
# print("Total Accuracy (InRMSE_Range):", ((df_ystrday.InRMSE_Range == 1).sum() / df_ystrday.shape[0]))
# if df_ystrday.shape[0] >= 50:
#     for tm in df_ystrday.Team.unique():
#         display(df_ystrday[df_ystrday.Team == tm])
# else:
#     display(df_ystrday)

# Main Model

In [15]:
def haversine_km(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in km
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

def travel_km_from_row(row):
    prev = row['PrevLocation']
    cur  = row['Location']
    # missing prev => first game => no travel
    if pd.isna(prev) or pd.isna(cur):
        return 0.0
    # same arena => 0
    if prev == cur:
        return 0.0
    # lookup coords
    prev_coords = arenas.get(prev)
    cur_coords  = arenas.get(cur)
    if not prev_coords or not cur_coords:
        # fallback if code not found
        return 0.0
    return haversine_km(prev_coords[0], prev_coords[1], cur_coords[0], cur_coords[1])

In [16]:
def setup_df_main(df):
    
    # Minutes based Features
    df['MP_lst_gm'] = (
        df
        .groupby(['Player', 'Season'])['MP']
        .shift(1)
    )

    df['MP_last_5_avg'] = (
        df.groupby(['Player', 'Season'])['MP']
          .rolling(window=5, min_periods=1)
          .mean()
          .shift(1)
          .reset_index(level=[0, 1], drop=True)
    )
    
    df['MP_last_10_avg'] = (
        df.groupby(['Player', 'Season'])['MP']
          .rolling(window=10, min_periods=1)
          .mean()
          .shift(1)
          .reset_index(level=[0, 1], drop=True)
    )
    
    # Location based features
    df["PrevOpp"] = df.groupby("Player")["Opp"].shift(1)
    df["DaysLstGm"] = (df.groupby("Player")["Date"].diff().dt.days).fillna(0).astype(int)
    df['Location'] = df.apply(lambda r: r['Team'] if r['Team_type'] == 'Home' else r['Opp'], axis=1)
    df['PrevLocation'] = df.groupby('Player')['Location'].shift(1)
    df['travel_km'] = df.apply(travel_km_from_row, axis=1).fillna(0)
    df['travel_hours'] = df['travel_km'] / 800.0      # approximate flight hours
    df['is_long_trip'] = (df['travel_km'] > 1500).astype(int)
    df['same_arena'] = (df['PrevLocation'] == df['Location']).astype(int)
    
    # Efficiency metrics
    df['three_rate_raw'] =  np.where(df.FGA > 0, df['TPA'] / df['FGA'], 0)
    df['ft_rate_raw']    =  np.where(df.FGA > 0, df['FTA'] / df['FGA'], 0)
    df['eFG_raw'] = (df['FG'] + 0.5 * df['TPM']) / df['FGA']
    df['TS_raw'] = df['PTS'] / (2 * (df['FGA'] + 0.44 * df['FTA']))    
    df['usage_proxy_raw'] =  np.where(df.MP > 0, (df['FGA'] + 0.44 * df['FTA']) / df['MP'], 0)
    eff_cols = []
    for w in [3, 5, 10]:
        for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
            col = f"{metric}_L{w}"
            df[col] = (
                df.groupby(['Player','Season'])[f'{metric}_raw']
                  .rolling(w, min_periods=1)
                  .mean()
                  .shift(1)
                  .reset_index(level=[0,1], drop=True)
            )
            eff_cols.append(col)
    for metric in ['three_rate', 'ft_rate', 'eFG', 'TS', 'usage_proxy']:
        col = f'{metric}_weighted'
        df[col] = (
            0.6 * df[f'{metric}_L3'] +
            0.3 * df[f'{metric}_L5'] +
            0.1 * df[f'{metric}_L10']
        )
        eff_cols.append(col)
    
    df['FGA_L5_avg'] = df.groupby(['Player', 'Season'])['FGA'].rolling(5, min_periods=1).mean().shift(1).reset_index(level=[0, 1], drop=True)
    df['TPA_L5_avg'] = df.groupby(['Player', 'Season'])['TPA'].rolling(5, min_periods=1).mean().shift(1).reset_index(level=[0, 1], drop=True)
    df['FTA_L5_avg'] = df.groupby(['Player', 'Season'])['FTA'].rolling(5, min_periods=1).mean().shift(1).reset_index(level=[0, 1], drop=True)
    
    stat_cols = []
    for col in [f'Off_{tgt_stat}', f'Off_L5_{tgt_stat}', f'Def_{tgt_stat}', f'Def_L5_{tgt_stat}']:
        stat_cols.append(col)

    final_cols = ['Date', 'Team', 'Team_type', 'B2B', 'cup_gm', 'pstszn_gm', 'Player', 'Pos', 'Opp', 
                  'MP', 'MP_lst_gm', 'MP_last_5_avg', 'MP_last_10_avg', 'Spread', 'Total',
                  'DaysLstGm', 'travel_km', 'travel_hours', 'PrevLocation', 'is_long_trip', 'same_arena', 
                  'FGA_L5_avg', 'TPA_L5_avg', 'FTA_L5_avg',
                  tgt_stat] + stat_cols + eff_cols
    df = df[final_cols]
    
    for col in df.select_dtypes(include='number').columns:
        df[col] = df[col].fillna(0)
        
    # PRA features
    if tgt_stat == 'PTS':
        pass
    elif tgt_stat == 'PRA':
        df['PRA_per_min'] = np.where(df.MP > 0, df.PRA / df['MP'], 0)
        df['PRA_last_5_per_min_avg'] = df.groupby('Player')['PRA_per_min'].rolling(5, min_periods=1).mean().shift(1).reset_index(level=[0,1], drop=True)
        df = df.drop(['PRA_per_min'], axis=1)
        
    return df

In [17]:
tgt_stat = "PRA"
df_main = df.copy()
df_main = setup_df_main(df_main)
df_main = df_main.dropna() # TEMP SOLUTION TO NULLS
display(df_main)

create_baseline_model(df_main, tgt_stat)

Unnamed: 0,Date,Team,Team_type,B2B,cup_gm,pstszn_gm,Player,Pos,Opp,MP,MP_lst_gm,MP_last_5_avg,MP_last_10_avg,Spread,Total,DaysLstGm,travel_km,travel_hours,PrevLocation,is_long_trip,same_arena,FGA_L5_avg,TPA_L5_avg,FTA_L5_avg,PRA,Off_PRA,Off_L5_PRA,Def_PRA,Def_L5_PRA,three_rate_L3,ft_rate_L3,eFG_L3,TS_L3,usage_proxy_L3,three_rate_L5,ft_rate_L5,eFG_L5,TS_L5,usage_proxy_L5,three_rate_L10,ft_rate_L10,eFG_L10,TS_L10,usage_proxy_L10,three_rate_weighted,ft_rate_weighted,eFG_weighted,TS_weighted,usage_proxy_weighted,PRA_last_5_per_min_avg
1,2022-10-21,0,1,0,0,0,5,2,21,14.37,0.00,24.286,26.067000,10.0,206.0,0,0.0,0.0,0.0,0,0,11.4,4.4,4.2,7,6.000000,6.0,29.00000,29.0,0.559259,0.829630,0.416667,0.519801,0.616138,0.508283,0.691717,0.475758,0.563433,0.552491,0.479785,0.587601,0.523390,0.606242,0.509510,0.536019,0.764053,0.445066,0.541535,0.586381,0.448430
2,2022-10-21,0,1,0,0,0,120,0,21,31.62,0.00,0.934,3.465000,10.0,206.0,0,0.0,0.0,0.0,0,0,0.2,0.2,0.0,15,10.000000,10.0,23.50000,23.5,0.333333,0.000000,1.500000,1.500000,0.071378,0.200000,0.000000,1.500000,1.500000,0.042827,0.300000,0.000000,0.600000,0.600000,0.088741,0.290000,0.000000,1.410000,1.410000,0.064549,0.420937
3,2022-10-21,0,1,0,0,0,171,3,21,32.53,0.00,31.028,31.442000,10.0,206.0,0,0.0,0.0,0.0,0,0,12.8,6.0,3.6,19,26.000000,26.0,17.00000,17.0,0.476068,0.409402,0.427457,0.491737,0.447576,0.470668,0.318368,0.465031,0.522407,0.460416,0.407527,0.324184,0.507822,0.559126,0.563756,0.467594,0.373570,0.446766,0.507677,0.463046,0.280624
4,2022-10-21,0,1,0,0,0,178,4,21,39.62,0.00,29.280,28.101000,10.0,206.0,0,0.0,0.0,0.0,0,0,9.4,0.0,1.0,38,36.000000,36.0,26.00000,26.0,0.000000,0.134680,0.821549,0.799477,0.334001,0.000000,0.098990,0.705051,0.694954,0.338075,0.000000,0.175991,0.721115,0.718415,0.332501,0.000000,0.128104,0.776556,0.760014,0.335073,0.210468
5,2022-10-21,0,1,0,0,0,235,0,21,0.00,0.00,1.858,1.327143,10.0,206.0,0,0.0,0.0,0.0,0,0,0.4,0.0,0.4,0,0.000000,0.0,23.50000,23.5,0.000000,0.333333,0.500000,0.520833,0.149300,0.000000,0.200000,0.500000,0.520833,0.089580,0.000000,0.142857,0.500000,0.520833,0.063986,0.000000,0.274286,0.500000,0.520833,0.122853,0.168375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95085,2025-12-18,28,1,0,0,0,704,3,13,16.98,0.00,20.166,22.319000,-8.0,278.0,3,0.0,0.0,6.0,0,0,4.4,3.2,0.0,12,13.280000,12.8,30.95000,33.2,0.666667,0.000000,0.800000,0.800000,0.100368,0.616667,0.000000,0.655208,0.655208,0.193944,0.612143,0.033333,0.600904,0.607579,0.246628,0.646214,0.003333,0.736653,0.737320,0.143067,0.988909
95086,2025-12-18,29,0,0,0,0,59,4,26,22.57,24.82,29.488,27.462000,-25.0,213.0,16,0.0,0.0,22.0,0,0,7.0,3.2,2.2,9,15.833333,13.6,26.96000,31.6,0.533333,0.111111,0.381481,0.375814,0.270905,0.453333,0.333333,0.378889,0.431399,0.267869,0.465455,0.417273,0.398535,0.476885,0.297470,0.502545,0.208394,0.382409,0.402597,0.272651,0.965009
95087,2025-12-18,29,0,0,0,0,89,4,26,29.10,34.13,32.222,31.599000,-25.0,213.0,4,0.0,0.0,11.0,0,0,16.8,7.8,3.6,18,25.541667,27.6,26.96000,31.6,0.440850,0.228431,0.586765,0.603236,0.568000,0.471176,0.203725,0.536821,0.561277,0.570557,0.458431,0.158815,0.577289,0.591184,0.584800,0.451706,0.214058,0.570834,0.589443,0.570447,0.000000
95088,2025-12-18,29,0,0,0,0,491,3,26,36.08,35.88,32.040,31.974000,-25.0,213.0,4,0.0,0.0,11.0,0,0,10.0,6.0,2.2,24,25.545455,23.4,28.28125,29.8,0.660606,0.157576,0.565152,0.578805,0.353568,0.601558,0.224416,0.597532,0.623845,0.343995,0.530955,0.184477,0.515166,0.541190,0.352268,0.629927,0.180318,0.569867,0.588556,0.350566,0.000000


Stats Model
tensor X torch.Size([95089, 9])
tensor y torch.Size([95089, 1])
Epoch 000 | Train MSE: 119.2222 | Val MSE: 101.4078
Epoch 010 | Train MSE: 55.4220 | Val MSE: 57.7664
Epoch 020 | Train MSE: 40.3923 | Val MSE: 44.4327
Epoch 030 | Train MSE: 32.4626 | Val MSE: 34.2135
Epoch 040 | Train MSE: 26.7334 | Val MSE: 28.0143
Epoch 050 | Train MSE: 21.4246 | Val MSE: 22.8053
Epoch 060 | Train MSE: 16.9072 | Val MSE: 17.9248
Epoch 070 | Train MSE: 13.2007 | Val MSE: 14.0883
Epoch 080 | Train MSE: 10.2103 | Val MSE: 10.9497
Epoch 090 | Train MSE: 7.8515 | Val MSE: 8.4777


In [20]:
df_main = df.copy()
df_main = setup_df_main(df_main)
df_main = df_main.dropna() # TEMP SOLUTION TO NULLS
df_main = df_main.drop(['Date'], axis=1)
for col in df_main.columns:
    if not pd.api.types.is_float_dtype(df_main[col]):
        df_main[col] = df_main[col].astype(float)

X = df_main.drop(tgt_stat, axis=1).values
y = df_main[tgt_stat].values
hyperparam_tuning(X, y, 10)

2025-12-19 22:28:56,429	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2025-12-19 22:29:32
Running for:,00:00:36.35
Memory:,27.3/63.9 GiB

Trial name,status,loc,hidden_units,lr,iter,total time (s),train_loss,val_loss
train_and_eval_29692_00000,TERMINATED,127.0.0.1:83728,32,0.000230965,1,1.69731,133.679,129.057
train_and_eval_29692_00001,TERMINATED,127.0.0.1:102480,16,0.00265986,1,1.82479,51.3209,51.325
train_and_eval_29692_00002,TERMINATED,127.0.0.1:2628,8,0.000373557,1,1.70822,636.996,642.641
train_and_eval_29692_00003,TERMINATED,127.0.0.1:52584,16,0.00015923,1,1.74443,358.297,361.023
train_and_eval_29692_00004,TERMINATED,127.0.0.1:100580,16,0.00114968,1,1.79155,128.975,122.73
train_and_eval_29692_00005,TERMINATED,127.0.0.1:78648,16,0.00295819,1,1.78808,71.4623,70.3135
train_and_eval_29692_00006,TERMINATED,127.0.0.1:104928,16,0.000106954,1,1.75683,233.481,240.514
train_and_eval_29692_00007,TERMINATED,127.0.0.1:34196,32,0.000597595,1,1.5872,114.613,110.914
train_and_eval_29692_00008,TERMINATED,127.0.0.1:83332,32,0.000372114,1,1.6993,178.333,174.13
train_and_eval_29692_00009,TERMINATED,127.0.0.1:18420,32,0.00632962,1,1.68293,51.9115,51.9807


Trial name,train_loss,val_loss
train_and_eval_29692_00000,133.679,129.057
train_and_eval_29692_00001,51.3209,51.325
train_and_eval_29692_00002,636.996,642.641
train_and_eval_29692_00003,358.297,361.023
train_and_eval_29692_00004,128.975,122.73
train_and_eval_29692_00005,71.4623,70.3135
train_and_eval_29692_00006,233.481,240.514
train_and_eval_29692_00007,114.613,110.914
train_and_eval_29692_00008,178.333,174.13
train_and_eval_29692_00009,51.9115,51.9807


2025-12-19 22:29:32,791	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/Rodolfo/Jupyter_files/FantasyBasketball/ray_temp_outputs/train_and_eval_2025-12-19_22-28-56' in 20.4490s.
2025-12-19 22:29:32,818	INFO tune.py:1041 -- Total run time: 36.39 seconds (15.90 seconds for the tuning loop).


Final test loss: 46.08809280395508
[[ 0.7903317]
 [ 5.093766 ]
 [22.757301 ]
 ...
 [25.243507 ]
 [24.889126 ]
 [ 9.194701 ]]


In [18]:
# rmse = np.sqrt(mean_squared_error(main_splits[5], stat_preds)) # splits[5] = y_test
# mae = mean_absolute_error(main_splits[5], stat_preds)
# pred_col = f'{tgt_stat}_Pred'
# df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
# df_lines['Date'] = pd.to_datetime(df_lines.Date)
# df_lines = df_lines[['Date', 'Team', 'Player', f'{tgt_stat}_line']]
# main_test_df[pred_col] = stat_preds
# main_test_df['Team'] = team_encoder.inverse_transform(main_test_df["Team"])
# main_test_df['Player'] = player_encoder.inverse_transform(main_test_df["Player"])

# df_test = main_test_df.merge(df_lines, on=['Date', 'Team', 'Player'])
# df_test = df_test[[c for c in df_test.columns if c != pred_col] + [pred_col]]

# df_test['Diff'] = df_test[f'{tgt_stat}_Pred'] - df_test[f'{tgt_stat}_line']
# df_test['Act_Res'] = np.where(df_test[tgt_stat] > df_test[f'{tgt_stat}_line'], 'O', 'U')
# df_test['Pred_Res'] = np.where(df_test[pred_col] > df_test[f'{tgt_stat}_line'], 'O', 'U')
# df_test['ParlayHit'] = np.where(df_test['Act_Res'] == df_test['Pred_Res'], 1, 0)

# df_test['Diff2'] = abs(df_test[f'{tgt_stat}_Pred'] - df_test[tgt_stat])
# df_test['InRMSE_Range'] = np.where(df_test['Diff2'] <= rmse, 1, 0)

# print("Total Accuracy (ParlayHit):", ((df_test.ParlayHit == 1).sum() / df_test.shape[0]))
# print((df_test.ParlayHit == 1).sum(), "/", df_test.shape[0])

# print("\nTotal Accuracy (InRMSE_Range):", ((df_test.InRMSE_Range == 1).sum() / df_test.shape[0]))
# print((df_test.InRMSE_Range == 1).sum(), "/", df_test.shape[0])

# df_ystrday = df_test[(df_test.Date == (datetime.strptime(now, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d")) & ~(df_test[f'{tgt_stat}_line'].isnull())]\
#             [['Team', 'Player', tgt_stat, f'{tgt_stat}_line', f'{tgt_stat}_Pred', 'ParlayHit', 'Diff', 'InRMSE_Range', 'Diff2']]\
#             .sort_values(f'{tgt_stat}_line', ascending=False)

# print("\nYesterday's Results:")
# print("Total Accuracy (ParlayHit):", ((df_ystrday.ParlayHit == 1).sum() / df_ystrday.shape[0]))
# print("Total Accuracy (InRMSE_Range):", ((df_ystrday.InRMSE_Range == 1).sum() / df_ystrday.shape[0]))
# if df_ystrday.shape[0] >= 50:
#     for tm in df_ystrday.Team.unique():
#         display(df_ystrday[df_ystrday.Team == tm])
# else:
#     display(df_ystrday)

### Today's predictions

In [19]:
# df_lines = pd.read_csv(f"../tables/2025/parlay_lines.csv")
# df_lines['Date'] = pd.to_datetime(df_lines.Date)

# df_pred = pd.read_csv("../tables/2025/parlay_stats.csv")
# df_pred['Date'] = pd.to_datetime(df_pred.Date)
# df_pred['Season'] = 2025
# for col in df_pred.select_dtypes(include='object').columns:
#     df_pred[col] = df_pred[col].astype('category')
# df_pred = df_pred.drop(['Spread', 'Total'], axis=1).merge(df_mtch, on=['Season', 'Date', 'Team'])
# df_pred[tgt_stat] = 0

# # Predict minutes
# df_act_mins = pd.read_csv("../tables/2025/parlay_actuals.csv")
# df_act_mins['Date'] = pd.to_datetime(df_act_mins.Date)
# df_pred = df_pred[df_pred.Player.isin(df.Player_name.unique())].merge(df_act_mins[['Date', 'Team', 'Player', 'MP', 'TPM']], on=['Date', 'Team', 'Player'], how='left')
# df_pred = df_pred.merge(df3[['Date', 'Team', 'Player', 'TOV', 'PF', '+/-', 'FGA', 'FG', 'TPA', 'FT', 'FTA']], on=['Date', 'Team', 'Player'], how='left')

# df_pred = df_pred.merge(df_lines, on=['Date', 'Team', 'Player'], how='left')
# df_pred['Spread_x'] = np.where(df_pred.Spread_x.isnull(), df_pred.Spread_y, df_pred.Spread_x)
# df_pred['Total_x'] = np.where(df_pred.Total_x.isnull(), df_pred.Total_y, df_pred.Total_x)
# df_pred = df_pred.rename(columns={"Spread_x": "Spread", "Total_x": "Total"}).drop(['Spread_y', 'Total_y'], axis=1)
# df_pred_mins = setup_df_mins(con, df_pred)

# df_pred_mins = df_pred_mins.drop(['Date', 'MP'], axis=1)
# df_pred_mins["Team"] = team_encoder.transform(df_pred_mins["Team"])
# df_pred_mins["Opp"] = team_encoder.transform(df_pred_mins["Opp"])
# df_pred_mins['Team_type'] = team_type_encoder.transform(df_pred_mins['Team_type'])
# df_pred_mins["Player"] = player_encoder.transform(df_pred_mins["Player"])
# df_pred_mins["Pos"] = position_encoder.transform(df_pred_mins["Pos"])
# DM_mins = xgb.DMatrix(df_pred_mins)
# df_pred['MP'] = mins_model.predict(DM_mins)
# df_pred['N_TPM'] = df_pred.FG - df_pred.TPM
# df_pred['PTS'] = (df_pred.FT * 1) + (df_pred.N_TPM * 2) + (df_pred.TPM * 3)
# df_pred = setup_df_main(df_pred)
# feature_cols = [col for col in df_pred.columns if col not in ['Date', tgt_stat]]
# df_pred = df_pred[df_pred.Date == now][feature_cols]

# # Predict stat
# df_pred["Team"] = team_encoder.transform(df_pred["Team"])
# df_pred["Opp"] = team_encoder.transform(df_pred["Opp"])
# df_pred = df_pred[~(df_pred.PrevLocation.isnull())] # Filters out players who are debuting on the year
# df_pred["PrevLocation"] = team_encoder.transform(df_pred["PrevLocation"])
# df_pred["Player"] = player_encoder.transform(df_pred["Player"])
# df_pred["Pos"] = position_encoder.transform(df_pred["Pos"])
# df_pred['Team_type'] = team_type_encoder.transform(df_pred['Team_type'])
# DM_stats = xgb.DMatrix(df_pred)
# df_pred[f"{tgt_stat}_proj"] = stat_model.predict(DM_stats)

# df_pred['Team'] = team_encoder.inverse_transform(df_pred["Team"])
# df_pred['Opp'] = team_encoder.inverse_transform(df_pred["Opp"])
# df_pred['Player'] = player_encoder.inverse_transform(df_pred["Player"])
# df_pred['Pos'] = position_encoder.inverse_transform(df_pred["Pos"])

# df_lines = df_lines[df_lines.Date == now][['Team', 'Player', f'{tgt_stat}_line']]
# df_pred = df_pred.merge(df_lines, on=['Team', 'Player'])

# tds_picks = df_pred[~(df_pred[f'{tgt_stat}_line'].isnull())]\
#             [['Team', 'Player', 'Pos', 'Opp', 'MP', 'MP_last_5_avg', f'{tgt_stat}_line', f'{tgt_stat}_proj']]
# tds_picks['Diff'] = abs((df_pred[f'{tgt_stat}_line'] - df_pred[f'{tgt_stat}_proj']))
# tds_picks['Diff2'] = abs((df_pred['MP'] - df_pred['MP_last_5_avg']))
# tds_picks = tds_picks[(tds_picks.Diff >= mae) & (tds_picks.Diff2 <= 5)].sort_values('Diff', ascending=False).drop(['Diff', 'Diff2'], axis=1)
# display(tds_picks)
# tds_picks.insert(0, 'Date', pd.to_datetime(now))
# partition_save_df(tds_picks, f"../tables/2025/gmday_preds_{tgt_stat}.csv")