In [2]:
import torch
import numpy as np
import pandas as pd

import os, json
from datetime import datetime

from lstm import train_dual_head_classifier, TrainConfig
from data_prep import add_over_under_label, prepare_receiving_sequences
from metrics import compute_ece, compute_pace

from sklearn.metrics import roc_auc_score
from player_utils import predict_player_over_prob


In [4]:
train_df = pd.read_csv("data/receiving_2019_2023.csv")
test_df  = pd.read_csv("data/receiving_24tocurrent.csv")

LINE_VALUE = 1      
N_PAST_GAMES = 5
HIDDEN_SIZE = 128
STAT_COL  = "YDS"


train_df = add_over_under_label(train_df, STAT_COL, line_value=LINE_VALUE, new_col="over_label")
test_df  = add_over_under_label(test_df,  STAT_COL, line_value=LINE_VALUE, new_col="over_label")

# LSTM TEST

In [None]:
X_train, y_train, lengths_train, meta_train = prepare_receiving_sequences(
    train_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

X_test, y_test, lengths_test, meta_test = prepare_receiving_sequences(
    test_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

cfg = TrainConfig(
    n_epochs=10,
    batch_size=64,
    lr=1e-3,
    device="auto",
    verbose=True,
)

train_result = train_dual_head_classifier(
    X=X_train,
    y=y_train,
    lengths=lengths_train,
    hidden_size=HIDDEN_SIZE,
    cfg=cfg,
)

model = train_result["model"]
history = train_result["history"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

X_train_t = torch.tensor(X_train, dtype=torch.float32).to(device)
len_train_t = torch.tensor(lengths_train, dtype=torch.long).to(device)

with torch.no_grad():
    y_reg_train, logits_train = model(X_train_t, len_train_t)
    probs_train = torch.sigmoid(logits_train).cpu().numpy()

y_true_train = np.asarray(y_train)

auc_train   = roc_auc_score(y_true_train, probs_train)
ece_train   = compute_ece(y_true_train, probs_train)
pace2_train = compute_pace(y_true_train, probs_train, L=2)

print("\n=== Train Metrics (Single-Leg + Parlay) ===")
print(f"AUC_train   : {auc_train:.4f}")
print(f"ECE_train   : {ece_train:.4f}")
print(f"PaCE2_train : {pace2_train:.4f}  (random 2-leg parlays)")

X_test_t = torch.tensor(X_test, dtype=torch.float32).to(device)
len_test_t = torch.tensor(lengths_test, dtype=torch.long).to(device)

with torch.no_grad():
    y_reg_test, logits_test = model(X_test_t, len_test_t)
    probs_test = torch.sigmoid(logits_test).cpu().numpy()

y_true_test = np.asarray(y_test)

auc_test   = roc_auc_score(y_true_test, probs_test)
ece_test   = compute_ece(y_true_test, probs_test)
pace2_test = compute_pace(y_true_test, probs_test, L=2)

print("\n=== Test Metrics (Single-Leg + Parlay) ===")
print(f"AUC_test   : {auc_test:.4f}")
print(f"ECE_test   : {ece_test:.4f}")
print(f"PaCE2_test : {pace2_test:.4f}  (random 2-leg parlays)")


os.makedirs("models", exist_ok=True)
os.makedirs("metrics", exist_ok=True)

model_tag = f"lstm_dual_receiving_{STAT_COL.lower()}_line_{LINE_VALUE:.1f}_past{N_PAST_GAMES}_hid{HIDDEN_SIZE}"

model_path   = os.path.join("models",  model_tag + ".pt")
metrics_path = os.path.join("metrics", model_tag + "_metrics.json")

model_cpu = model.to("cpu")
torch.save(model_cpu.state_dict(), model_path)
print(f"Saved model to {model_path}")

metrics_payload = {
    "timestamp": datetime.now().isoformat(),
    "stat_col": STAT_COL,
    "line_value": LINE_VALUE,
    "n_past_games": N_PAST_GAMES,
    "hidden_size": HIDDEN_SIZE,
    "train_cfg": {
        "n_epochs": cfg.n_epochs,
        "batch_size": cfg.batch_size,
        "lr": cfg.lr,
        "device": cfg.device,
    },
    "train_history": history,  # per-epoch losses
    "train_metrics": {
        "auc": float(auc_train),
        "ece": float(ece_train),
        "pace2": float(pace2_train),
        "n_train": int(len(y_true_train)),
    },
    "test_metrics": {
        "auc": float(auc_test),
        "ece": float(ece_test),
        "pace2": float(pace2_test),
        "n_test": int(len(y_true_test)),
    },
}

with open(metrics_path, "w") as f:
    json.dump(metrics_payload, f, indent=2)

print(f"Saved metrics to {metrics_path}")

Epoch 01 | Train BCE loss: 0.5255
Epoch 02 | Train BCE loss: 0.5176
Epoch 03 | Train BCE loss: 0.5154
Epoch 04 | Train BCE loss: 0.5135
Epoch 05 | Train BCE loss: 0.5122
Epoch 06 | Train BCE loss: 0.5125
Epoch 07 | Train BCE loss: 0.5114
Epoch 08 | Train BCE loss: 0.5102
Epoch 09 | Train BCE loss: 0.5106
Epoch 10 | Train BCE loss: 0.5094

=== Train Metrics (Single-Leg + Parlay) ===
AUC_train   : 0.7916
ECE_train   : 0.0303
PaCE2_train : 0.1554  (random 2-leg parlays)

=== Test Metrics (Single-Leg + Parlay) ===
AUC_test   : 0.7894
ECE_test   : 0.0371
PaCE2_test : 0.1596  (random 2-leg parlays)
Saved model to models/lstm_dual_receiving_yds_line_37.5_past5_hid128.pt
Saved metrics to metrics/lstm_dual_receiving_yds_line_37.5_past5_hid128_metrics.json


In [15]:
player_name = "George Kittle"
stat_col = "YDS"
line_value = LINE_VALUE

prob = predict_player_over_prob(
    model=model,
    df=test_df,              
    player_name=player_name,
    stat_col=stat_col,
    line_value=line_value,
    n_past_games=N_PAST_GAMES,
)

print(f"Player: {player_name}")
print(f"Prop: {stat_col} over {line_value}")
print(f"Predicted probability (model): {prob:.3f}")

Player: George Kittle
Prop: YDS over 37.5
Predicted probability (model): 0.355


# TFT TEST

In [11]:
from tft import train_tft_classifier, TrainConfig 

D_MODEL = 128

X_train, y_train, lengths_train, meta_train = prepare_receiving_sequences(
    train_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

X_test, y_test, lengths_test, meta_test = prepare_receiving_sequences(
    test_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

cfg = TrainConfig(
    n_epochs=10,
    batch_size=64,
    lr=1e-3,
    device="auto",
    verbose=True,
)

train_result = train_tft_classifier(
    X=X_train,
    y=y_train,
    lengths=lengths_train,
    d_model=D_MODEL,
    n_heads=4,
    num_layers=2,
    dropout=0.1,
    cfg=cfg,
)

model = train_result["model"]
history = train_result["history"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

X_train_t = torch.tensor(X_train, dtype=torch.float32).to(device)
len_train_t = torch.tensor(lengths_train, dtype=torch.long).to(device)

with torch.no_grad():
    y_reg_train, logits_train = model(X_train_t, len_train_t)
    probs_train = torch.sigmoid(logits_train).cpu().numpy()

y_true_train = np.asarray(y_train)

auc_train   = roc_auc_score(y_true_train, probs_train)
ece_train   = compute_ece(y_true_train, probs_train)
pace2_train = compute_pace(y_true_train, probs_train, L=2)

print("\n=== TFT Train Metrics (Single-Leg + Parlay) ===")
print(f"AUC_train   : {auc_train:.4f}")
print(f"ECE_train   : {ece_train:.4f}")
print(f"PaCE2_train : {pace2_train:.4f}  (random 2-leg parlays)")

X_test_t = torch.tensor(X_test, dtype=torch.float32).to(device)
len_test_t = torch.tensor(lengths_test, dtype=torch.long).to(device)

with torch.no_grad():
    y_reg_test, logits_test = model(X_test_t, len_test_t)
    probs_test = torch.sigmoid(logits_test).cpu().numpy()

y_true_test = np.asarray(y_test)

auc_test   = roc_auc_score(y_true_test, probs_test)
ece_test   = compute_ece(y_true_test, probs_test)
pace2_test = compute_pace(y_true_test, probs_test, L=2)

print("\n=== TFT Test Metrics (Single-Leg + Parlay) ===")
print(f"AUC_test   : {auc_test:.4f}")
print(f"ECE_test   : {ece_test:.4f}")
print(f"PaCE2_test : {pace2_test:.4f}  (random 2-leg parlays)")

os.makedirs("models", exist_ok=True)
os.makedirs("metrics", exist_ok=True)

model_tag = f"tft_dual_receiving_{STAT_COL.lower()}_line_{LINE_VALUE:.1f}_past{N_PAST_GAMES}_dmodel{D_MODEL}"

model_path   = os.path.join("models",  model_tag + ".pt")
metrics_path = os.path.join("metrics", model_tag + "_metrics.json")

model_cpu = model.to("cpu")
torch.save(model_cpu.state_dict(), model_path)
print(f"Saved TFT model to {model_path}")

metrics_payload = {
    "timestamp": datetime.now().isoformat(),
    "stat_col": STAT_COL,
    "line_value": LINE_VALUE,
    "n_past_games": N_PAST_GAMES,
    "d_model": D_MODEL,
    "train_cfg": {
        "n_epochs": cfg.n_epochs,
        "batch_size": cfg.batch_size,
        "lr": cfg.lr,
        "device": cfg.device,
    },
    "train_history": history,
    "train_metrics": {
        "auc": float(auc_train),
        "ece": float(ece_train),
        "pace2": float(pace2_train),
        "n_train": int(len(y_true_train)),
    },
    "test_metrics": {
        "auc": float(auc_test),
        "ece": float(ece_test),
        "pace2": float(pace2_test),
        "n_test": int(len(y_true_test)),
    },
}

with open(metrics_path, "w") as f:
    json.dump(metrics_payload, f, indent=2)

print(f"Saved TFT metrics to {metrics_path}")



[TFT] Epoch 01 | Train BCE loss: 0.5420
[TFT] Epoch 02 | Train BCE loss: 0.5294
[TFT] Epoch 03 | Train BCE loss: 0.5276
[TFT] Epoch 04 | Train BCE loss: 0.5246
[TFT] Epoch 05 | Train BCE loss: 0.5230
[TFT] Epoch 06 | Train BCE loss: 0.5220
[TFT] Epoch 07 | Train BCE loss: 0.5236
[TFT] Epoch 08 | Train BCE loss: 0.5231
[TFT] Epoch 09 | Train BCE loss: 0.5226
[TFT] Epoch 10 | Train BCE loss: 0.5207

=== TFT Train Metrics (Single-Leg + Parlay) ===
AUC_train   : 0.7832
ECE_train   : 0.0080
PaCE2_train : 0.1638  (random 2-leg parlays)

=== TFT Test Metrics (Single-Leg + Parlay) ===
AUC_test   : 0.7829
ECE_test   : 0.0202
PaCE2_test : 0.1710  (random 2-leg parlays)
Saved TFT model to models/tft_dual_receiving_yds_line_37.5_past5_dmodel128.pt
Saved TFT metrics to metrics/tft_dual_receiving_yds_line_37.5_past5_dmodel128_metrics.json


In [14]:
player_name = "George Kittle"
stat_col = "YDS"
line_value = 37.5    

prob = predict_player_over_prob(
    model=model,
    df=test_df,
    player_name=player_name,
    stat_col=stat_col,
    line_value=line_value,
    n_past_games=N_PAST_GAMES,
    model_type="tft"
)

print(f"\n[TFT] Player: {player_name}")
print(f"Prop: {stat_col} over {line_value}")
print(f"Predicted probability (model): {prob:.3f}")


[TFT] Player: George Kittle
Prop: YDS over 37.5
Predicted probability (model): 0.355


# XGBoost TEST

In [None]:
from xgb import XGBTrainConfig, train_xgb_classifier, flatten_sequences


X_train, y_train, lengths_train, meta_train = prepare_receiving_sequences(
    train_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

X_test, y_test, lengths_test, meta_test = prepare_receiving_sequences(
    test_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

xgb_cfg = XGBTrainConfig(
    n_estimators=300,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    eval_metric="logloss",
    verbose=False,
)

xgb_result = train_xgb_classifier(
    X=X_train,
    y=y_train,
    lengths=lengths_train,
    cfg=xgb_cfg,
)

xgb_model   = xgb_result["model"]
xgb_history = xgb_result["history"]

X_train_flat = flatten_sequences(X_train)  # (N_train, T*E)

if "torch" in str(type(y_train)):
    y_train_np = y_train.detach().cpu().numpy().astype(float)
else:
    y_train_np = np.asarray(y_train, dtype=float)

train_probs = xgb_model.predict_proba(X_train_flat)[:, 1]

auc_train   = roc_auc_score(y_train_np, train_probs)
ece_train   = compute_ece(y_train_np, train_probs)
pace2_train = compute_pace(y_train_np, train_probs, L=2)

print("\n=== XGB Train Metrics (Single-Leg + Parlay) ===")
print(f"AUC_train   : {auc_train:.4f}")
print(f"ECE_train   : {ece_train:.4f}")
print(f"PaCE2_train : {pace2_train:.4f}  (random 2-leg parlays)")

X_test_flat = flatten_sequences(X_test)  # (N_test, T*E)

if "torch" in str(type(y_test)):
    y_test_np = y_test.detach().cpu().numpy().astype(float)
else:
    y_test_np = np.asarray(y_test, dtype=float)

test_probs = xgb_model.predict_proba(X_test_flat)[:, 1]

auc_test   = roc_auc_score(y_test_np, test_probs)
ece_test   = compute_ece(y_test_np, test_probs)
pace2_test = compute_pace(y_test_np, test_probs, L=2)

print("\n=== XGB Test Metrics (Single-Leg + Parlay) ===")
print(f"AUC_test   : {auc_test:.4f}")
print(f"ECE_test   : {ece_test:.4f}")
print(f"PaCE2_test : {pace2_test:.4f}  (random 2-leg parlays)")

os.makedirs("models", exist_ok=True)
os.makedirs("metrics", exist_ok=True)

model_tag = f"xgb_dual_receiving_{STAT_COL.lower()}_line_{LINE_VALUE:.1f}_past{N_PAST_GAMES}"

model_path   = os.path.join("models",  model_tag + ".json")
metrics_path = os.path.join("metrics", model_tag + "_metrics.json")

# save XGBoost model
xgb_model.save_model(model_path)
print(f"Saved XGB model to {model_path}")

metrics_payload = {
    "timestamp": datetime.now().isoformat(),
    "stat_col": STAT_COL,
    "line_value": LINE_VALUE,
    "n_past_games": N_PAST_GAMES,
    "xgb_cfg": {
        "n_estimators": xgb_cfg.n_estimators,
        "max_depth": xgb_cfg.max_depth,
        "learning_rate": xgb_cfg.learning_rate,
        "subsample": xgb_cfg.subsample,
        "colsample_bytree": xgb_cfg.colsample_bytree,
        "reg_lambda": xgb_cfg.reg_lambda,
        "reg_alpha": xgb_cfg.reg_alpha,
        "eval_metric": xgb_cfg.eval_metric,
    },
    "train_history": xgb_history,  # per-iteration logloss
    "train_metrics": {
        "auc": float(auc_train),
        "ece": float(ece_train),
        "pace2": float(pace2_train),
        "n_train": int(len(y_train_np)),
    },
    "test_metrics": {
        "auc": float(auc_test),
        "ece": float(ece_test),
        "pace2": float(pace2_test),
        "n_test": int(len(y_test_np)),
    },
}

with open(metrics_path, "w") as f:
    json.dump(metrics_payload, f, indent=2)

print(f"Saved XGB metrics to {metrics_path}")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



=== XGB Train Metrics (Single-Leg + Parlay) ===
AUC_train   : 0.8342
ECE_train   : 0.0265
PaCE2_train : 0.1548  (random 2-leg parlays)

=== XGB Test Metrics (Single-Leg + Parlay) ===
AUC_test   : 0.7821
ECE_test   : 0.0132
PaCE2_test : 0.1692  (random 2-leg parlays)
Saved XGB model to models/xgb_dual_receiving_yds_line_37.5_past5.json
Saved XGB metrics to metrics/xgb_dual_receiving_yds_line_37.5_past5_metrics.json


In [None]:
from player_utils import predict_player_over_prob

player_name = "George Kittle"

prob_xgb = predict_player_over_prob(
    model=xgb_model,
    df=test_df,
    player_name=player_name,
    stat_col=STAT_COL,       
    line_value=LINE_VALUE,    
    n_past_games=N_PAST_GAMES,
    model_type="xgboost",   
)

print(f"[XGBoost] Player: {player_name}")
print(f"Prop: {STAT_COL} over {LINE_VALUE}")
print(f"Predicted probability (model): {prob_xgb:.3f}")

[XGBoost] Player: George Kittle
Prop: YDS over 37.5
Predicted probability (model): 0.313
