In [2]:
import torch
import numpy as np
import pandas as pd

import os, json
from datetime import datetime

from lstm import train_dual_head_classifier, TrainConfig
from data_prep import add_over_under_label, prepare_receiving_sequences
from metrics import compute_ece, compute_pace

from sklearn.metrics import roc_auc_score
from player_utils import predict_player_over_prob


In [6]:
train_df = pd.read_csv("data/receiving_2019_2023.csv")
test_df  = pd.read_csv("data/receiving_24tocurrent.csv")

LINE_VALUE = 37.5      
N_PAST_GAMES = 5
HIDDEN_SIZE = 128
STAT_COL  = "YDS"


train_df = add_over_under_label(train_df, STAT_COL, line_value=LINE_VALUE, new_col="over_label")
test_df  = add_over_under_label(test_df,  STAT_COL, line_value=LINE_VALUE, new_col="over_label")

# LSTM TEST

In [None]:
X_train, y_train, lengths_train, meta_train = prepare_receiving_sequences(
    train_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

X_test, y_test, lengths_test, meta_test = prepare_receiving_sequences(
    test_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

cfg = TrainConfig(
    n_epochs=10,
    batch_size=64,
    lr=1e-3,
    device="auto",
    verbose=True,
)

train_result = train_dual_head_classifier(
    X=X_train,
    y=y_train,
    lengths=lengths_train,
    hidden_size=HIDDEN_SIZE,
    cfg=cfg,
)

model = train_result["model"]
history = train_result["history"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

X_test_t = torch.tensor(X_test, dtype=torch.float32).to(device)
len_test_t = torch.tensor(lengths_test, dtype=torch.long).to(device)

with torch.no_grad():
    y_reg_test, logits_test = model(X_test_t, len_test_t)
    probs_test = torch.sigmoid(logits_test).cpu().numpy()

y_true_test = np.asarray(y_test)

auc  = roc_auc_score(y_true_test, probs_test)
ece  = compute_ece(y_true_test, probs_test)
pace2 = compute_pace(y_true_test, probs_test, L=2)

print("\n=== Test Metrics (Single-Leg + Parlay) ===")
print(f"AUC   : {auc:.4f}")
print(f"ECE   : {ece:.4f}")
print(f"PaCE2 : {pace2:.4f}  (random 2-leg parlays)")

os.makedirs("models", exist_ok=True)
os.makedirs("metrics", exist_ok=True)

model_tag = f"lstm_dual_receiving_{STAT_COL.lower()}_line_{LINE_VALUE:.1f}_past{N_PAST_GAMES}_hid{HIDDEN_SIZE}"

model_path   = os.path.join("models",  model_tag + ".pt")
metrics_path = os.path.join("metrics", model_tag + "_metrics.json")

model_cpu = model.to("cpu")
torch.save(model_cpu.state_dict(), model_path)
print(f"Saved model to {model_path}")

metrics_payload = {
    "timestamp": datetime.now().isoformat(),
    "stat_col": STAT_COL,
    "line_value": LINE_VALUE,
    "n_past_games": N_PAST_GAMES,
    "hidden_size": HIDDEN_SIZE,
    "train_cfg": {
        "n_epochs": cfg.n_epochs,
        "batch_size": cfg.batch_size,
        "lr": cfg.lr,
        "device": cfg.device,
    },
    "train_history": history,     # per-epoch losses
    "test_metrics": {
        "auc": float(auc),
        "ece": float(ece),
        "pace2": float(pace2),
        "n_test": int(len(y_true_test)),
    },
}

with open(metrics_path, "w") as f:
    json.dump(metrics_payload, f, indent=2)

print(f"Saved metrics to {metrics_path}")

Epoch 01 | Train BCE loss: 0.5257
Epoch 02 | Train BCE loss: 0.5160
Epoch 03 | Train BCE loss: 0.5150
Epoch 04 | Train BCE loss: 0.5138
Epoch 05 | Train BCE loss: 0.5119
Epoch 06 | Train BCE loss: 0.5117
Epoch 07 | Train BCE loss: 0.5100
Epoch 08 | Train BCE loss: 0.5100
Epoch 09 | Train BCE loss: 0.5101
Epoch 10 | Train BCE loss: 0.5099

=== Test Metrics (Single-Leg + Parlay) ===
AUC   : 0.7889
ECE   : 0.0323
PaCE2 : 0.1524  (random 2-leg parlays)
Saved model to models/lstm_dual_receiving_yds_line_37.5_past5_hid128.pt
Saved metrics to metrics/lstm_dual_receiving_yds_line_37.5_past5_hid128_metrics.json
Player: George Kittle
Prop: YDS over 37.5
Predicted probability (model): 0.223


In [None]:
player_name = "George Kittle"
stat_col = "YDS"
line_value = LINE_VALUE       # MUST match training line (for now)

prob = predict_player_over_prob(
    model=model,
    df=test_df,               # or train_df, or combined df
    player_name=player_name,
    stat_col=stat_col,
    line_value=line_value,
    n_past_games=N_PAST_GAMES,
)

print(f"Player: {player_name}")
print(f"Prop: {stat_col} over {line_value}")
print(f"Predicted probability (model): {prob:.3f}")

# TFT TEST

In [9]:
from tft import train_tft_classifier, TrainConfig   # <-- TFT imports

D_MODEL = 128

X_train, y_train, lengths_train, meta_train = prepare_receiving_sequences(
    train_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

X_test, y_test, lengths_test, meta_test = prepare_receiving_sequences(
    test_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

cfg = TrainConfig(
    n_epochs=10,
    batch_size=64,
    lr=1e-3,
    device="auto",
    verbose=True,
)

train_result = train_tft_classifier(
    X=X_train,
    y=y_train,
    lengths=lengths_train,
    d_model=D_MODEL,
    n_heads=4,
    num_layers=2,
    dropout=0.1,
    cfg=cfg,
)

model = train_result["model"]
history = train_result["history"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

X_test_t = torch.tensor(X_test, dtype=torch.float32).to(device)
len_test_t = torch.tensor(lengths_test, dtype=torch.long).to(device)

with torch.no_grad():
    y_reg_test, logits_test = model(X_test_t, len_test_t)
    probs_test = torch.sigmoid(logits_test).cpu().numpy()

y_true_test = np.asarray(y_test)

auc   = roc_auc_score(y_true_test, probs_test)
ece   = compute_ece(y_true_test, probs_test)
pace2 = compute_pace(y_true_test, probs_test, L=2)

print("\n=== TFT Test Metrics (Single-Leg + Parlay) ===")
print(f"AUC   : {auc:.4f}")
print(f"ECE   : {ece:.4f}")
print(f"PaCE2 : {pace2:.4f}  (random 2-leg parlays)")

os.makedirs("models", exist_ok=True)
os.makedirs("metrics", exist_ok=True)

model_tag = f"tft_dual_receiving_{STAT_COL.lower()}_line_{LINE_VALUE:.1f}_past{N_PAST_GAMES}_dmodel{D_MODEL}"

model_path   = os.path.join("models",  model_tag + ".pt")
metrics_path = os.path.join("metrics", model_tag + "_metrics.json")

model_cpu = model.to("cpu")
torch.save(model_cpu.state_dict(), model_path)
print(f"Saved TFT model to {model_path}")

metrics_payload = {
    "timestamp": datetime.now().isoformat(),
    "stat_col": STAT_COL,
    "line_value": LINE_VALUE,
    "n_past_games": N_PAST_GAMES,
    "d_model": D_MODEL,
    "train_cfg": {
        "n_epochs": cfg.n_epochs,
        "batch_size": cfg.batch_size,
        "lr": cfg.lr,
        "device": cfg.device,
    },
    "train_history": history,  
    "test_metrics": {
        "auc": float(auc),
        "ece": float(ece),
        "pace2": float(pace2),
        "n_test": int(len(y_true_test)),
    },
}

with open(metrics_path, "w") as f:
    json.dump(metrics_payload, f, indent=2)

print(f"Saved TFT metrics to {metrics_path}")



[TFT] Epoch 01 | Train BCE loss: 0.5485
[TFT] Epoch 02 | Train BCE loss: 0.5311
[TFT] Epoch 03 | Train BCE loss: 0.5256
[TFT] Epoch 04 | Train BCE loss: 0.5228
[TFT] Epoch 05 | Train BCE loss: 0.5236
[TFT] Epoch 06 | Train BCE loss: 0.5247
[TFT] Epoch 07 | Train BCE loss: 0.5239
[TFT] Epoch 08 | Train BCE loss: 0.5229
[TFT] Epoch 09 | Train BCE loss: 0.5216
[TFT] Epoch 10 | Train BCE loss: 0.5207

=== TFT Test Metrics (Single-Leg + Parlay) ===
AUC   : 0.7848
ECE   : 0.0765
PaCE2 : 0.1518  (random 2-leg parlays)
Saved TFT model to models/tft_dual_receiving_yds_line_37.5_past5_dmodel128.pt
Saved TFT metrics to metrics/tft_dual_receiving_yds_line_37.5_past5_dmodel128_metrics.json


In [None]:
player_name = "George Kittle"
stat_col = "YDS"
line_value = LINE_VALUE      

prob = predict_player_over_prob(
    model=model,
    df=test_df,
    player_name=player_name,
    stat_col=stat_col,
    line_value=line_value,
    n_past_games=N_PAST_GAMES,
    model_type="tft"
)

print(f"\n[TFT] Player: {player_name}")
print(f"Prop: {stat_col} over {line_value}")
print(f"Predicted probability (model): {prob:.3f}")


[TFT] Player: George Kittle
Prop: YDS over 37.5
Predicted probability (model): 0.361


# XGBoost TEST

In [7]:
from xgb import XGBTrainConfig, train_xgb_classifier

X_train, y_train, lengths_train, meta_train = prepare_receiving_sequences(
    train_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

X_test, y_test, lengths_test, meta_test = prepare_receiving_sequences(
    test_df,
    n_past_games=N_PAST_GAMES,
    target_col="over_label",
)

xgb_cfg = XGBTrainConfig(
    n_estimators=300,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    eval_metric="logloss",
    verbose=False,
)

xgb_result = train_xgb_classifier(
    X=X_train,
    y=y_train,
    lengths=lengths_train,
    cfg=xgb_cfg,
)

xgb_model = xgb_result["model"]
xgb_history = xgb_result["history"]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [8]:
from player_utils import predict_player_over_prob

player_name = "George Kittle"

prob_xgb = predict_player_over_prob(
    model=xgb_model,
    df=test_df,
    player_name=player_name,
    stat_col=STAT_COL,        # "YDS"
    line_value=LINE_VALUE,    # same line you trained on, e.g. 37.5
    n_past_games=N_PAST_GAMES,
    model_type="xgboost",     # ðŸ‘ˆ key difference
)

print(f"[XGBoost] Player: {player_name}")
print(f"Prop: {STAT_COL} over {LINE_VALUE}")
print(f"Predicted probability (model): {prob_xgb:.3f}")

[XGBoost] Player: George Kittle
Prop: YDS over 37.5
Predicted probability (model): 0.313
