In [7]:
from bdb2026.data_io import load_bdb_csvs
from bdb2026.preprocess import add_postthrow_features, merge_route_embeddings, normalize_coordinates, add_postthrow_features,filter_targeted_wr_routes
from bdb2026.routes import cluster_routes_kmeans, make_route_embedding_table
from bdb2026.sequences import build_sequences
from bdb2026.models import build_lstm, train_lstm
from bdb2026.metrics import compute_truespeed
from bdb2026.viz import *

In [19]:
data = load_bdb_csvs(
    input_path=r"C:\Users\rajas\Documents\ADS\SII\Big_Data_Bowl\data\114239_nfl_competition_files_published_analytics_final\combined\group_input.csv",
    output_path=r"C:\Users\rajas\Documents\ADS\SII\Big_Data_Bowl\data\114239_nfl_competition_files_published_analytics_final\combined\group_output.csv",
    supplementary_path=r"C:\Users\rajas\Documents\ADS\SII\Big_Data_Bowl\data\114239_nfl_competition_files_published_analytics_final\combined\supplementary_data.csv",
)

In [26]:
data.df_supp["pass_result"].value_counts().head(20)

pass_result
C     12470
I      5106
IN      433
Name: count, dtype: int64

In [10]:
# Normalize and engineer post-throw features (example)
df_out = normalize_coordinates(data.df_output, offense_left_to_right=True)
df_postthrow = add_postthrow_features(df_out)
df_postthrow_wr = filter_targeted_wr_routes(df_postthrow)

In [12]:
df_out.columns

Index(['game_id', 'play_id', 'nfl_id', 'frame_id', 'x', 'y', 'y_norm',
       'x_norm'],
      dtype='object')

In [None]:
# Build route features on the pre-throw window (targeted receiver), then cluster
route_result = cluster_routes_kmeans(df_postthrow_wr, n_clusters=12)
route_emb = make_route_embedding_table(route_result.assignments, n_clusters=12, prefix='route_emb')

# merge the static embedding columns
df_features = merge_route_embeddings(df_postthrow_wr, route_emb)

In [None]:
# Include route_emb_* columns in feature_cols when building LSTM tensors
feature_cols = [
    # Position
    'x_norm', 'y_norm',
    # Motion
    'dx', 'dy', 'speed',
    # Geometry
    'dist_to_land', 'bearing_to_land', 'heading', 'heading_align_cos',
    # Temporal
    'time_since_throw',
    # Context (constant per play)
    'initial_separation', 'ball_land_x_norm', 'ball_land_y_norm'
    # 'defender_separation' removed - will use in Phase 3 for context scoring
]  + [c for c in df_features.columns if c.startswith('route_emb_')]

X, y, keys = build_sequences(df_features, feature_cols=feature_cols, target_col='converge_rate')

NameError: name 'df_prethrow_wr' is not defined

In [None]:
model = build_lstm(n_features=X.shape[-1], hidden_units=64, dropout=0.2, mask_value=0.0)
result = train_lstm(model, X, y, epochs=20, batch_size=64, verbose=1)

In [None]:
# Predict + compute TrueSpeed per play
y_hat = result.model.predict(X)
df_pred = (
    pd.DataFrame(keys, columns=["game_id","play_id"])
      .assign(idx=np.arange(len(keys)))
      .merge(
          pd.DataFrame({
              "idx": np.repeat(np.arange(len(keys)), X.shape[1]),
              "t": np.tile(np.arange(X.shape[1]), len(keys)),
              "actual": y.reshape(-1),
              "pred": y_hat.reshape(-1),
          }),
          on="idx",
      )
)
ts = compute_truespeed(df_pred, actual_col="actual", pred_col="pred", id_cols=("game_id","play_id"), agg="mean")
ts.head()