In [1]:
# ============================================================
# GRU + Off-Season Cross-Attention (Opponent Scouting)
# ============================================================

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Layer, MultiHeadAttention, LayerNormalization, Dropout
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

In [2]:
# ---------------------------
# Load Data
# ---------------------------
gru_df = pd.read_csv("gru_team_embeddings_by_week.csv")
games_df = pd.read_excel("nfl_2024_schedule.xlsx")



gru_cols = [c for c in gru_df.columns if c.startswith("emb_")]
gru_dim = len(gru_cols)

games_df = games_df.rename(columns={"Home Team": "home", "Away Team": "away", "Seasonweek": "week"})
games_df["week"] = games_df["week"].astype(int)

# ✅ Only keep games from Week 202318 onward
start_week = 202318
games_df = games_df[games_df["week"] >= start_week].copy()

print(f"✅ Loaded GRU embeddings ({len(gru_df)} rows, {gru_dim} features)")
print(f"✅ Loaded {len(games_df)} games from week {start_week} onward")

✅ Loaded GRU embeddings (1598 rows, 20 features)
✅ Loaded 272 games from week 202318 onward


In [3]:
# ------------------------------------------------------------
# 2. Load and Scale Off-Season Features
# ------------------------------------------------------------
off_df = pd.read_csv("offseason_2024_labeled.csv")
off_df['Team'] = ['CRD', 'ATL', 'RAV', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE', 'DAL',
              'DEN', 'DET', 'GNB', 'HTX', 'CLT', 'JAX', 'KAN', 'RAI', 'SDG',
              'RAM', 'MIA', 'MIN', 'NWE', 'NOR', 'NYG', 'NYJ', 'PHI', 'PIT',
              'SFO', 'SEA', 'TAM', 'OTI', 'WAS'] 

# Drop label and team columns if present
off_df = off_df.drop(columns=["Improvement?"], errors="ignore")

team_col = "Team" if "Team" in off_df.columns else "team"
off_df[team_col] = off_df[team_col].str.strip()

# Identify which columns to scale
cat_features = ["New Coach", "New QB"]
num_features = [c for c in off_df.columns if c not in cat_features + [team_col]]

scaler = StandardScaler()
off_df_scaled = off_df.copy()
off_df_scaled[num_features] = scaler.fit_transform(off_df[num_features])

off_cols = num_features + cat_features
print(f"✅ Off-season data scaled ({len(off_cols)} features, {len(cat_features)} categorical untouched)")

✅ Off-season data scaled (12 features, 2 categorical untouched)


In [4]:
# ------------------------------------------------------------
# 3. Define Mutual Cross-Attention Layer
# ------------------------------------------------------------
class ScoutingAttention(Layer):
    """Mutual cross-attention where Away and Home teams contextualize each other."""
    def __init__(self, num_heads=4, key_dim=8, dropout=0.1):
        super().__init__()
        self.attn = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, dropout=dropout)
        self.norm_q = LayerNormalization()
        self.norm_kv = LayerNormalization()
        self.dropout = Dropout(dropout)

    def call(self, away_team, home_team):
        attn_away = self.attn(
            query=self.norm_q(away_team),
            value=self.norm_kv(home_team),
            key=self.norm_kv(home_team)
        )
        attn_home = self.attn(
            query=self.norm_q(home_team),
            value=self.norm_kv(away_team),
            key=self.norm_kv(away_team)
        )
        return tf.concat([self.dropout(attn_away), self.dropout(attn_home)], axis=-1)

scouting_layer = ScoutingAttention(num_heads=4, key_dim=8, dropout=0.1)

In [5]:
import warnings
warnings.filterwarnings('ignore')
# ------------------------------------------------------------
# 4. Build Fused Embeddings (GRU + Off-Season)
# ------------------------------------------------------------
results = []
for _, row in tqdm(games_df.iterrows(), total=len(games_df), desc=f"Generating Cross-Attention from {start_week}"):
    week = int(row["week"])
    away, home = row["away"], row["home"]

    # --- Retrieve GRU embeddings ---
    away_gru = gru_df[(gru_df["team"] == away) & (gru_df["seasonweek"] == week)][gru_cols].values
    home_gru = gru_df[(gru_df["team"] == home) & (gru_df["seasonweek"] == week)][gru_cols].values
    if away_gru.size == 0 or home_gru.size == 0:
        continue

    # --- Retrieve off-season features ---
    away_off = off_df_scaled[off_df_scaled[team_col] == away][off_cols].values
    home_off = off_df_scaled[off_df_scaled[team_col] == home][off_cols].values
    if away_off.size == 0 or home_off.size == 0:
        continue

    # --- Concatenate GRU + Off-Season ---
    away_vec = np.concatenate([away_gru.flatten(), away_off.flatten()])[None, :]
    home_vec = np.concatenate([home_gru.flatten(), home_off.flatten()])[None, :]

    # --- Tensor format for attention ---
    away_tf = tf.expand_dims(tf.convert_to_tensor(away_vec, dtype=tf.float32), axis=1)
    home_tf = tf.expand_dims(tf.convert_to_tensor(home_vec, dtype=tf.float32), axis=1)

    # --- Mutual attention ---
    attended = scouting_layer(away_tf, home_tf)
    attended_np = tf.squeeze(attended).numpy()

    # --- Split back into away/home halves ---
    half = attended_np.shape[0] // 2
    away_half, home_half = attended_np[:half], attended_np[half:]

    winner = None
    if "Winner" in row:
        winner = row["Winner"]
    elif "winner" in row:
        winner = row["winner"]

    results.append({
        "week": week,
        "away_team": away,
        "home_team": home,
        "winner": winner,
        **{f"A_attn_{i}": away_half[i] for i in range(len(away_half))},
        **{f"B_attn_{i}": home_half[i] for i in range(len(home_half))}
    })

Generating Cross-Attention from 202318: 100%|████████████████████████████████████████| 272/272 [00:06<00:00, 44.26it/s]


In [6]:
# ------------------------------------------------------------
# 5. Save to CSV
# ------------------------------------------------------------
crossattn_df = pd.DataFrame(results).sort_values("week").reset_index(drop=True)
crossattn_df.to_csv("crossattn_gru_offseason_embeddings.csv", index=False)

print(f"📁 Saved {len(crossattn_df)} rows → crossattn_gru_offseason_embeddings.csv")
print("Columns:", crossattn_df.columns.tolist()[:10], "...")

📁 Saved 272 rows → crossattn_gru_offseason_embeddings.csv
Columns: ['week', 'away_team', 'home_team', 'winner', 'A_attn_0', 'A_attn_1', 'A_attn_2', 'A_attn_3', 'A_attn_4', 'A_attn_5'] ...


In [7]:
crossattn_df.head()

Unnamed: 0,week,away_team,home_team,winner,A_attn_0,A_attn_1,A_attn_2,A_attn_3,A_attn_4,A_attn_5,...,B_attn_22,B_attn_23,B_attn_24,B_attn_25,B_attn_26,B_attn_27,B_attn_28,B_attn_29,B_attn_30,B_attn_31
0,202401,WAS,TAM,TAM,0.200131,0.099851,-0.139041,-0.553309,0.123657,0.20152,...,-0.196663,-0.433345,-0.695829,0.148547,0.159381,0.114307,0.703918,-0.370277,-0.48261,-0.411924
1,202401,RAM,DET,DET,0.025118,-0.119327,0.138923,-0.401957,0.116467,-0.278825,...,0.110287,-0.562697,0.43096,-0.032633,0.130994,-0.432125,0.016391,-0.312047,1.013716,0.464286
2,202401,NYJ,SFO,SFO,-0.057891,0.084669,0.35634,0.257793,0.140459,0.263779,...,-0.123121,-0.085181,-0.058065,-0.062812,0.279438,-0.020313,0.018604,-0.407296,-0.110128,-0.52778
3,202401,OTI,CHI,CHI,-0.121926,-0.160202,0.169817,0.500754,0.252079,-0.110317,...,-0.129292,0.223754,0.068704,0.01087,-0.389761,-0.194123,-0.053506,0.259154,0.150332,0.148205
4,202401,JAX,MIA,MIA,0.297381,0.086934,-0.084948,0.088505,-0.187295,0.049694,...,-0.024766,0.256065,0.560577,-0.348687,-0.094369,-0.214414,-0.341682,0.333322,0.469406,0.237827
