# Player Performance Prediction (S - Safety) - Transformer + Time2Vec

This notebook implements a Transformer-based model with Time2Vec embeddings to predict future safety performance (e.g., weighted average grade) based on historical data from **SPFF.csv**.

## Architecture
1. **Time2Vec Embedding**: Captures periodic and linear temporal patterns.
2. **Transformer Encoder**: Captures long-range dependencies and interactions between features.
3. **Regression Head**: Predicts the target metric (e.g., next-year grade).

In [8]:
# Install dependencies if not already installed
!pip install tensorflow pandas numpy scikit-learn matplotlib



In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import os
import matplotlib.pyplot as plt

## 1. Time2Vec Layer

In [10]:
class Time2Vec(layers.Layer):
    """Time2Vec: linear term (wb*x) + per-feature periodic terms sin(wa*x + ba)."""
    def __init__(self, kernel_size=1, **kwargs):
        super(Time2Vec, self).__init__(**kwargs)
        self.k = kernel_size

    def build(self, input_shape):
        F = input_shape[-1]
        self.wb = self.add_weight(name='wb', shape=(F,), initializer='uniform', trainable=True)
        self.wa = self.add_weight(name='wa', shape=(1, F, self.k), initializer='uniform', trainable=True)
        self.ba = self.add_weight(name='ba', shape=(1, F, self.k), initializer='uniform', trainable=True)
        super(Time2Vec, self).build(input_shape)

    def call(self, inputs, **kwargs):
        # inputs: (batch, time, features)
        bias = self.wb * inputs  # (batch, time, features)
        # Per-feature periodic: for each feature f, k terms sin(wa[f,:]*x[f] + ba[f,:])
        # Expand inputs to (batch, time, features, 1) * wa (1, features, k) -> (batch, time, features, k)
        x = inputs[..., tf.newaxis]  # (batch, time, F, 1)
        pattern = tf.math.sin(x * self.wa + self.ba)  # (batch, time, F, k)
        # Reshape (batch, time, F, k) -> (batch, time, F*k)
        pattern = tf.reshape(pattern, (-1, tf.shape(inputs)[1], inputs.shape[2] * self.k))
        return tf.concat([bias, pattern], axis=-1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1], input_shape[2] * (self.k + 1))

## 2. Transformer Block

In [11]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super(TransformerBlock, self).__init__(**kwargs)
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = models.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

## 3. Data Loading & Sliding Window Generation

Load **SPFF.csv** (safety data), group by player, and create sequences (e.g., Year 1–3 predicts Year 4).

In [12]:
# Load Safety data from SPFF.csv — try multiple paths so it works from different cwds
_candidates = ['../SPFF.csv', 'SPFF.csv', 'backend/ML/SPFF.csv']
data_path = None
for p in _candidates:
    if os.path.exists(p):
        data_path = p
        break
if data_path is None:
    raise FileNotFoundError(f"SPFF.csv not found. Tried: {_candidates}")

print(f"Reading data from: {data_path}")
df = pd.read_csv(data_path)
if 'year' in df.columns:
    df.rename(columns={'year': 'Year'}, inplace=True)
df.sort_values(by=['player', 'Year'], inplace=True)
print(f"Loaded: {df.shape[0]} rows, {df['player'].nunique()} players, years {df['Year'].min()}-{df['Year'].max()}")
df.head()

Reading data from: ../SPFF.csv
Loaded Data Shape: (2427, 38)
Columns: ['Unnamed: 0', 'player', 'team_name', 'position', 'snap_counts_defense', 'player_id', 'player_game_count', 'assists', 'declined_penalties', 'forced_fumbles', 'franchise_id', 'fumble_recoveries', 'grades_coverage_defense', 'grades_defense', 'grades_defense_penalty', 'grades_tackle', 'interceptions', 'interception_touchdowns', 'missed_tackle_rate', 'missed_tackles', 'pass_break_ups', 'penalties', 'qb_rating_against', 'receptions', 'snap_counts_fs', 'snap_counts_box', 'snap_counts_coverage', 'snap_counts_slot', 'stops', 'tackles', 'tackles_for_loss', 'targets', 'touchdowns', 'yards', 'Team', 'Year', 'weighted_grade', 'weighted_average_grade']
Years covered: 2010 - 2024
Unique Players: 694


Unnamed: 0.1,Unnamed: 0,player,team_name,position,snap_counts_defense,player_id,player_game_count,assists,declined_penalties,forced_fumbles,...,stops,tackles,tackles_for_loss,targets,touchdowns,yards,Team,Year,weighted_grade,weighted_average_grade
1395,1395,A.J. Moore,HST,S,1,51146,1,0,0,0,...,0,0,0,0,0,0,Texans,2018,60.0,
1548,1548,A.J. Moore,HST,S,22,51146,5,0,0,0,...,1,2,1,1,0,-2,Texans,2019,2083.4,
1717,1717,A.J. Moore,HST,S,355,51146,10,9,0,1,...,4,20,0,10,2,87,Texans,2020,18779.5,
1874,1874,A.J. Moore,HST,S,44,51146,6,2,0,0,...,0,0,0,1,0,13,Texans,2021,1276.0,
2055,2055,A.J. Thomas,CHI,S,13,56309,1,0,0,0,...,0,2,0,0,0,0,Bears,2022,872.3,


In [13]:
# Context years for prediction. Comparison (run compare_sequence_lengths.py): 2y best Test R² (~0.31), 3y ~0.06, 4–5y negative.
SEQUENCE_LENGTH = 2

features = [
    'grades_coverage_defense', 'grades_defense', 'grades_defense_penalty', 'grades_tackle',
    'missed_tackle_rate', 'qb_rating_against', 'interceptions', 'pass_break_ups',
    'stops', 'tackles', 'snap_counts_defense'
]
# Predict weighted_grade (full coverage in SPFF); predictions clipped to >= 0
target_col = 'weighted_grade'
features = [f for f in features if f in df.columns]
print(f"Using {len(features)} features, target: {target_col}")

# Fill feature NaNs with median; drop only rows missing target
medians = df[features].median()
df_filled = df.copy()
df_filled[features] = df[features].fillna(medians)
df_clean = df_filled.dropna(subset=[target_col]).copy()
print(f"Rows with non-null target (for training): {len(df_clean)}")

scaler = StandardScaler()
df_clean[features] = scaler.fit_transform(df_clean[features])

def create_sequences(dataset, seq_len, feats, target):
    X, y = [], []
    for player, group in dataset.groupby('player'):
        group = group.sort_values('Year')
        if len(group) <= seq_len:
            continue
        vals = group[feats].values
        targs = group[target].values
        for i in range(len(group) - seq_len):
            X.append(vals[i:i+seq_len])
            y.append(targs[i+seq_len])
    return np.array(X), np.array(y)

X, y = create_sequences(df_clean, SEQUENCE_LENGTH, features, target_col)
print(f"Sequences: X={X.shape}, y={y.shape}")

if len(X) > 0:
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"Train: {X_train.shape}, Test: {X_test.shape}")
else:
    print("No sequences: need players with >", SEQUENCE_LENGTH, "years of data.")

Rows after dropping NaN in features/target: 68
Generated Sequences Shape: X=(4, 3, 11), y=(4,)
Train: (3, 3, 11), Test: (1, 3, 11)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean[features] = scaler.fit_transform(df_clean[features])


## 4. Build & Train Model

In [14]:
def build_transformer_model(input_shape, embed_dim=32, num_heads=2, ff_dim=32, num_transformer_blocks=1, mlp_units=[64]):
    assert embed_dim % num_heads == 0
    inputs = layers.Input(shape=input_shape)
    x = Time2Vec(kernel_size=2)(inputs)
    x = layers.Dense(embed_dim)(x)
    for _ in range(num_transformer_blocks):
        x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)
    x = layers.GlobalAveragePooling1D()(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(1)(x)
    return models.Model(inputs=inputs, outputs=outputs)

if len(X) > 0:
    input_shape = (SEQUENCE_LENGTH, X_train.shape[2])
    model = build_transformer_model(input_shape, embed_dim=32, num_heads=2)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    model.summary()

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    history = model.fit(
        X_train, y_train,
        epochs=200,
        batch_size=16,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=1
    )

    train_pred = model.predict(X_train).flatten()
    test_pred = model.predict(X_test).flatten()
    # Clip to non-negative (weighted grades are non-negative unless data has negatives)
    train_pred = np.maximum(train_pred, 0.0)
    test_pred = np.maximum(test_pred, 0.0)
    train_r2 = r2_score(y_train, train_pred)
    test_r2 = r2_score(y_test, test_pred)
    print("\n--- Results ---")
    print(f"Train R²: {train_r2:.4f}")
    print(f"Test  R²: {test_r2:.4f}")

    plt.figure(figsize=(6, 5))
    plt.scatter(y_test, test_pred)
    plt.xlabel('Actual ' + target_col)
    plt.ylabel('Predicted')
    plt.title('Safety Performance Prediction')
    plt.show()
else:
    print("Skipping model build: no sequences generated.")

Epoch 1/200


ValueError: Exception encountered when calling Time2Vec.call().

[1mDimensions must be equal, but are 3 and 11 for '{{node functional_3_1/time2_vec_1_1/add}} = AddV2[T=DT_FLOAT](functional_3_1/time2_vec_1_1/MatMul, functional_3_1/time2_vec_1_1/add/ReadVariableOp)' with input shapes: [?,3,2], [1,11,2].[0m

Arguments received by Time2Vec.call():
  • inputs=tf.Tensor(shape=(None, 3, 11), dtype=float32)
  • kwargs=<class 'inspect._empty'>

## 5. Multi-year predictions CSV

Predictions for **2015–2025** using the previous **2** years of data each time (e.g. 2013–2014 → 2015). *(2 years gave best test R² in comparison; see `compare_sequence_lengths.py`.)* CSV has top scores first for each year. Printed view shows 2015–2024 only (2025 omitted from per-year samples).

In [None]:
# Multi-year predictions: 2015–2025, using previous SEQUENCE_LENGTH years (e.g. 2: 2013-2014 -> 2015)
min_year = df_filled['Year'].min()
max_year = df_filled['Year'].max()
prediction_years = list(range(2015, 2026))
players_2024 = set(df_filled[df_filled['Year'] == 2024]['player'].unique())
actual_lookup = df_filled.set_index(['player', 'Year'])[target_col].to_dict()

if len(X) > 0 and 'model' in dir():
    all_rows = []
    for year_pred in prediction_years:
        context_years = list(range(year_pred - SEQUENCE_LENGTH, year_pred))
        subset = df_filled[df_filled['Year'].isin(context_years)].copy()
        subset[features] = scaler.transform(subset[features])
        for player, group in subset.groupby('player'):
            group = group.sort_values('Year')
            if list(group['Year'].values) != context_years:
                continue
            if year_pred == max_year + 1 and player not in players_2024:
                continue
            seq = group[features].values
            pred = model.predict(seq[np.newaxis, ...], verbose=0).flatten()[0]
            pred = max(0.0, float(pred))
            actual = actual_lookup.get((player, year_pred), np.nan)
            last = group.iloc[-1]
            all_rows.append({
                'player': player,
                'year_predicted': year_pred,
                'predicted_weighted_grade': round(pred, 4),
                'actual_weighted_grade': round(actual, 4) if not np.isnan(actual) else np.nan,
                'team': last.get('team_name', last.get('Team', ''))
            })

    pred_df = pd.DataFrame(all_rows)
    # Top scores first within each year: sort by year, then by predicted grade descending
    pred_df = pred_df.sort_values(['year_predicted', 'predicted_weighted_grade'], ascending=[True, False]).reset_index(drop=True)
    pred_df['rank_in_year'] = pred_df.groupby('year_predicted')['predicted_weighted_grade'].rank(method='first', ascending=False).astype(int)
    pred_df = pred_df[['player', 'year_predicted', 'predicted_weighted_grade', 'actual_weighted_grade', 'rank_in_year', 'team']]

    out_dir = os.path.dirname(data_path) if os.path.dirname(data_path) else '.'
    out_path = os.path.join(out_dir, 'S_predictions_by_year.csv')
    pred_df.to_csv(out_path, index=False)
    print(f"Saved {len(pred_df)} predictions to: {out_path}")
    print(f"Years in CSV: {sorted(pred_df['year_predicted'].unique())} (top scores listed first for each year)")

    with_actual = pred_df[pred_df['actual_weighted_grade'].notna()]
    if len(with_actual) > 0:
        corr = np.corrcoef(with_actual['predicted_weighted_grade'], with_actual['actual_weighted_grade'])[0, 1]
        r2 = r2_score(with_actual['actual_weighted_grade'], with_actual['predicted_weighted_grade'])
        print(f"\nPredicted vs actual (where available): correlation = {corr:.4f}, R² = {r2:.4f}")

    print("\n--- Individual year predictions (2015–2024 only; 2025 omitted from this view) ---")
    for y in range(2015, 2025):
        sample = pred_df[pred_df['year_predicted'] == y].head(8 if y == 2015 else 5)
        if len(sample) > 0:
            print(f"\n{y} (top {len(sample)} by predicted grade):")
            print(sample.to_string())
else:
    print("Run training cell first (need model and data).")