In [None]:
# imports

import os
import re
import glob  

os.chdir(r"C:\Users\Aglaia\Documents\kaggle-nfl-big-data-bowl-2026")

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import ElasticNet, Ridge, Lasso, LinearRegression
from sklearn.preprocessing import StandardScaler
from project.utils.data import Data, PredictionSequencer

In [2]:
d = Data(path="data/")
d.preproc()

data/train\input_2023_w01.csv: (285714, 24), data/train\output_2023_w01.csv: (32088, 7), week: 1
data/train\input_2023_w02.csv: (288586, 24), data/train\output_2023_w02.csv: (32180, 7), week: 2
data/train\input_2023_w03.csv: (297757, 24), data/train\output_2023_w03.csv: (36080, 7), week: 3
data/train\input_2023_w04.csv: (272475, 24), data/train\output_2023_w04.csv: (30147, 7), week: 4
data/train\input_2023_w05.csv: (254779, 24), data/train\output_2023_w05.csv: (29319, 7), week: 5
data/train\input_2023_w06.csv: (270676, 24), data/train\output_2023_w06.csv: (31162, 7), week: 6
data/train\input_2023_w07.csv: (233597, 24), data/train\output_2023_w07.csv: (27443, 7), week: 7
data/train\input_2023_w08.csv: (281011, 24), data/train\output_2023_w08.csv: (33017, 7), week: 8
data/train\input_2023_w09.csv: (252796, 24), data/train\output_2023_w09.csv: (28291, 7), week: 9
data/train\input_2023_w10.csv: (260372, 24), data/train\output_2023_w10.csv: (29008, 7), week: 10
data/train\input_2023_w11.csv

In [3]:
df = d.preprocessed.copy()

# filter the rows based on the player_to_predict entry

if "player_to_predict" in df.columns:
    df = df[df["player_to_predict"] == True].copy()

# 2. Feature Engineering

In order to proceed with the model, it would be wise to first engineer new features from the information available in the input dataframe.

For the baseline model, we are creating features associated with the direction of the players right before the ball is thrown, their body orientation, their velocity, their acceleration, features relevant to ball geometry and player alignment with the ball. Features are added as new columns in the df dataframe which will be later split into two separate player-specific sets. 

In [4]:
def add_features(df: pd.DataFrame, type = ""):

    ###### Baseline Model #######

    # Direction
    df["dir_rad"] = np.deg2rad(df["dir"])

    # Body orientation
    df["o_rad"] = np.deg2rad(df["o"])

    # Velocity components
    df["vx"] = df["s"] * np.cos(df["dir_rad"])
    df["vy"] = df["s"] * np.sin(df["dir_rad"])

    # Acceleration components
    df["ax"] = df["a"] * np.cos(df["dir_rad"])
    df["ay"] = df["a"] * np.sin(df["dir_rad"])

    # Ball geometry
    dx_ball = df["ball_land_x"] - df["x"]
    dy_ball = df["ball_land_y"] - df["y"]

    df["dist_to_ball"]  = np.sqrt(dx_ball**2 + dy_ball**2)
    df["angle_to_ball"] = np.arctan2(dy_ball, dx_ball)

    # Alignment with ball
    df["angle_diff"] = (
        (df["dir_rad"] - df["angle_to_ball"] + np.pi)
        % (2*np.pi)
        - np.pi
    )
    df["cos_angle_diff"] = np.cos(df["angle_diff"])
    df["sin_angle_diff"] = np.sin(df["angle_diff"])

    ####### Additional Features ########

    if type == "extended":
        df["s_delta_t"]  = df["s"]  * df["delta_t"]
        df["vx_delta_t"] = df["vx"] * df["delta_t"]
        df["vy_delta_t"] = df["vy"] * df["delta_t"]

        # Distance to sidelines/endzone in the normalized frame
        df["dist_to_left_sideline"]  = df["y"]  # y grows upward
        df["dist_to_right_sideline"] = X_LIMIT - df["y"]  # distance to top boundary
        df["dist_to_endzone"] = Y_LIMIT - df["x"]  # moving toward right endzone

        # Normalized frame index within predicted horizon (0..1)
        den = pd.to_numeric(df["num_frames_output"], errors="coerce").replace(0, np.nan)
        df["frame_norm"] = df["frame_id"] / den

        # Static player features
        def _h_m(h):
            try:
                feet, inch = str(h).split("-")
                return (int(feet)*12 + int(inch)) * 0.0254
            except:
                return np.nan
        df["height_m"] = df["player_height"].map(_h_m) # in meters

        birth = pd.to_datetime(df["player_birth_date"], errors="coerce")
        df["age_years"] = (pd.Timestamp("2025-01-01") - birth).dt.days / 365.25    
         
       


In [5]:
add_features(df, type == "extended")

# 3. Create two separate datasets according to player side

Considering that we have two player sides which influence the movement trajectory of players, we will split the dataset into two dataframes and train two separate models.
As an additional reasoning for that approach, it would be optimal that the model separates these two positions because by keeping both of them in the dataset we decrease the amount of data.

In [6]:
# create a dataframe for only the defense players
df_def_players = df[(df["player_side"] == "Defense") & 
                    (df["player_to_predict"] == True)].copy()

df_def_players.head()

Unnamed: 0,game_id,play_id,nfl_id,frame_id,target_x,target_y,week,id,player_to_predict,last_frame_id,...,o_rad,vx,vy,ax,ay,dist_to_ball,angle_to_ball,angle_diff,cos_angle_diff,sin_angle_diff
0,2023090700,101,46137,1,56.22,17.28,1,2023090700_101_46137_1,True,26,...,3.228685,-3.720857,3.830251,-1.254221,1.291096,19.375388,-1.17668,-2.764797,-0.929848,-0.367943
1,2023090700,101,46137,2,56.63,16.88,1,2023090700_101_46137_2,True,26,...,3.228685,-3.720857,3.830251,-1.254221,1.291096,19.375388,-1.17668,-2.764797,-0.929848,-0.367943
2,2023090700,101,46137,3,57.06,16.46,1,2023090700_101_46137_3,True,26,...,3.228685,-3.720857,3.830251,-1.254221,1.291096,19.375388,-1.17668,-2.764797,-0.929848,-0.367943
3,2023090700,101,46137,4,57.48,16.02,1,2023090700_101_46137_4,True,26,...,3.228685,-3.720857,3.830251,-1.254221,1.291096,19.375388,-1.17668,-2.764797,-0.929848,-0.367943
4,2023090700,101,46137,5,57.91,15.56,1,2023090700_101_46137_5,True,26,...,3.228685,-3.720857,3.830251,-1.254221,1.291096,19.375388,-1.17668,-2.764797,-0.929848,-0.367943


In [7]:
# keep the offense players in a separate dataframe
df_off_players = df[(df["player_side"] == "Offense") & 
                    (df["player_to_predict"] == True)].copy()

df_off_players.head()

Unnamed: 0,game_id,play_id,nfl_id,frame_id,target_x,target_y,week,id,player_to_predict,last_frame_id,...,o_rad,vx,vy,ax,ay,dist_to_ball,angle_to_ball,angle_diff,cos_angle_diff,sin_angle_diff
42,2023090700,101,44930,1,53.2,13.98,1,2023090700_101_44930_1,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967
43,2023090700,101,44930,2,53.96,13.78,1,2023090700_101_44930_2,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967
44,2023090700,101,44930,3,54.7,13.54,1,2023090700_101_44930_3,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967
45,2023090700,101,44930,4,55.41,13.27,1,2023090700_101_44930_4,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967
46,2023090700,101,44930,5,56.09,12.95,1,2023090700_101_44930_5,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967


# 4. Training the models
## 4.1 Cross Validation 
### 4.1.1 Linear Model for Targeted Receivers

We estimate that this type of player will mainly go after the direction of the ball, therefore serving as an excellent dataset for a baseline model

In [8]:
# keep the offense players in a separate dataframe
df_off_players_tr = df_off_players[(df_off_players["player_role"] == "Targeted Receiver")]

df_off_players_tr.head()

Unnamed: 0,game_id,play_id,nfl_id,frame_id,target_x,target_y,week,id,player_to_predict,last_frame_id,...,o_rad,vx,vy,ax,ay,dist_to_ball,angle_to_ball,angle_diff,cos_angle_diff,sin_angle_diff
42,2023090700,101,44930,1,53.2,13.98,1,2023090700_101_44930_1,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967
43,2023090700,101,44930,2,53.96,13.78,1,2023090700_101_44930_2,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967
44,2023090700,101,44930,3,54.7,13.54,1,2023090700_101_44930_3,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967
45,2023090700,101,44930,4,55.41,13.27,1,2023090700_101_44930_4,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967
46,2023090700,101,44930,5,56.09,12.95,1,2023090700_101_44930_5,True,26,...,1.864012,-1.269866,7.797271,-0.43079,2.64515,17.986063,-0.924627,2.656866,-0.884802,0.465967


In [9]:
# simple kinematic prediction from last frame
df_off_players_tr["x_phys"] = (
    df_off_players_tr["x"] +
    df_off_players_tr["vx"] * df_off_players_tr["delta_t"] +
    0.5 * df_off_players_tr["ax"] * (df_off_players_tr["delta_t"] ** 2)
)

df_off_players_tr["y_phys"] = (
    df_off_players_tr["y"] +
    df_off_players_tr["vy"] * df_off_players_tr["delta_t"] +
    0.5 * df_off_players_tr["ay"] * (df_off_players_tr["delta_t"] ** 2)
)


In [22]:
feature_cols = [
    "x", "y", "s", "a",
    "vx", "vy", "ax", "ay",
    "dir_rad", "o_rad",
    "dist_to_ball", "angle_to_ball",
    "angle_diff", "cos_angle_diff", "sin_angle_diff",
    "ball_land_x", "ball_land_y",
    "delta_t",
    "s_delta_t", "vx_delta_t", "vy_delta_t",
    "x_phys", "y_phys", "dist_to_left_sideline", "dist_to_right_sideline",  "dist_to_endzone", "frame_norm",  "height_m", "age_years"
'vx_delta_t', 'vy_delta_t']

feature_cols = [c for c in feature_cols if c in df_off_players_tr.columns]

X_tr = df_off_players_tr[feature_cols].astype(float).values
y_tr = df_off_players_tr[["target_x", "target_y"]].astype(float).values

# create a group so that the cross validation splitting takes game_id and player_id into consideration
groups = (
    df_off_players_tr[["game_id", "nfl_id"]]
    .astype(str)
    .agg("_".join, axis=1)
    .to_numpy()
)

In [23]:
def combined_rmse(y_true, y_pred):
    mse_x = np.mean((y_true[:, 0] - y_pred[:, 0])**2)
    mse_y = np.mean((y_true[:, 1] - y_pred[:, 1])**2)
    return np.sqrt((mse_x + mse_y) / 2.0)

gkf = GroupKFold(n_splits=5)
alpha = 1.0

cv_scores = []

for fold, (train_idx, val_idx) in enumerate(gkf.split(X_tr, y_tr, groups), 1):
    print(f"\n--- Fold {fold} ---")

    X_train, X_val = X_tr[train_idx], X_tr[val_idx]
    y_train, y_val = y_tr[train_idx], y_tr[val_idx]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled   = scaler.transform(X_val)

    model = LinearRegression()
    model.fit(X_train_scaled, y_train)

    y_pred = model.predict(X_val_scaled)
    rmse   = combined_rmse(y_val, y_pred)
    cv_scores.append(rmse)

    print(f"Fold {fold} RMSE: {rmse:.4f}")

print("\nMean CV RMSE:", np.mean(cv_scores))
print("Std  CV RMSE:", np.std(cv_scores))


--- Fold 1 ---
Fold 1 RMSE: 2.4054

--- Fold 2 ---
Fold 2 RMSE: 2.4100

--- Fold 3 ---
Fold 3 RMSE: 2.2681

--- Fold 4 ---
Fold 4 RMSE: 3.5231

--- Fold 5 ---
Fold 5 RMSE: 2.3436

Mean CV RMSE: 2.5900267928064844
Std  CV RMSE: 0.4693609364626003


### 4.2.1 Linear Model for all offense players

In [24]:
# add the features
feature_cols = [
    "x", "y", "s", "a",
    "vx", "vy", "ax", "ay",
    "dir_rad", "o_rad",
    "dist_to_ball", "angle_to_ball",
    "angle_diff", "cos_angle_diff", "sin_angle_diff",
    "ball_land_x", "ball_land_y",
    "delta_t",
    "s_delta_t", "vx_delta_t", "vy_delta_t",
    "x_phys", "y_phys", "dist_to_left_sideline", "dist_to_right_sideline",  "dist_to_endzone", "frame_norm",  "height_m", "age_years",
'vx_delta_t', 'vy_delta_t']

feature_cols = [c for c in feature_cols if c in df_off_players.columns]


# define the X matrix and the multivariate y vector
X_off = df_off_players[feature_cols].astype(float).values
y_off = df_off_players[["target_x", "target_y"]].astype(float).values

In [25]:
def combined_rmse(y_true, y_pred):
    mse_x = np.mean((y_true[:, 0] - y_pred[:, 0])**2)
    mse_y = np.mean((y_true[:, 1] - y_pred[:, 1])**2)
    return np.sqrt((mse_x + mse_y) / 2.0)

gkf = GroupKFold(n_splits=5)
alpha = 1.0

cv_scores = []

for fold, (train_idx, val_idx) in enumerate(gkf.split(X_off, y_off, groups), 1):
    print(f"\n--- Fold {fold} ---")

    X_train_off, X_val_off = X_off[train_idx], X_off[val_idx]
    y_train_off, y_val_off = y_off[train_idx], y_off[val_idx]

    scaler = StandardScaler()
    X_train_scaled_off = scaler.fit_transform(X_train_off)
    X_val_scaled_off   = scaler.transform(X_val_off)

    model = LinearRegression()
    model.fit(X_train_scaled_off, y_train_off)

    y_pred_off = model.predict(X_val_scaled_off)
    rmse   = combined_rmse(y_val_off, y_pred_off)
    cv_scores.append(rmse)

    print(f"Fold {fold} RMSE: {rmse:.4f}")

print("\nMean CV RMSE for offense positioning model:", np.mean(cv_scores))
print("Std  CV RMSE for offense positioning model:", np.std(cv_scores))


--- Fold 1 ---
Fold 1 RMSE: 2.4123

--- Fold 2 ---
Fold 2 RMSE: 2.4204

--- Fold 3 ---
Fold 3 RMSE: 2.2783

--- Fold 4 ---
Fold 4 RMSE: 2.6434

--- Fold 5 ---
Fold 5 RMSE: 2.3440

Mean CV RMSE for offense positioning model: 2.4196712634113666
Std  CV RMSE for offense positioning model: 0.12313554467563176


## 4.2 Fit the models for targeted receivers and all offense players

In [26]:
# fit the model for the Targeted Receivers 
scaler_final_tr = StandardScaler()
X_tr_scaled = scaler_final_tr.fit_transform(X_tr)

final_model_tr = LinearRegression()
final_model_tr.fit(X_tr_scaled, y_tr)

# fit the model for the offense players
scaler_final_off = StandardScaler()
X_scaled_off = scaler_final_off.fit_transform(X_off)

final_model_off = LinearRegression()
final_model_off.fit(X_scaled_off, y_off)

print("\nFinal LinearRegression model fitted on all training rows.")


Final LinearRegression model fitted on all training rows.


## 4.3 Alternative Linear Model Approach for TRs
### 4.3.1 Regularization with Ridge, Lasso and Elastic Net

In [None]:
# ---------------------------------------------------------
# RMSE metric used for both x,y (same as your combined_rmse)
# ---------------------------------------------------------
def combined_rmse(y_true, y_pred):
    mse_x = np.mean((y_true[:, 0] - y_pred[:, 0])**2)
    mse_y = np.mean((y_true[:, 1] - y_pred[:, 1])**2)
    return np.sqrt((mse_x + mse_y) / 2.0)


# ---------------------------------------------------------
# Groups (each unique player-play is a group)
# ---------------------------------------------------------
groups = df_off_players_tr["game_id"].astype(str) + "_" \
       + df_off_players_tr["play_id"].astype(str) + "_" \
       + df_off_players_tr["nfl_id"].astype(str)

groups = groups.to_numpy()

gkf = GroupKFold(n_splits=5)

# ---------------------------------------------------------
# Hyperparameter grids
# ---------------------------------------------------------
alphas = [0.0001, 0.001, 0.01, 0.1, 1, 3, 10, 30, 100]

elastic_alphas = [0.01, 0.1, 1, 10]
elastic_l1_ratios = [0.1, 0.5, 0.7, 0.9]


# ---------------------------------------------------------
# Dictionary to store results
# ---------------------------------------------------------
results = {
    "model": [],
    "alpha": [],
    "l1_ratio": [],
    "mean_rmse": []
}


# ---------------------------------------------------------
# Helper for CV evaluation
# ---------------------------------------------------------
def evaluate_model_cv(model, X, y, groups):
    rmses = []

    for train_idx, val_idx in gkf.split(X, y, groups):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        # scale inside fold (correct!)
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_val   = scaler.transform(X_val)

        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)

        rmses.append(combined_rmse(y_val, y_pred))

    return np.mean(rmses)


# ---------------------------------------------------------
# 1Ô∏è‚É£ GRID SEARCH ‚Äî RIDGE
# ---------------------------------------------------------
print("\nüîµ Searching RIDGE...")
for a in alphas:
    model = Ridge(alpha=a, random_state=42)
    rmse = evaluate_model_cv(model, X_tr, y_tr, groups)
    results["model"].append("Ridge")
    results["alpha"].append(a)
    results["l1_ratio"].append(None)
    results["mean_rmse"].append(rmse)
    print(f"Ridge(alpha={a}): RMSE = {rmse:.4f}")


# ---------------------------------------------------------
# 2Ô∏è‚É£ GRID SEARCH ‚Äî LASSO
# ---------------------------------------------------------
print("\nüü† Searching LASSO...")
for a in alphas:
    model = Lasso(alpha=a, random_state=42, max_iter=5000)
    rmse = evaluate_model_cv(model, X_tr, y_tr, groups)
    results["model"].append("Lasso")
    results["alpha"].append(a)
    results["l1_ratio"].append(None)
    results["mean_rmse"].append(rmse)
    print(f"Lasso(alpha={a}): RMSE = {rmse:.4f}")


# ---------------------------------------------------------
# 3Ô∏è‚É£ GRID SEARCH ‚Äî ELASTIC NET
# ---------------------------------------------------------
print("\nüü£ Searching ElasticNet...")
for a in elastic_alphas:
    for l1 in elastic_l1_ratios:
        model = ElasticNet(alpha=a, l1_ratio=l1, random_state=42, max_iter=5000)
        rmse = evaluate_model_cv(model, X_tr, y_tr, groups)
        results["model"].append("ElasticNet")
        results["alpha"].append(a)
        results["l1_ratio"].append(l1)
        results["mean_rmse"].append(rmse)
        print(f"ElasticNet(alpha={a}, l1_ratio={l1}): RMSE = {rmse:.4f}")


# ---------------------------------------------------------
# Convert to DataFrame + sort
# ---------------------------------------------------------
df_results = pd.DataFrame(results)
df_results = df_results.sort_values("mean_rmse", ascending=True)
print("\n\nüèÜ BEST MODELS FOUND:")
print(df_results.head(10))



üîµ Searching RIDGE...
Ridge(alpha=0.0001): RMSE = 2.5697
Ridge(alpha=0.001): RMSE = 2.5692
Ridge(alpha=0.01): RMSE = 2.5639
Ridge(alpha=0.1): RMSE = 2.5203
Ridge(alpha=1): RMSE = 2.3926
Ridge(alpha=3): RMSE = 2.3734
Ridge(alpha=10): RMSE = 2.3777
Ridge(alpha=30): RMSE = 2.3820
Ridge(alpha=100): RMSE = 2.3840

üü† Searching LASSO...


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.0001): RMSE = 2.4283


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.001): RMSE = 2.4116
Lasso(alpha=0.01): RMSE = 2.4079
Lasso(alpha=0.1): RMSE = 2.3929
Lasso(alpha=1): RMSE = 2.6487
Lasso(alpha=3): RMSE = 3.9061
Lasso(alpha=10): RMSE = 10.4418
Lasso(alpha=30): RMSE = 19.9764
Lasso(alpha=100): RMSE = 19.9764

üü£ Searching ElasticNet...
ElasticNet(alpha=0.01, l1_ratio=0.1): RMSE = 2.3855
ElasticNet(alpha=0.01, l1_ratio=0.5): RMSE = 2.3837
ElasticNet(alpha=0.01, l1_ratio=0.7): RMSE = 2.3831
ElasticNet(alpha=0.01, l1_ratio=0.9): RMSE = 2.3831
ElasticNet(alpha=0.1, l1_ratio=0.1): RMSE = 2.4637
ElasticNet(alpha=0.1, l1_ratio=0.5): RMSE = 2.4098
ElasticNet(alpha=0.1, l1_ratio=0.7): RMSE = 2.3913
ElasticNet(alpha=0.1, l1_ratio=0.9): RMSE = 2.3811
ElasticNet(alpha=1, l1_ratio=0.1): RMSE = 5.2166
ElasticNet(alpha=1, l1_ratio=0.5): RMSE = 4.0484
ElasticNet(alpha=1, l1_ratio=0.7): RMSE = 3.4350
ElasticNet(alpha=1, l1_ratio=0.9): RMSE = 2.8908
ElasticNet(alpha=10, l1_ratio=0.1): RMSE = 15.0571
ElasticNet(alpha=10, l1_ratio=0.5): RMSE = 14.4997
Elas

### 4.3.2 Evaluate feature importance of Ridge

Evaluate feature importance of each features after fitting the ridge model with the best alpha on the Targeted Receiver subset

In [None]:
# 1) Rebuild X_tr & y_tr with the CURRENT feature_cols (18 features)
X_tr = df_off_players_tr[feature_cols].astype(float).values
y_tr = df_off_players_tr[["target_x", "target_y"]].astype(float).values

print("X_tr shape:", X_tr.shape)   # (N, 18)
print("y_tr shape:", y_tr.shape)

# 2) Pick some alpha ‚Äì ideally your best from df_results
best_alpha = 1.0  # or df_results[df_results.model=="Ridge"].sort_values("mean_rmse").iloc[0]["alpha"]

scaler_final = StandardScaler()
X_tr_scaled = scaler_final.fit_transform(X_tr)

ridge_final = Ridge(alpha=best_alpha, random_state=42)
ridge_final.fit(X_tr_scaled, y_tr)

print("ridge_final.coef_.shape:", ridge_final.coef_.shape)  # should be (2, 18) now

# 3) Build feature importance DataFrame
coef_x = ridge_final.coef_[0]
coef_y = ridge_final.coef_[1]

print("len(feature_cols):", len(feature_cols))
print("len(coef_x):", len(coef_x))

feat_importance = pd.DataFrame({
    "feature": feature_cols,
    "coef_x": coef_x,
    "coef_y": coef_y,
})

feat_importance["coef_norm"] = np.sqrt(feat_importance["coef_x"]**2 +
                                       feat_importance["coef_y"]**2)

feat_importance = feat_importance.sort_values("coef_norm", ascending=False)

print("\nTop 20 most important features:")
print(feat_importance.head(20))


X_tr shape: (160360, 18)
y_tr shape: (160360, 2)
ridge_final.coef_.shape: (2, 18)
len(feature_cols): 18
len(coef_x): 18

Top 20 most important features:
           feature     coef_x    coef_y  coef_norm
0                x  15.859985  0.057327  15.860088
1                y   0.101969  8.637166   8.637768
15     ball_land_x   7.897646 -0.061233   7.897883
16     ball_land_y  -0.123550  5.844248   5.845554
5               vy   1.351062  0.006423   1.351077
4               vx   0.081998  1.151884   1.154798
10    dist_to_ball   0.295295  0.006763   0.295372
7               ay  -0.271445 -0.002239   0.271454
11   angle_to_ball  -0.017169 -0.257182   0.257754
14  sin_angle_diff  -0.196710  0.026020   0.198423
17         delta_t   0.150568 -0.023199   0.152345
2                s  -0.144021  0.015154   0.144816
8          dir_rad   0.111827  0.039926   0.118741
12      angle_diff   0.073462 -0.068151   0.100206
6               ax   0.002015  0.096025   0.096046
13  cos_angle_diff   0.046750  

The table above indicates each top 20 feature, and the associated coefficient it has the model (coef_x and coef_y). Ridge shrinks the coefficients of variables that don't contribute to high accuracy in prediction, but without completely nullify them (like Lasso regularization does). In terms of the outcome, as expected, the landing positioning of the ball is of high importance for predicting the final x and y coordinates of the TR players alongside the velocity parameters and distance to the ball. 