## RBFNs (Radial Basis Function Networks) - Time-Series

In [23]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

# ------------------------------------------------------------
# 1) Custom RBF Layer
# ------------------------------------------------------------
class RBFLayer(tf.keras.layers.Layer):
    def __init__(self, units, gamma=1.0, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.gamma = gamma

    def build(self, input_shape):
        feature_dim = input_shape[-1]
        self.centers = self.add_weight(
            name='centers',
            shape=(self.units, feature_dim),
            initializer='glorot_uniform',
            trainable=True
        )
        self.betas = self.add_weight(
            name='betas',
            shape=(self.units,),
            initializer='ones',
            trainable=True
        )
        super().build(input_shape)

    def call(self, inputs):
        expanded_inputs = tf.expand_dims(inputs, axis=1)         # [batch_size, 1, feature_dim]
        expanded_centers = tf.expand_dims(self.centers, axis=0)  # [1, units, feature_dim]

        distances = tf.reduce_sum(tf.square(expanded_inputs - expanded_centers), axis=-1)
        rbfs = tf.exp(-self.gamma * tf.expand_dims(self.betas, 0) * distances)  # [batch_size, units]
        return rbfs

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.units)


# ------------------------------------------------------------
# 2) Build RBF Model
# ------------------------------------------------------------
def build_rbf_model(input_dim, rbf_units=30, gamma=0.1):
    model = Sequential()
    model.add(RBFLayer(units=rbf_units, gamma=gamma, input_shape=(input_dim,)))
    model.add(Dense(1))  # Single output for regression
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model


# ------------------------------------------------------------
# 3) Load & Prepare Data (Now with Lagged Features)
# ------------------------------------------------------------
DB_NAME = "nba_data.db"
DB_URI = f"sqlite:///{DB_NAME}"
engine = create_engine(DB_URI, echo=False)

df = pd.read_sql("SELECT * FROM player_game_features", engine)

# Sort by player and date to maintain chronological order
df = df.sort_values(by=["player_id", "game_date"])

# Create a 'game_year' for train/val splits
df['game_year'] = pd.to_datetime(df['game_date']).dt.year

# --- Create Lagged Features ---
# We shift the columns by 1 game within each player group.
# For example, 'pts' from the previous game becomes 'lag_pts',
# 'min' from the previous game becomes 'lag_min', etc.
df['lag_pts'] = df.groupby('player_id')['pts'].shift(1)
df['lag_min'] = df.groupby('player_id')['min'].shift(1)
df['lag_fgm'] = df.groupby('player_id')['fgm'].shift(1)
df['lag_fga'] = df.groupby('player_id')['fga'].shift(1)
df['lag_pts_per_min'] = df.groupby('player_id')['pts_per_min'].shift(1)
df['lag_fg_pct'] = df.groupby('player_id')['fg_pct'].shift(1)

# Drop rows that don't have valid lag values
df = df.dropna(subset=['lag_pts', 'lag_min', 'lag_fgm', 'lag_fga', 'lag_pts_per_min', 'lag_fg_pct'])

# The target is still the current game points...
target = 'pts'

# Now, our input features are from the *previous* game (no data leakage!)
# We can still include 'player_id' if desired (though it's categorical in nature).
features = [
    "player_id",
    "lag_pts",
    "lag_min",
    "lag_fgm",
    "lag_fga",
    "lag_pts_per_min",
    "lag_fg_pct"
]

# ------------------------------------------------------------
# 4) Train on 2015-2022, Validate on 2023
# ------------------------------------------------------------
train_mask = (df['game_year'] >= 2015) & (df['game_year'] <= 2022)
val_mask   = (df['game_year'] == 2023)

train_data = df[train_mask]
val_data   = df[val_mask]

if len(train_data) == 0 or len(val_data) == 0:
    raise ValueError("No valid training/validation data for the selected years.")

print(f"Training on years 2015 through 2022.")
print(f"Validating on year 2023.")
print(f"Training Samples: {len(train_data)}, Validation Samples: {len(val_data)}")

# ------------------------------------------------------------
# 5) Scale Data (No Sequences, Just Flattened Observations)
# ------------------------------------------------------------
X_train_raw = train_data[features].values
X_val_raw   = val_data[features].values

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train_raw)
X_val   = scaler.transform(X_val_raw)

y_train = train_data[target].values
y_val   = val_data[target].values

# ------------------------------------------------------------
# 6) Build & Train the RBF Model
# ------------------------------------------------------------
input_dim = X_train.shape[1]
model = build_rbf_model(input_dim=input_dim, rbf_units=30, gamma=0.1)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    verbose=1,
    callbacks=[early_stop]
)

# ------------------------------------------------------------
# 7) Predictions & Evaluation
# ------------------------------------------------------------
y_pred = model.predict(X_val).flatten()

mae  = mean_absolute_error(y_val, y_pred)
mse  = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)

print("\n--- Results for 2023 Validation ---")
print(f"MAE:  {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

# ------------------------------------------------------------
# 8) Create & Save the 2023 CSV with All Attributes + Predictions
# ------------------------------------------------------------
val_data_copy = val_data.copy()
val_data_copy["predicted_pts"] = y_pred
val_data_copy["actual_pts"] = y_val

# Save *all* columns from 'val_data' plus predicted/actual points.
val_data_copy.to_csv("predictions_2023.csv", index=False)
print("\nSaved predictions to predictions_2023.csv")


Training on years 2015 through 2022.
Validating on year 2023.
Training Samples: 186305, Validation Samples: 14254
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50

--- Results for 2023 Validation ---
MAE:  5.25
RMSE: 6.87

Saved predictions to predictions_2023.csv
