## RBFNs (Radial Basis Function Networks) - Time Series

In [8]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

In [9]:
# ------------------------------------------------------------
# 1) Custom RBF Layer
# ------------------------------------------------------------
class RBFLayer(tf.keras.layers.Layer):
    def __init__(self, units, gamma=1.0, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.gamma = gamma

    def build(self, input_shape):
        features = input_shape[-1]
        self.centers = self.add_weight(
            name='centers',
            shape=(self.units, features),
            initializer='glorot_uniform',
            trainable=True
        )
        self.betas = self.add_weight(
            name='betas',
            shape=(self.units,),
            initializer='ones',
            trainable=True
        )
        super().build(input_shape)

    def call(self, inputs):
        expanded_inputs = tf.expand_dims(inputs, axis=1)
        expanded_centers = tf.expand_dims(self.centers, axis=0)
        distances = tf.reduce_sum(tf.square(expanded_inputs - expanded_centers), axis=-1)
        rbfs = tf.exp(-self.gamma * tf.expand_dims(self.betas, 0) * distances)
        return rbfs

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.units)



In [10]:
# ------------------------------------------------------------
# 2) Configuration
# ------------------------------------------------------------
DB_NAME = "../../nba_data.db"
DB_URI = f"sqlite:///{DB_NAME}"
engine = create_engine(DB_URI, echo=False)

# ------------------------------------------------------------
# 3) Load Data & Sort
# ------------------------------------------------------------
df = pd.read_sql("SELECT * FROM player_game_features", engine)

# Ensure data is sorted by player and date
df = df.sort_values(by=["player_id", "game_date"])

# Extract the season or year from 'game_date'
df['game_year'] = pd.to_datetime(df['game_date']).dt.year

# Features and target
features = ["player_id", "pts", "min", "fgm", "fga", "pts_per_min", "fg_pct"]
target = "pts"

df = df.dropna(subset=features + [target])

X = df[features]
y = df[target]

In [11]:
# ------------------------------------------------------------
# 4) Helper Function: Create Sequences
# ------------------------------------------------------------
def create_player_sequences_fixed_length(data, target, player_column, max_length):
    """
    Create sequences of all past games for each player, then pad them to 'max_length'.
    """
    X_list, y_list = [], []
    
    for p_id, group in data.groupby(player_column):
        player_features = group.drop(columns=[player_column]).values
        player_target = target[group.index].values

        # Build sequences from length=1 up to the current index
        for i in range(1, len(player_features)):
            seq = player_features[:i]  # up to i-1
            X_list.append(seq)
            y_list.append(player_target[i])  # target at i

    num_features = X_list[0].shape[1] if X_list else 0
    X_padded = np.zeros((len(X_list), max_length, num_features), dtype=np.float32)

    for i, seq in enumerate(X_list):
        seq_len = len(seq)
        if seq_len <= max_length:
            X_padded[i, max_length - seq_len:, :] = seq
        else:
            X_padded[i, :, :] = seq[-max_length:]

    return X_padded, np.array(y_list)

In [12]:
# ------------------------------------------------------------
# 5) RBF Model Builder
# ------------------------------------------------------------
def build_rbf_model(input_dim, rbf_units=20, gamma=1.0):
    """
    Build a simple RBF Network:
      - Flattened input -> RBF layer -> Dense(1)
    """
    model = Sequential()
    model.add(RBFLayer(units=rbf_units, gamma=gamma, input_shape=(input_dim,)))
    # Output
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model


In [13]:
# ------------------------------------------------------------
# 6) Train on 2015-2022, Validate on 2023
# ------------------------------------------------------------
train_mask = (df['game_year'] >= 2015) & (df['game_year'] <= 2022)
val_mask   = (df['game_year'] == 2023)

train_data = df[train_mask]
val_data   = df[val_mask]

# If there's no data in either split, raise an error or handle gracefully
if len(train_data) == 0 or len(val_data) == 0:
    raise ValueError("No data found in train or validation sets with the specified year filters.")

# Scale only feature columns except "player_id"
scaler = MinMaxScaler()
scaled_features_train = scaler.fit_transform(train_data[features].drop(columns=["player_id"]))
scaled_features_val   = scaler.transform(val_data[features].drop(columns=["player_id"]))

train_scaled = pd.DataFrame(scaled_features_train, index=train_data.index, columns=features[1:])
train_scaled["player_id"] = train_data["player_id"].values

val_scaled = pd.DataFrame(scaled_features_val, index=val_data.index, columns=features[1:])
val_scaled["player_id"] = val_data["player_id"].values

def find_player_longest_sequence(data_df, id_col="player_id"):
    max_len = 0
    for _, group in data_df.groupby(id_col):
        length = len(group)
        # Subtract 1 because we build sequences up to (i-1)
        max_len = max(max_len, length - 1)
    return max_len

max_len_train = find_player_longest_sequence(train_scaled, "player_id")
max_len_val   = find_player_longest_sequence(val_scaled, "player_id")
max_len_both  = max(max_len_train, max_len_val)
if max_len_both < 1:
    raise ValueError("Not enough data to create sequences.")

# Create sequences for RBF
X_train_3D, y_train = create_player_sequences_fixed_length(
    train_scaled, train_data[target], "player_id", max_len_both
)
X_val_3D, y_val = create_player_sequences_fixed_length(
    val_scaled, val_data[target], "player_id", max_len_both
)

# Ensure we have data after sequence creation
if len(X_train_3D) == 0 or len(X_val_3D) == 0:
    raise ValueError("No sequences were created for training/validation.")

# RBFN requires 2D input: flatten [batch, timesteps, features] -> [batch, timesteps*features]
X_train = X_train_3D.reshape((X_train_3D.shape[0], -1))
X_val   = X_val_3D.reshape((X_val_3D.shape[0], -1))

input_dim = X_train.shape[1]  # timesteps * features

# Build the RBF model
model = build_rbf_model(input_dim=input_dim, rbf_units=30, gamma=0.1)

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50, batch_size=32,
    verbose=1,
    callbacks=[early_stop]
)

# Predict
y_pred = model.predict(X_val).flatten()

mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50


In [14]:
print("Validation Year: 2023")
print("Train Years: 2015 to 2022")
print(f"MAE:  {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

Validation Year: 2023
Train Years: 2015 to 2022
MAE:  5.00
RMSE: 6.61
