In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# ------------------------------------------------------------
# Configuration
# ------------------------------------------------------------
DB_NAME = "nba_data.db"
DB_URI = f"sqlite:///{DB_NAME}"
engine = create_engine(DB_URI, echo=False)

In [None]:
# ------------------------------------------------------------
# Load Data
# ------------------------------------------------------------
df = pd.read_sql("SELECT * FROM player_game_features", engine)

# Sort by player_id and game_date to maintain chronological order per player
df = df.sort_values(by=["game_date", "player_id"])


# Extract the season or year from 'game_date'. 
# Assuming 'game_date' is in a format like "YYYY-MM-DD".
df['game_year'] = pd.to_datetime(df['game_date']).dt.year

# We'll use the same features as before
features = [
    "rolling_pts_5",
    "rolling_min_5",
    "rolling_fg_pct_5",
    "rolling_ppm_5",
    "rolling_fgm_5",
    "rolling_fga_5",
    "reb",
    "ast",
    "pts"
]

# Drop rows with NaNs in features or target
df = df.dropna(subset=features + ["pts"])

X = df[features]
y = df["pts"]

In [None]:
# Scale the features for deep learning
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Time-based split: 80% training, 20% validation
split_index = int(len(df) * 0.8)
X_train, X_val = X_scaled[:split_index], X_scaled[split_index:]
y_train, y_val = y.iloc[:split_index], y.iloc[split_index:]

# Build a simple deep learning model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # Single output for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50, batch_size=32,
    verbose=1, callbacks=[early_stop]
)

# Predict on validation set
y_pred = model.predict(X_val).flatten()

# Calculate evaluation metrics
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)

# Print results
print("Time-based validation results (Deep Learning):")
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")

In [None]:
# Convert to DataFrame
results = pd.DataFrame({
    'Actual': y_val.values,
    'Predicted': y_pred
})

# Display the first 10 rows
print(results.head(10))

# Optionally, save to CSV
results.to_csv('predicted_vs_actual.csv', index=False)
print("Results saved to 'predicted_vs_actual.csv'.")