In [None]:
import pandas as pd
from sqlalchemy import create_engine
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# ------------------------------------------------------------
# Configuration
# ------------------------------------------------------------
DB_NAME = "../../nba_data.db"
DB_URI = f"sqlite:///{DB_NAME}"
engine = create_engine(DB_URI, echo=False)

# ------------------------------------------------------------
# Load Data & Sort
# ------------------------------------------------------------
df = pd.read_sql("SELECT * FROM player_game_features", engine)
df = df.sort_values(by=["player_id", "game_date"])
df['game_year'] = pd.to_datetime(df['game_date']).dt.year

features = [
    "rolling_pts_5",
    "rolling_min_5",
    "rolling_fg_pct_5",
    "rolling_ppm_5",
    "rolling_fgm_5",
    "rolling_fga_5",
    "reb",
    "ast"
]
df = df.dropna(subset=features + ["pts"])
X = df[features]
y = df["pts"]

# ------------------------------------------------------------
# Expanding Window Validation Function
# ------------------------------------------------------------
def expanding_window_validation(model, model_name):
    available_years = sorted(df['game_year'].unique())
    mae_scores = []
    rmse_scores = []
    years_tested = []

    print(f"\n===== Model: {model_name} =====\n")
    for validate_year in available_years:
        train_mask = (df['game_year'] < validate_year)
        val_mask = (df['game_year'] == validate_year)

        X_train, y_train = X[train_mask], y[train_mask]
        X_val, y_val = X[val_mask], y[val_mask]

        if len(X_train) == 0 or len(X_val) == 0:
            continue

        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)

        mae = mean_absolute_error(y_val, y_pred)
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))

        mae_scores.append(mae)
        rmse_scores.append(rmse)
        years_tested.append(validate_year)

        print(f"Validation Year: {validate_year}")
        print(f"Train Years: Before {validate_year}")
        print(f"MAE:  {mae:.2f}")
        print(f"RMSE: {rmse:.2f}\n")

    if len(years_tested) > 0:
        print("\nSummary of Expanding Window Validation:")
        for yr, mae_score, rmse_score in zip(years_tested, mae_scores, rmse_scores):
            print(f"Year: {yr}, MAE: {mae_score:.2f}, RMSE: {rmse_score:.2f}")
        print("\nAverage MAE:", np.mean(mae_scores))
        print("Average RMSE:", np.mean(rmse_scores))
    print("\n========================================\n")


# ------------------------------------------------------------
# Model Training and Validation
# ------------------------------------------------------------
models = [
    (LinearRegression(), "Linear Regression"),
    (RandomForestRegressor(n_estimators=100, random_state=42), "Random Forest"),
    (GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42), "Gradient Boosting")
]

for model, model_name in models:
    expanding_window_validation(model, model_name)



===== Model: Linear Regression =====

Validation Year: 2001
Train Years: Before 2001
MAE:  3.39
RMSE: 4.52

Validation Year: 2002
Train Years: Before 2002
MAE:  3.38
RMSE: 4.49

Validation Year: 2003
Train Years: Before 2003
MAE:  3.32
RMSE: 4.42

Validation Year: 2004
Train Years: Before 2004
MAE:  3.37
RMSE: 4.51

Validation Year: 2005
Train Years: Before 2005
MAE:  3.39
RMSE: 4.53

Validation Year: 2006
Train Years: Before 2006
MAE:  3.43
RMSE: 4.61

Validation Year: 2007
Train Years: Before 2007
MAE:  3.52
RMSE: 4.71

Validation Year: 2008
Train Years: Before 2008
MAE:  3.52
RMSE: 4.66

Validation Year: 2009
Train Years: Before 2009
MAE:  3.53
RMSE: 4.69

Validation Year: 2010
Train Years: Before 2010
MAE:  3.49
RMSE: 4.61

Validation Year: 2011
Train Years: Before 2011
MAE:  3.50
RMSE: 4.63

Validation Year: 2012
Train Years: Before 2012
MAE:  3.40
RMSE: 4.51

Validation Year: 2013
Train Years: Before 2013
MAE:  3.40
RMSE: 4.50

Validation Year: 2014
Train Years: Before 2014
MAE: