# NBA Prediction: Comprehensive Model Comparison

This notebook compares traditional baselines, Machine Learning models, and Deep Learning models.

## Models Evaluated
### Baselines & ML
1.  **Linear Regression**: Simple linear relationship.
2.  **Rolling Average (Last 5)**: Moving average of recent performance.
3.  **Random Forest**: Bagging ensemble.
4.  **XGBoost**: Gradient Boosting.

### Deep Learning
5.  **SeqModel (Transformer)**: Time-series transformer using numerical stats sequences.
6.  **GraphModel (CNN)**: Spatial-temporal CNN using shot chart heatmaps.

## Metrics
-   **RMSE**: Root Mean Squared Error (Primary Metric)
-   **MAE**: Mean Absolute Error
-   **Std Dev (Reference)**: If RMSE < Std Dev, model has predictive power.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json
import torch
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler

# Import Local Deep Learning Modules
import sys
sys.path.append('.')
# Note: These imports assume seqModel.py and graphModel.py are in the same folder
try:
    from seqModel import NbaTransformer, createSequences
    from graphModel import NbaCnn, createCnnSequences, loadAndPreprocessData
    DL_AVAILABLE = True
except ImportError as e:
    print(f"Deep Learning modules not found: {e}")
    DL_AVAILABLE = False

# Configuration
GAMES_PATH = 'dataset/games.csv'
SHOTS_PATH = 'dataset/shots.csv'
TARGET_COLS = ['PTS', 'AST', 'REB']
TEST_SEASON_ID = 22024

# Set Style
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (14, 7)

## 1. Data Loading & Feature Engineering (ML Baselines)

In [None]:
def create_features(df):
    df_eng = df.copy()
    for target in TARGET_COLS:
        # Baseline: Rolling Average (Last 5) - STRICTLY WITHIN SEASON
        # We group by SEASON_ID as well to matching DL models behavior
        df_eng[f'{target}_Roll5'] = df_eng.groupby(['Player_ID', 'SEASON_ID'])[target].shift(1).rolling(5).mean()
        
        # ML Features: Lags - STRICTLY WITHIN SEASON
        # If we just shift, we might shift previous season data into current season game 1
        # So we must group by Season
        for lag in [1, 2, 3]:
            df_eng[f'{target}_Lag{lag}'] = df_eng.groupby(['Player_ID', 'SEASON_ID'])[target].shift(lag)
            
    return df_eng


## 2. Train & Evaluate ML Baselines

In [None]:
results = []
predictions = {} # Store test set predictions for visualization

# Prepare features
feature_cols = []
for t in TARGET_COLS:
    feature_cols += [f'{t}_Lag1', f'{t}_Lag2', f'{t}_Lag3', f'{t}_Roll5']

models = {
    'Linear Regression': LinearRegression(),
    'RandomForest': RandomForestRegressor(n_estimators=50, max_depth=8, n_jobs=-1, random_state=42),
    'XGBoost': XGBRegressor(n_estimators=100, learning_rate=0.1, n_jobs=-1, random_state=42)
}

for target in TARGET_COLS:
    print(f"Evaluating {target}...")
    y_test = test_df[target].values
    std_dev = np.std(y_test)
    
    # 1. Rolling Baseline
    pred_roll = test_df[f'{target}_Roll5'].values
    results.append({
        'Target': target, 'Model': 'Rolling Avg (5)',
        'RMSE': np.sqrt(mean_squared_error(y_test, pred_roll)),
        'MAE': mean_absolute_error(y_test, pred_roll),
        'StdDev': std_dev
    })
    
    # 2. ML Models
    X_train = train_df[feature_cols]
    y_train = train_df[target]
    X_test = test_df[feature_cols]
    
    for name, model in models.items():
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        results.append({
            'Target': target, 'Model': name,
            'RMSE': np.sqrt(mean_squared_error(y_test, pred)),
            'MAE': mean_absolute_error(y_test, pred),
            'StdDev': std_dev
        })
        
        # Save predictions for visualization (only need one 'best' ML, let's keep XGB)
        if name == 'XGBoost':
            predictions[f'{target}_XGB'] = pred

## 3. Evaluate Deep Learning Models
We load the latest checkpoints from `savedSeqModels` and `savedCnnModels`.

In [None]:
# --- STRICT ALIGNMENT HELPER ---
def get_valid_dl_indices(df, seq_length):
    """Replicates the createSequences logic to find which games are actually predicted."""
    valid_indices = []
    # Must match: Group by Player, Season -> sort date -> sliding window
    # The target of the sliding window is the game at index (i + seqLength)
    
    # Ensure Sort
    df_sorted = df.sort_values(['Player_ID', 'GAME_DATE'])
    
    groups = df_sorted.groupby(['Player_ID', 'SEASON_ID'])
    for _, group in groups:
        if len(group) <= seq_length: continue
        
        # The first 'seq_length' games are inputs only, not targets.
        # Targets start from index 'seq_length'
        # e.g. seq=5. Indices 0,1,2,3,4 are input. Index 5 is first target.
        # So we take group.iloc[seq_length:]
        
        valid_subset = group.iloc[seq_length:]
        # We'll use a composite key for matching
        keys = valid_subset['Player_ID'].astype(str) + '_' + valid_subset['Game_ID'].astype(str)
        valid_indices.extend(keys.tolist())
        
    return set(valid_indices)

# 1. Determine the 'Common Valid Set'
# We assume seqLength=5 is the standard (or read from config if loaded)
# For safety, we'll calculate it based on the ML Test DF which already dropped 5 rows/group via rolling.
# Actually, 'df_clean' in ML part already did dropna(). 
# Rolling(5) results in first 4 NaNs. The 5th element is valid? 
# Rolling(5) at index T uses T, T-1, T-2, T-3, T-4. 
# We Shift(1). So at T, we use T-1...T-5. 
# So first 5 games (indices 0-4) will be NaN. Index 5 is valid.
# This matches DL logic (targets start at index 5).

# So, 'test_df' (which is df_clean filtered by season) SHOULD be perfectly aligned already
# IF strict season grouping was used in feature engineering (which we just patched).

# Let's verify and force filter just in case DL model uses different seqLength
print("Alignment Check...")


In [None]:
def get_latest_model_path(base_dir):
    if not os.path.exists(base_dir): return None
    subdirs = [os.path.join(base_dir, d) for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    if not subdirs: return None
    return max(subdirs, key=os.path.getmtime)

def evaluate_dl_model(model_type, save_dir):
    best_path = get_latest_model_path(save_dir)
    if not best_path:
        print(f"No saved models found in {save_dir}")
        return
    
    print(f"Time to evaluate {model_type}... Loading from {best_path}")
    
    try:
        with open(os.path.join(best_path, 'config.json'), 'r') as f:
            config = json.load(f)
            
        # Load Data (Re-using DL preprocessing)
        # NOTE: This creates data fresh, so it might be slightly different dim than ML DF if logic differs
        # But it should be the same TEST_SEASON
        
        if model_type == 'SeqModel':
            gamesData = pd.read_csv(GAMES_PATH, low_memory=False)
            # Preprocess is embedded in createSequences for SeqModel usually or separate?
            # Checking seqModel.py structure... it does cleaning inline usually. 
            # Let's simple filter
            gamesData['GAME_DATE'] = pd.to_datetime(gamesData['GAME_DATE'])
            for col in TARGET_COLS: gamesData[col] = pd.to_numeric(gamesData[col], errors='coerce')
            gamesData = gamesData.dropna(subset=TARGET_COLS).sort_values(['Player_ID', 'GAME_DATE'])
            
            test_season_data = gamesData[gamesData['SEASON_ID'].astype(str) == str(TEST_SEASON_ID)]
            
            # Create Sequences
            # Caution: We need training scaler to inverse transform!
            # We assume we re-fit scaler on TRAIN part to simulate production usage
            train_season_data = gamesData[gamesData['SEASON_ID'].astype(str) != str(TEST_SEASON_ID)]
            
            # Fit Scaler
            scaler = StandardScaler()
            scaler.fit(train_season_data[TARGET_COLS])
            
            featureCols = ['PTS', 'AST', 'REB', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'PLUS_MINUS']
            # Generate Test Sequences
            xTest, yTest = createSequences(test_season_data, config['seqLength'], featureCols, TARGET_COLS)
            
            # Model
            model = NbaTransformer(
                numFeatures=len(featureCols), seqLength=config['seqLength'], outputDim=len(TARGET_COLS),
                d_model=config.get('d_model', 64), nhead=config.get('nHead', 4),
                numLayers=config.get('numLayers', 2), dropout=config.get('dropout', 0.1)
            )
            
        elif model_type == 'GraphModel':
            # Load Data using helper
            gamesData, shotsGrouped, targetCols = loadAndPreprocessData(GAMES_PATH, SHOTS_PATH)
            testGames = gamesData[gamesData['SEASON_ID'].isin([TEST_SEASON_ID])].copy()
            trainGames = gamesData[~gamesData['SEASON_ID'].isin([TEST_SEASON_ID])].copy()
            
            xTest, yTest = createCnnSequences(testGames, shotsGrouped, config['seqLength'], TARGET_COLS)
            
            # Scaler
            yTrain = createCnnSequences(trainGames, shotsGrouped, config['seqLength'], TARGET_COLS)[1]
            scaler = StandardScaler()
            scaler.fit(yTrain)

            # Model
            # Plan A Check: Input Channels = seqLength * 2
            inputCh = config['seqLength'] * 2
            model = NbaCnn(outputDim=len(TARGET_COLS), inputChannels=inputCh)

        # Load Weights
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        model.load_state_dict(torch.load(os.path.join(best_path, 'model.ckpt'), map_location=device))
        model.to(device)
        model.eval()
        
        # Inference
        test_inputs = torch.FloatTensor(xTest).to(device)
        with torch.no_grad():
            preds_scaled = model(test_inputs).cpu().numpy()
        
        preds_original = scaler.inverse_transform(preds_scaled)
        
        # Metrics
        for i, target in enumerate(TARGET_COLS):
            y_true = yTest[:, i]
            y_pred = preds_original[:, i]
            
            rmse = np.sqrt(mean_squared_error(y_true, y_pred))
            mae = mean_absolute_error(y_true, y_pred)
            std_dev = np.std(y_true)
            
            results.append({
                'Target': target, 'Model': model_type,
                'RMSE': rmse, 'MAE': mae, 'StdDev': std_dev
            })
            
    except Exception as e:
        print(f"Failed to evaluate {model_type}: {e}")

if DL_AVAILABLE:
    evaluate_dl_model('SeqModel', 'savedSeqModels')
    evaluate_dl_model('GraphModel', 'savedCnnModels')

## 4. Final Comparison Results

In [None]:
results_df = pd.DataFrame(results)

print("Leaderboard (Lowest RMSE by Target):")
for target in TARGET_COLS:
    sub = results_df[results_df['Target'] == target].sort_values('RMSE')
    display(sub.head(3))

g = sns.catplot(
    data=results_df, kind="bar",
    x="Target", y="RMSE", hue="Model",
    height=6, aspect=2, palette="magma"
)
plt.title("Model Comparison: Baseline vs ML vs Deep Learning")
plt.show()