# NBA Prediction: Comprehensive Model Comparison

This notebook compares traditional baselines, Machine Learning models, and Deep Learning models.

## Models Evaluated
### Baselines & ML
1.  **Linear Regression**: Simple linear relationship.
2.  **Rolling Average (Last 5)**: Moving average of recent performance.
3.  **Random Forest**: Bagging ensemble.
4.  **XGBoost**: Gradient Boosting.

### Deep Learning
5.  **SeqModel (Transformer)**: Time-series transformer using numerical stats sequences.
6.  **GraphModel (CNN)**: Spatial-temporal CNN using shot chart heatmaps.
7.  **MultiModal (CRNN)**: Hybrid model combining Shot Charts (CNN) and Stats (Transformer).

## Metrics
-   **RMSE**: Root Mean Squared Error (Lower is Better)
-   **MAE**: Mean Absolute Error (Lower is Better)
-   **R2 Score**: Coefficient of Determination (Higher is Better, max 1.0)
-   **Std Dev**: Standard Deviation of true values (Target Variability)
-   **NRMSE (norm)**: RMSE / Std Dev. If < 1.0, model is better than guessing the mean.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json
import torch
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler

# Import Local Deep Learning Modules
import sys
sys.path.append('.')
try:
    from seqModel import NbaTransformer, createSequences
    from graphModel import NbaCnn, createCnnSequences, loadAndPreprocessData
    from multiModel import NbaMultimodal, createMultimodalSequences, loadAndPreprocessData as loadMultimodalData
    DL_AVAILABLE = True
except ImportError as e:
    print(f"Deep Learning modules not found: {e}")
    DL_AVAILABLE = False

# Configuration
GAMES_PATH = 'dataset/games.csv'
SHOTS_PATH = 'dataset/shots.csv'
TARGET_COLS = ['PTS', 'AST', 'REB']
TEST_SEASON_ID = 22024

# Set Style
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (14, 7)

## 1. Data Loading & Feature Engineering (ML Baselines)

In [None]:
print("Loading Data for ML Baselines...")
df = pd.read_csv(GAMES_PATH, low_memory=False)
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'], format='mixed')
df = df.sort_values(['Player_ID', 'GAME_DATE']).reset_index(drop=True)
for col in TARGET_COLS:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df = df.dropna(subset=TARGET_COLS)

def create_features(df):
    df_eng = df.copy()
    for target in TARGET_COLS:
        # Baseline: Rolling Average (Last 5) - STRICTLY WITHIN SEASON
        # We group by SEASON_ID to prevent cross-season leakage
        df_eng[f'{target}_Roll5'] = df_eng.groupby(['Player_ID', 'SEASON_ID'])[target].shift(1).rolling(5).mean()
        
        # ML Features: Lags - STRICTLY WITHIN SEASON
        for lag in [1, 2, 3]:
            df_eng[f'{target}_Lag{lag}'] = df_eng.groupby(['Player_ID', 'SEASON_ID'])[target].shift(lag)
            
    return df_eng

print("Generating Features (Strict Season)...")
df_features = create_features(df)
df_clean = df_features.dropna().reset_index(drop=True)

train_df = df_clean[df_clean['SEASON_ID'] != TEST_SEASON_ID].copy()
test_df = df_clean[df_clean['SEASON_ID'] == TEST_SEASON_ID].copy()

print(f"ML Train: {len(train_df)}, ML Test: {len(test_df)}")


## 2. Train & Evaluate ML Baselines

In [None]:
results = []

# Prepare features
feature_cols = []
for t in TARGET_COLS:
    feature_cols += [f'{t}_Lag1', f'{t}_Lag2', f'{t}_Lag3', f'{t}_Roll5']

models = {
    'Linear Regression': LinearRegression(),
    'RandomForest': RandomForestRegressor(n_estimators=50, max_depth=8, n_jobs=-1, random_state=42),
    'XGBoost': XGBRegressor(n_estimators=100, learning_rate=0.1, n_jobs=-1, random_state=42)
}

for target in TARGET_COLS:
    print(f"Evaluating {target}...")
    y_test = test_df[target].values
    std_dev = np.std(y_test)
    
    # 1. Rolling Baseline
    pred_roll = test_df[f'{target}_Roll5'].values
    rmse_roll = np.sqrt(mean_squared_error(y_test, pred_roll))
    results.append({
        'Target': target, 'Model': 'Rolling Avg (5)',
        'RMSE': rmse_roll,
        'MAE': mean_absolute_error(y_test, pred_roll),
        'R2': r2_score(y_test, pred_roll),
        'StdDev': std_dev,
        'NRMSE': rmse_roll / std_dev
    })
    
    # 2. ML Models
    X_train = train_df[feature_cols]
    y_train = train_df[target]
    X_test = test_df[feature_cols]
    
    for name, model in models.items():
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, pred))
        results.append({
            'Target': target, 'Model': name,
            'RMSE': rmse,
            'MAE': mean_absolute_error(y_test, pred),
            'R2': r2_score(y_test, pred),
            'StdDev': std_dev,
            'NRMSE': rmse / std_dev
        })


## 3. Evaluate Deep Learning Models
We load the latest checkpoints from `savedSeqModels`, `savedCnnModels`, and `savedMultimodalModels`.

In [None]:
def get_latest_model_path(base_dir):
    if not os.path.exists(base_dir): return None
    subdirs = [os.path.join(base_dir, d) for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    if not subdirs: return None
    return max(subdirs, key=os.path.getmtime)

def predict_batch(model, device, *inputs):
    """
    Helper to run inference in batches to avoid OOM
    """
    batch_size = 32
    n_samples = len(inputs[0])
    preds_list = []
    
    # inputs is a tuple of arrays, e.g. (xImg, xStat) or (xSeq,)
    
    for i in range(0, n_samples, batch_size):
        # Slice batch
        batch_inputs = [x[i:i+batch_size] for x in inputs]
        # To Torch & Device
        batch_tensors = [torch.FloatTensor(b).to(device) for b in batch_inputs]
        
        with torch.no_grad():
            batch_pred = model(*batch_tensors)
            preds_list.append(batch_pred.cpu().numpy())
            
    return np.concatenate(preds_list, axis=0)

def evaluate_dl_model(model_type, save_dir):
    best_path = get_latest_model_path(save_dir)
    if not best_path:
        print(f"No saved models found in {save_dir}")
        return
    
    print(f"[{model_type}] Finding best model... Found: {best_path}")
    
    try:
        with open(os.path.join(best_path, 'config.json'), 'r') as f:
            config = json.load(f)
        
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        model = None
        preds = None
        y_truth = None
        
        # Step 1: Load & Preprocess Data
        if model_type == 'SeqModel':
            print(f"[{model_type}] Step 1: Loading Data...")
            gamesData = pd.read_csv(GAMES_PATH, low_memory=False)
            gamesData['GAME_DATE'] = pd.to_datetime(gamesData['GAME_DATE'], format='mixed')
            for col in TARGET_COLS: gamesData[col] = pd.to_numeric(gamesData[col], errors='coerce')
            gamesData = gamesData.dropna(subset=TARGET_COLS).sort_values(['Player_ID', 'GAME_DATE'])
            
            test_season_data = gamesData[gamesData['SEASON_ID'].astype(str) == str(TEST_SEASON_ID)]
            train_season_data = gamesData[gamesData['SEASON_ID'].astype(str) != str(TEST_SEASON_ID)]
            
            # FULL Feature List (Must match training!)
            featureCols = ['PTS', 'AST', 'REB', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 
                           'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'STL', 'BLK', 'TOV', 'PF', 
                           'PLUS_MINUS', 'MIN', 'USG_PCT', 'OFF_RATING', 'DEF_RATING', 'PACE', 'TS_PCT']
            
            # Check if we need to extend featureCols manually if not using loadAndPreprocessData
            # SeqModel notebook implementation above used hardcoded features, so we stick to it unless updated.
            # But the Error was in MultiModal, so let's focus there.
            
            print(f"[{model_type}] Step 2: Generating Sequences & Scaling...")
            xTrain, yTrain = createSequences(train_season_data, config['seqLength'], featureCols, TARGET_COLS)
            xTest, yTest = createSequences(test_season_data, config['seqLength'], featureCols, TARGET_COLS)
            
            # Scale Features
            scalerX = StandardScaler()
            N_train, S, F = xTrain.shape
            xTrainReshaped = xTrain.reshape(-1, F)
            scalerX.fit(xTrainReshaped)
            
            N_test, S_test, F_test = xTest.shape
            xTestScaled = scalerX.transform(xTest.reshape(-1, F)).reshape(N_test, S_test, F_test)
            
            # Scale Targets
            scalerY = StandardScaler()
            scalerY.fit(yTrain)
            
            print(f"[{model_type}] Step 3: Loading Model...")
            model = NbaTransformer(
                inputDim=len(featureCols),
                dModel=config.get('dModel', 64),
                nHead=config.get('nHead', 4),
                numLayers=config.get('numLayers', 2),
                outputDim=len(TARGET_COLS),
                dropout=config.get('dropout', 0.1)
            )
            model.load_state_dict(torch.load(os.path.join(best_path, 'model.ckpt'), map_location=device))
            model.to(device).eval()
            
            print(f"[{model_type}] Step 4: Inference (Batched)...")
            preds = predict_batch(model, device, xTestScaled)
            preds = scalerY.inverse_transform(preds)
            y_truth = yTest

        elif model_type == 'GraphModel':
            print(f"[{model_type}] Step 1: Loading Data...")
            gamesData, shotsGrouped, targetCols = loadAndPreprocessData(GAMES_PATH, SHOTS_PATH)
            testGames = gamesData[gamesData['SEASON_ID'].isin([TEST_SEASON_ID])].copy()
            trainGames = gamesData[~gamesData['SEASON_ID'].isin([TEST_SEASON_ID])].copy()
            
            print(f"[{model_type}] Step 2: Generating Heatmap Sequences...")
            xTest, yTest = createCnnSequences(testGames, shotsGrouped, config['seqLength'], TARGET_COLS)
            _, yTrain = createCnnSequences(trainGames, shotsGrouped, config['seqLength'], TARGET_COLS)
            
            scalerY = StandardScaler()
            scalerY.fit(yTrain)
            
            print(f"[{model_type}] Step 3: Loading Model...")
            inputCh = config['seqLength'] * 2
            model = NbaCnn(outputDim=len(TARGET_COLS), inputChannels=inputCh)
            model.load_state_dict(torch.load(os.path.join(best_path, 'model.ckpt'), map_location=device))
            model.to(device).eval()
            
            print(f"[{model_type}] Step 4: Inference (Batched)...")
            preds = predict_batch(model, device, xTest)
            preds = scalerY.inverse_transform(preds)
            y_truth = yTest

        elif model_type == 'MultiModal':
            print(f"[{model_type}] Step 1: Loading Data...")
            gamesData, shotsGrouped, fCols, tCols = loadMultimodalData(GAMES_PATH, SHOTS_PATH, config['seqLength'])
            testGames = gamesData[gamesData['SEASON_ID'].isin([TEST_SEASON_ID])].copy()
            trainGames = gamesData[~gamesData['SEASON_ID'].isin([TEST_SEASON_ID])].copy()
            
            print(f"[{model_type}] Step 2: Generating Sequences & Scaling...")
            xImgTest, xStatTest, yTest = createMultimodalSequences(testGames, shotsGrouped, config['seqLength'], fCols, tCols)
            _, xStatTrain, yTrain = createMultimodalSequences(trainGames, shotsGrouped, config['seqLength'], fCols, tCols)

            scalerX = StandardScaler()
            N, S, F = xStatTrain.shape
            scalerX.fit(xStatTrain.reshape(-1, F))
            
            N_t, S_t, F_t = xStatTest.shape
            xStatTest = scalerX.transform(xStatTest.reshape(-1, F)).reshape(N_t, S_t, F_t)

            scalerY = StandardScaler()
            scalerY.fit(yTrain)

            print(f"[{model_type}] Step 3: Loading Model...")
            
            # --- FIX: Handle Feature Mismatch for Old Models ---
            # Try loading state dict to check weight shape
            checkpoint = torch.load(os.path.join(best_path, 'model.ckpt'), map_location=device)
            stat_weight_shape = checkpoint['statEncoder.0.weight'].shape # (32, input_dim)
            ckpt_input_dim = stat_weight_shape[1]
            
            current_input_dim = len(fCols)
            
            if ckpt_input_dim != current_input_dim:
                print(f"⚠️ Warning: Model expects {ckpt_input_dim} features but data has {current_input_dim}.")
                print(f"cutting features to match model...")
                # Slice features to match model
                xStatTest = xStatTest[:, :, :ckpt_input_dim]
                # Update input dimension for model init
                input_dim_to_use = ckpt_input_dim
            else:
                input_dim_to_use = current_input_dim
                
            model = NbaMultimodal(
                numStatFeatures=input_dim_to_use, 
                seqLength=config['seqLength'], outputDim=len(TARGET_COLS),
                cnnEmbedDim=config['cnnEmbedDim'], statEmbedDim=config['statEmbedDim'],
                dModel=config['dModel'], nHead=config['nHead'],
                numLayers=config['numLayers'], dropout=config['dropout']
            )
            model.load_state_dict(checkpoint)
            model.to(device).eval()

            print(f"[{model_type}] Step 4: Inference (Batched)...")
            preds = predict_batch(model, device, xImgTest, xStatTest)
            preds = scalerY.inverse_transform(preds)
            y_truth = yTest

        # Calculate Metrics
        if preds is not None:
            for i, target in enumerate(TARGET_COLS):
                y_true_col = y_truth[:, i]
                y_pred_col = preds[:, i]
                rmse = np.sqrt(mean_squared_error(y_true_col, y_pred_col))
                mae = mean_absolute_error(y_true_col, y_pred_col)
                r2 = r2_score(y_true_col, y_pred_col)
                std_dev = np.std(y_true_col)
                
                results.append({
                    'Target': target, 'Model': model_type,
                    'RMSE': rmse, 'MAE': mae, 'R2': r2,
                    'StdDev': std_dev, 'NRMSE': rmse/std_dev
                })
            print(f"[{model_type}] Evaluation Complete.")

    except Exception as e:
        print(f"Failed to evaluate {model_type}: {e}")
        import traceback
        traceback.print_exc()

if DL_AVAILABLE:
    evaluate_dl_model('SeqModel', 'savedSeqModels')
    evaluate_dl_model('GraphModel', 'savedCnnModels')
    evaluate_dl_model('MultiModal', 'savedMultimodalModels')

In [None]:
results_df = pd.DataFrame(results)

print("Leaderboard (Lowest RMSE by Target):")
for target in TARGET_COLS:
    sub = results_df[results_df['Target'] == target].sort_values('RMSE')
    display(sub.head(5))

print("\nDetailed Statistical Metrics:")
display(results_df[['Target', 'Model', 'RMSE', 'StdDev', 'NRMSE', 'R2']].sort_values(['Target', 'RMSE']))

g = sns.catplot(
    data=results_df, kind="bar",
    x="Target", y="RMSE", hue="Model",
    height=6, aspect=2, palette="magma"
)
plt.title("Model Comparison: Baseline vs ML vs Deep Learning")
plt.show()