# Benchmark Evaluation Script (Production v2)
# Updated for OAI_mega_cohort_v2.parquet

Evaluates model performance with:
- 15 biomarkers (expanded from 5)
- Random Survival Forest baseline
- C-index, Brier score metrics

In [None]:
Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

try:
    from sksurv.ensemble import RandomSurvivalForest
    from sksurv.metrics import concordance_index_censored, brier_score
    HAS_SKSURV = True
except ImportError:
    HAS_SKSURV = False
    print("‚ö†Ô∏è scikit-survival not installed")

In [None]:
Configuration
DATA_PATH = '/kaggle/input/oai-preprocessed-data/OAI_mega_cohort_v2.parquet'

# V2 Feature sets
CLINICAL_FEATURES = [
    'V00AGE', 'V00PASE', 'V00KOOSQOL', 
    'WOMAC_Score', 'WOMAC_Stiffness', 'KOOS_Symptoms',
    'V00MACLBML', 'V00WMTMTH', 'V00WLTMTH',
    'KL_Grade', 'Knee_Side'
]

BIOMARKER_FEATURES = [
    'Bio_C1_2C', 'Bio_C2C', 'Bio_CPII', 'Bio_COMP', 'Bio_CS846',
    'Bio_COLL2_1_NO2', 'Bio_CTXI', 'Bio_NTXI', 'Bio_PIIANP',
    'Bio_HA', 'Bio_MMP3', 'Bio_uCTXII', 'Bio_uC1_2C', 'Bio_uC2C', 'Bio_uNTXI'
]

In [None]:
Load Data
df = pd.read_parquet(DATA_PATH)
print(f"üìÇ Loaded: {df.shape}")

# Prepare features
feature_cols = [c for c in CLINICAL_FEATURES + BIOMARKER_FEATURES if c in df.columns]
print(f"üìä Using {len(feature_cols)} features")

X = df[feature_cols].copy()
y_event = df['event'].astype(bool)
y_time = df['time_to_event']

# Handle missing
X = X.fillna(X.median())

# Convert to structured array for sksurv
if HAS_SKSURV:
    y = np.array([(e, t) for e, t in zip(y_event, y_time)], 
                 dtype=[('event', bool), ('time', float)])
    
    # Train/Test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    print(f"üìä Train: {len(X_train)} | Test: {len(X_test)}")
    print(f"   Events (train): {sum(y_train['event'])} | Events (test): {sum(y_test['event'])}")

In [None]:
Train Random Survival Forest
if HAS_SKSURV:
    print("\nüå≤ Training Random Survival Forest...")
    rsf = RandomSurvivalForest(
        n_estimators=100,
        min_samples_split=10,
        min_samples_leaf=5,
        max_features='sqrt',
        n_jobs=-1,
        random_state=42
    )
    rsf.fit(X_train, y_train)
    
    # Evaluate
    c_index = rsf.score(X_test, y_test)
    print(f"\nüìà Results:")
    print(f"   C-Index: {c_index:.4f}")
    
    # Feature importance
    importances = pd.DataFrame({
        'feature': feature_cols,
        'importance': rsf.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print(f"\nüîù Top 10 Features:")
    print(importances.head(10).to_string(index=False))

In [None]:
Comparison Summary
print(f"""
{'='*50}
BENCHMARK SUMMARY (v2 Cohort)
{'='*50}
Features: {len(feature_cols)} ({len([c for c in CLINICAL_FEATURES if c in df.columns])} clinical + {len([c for c in BIOMARKER_FEATURES if c in df.columns])} biomarker)
Samples: {len(df)}
Events: {df['event'].sum()} ({df['event'].mean()*100:.1f}%)

Target C-Index: 0.75+
Previous Best: 0.65
{'='*50}
""")