# Model Usage and Inference for Alzheimer's Disease Prediction

This notebook loads trained models and performs predictions on new data, providing a complete inference pipeline.

## Features:
- Load best trained model from results directory
- Load new data for prediction
- Generate predictions with confidence scores
- Save prediction results
- Model performance evaluation on new data

## Usage:
1. Ensure you have trained models in `results/` directory
2. Prepare new data in the same format as training data
3. Run inference and get predictions
4. Save results for further analysis

## Outputs:
- `predictions.csv` with predictions and confidence scores
- Model performance metrics on new data


In [None]:
# Setup
import os
import sys
import warnings
warnings.filterwarnings('ignore')

# Add src to path
sys.path.append('./src')

# Set thread limits for stability
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['NUMEXPR_MAX_THREADS'] = '1'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import joblib
import json
import glob

# Create results directory
os.makedirs('results', exist_ok=True)

print("✅ Setup complete - Ready for model inference")


In [None]:
# Find and Load Best Model
print("🔍 Finding best trained model...")

# Look for model files in results directory
model_files = glob.glob('results/best_model_*.pkl')
tuned_model_files = glob.glob('results/best_tuned_model_*.pkl')
fused_model_files = glob.glob('results/best_fused_dataset_*.npz')

print(f"📊 Found {len(model_files)} basic models")
print(f"📊 Found {len(tuned_model_files)} tuned models")
print(f"📊 Found {len(fused_model_files)} fused datasets")

# Load the most recent model
if tuned_model_files:
    model_path = max(tuned_model_files, key=os.path.getctime)
    model_type = "tuned"
elif model_files:
    model_path = max(model_files, key=os.path.getctime)
    model_type = "basic"
else:
    print("⚠️ No trained models found. Creating sample model...")
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model_type = "sample"

if model_type != "sample":
    print(f"📊 Loading {model_type} model from: {model_path}")
    model = joblib.load(model_path)
    print(f"✅ Model loaded successfully")
    print(f"📊 Model type: {type(model).__name__}")
else:
    print("📊 Using sample Random Forest model")

# Load model metadata if available
metadata_files = glob.glob('results/*_summary_*.json')
if metadata_files:
    metadata_path = max(metadata_files, key=os.path.getctime)
    try:
        with open(metadata_path, 'r') as f:
            metadata = json.load(f)
        print(f"📊 Model metadata loaded from: {metadata_path}")
    except:
        metadata = {}
else:
    metadata = {}

print(f"✅ Model loading complete")


In [None]:
# Load New Data for Prediction
print("📊 Loading new data for prediction...")

# Try to load new data from various sources
new_data_loaded = False

# Option 1: Try to load fused dataset
fused_model_files = []
import glob
fused_model_files = glob.glob('results/best_fused_dataset_*.npz')
if fused_model_files:
    try:
        fused_path = max(fused_model_files, key=os.path.getctime)
        fused_data = np.load(fused_path, allow_pickle=True)
        X_new = fused_data['X_test']
        y_new = fused_data['y_test'] if 'y_test' in fused_data else None
        print(f"✅ Loaded fused dataset: {X_new.shape}")
        new_data_loaded = True
    except Exception as e:
        print(f"⚠️ Fused dataset loading failed: {e}")

# Option 2: Try to load NPZ data
if not new_data_loaded:
    try:
        npz_data = np.load('data/processed/preprocessed_alz_data.npz', allow_pickle=True)
        X_new = npz_data['X_test']
        y_new = npz_data['y_test']
        print(f"✅ Loaded NPZ test data: {X_new.shape}")
        new_data_loaded = True
    except Exception as e:
        print(f"⚠️ NPZ loading failed: {e}")

# Option 3: Try to load CSV data
if not new_data_loaded:
    try:
        df = pd.read_csv('data/processed/alz_clean.csv')
        # Assume last column is target, rest are features
        X_new = df.iloc[:, :-1].values
        y_new = df.iloc[:, -1].values
        print(f"✅ Loaded CSV data: {X_new.shape}")
        new_data_loaded = True
    except Exception as e:
        print(f"⚠️ CSV loading failed: {e}")

# Option 4: Create sample data
if not new_data_loaded:
    print("🔄 Creating sample data for demonstration...")
    np.random.seed(42)
    X_new = np.random.randn(100, 50)  # 100 samples, 50 features
    y_new = None
    print(f"✅ Created sample data: {X_new.shape}")
    new_data_loaded = True

# Clean the data
def clean_data(X):
    X = np.array(X, dtype=np.float64)
    X = np.where(np.isinf(X), np.nan, X)
    X = np.where(np.abs(X) > 1e10, np.nan, X)
    from sklearn.impute import SimpleImputer
    imputer = SimpleImputer(strategy='median')
    X = imputer.fit_transform(X)
    return X

X_new = clean_data(X_new)

print(f"📊 New data shape: {X_new.shape}")
if y_new is not None:
    # If one-hot, convert to class indices
    if len(y_new.shape) > 1 and y_new.shape[1] > 1:
        y_new = np.argmax(y_new, axis=1)
    print(f"📊 Target distribution: {np.bincount(y_new)}")
    print(f"📊 Classes: {len(np.unique(y_new))}")
else:
    print("📊 No ground truth labels available (prediction mode)")


In [None]:
# Run Predictions
print("🤖 Running predictions...")

# Predict classes
y_pred = model.predict(X_new)

# Predict probabilities if available
proba_available = hasattr(model, 'predict_proba')
if proba_available:
    y_proba = model.predict_proba(X_new)
    max_proba = np.max(y_proba, axis=1)
else:
    y_proba = None
    max_proba = None

print(f"✅ Predictions complete: {y_pred.shape}")

# Build results DataFrame
results_df = pd.DataFrame({'prediction': y_pred})
if max_proba is not None:
    results_df['confidence'] = max_proba

print(results_df.head())


In [None]:
# Evaluate (if labels available)
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

if y_new is not None:
    print("📊 Evaluating predictions...")
    
    acc = accuracy_score(y_new, y_pred)
    print(f"Accuracy: {acc:.4f}")
    
    print("\nClassification Report:")
    print(classification_report(y_new, y_pred))
    
    # Confusion Matrix
    cm = confusion_matrix(y_new, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.show()
else:
    print("⚠️ Ground truth labels not provided. Skipping evaluation.")


In [None]:
# Save Predictions
print("💾 Saving predictions...")

from datetime import datetime
ts = datetime.now().strftime("%Y%m%d_%H%M%S")

# Save CSV
pred_csv = f'results/predictions_{ts}.csv'
results_df.to_csv(pred_csv, index=False)
print(f"✅ Predictions saved to: {pred_csv}")

# Save JSON summary
summary = {
    'model_path': model_path if 'model_path' in globals() else None,
    'num_predictions': int(len(results_df)),
    'proba_available': bool(proba_available),
}

if y_new is not None:
    summary['accuracy'] = float(acc)

import json
pred_json = f'results/predictions_summary_{ts}.json'
with open(pred_json, 'w') as f:
    json.dump(summary, f, indent=2)
print(f"✅ Prediction summary saved to: {pred_json}")

print("\n🎉 Inference complete!")


In [None]:
# Load New Data for Prediction
print("📊 Loading new data for prediction...")

# Try to load new data from various sources
new_data_loaded = False

# Option 1: Try to load fused dataset
if fused_model_files:
    try:
        fused_path = max(fused_model_files, key=os.path.getctime)
        fused_data = np.load(fused_path, allow_pickle=True)
        X_new = fused_data['X_test']
        y_new = fused_data['y_test'] if 'y_test' in fused_data else None
        print(f"✅ Loaded fused dataset: {X_new.shape}")
        new_data_loaded = True
    except Exception as e:
        print(f"⚠️ Fused dataset loading failed: {e}")

# Option 2: Try to load NPZ data
if not new_data_loaded:
    try:
        npz_data = np.load('data/processed/preprocessed_alz_data.npz', allow_pickle=True)
        X_new = npz_data['X_test']
        y_new = npz_data['y_test']
        print(f"✅ Loaded NPZ test data: {X_new.shape}")
        new_data_loaded = True
    except Exception as e:
        print(f"⚠️ NPZ loading failed: {e}")

# Option 3: Try to load CSV data
if not new_data_loaded:
    try:
        df = pd.read_csv('data/processed/alz_clean.csv')
        # Assume last column is target, rest are features
        X_new = df.iloc[:, :-1].values
        y_new = df.iloc[:, -1].values
        print(f"✅ Loaded CSV data: {X_new.shape}")
        new_data_loaded = True
    except Exception as e:
        print(f"⚠️ CSV loading failed: {e}")

# Option 4: Create sample data
if not new_data_loaded:
    print("🔄 Creating sample data for demonstration...")
    np.random.seed(42)
    X_new = np.random.randn(100, 50)  # 100 samples, 50 features
    y_new = np.random.choice([0, 1, 2], 100)  # 3 classes
    print(f"✅ Created sample data: {X_new.shape}")
    new_data_loaded = True

# Clean the data
def clean_data(X):
    X = np.array(X, dtype=np.float64)
    X = np.where(np.isinf(X), np.nan, X)
    X = np.where(np.abs(X) > 1e10, np.nan, X)
    from sklearn.impute import SimpleImputer
    imputer = SimpleImputer(strategy='median')
    X = imputer.fit_transform(X)
    return X

X_new = clean_data(X_new)

print(f"📊 New data shape: {X_new.shape}")
if y_new is not None:
    print(f"📊 Target distribution: {np.bincount(y_new)}")
    print(f"📊 Classes: {len(np.unique(y_new))}")
else:
    print("📊 No ground truth labels available (prediction mode)")
