In [None]:
# %% [markdown]
"""
# Model Evaluation Notebook
**Team:** [Your Team Name]  
**Authors:** [Team Members]  
**Date:** [Date]

## Objective
This notebook demonstrates loading a trained pipeline, making predictions on test data, and evaluating model performance.
"""
# %%
# =====================
# IMPORTS
# =====================
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score,
    roc_auc_score,
    mean_squared_error,
    r2_score
)
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import os

# %% [markdown]
"""
## 1. Model Loading
Utility functions for loading persisted pipeline
"""
# %%
def load_model(file_path: str):
    """
    Load trained pipeline from disk
    
    Parameters:
    file_path (str): Path to saved model
    
    Returns:
    Pipeline: Loaded sklearn pipeline
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"No model found at {file_path}")
        
    model = joblib.load(file_path)
    print(f"Successfully loaded model from {file_path}")
    return model

# %% [markdown]
"""
## 2. Data Loading
Load and prepare test data for prediction
"""
# %%
def load_test_data(file_path: str) -> pd.DataFrame:
    """
    Load and prepare test dataset
    
    Parameters:
    file_path (str): Path to test CSV file
    
    Returns:
    tuple: (features, target) pandas DataFrames
    """
    test_df = pd.read_csv(file_path)
    print(f"Loaded test data with shape: {test_df.shape}")
    
    # Ensure same preprocessing as training
    X_test = test_df.drop('target', axis=1)
    y_test = test_df['target']
    
    return X_test, y_test

# %% [markdown]
"""
## 3. Prediction & Evaluation
Generate predictions and calculate performance metrics
"""
# %%
def make_predictions(model, X_test: pd.DataFrame) -> np.ndarray:
    """
    Generate predictions using trained pipeline
    
    Parameters:
    model: Trained sklearn pipeline
    X_test (pd.DataFrame): Test features
    
    Returns:
    np.ndarray: Array of predictions
    """
    predictions = model.predict(X_test)
    print(f"Generated {len(predictions)} predictions")
    return predictions

def evaluate_model(y_true: pd.Series, y_pred: np.ndarray, proba_pred: np.ndarray = None):
    """
    Calculate evaluation metrics and generate visualizations
    
    Parameters:
    y_true (pd.Series): True target values
    y_pred (np.ndarray): Model predictions
    proba_pred (np.ndarray): Predicted probabilities (for classification)
    """
    # Classification Metrics
    if np.issubdtype(y_true.dtype, np.number) and len(np.unique(y_true)) > 2:
        # Regression Metrics
        print("Regression Metrics:")
        print(f"MSE: {mean_squared_error(y_true, y_pred):.3f}")
        print(f"RMSE: {np.sqrt(mean_squared_error(y_true, y_pred)):.3f}")
        print(f"R²: {r2_score(y_true, y_pred):.3f}")
    else:
        # Classification Metrics
        print("Classification Metrics:")
        print(f"Accuracy: {accuracy_score(y_true, y_pred):.3f}")
        print(f"Precision: {precision_score(y_true, y_pred):.3f}")
        print(f"Recall: {recall_score(y_true, y_pred):.3f}")
        print(f"F1 Score: {f1_score(y_true, y_pred):.3f}")
        
        if proba_pred is not None:
            print(f"ROC AUC: {roc_auc_score(y_true, proba_pred):.3f}")
        
        print("\nClassification Report:")
        print(classification_report(y_true, y_pred))
        
        # Confusion Matrix Visualization
        plt.figure(figsize=(8,6))
        sns.heatmap(confusion_matrix(y_true, y_pred), 
                    annot=True, fmt='d', cmap='Blues')
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title('Confusion Matrix')
        plt.show()

# %% [markdown]
"""
## 4. Results Saving
Save predictions and metrics for future reference
"""
# %%
def save_results(y_pred: np.ndarray, file_path: str):
    """
    Save predictions to CSV file
    
    Parameters:
    y_pred (np.ndarray): Array of predictions
    file_path (str): Path to save predictions
    """
    results_df = pd.DataFrame(y_pred, columns=['predictions'])
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    results_df.to_csv(file_path, index=False)
    print(f"Predictions saved to {file_path}")

# %% [markdown]
"""
## 5. Execution
Main execution flow for end-to-end evaluation
"""
# %%
if __name__ == "__main__":
    # Load artifacts
    model = load_model('../models/best_pipeline.pkl')
    X_test, y_test = load_test_data('../data/test.csv')
    
    # Generate predictions
    predictions = make_predictions(model, X_test)
    
    # For classification: get probabilities if available
    proba_predictions = None
    if hasattr(model, 'predict_proba'):
        proba_predictions = model.predict_proba(X_test)[:, 1]
    
    # Evaluate performance
    evaluate_model(y_test, predictions, proba_predictions)
    
    # Save results
    save_results(predictions, '../results/predictions.csv')
    print("Evaluation process completed!")
