In [4]:
"""
Model Evaluation Script
Evaluate model performance and generate metrics
"""

import numpy as np
import sys
from pathlib import Path
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

# Add backend to path
sys.path.insert(0, str(Path(__file__).parent))

from model_training.train_model import FashionModel
from model_training.dataset_preprocess import FashionDatasetPreprocessor

class ModelEvaluator:
    """Evaluate fashion model performance"""
    
    def __init__(self, model=None):
        self.model = model
    
    def evaluate_on_test_set(self, X_test, y_test):
        """
        Evaluate model on test dataset
        """
        if self.model is None or self.model.model is None:
            print("✗ Model not loaded")
            return None
        
        # Get predictions
        predictions = []
        for img in X_test:
            pred = self.model.predict(np.expand_dims(img, axis=0))
            predictions.append(pred['style_match'] * 10)
        
        predictions = np.array(predictions).reshape(-1, 1)
        y_true = y_test * 10
        
        # Calculate metrics
        mse = mean_squared_error(y_true, predictions)
        mae = mean_absolute_error(y_true, predictions)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_true, predictions)
        
        metrics = {
            'mse': mse,
            'rmse': rmse,
            'mae': mae,
            'r2': r2
        }
        
        print("\n" + "="*50)
        print("📊 MODEL EVALUATION RESULTS")
        print("="*50)
        print(f"Mean Squared Error (MSE):  {mse:.4f}")
        print(f"Root Mean Squared Error:   {rmse:.4f}")
        print(f"Mean Absolute Error:       {mae:.4f}")
        print(f"R² Score:                  {r2:.4f}")
        print("="*50 + "\n")
        
        return metrics, predictions
    
    def plot_predictions(self, y_true, y_pred, save_path='evaluation/predictions_plot.png'):
        """Plot predicted vs actual scores"""
        plt.figure(figsize=(10, 6))
        
        plt.scatter(y_true, y_pred, alpha=0.6, s=50)
        plt.plot([0, 10], [0, 10], 'r--', lw=2, label='Perfect prediction')
        
        plt.xlabel('True Fashion Score', fontsize=12)
        plt.ylabel('Predicted Fashion Score', fontsize=12)
        plt.title('Fashion Score Predictions vs Ground Truth', fontsize=14, fontweight='bold')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.xlim(0, 10)
        plt.ylim(0, 10)
        
        # Save
        import os
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"✓ Plot saved to {save_path}")
        plt.close()
    
    def plot_training_history(self, history, save_path='evaluation/training_history.png'):
        """Plot training history"""
        if history is None:
            print("⚠ No training history available")
            return
        
        fig, axes = plt.subplots(1, 2, figsize=(14, 5))
        
        # Loss plot
        axes[0].plot(history.history['loss'], label='Train Loss', linewidth=2)
        axes[0].plot(history.history['val_loss'], label='Val Loss', linewidth=2)
        axes[0].set_xlabel('Epoch', fontsize=11)
        axes[0].set_ylabel('Loss', fontsize=11)
        axes[0].set_title('Model Loss', fontsize=12, fontweight='bold')
        axes[0].legend()
        axes[0].grid(True, alpha=0.3)
        
        # MAE plot
        axes[1].plot(history.history['mae'], label='Train MAE', linewidth=2)
        axes[1].plot(history.history['val_mae'], label='Val MAE', linewidth=2)
        axes[1].set_xlabel('Epoch', fontsize=11)
        axes[1].set_ylabel('MAE', fontsize=11)
        axes[1].set_title('Mean Absolute Error', fontsize=12, fontweight='bold')
        axes[1].legend()
        axes[1].grid(True, alpha=0.3)
        
        # Save
        import os
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"✓ Training history plot saved to {save_path}")
        plt.close()
    
    def generate_confusion_matrix(self, y_true, y_pred, bins=5):
        """Generate confusion matrix for binned scores"""
        from sklearn.metrics import confusion_matrix
        
        # Bin scores into categories
        y_true_binned = np.digitize(y_true, bins=np.linspace(0, 10, bins+1)) - 1
        y_pred_binned = np.digitize(y_pred, bins=np.linspace(0, 10, bins+1)) - 1
        
        cm = confusion_matrix(y_true_binned, y_pred_binned)
        
        # Plot
        import seaborn as sns
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True)
        plt.xlabel('Predicted Category', fontsize=11)
        plt.ylabel('True Category', fontsize=11)
        plt.title('Confusion Matrix (Score Categories)', fontsize=12, fontweight='bold')
        
        import os
        os.makedirs('evaluation', exist_ok=True)
        plt.savefig('evaluation/confusion_matrix.png', dpi=300, bbox_inches='tight')
        print("✓ Confusion matrix saved to evaluation/confusion_matrix.png")
        plt.close()
    
    def get_error_analysis(self, y_true, y_pred):
        """Analyze prediction errors"""
        errors = np.abs(y_true - y_pred).flatten()
        
        print("\n" + "="*50)
        print("🔍 ERROR ANALYSIS")
        print("="*50)
        print(f"Mean Error:                {np.mean(errors):.4f}")
        print(f"Median Error:              {np.median(errors):.4f}")
        print(f"Std Dev of Errors:         {np.std(errors):.4f}")
        print(f"Min Error:                 {np.min(errors):.4f}")
        print(f"Max Error:                 {np.max(errors):.4f}")
        print(f"% Predictions within ±1:   {100*np.sum(errors <= 1) / len(errors):.2f}%")
        print(f"% Predictions within ±0.5: {100*np.sum(errors <= 0.5) / len(errors):.2f}%")
        print("="*50 + "\n")
        
        return {
            'mean_error': np.mean(errors),
            'median_error': np.median(errors),
            'std_error': np.std(errors),
            'max_error': np.max(errors)
        }

# Example usage
if __name__ == "__main__":
    print("🚀 Starting Model Evaluation...\n")
    
    # Load dataset
    processor = FashionDatasetPreprocessor()
    dataset = processor.prepare_fashion_mnist()
    
    # Load model
    model = FashionModel()
    model.load_model("models/fashion_model.h5")
    
    # Evaluate
    evaluator = ModelEvaluator(model)
    metrics, predictions = evaluator.evaluate_on_test_set(
        dataset['X_test'], 
        dataset['y_test']
    )
    
    # Analysis
    error_stats = evaluator.get_error_analysis(
        dataset['y_test'] * 10,
        predictions
    )
    
    # Plots
    if model.history:
        evaluator.plot_training_history(model.history)
    
    evaluator.plot_predictions(dataset['y_test'] * 10, predictions)
    evaluator.generate_confusion_matrix(dataset['y_test'] * 10, predictions)
    
    print("✓ Evaluation complete!")

NameError: name '__file__' is not defined