# Model Comparison Results

This notebook visualizes and analyzes the results of our model comparison for software defect prediction.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from src.models.model_comparison import ModelComparison

## 1. Load Model Comparison Results

In [None]:
# Initialize ModelComparison with reports directory
reports_dir = os.path.join('..', 'reports')
model_comparison = ModelComparison(reports_dir)

# Load the results
results_df = model_comparison.load_results()
results_df.round(3)

## 2. Visualize Model Performance

In [None]:
def plot_model_comparison(results_df):
    metrics = ["Precision", "Recall", "F-beta Score", "PR-AUC"]
    model_names = results_df.index
    n_models = len(model_names)
    n_metrics = len(metrics)
    
    # Create subplots
    fig, axes = plt.subplots(1, n_metrics, figsize=(20, 6), sharey=True)
    fig.suptitle("Model Comparison Across Metrics", fontsize=16, y=1.05)
    
    # Color palette
    colors = plt.cm.viridis(np.linspace(0, 1, n_models))
    
    # Plot each metric
    for i, metric in enumerate(metrics):
        values = results_df[metric]
        bars = axes[i].bar(range(n_models), values, color=colors)
        
        # Customize subplot
        axes[i].set_title(metric)
        axes[i].set_xticks(range(n_models))
        axes[i].set_xticklabels(model_names, rotation=45, ha='right')
        if i == 0:
            axes[i].set_ylabel('Score')
        
        # Add value labels
        for bar in bars:
            height = bar.get_height()
            axes[i].text(bar.get_x() + bar.get_width()/2., height,
                        f'{height:.3f}',
                        ha='center', va='bottom')
    
    plt.tight_layout()
    return fig

# Create visualization
fig = plot_model_comparison(results_df)
plt.show()

## 3. Model Ranking Analysis

In [None]:
# Calculate overall ranking based on mean performance across metrics
rankings = results_df.rank(ascending=False)
mean_ranks = rankings.mean(axis=1).sort_values()

print("Model Rankings (lower is better):")
print(mean_ranks)

# Best performing model
best_model = mean_ranks.index[0]
print(f"\nBest Overall Model: {best_model}")
print("\nPerformance Metrics:")
print(results_df.loc[best_model].round(3))