# Result Analysis

In this notebook, we will analyze the evaluation results of multiple image classification models. We will compare their overall performance, examine confusion matrices, and investigate common misclassifications, especially for challenging or similar classes. This analysis will help us understand the strengths and weaknesses of each model and guide future improvements.

In [4]:
# Import necessary libraries
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import glob
import os
import numpy as np


In [None]:

# --- Configuration ---
MODEL_CONFIGS = [
    {
        "name": "MobileNetV2 (Head Trained, Augmentation)",
        "prefix": "mobilenet_head_aug",
        "metrics_file": "eval_metrics_mobilenet_head_aug.csv",
        "predictions_file": "predictions_mobilenet_head_aug.csv"
    },
    {
        "name": "MobileNetV2 (Mid Trained, No Augmentation)",
        "prefix": "mobilenet_mid_noaug",
        "metrics_file": "eval_metrics_mobilenet_mid_noaug.csv",
        "predictions_file": "predictions_mobilenet_mid_noaug.csv"
    },{"name": "MobileNetV2 (Head Trained, No Augmentation)",
        "prefix": "mobilenet_head_noaug",
        "metrics_file": "eval_metrics_mobilenet_head_noaug.csv",
        "predictions_file": "predictions_mobilenet_head_noaug.csv"
    },{"name": "MobileNetV2 (Mid Trained, Augmentation)",
        "prefix": "mobilenet_mid_aug",
        "metrics_file": "eval_metrics_mobilenet_mid_aug.csv",
        "predictions_file": "predictions_mobilenet_mid_aug.csv"
    },
    {
        "name": "ResNet50 (Mid Trained, No Augmentation)",
        "prefix": "resnet_mid_noaug",
        "metrics_file": "eval_metrics_resnet_mid_noaug.csv",
        "predictions_file": "predictions_resnet_mid_noaug.csv"
    },
    {
        "name": "ResNet50 (Head Trained, Augmentation)",
        "prefix": "resnet_head_aug",
        "metrics_file": "eval_metrics_resnet_head_aug.csv",
        "predictions_file": "predictions_resnet_head_aug.csv"
    },
    {
        "name": "ResNet50 (Head Trained, No Augmentation)",
        "prefix": "resnet_head_noaug",
        "metrics_file": "eval_metrics_resnet_head_noaug.csv",
        "predictions_file": "predictions_resnet_head_noaug.csv"
    },
    {
        "name": "ResNet50 (Mid Trained, Augmentation)",
        "prefix": "resnet_mid_aug",
        "metrics_file": "eval_metrics_resnet_mid_aug.csv",
        "predictions_file": "predictions_resnet_mid_aug.csv"
    },
    


    
]
OUTPUT_DIR = "analysis_outputs"
N_TOP_CLASSES = 5
N_WORST_CLASSES = 5
CM_FIG_SIZE = (18, 15) # Adjusted for ~37 classes


In [2]:

# --- Helper Functions ---
def load_metrics(filepath):
    """Loads evaluation metrics, setting class names as index."""
    try:
        df = pd.read_csv(filepath)
        df = df.rename(columns={df.columns[0]: 'class_name'})
        df.set_index('class_name', inplace=True)
        # Convert metric columns to numeric, coercing errors
        for col in ['precision', 'recall', 'f1-score', 'support']:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce')
        return df
    except FileNotFoundError:
        print(f"Warning: Metrics file not found: {filepath}")
        return None

def load_predictions(filepath):
    """Loads prediction details."""
    try:
        return pd.read_csv(filepath)
    except FileNotFoundError:
        print(f"Warning: Predictions file not found: {filepath}")
        return None

def get_class_names(metrics_df):
    """Extracts class names, excluding summary rows."""
    if metrics_df is None:
        return []
    class_names = metrics_df.index.tolist()
    return [name for name in class_names if name not in ['macro avg', 'weighted avg']]

def plot_confusion_matrix_custom(y_true_idx, y_pred_idx, display_labels, model_name, prefix):
    """Generates and saves a confusion matrix plot."""
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    cm = confusion_matrix(y_true_idx, y_pred_idx, labels=np.arange(len(display_labels)))

    plt.figure(figsize=CM_FIG_SIZE)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)
    disp.plot(cmap=plt.cm.Blues, xticks_rotation='vertical', values_format='d')
    plt.title(f"Confusion Matrix: {model_name}", fontsize=16)
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, f"confusion_matrix_{prefix}.png"), dpi=300)
    plt.close()
    print(f"Saved confusion matrix for {model_name} to {OUTPUT_DIR}/confusion_matrix_{prefix}.png")

def analyze_specific_confusion(predictions_df, class1_name, class2_name):
    """Analyzes confusion between two specific classes."""
    if predictions_df is None:
        return {}

    confusion_counts = {}
    # Class1 misclassified as Class2
    misclassified_c1_as_c2 = predictions_df[
        (predictions_df['true_label_name'] == class1_name) &
        (predictions_df['predicted_label_name'] == class2_name)
    ].shape[0]
    confusion_counts[f"{class1_name}_as_{class2_name}"] = misclassified_c1_as_c2

    # Class2 misclassified as Class1
    misclassified_c2_as_c1 = predictions_df[
        (predictions_df['true_label_name'] == class2_name) &
        (predictions_df['predicted_label_name'] == class1_name)
    ].shape[0]
    confusion_counts[f"{class2_name}_as_{class1_name}"] = misclassified_c2_as_c1
    return confusion_counts

def get_common_misclassifications(predictions_df, true_class_name, top_n=3):
    """Identifies the most common incorrect predictions for a given true class."""
    if predictions_df is None:
        return pd.Series(dtype='int64')

    misclassifications = predictions_df[
        (predictions_df['true_label_name'] == true_class_name) &
        (predictions_df['true_label_name'] != predictions_df['predicted_label_name'])
    ]
    return misclassifications['predicted_label_name'].value_counts().nlargest(top_n)


In [5]:

# --- Main Analysis ---
if __name__ == "__main__":
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    all_model_summary_metrics = []

    for config in MODEL_CONFIGS:
        print(f"\n{'='*20} Analyzing: {config['name']} {'='*20}")
        metrics_df = load_metrics(config["metrics_file"])
        predictions_df = load_predictions(config["predictions_file"])

        if metrics_df is None:
            print(f"Skipping {config['name']} due to missing metrics file.")
            continue

        class_names_ordered = get_class_names(metrics_df) # Get class names before dropping summary rows

        # Store overall metrics
        summary_metrics = {
            "Model": config["name"],
            "Macro Avg F1-Score": metrics_df.loc['macro avg', 'f1-score'] if 'macro avg' in metrics_df.index else np.nan,
            "Weighted Avg F1-Score": metrics_df.loc['weighted avg', 'f1-score'] if 'weighted avg' in metrics_df.index else np.nan,
            "Macro Avg Precision": metrics_df.loc['macro avg', 'precision'] if 'macro avg' in metrics_df.index else np.nan,
            "Macro Avg Recall": metrics_df.loc['macro avg', 'recall'] if 'macro avg' in metrics_df.index else np.nan,
        }
        all_model_summary_metrics.append(summary_metrics)

        print("\n--- Overall Performance ---")
        print(f"Macro Average F1-Score: {summary_metrics['Macro Avg F1-Score']:.4f}")
        print(f"Weighted Average F1-Score: {summary_metrics['Weighted Avg F1-Score']:.4f}")

        # Filter out summary rows for class-specific analysis
        class_metrics_df = metrics_df.drop(['macro avg', 'weighted avg'], errors='ignore')

        print(f"\n--- Top {N_TOP_CLASSES} Performing Classes (by F1-score) ---")
        top_classes = class_metrics_df['f1-score'].nlargest(N_TOP_CLASSES)
        print(top_classes)

        print(f"\n--- Worst {N_WORST_CLASSES} Performing Classes (by F1-score) ---")
        worst_classes = class_metrics_df['f1-score'].nsmallest(N_WORST_CLASSES)
        print(worst_classes)

        if predictions_df is not None and class_names_ordered:
            # Ensure label indices are present and map correctly
            if 'true_label_idx' in predictions_df.columns and 'predicted_label_idx' in predictions_df.columns:
                # Create a mapping if prediction_df doesn't have consistent label names for indices
                # For this script, we assume the indices in predictions_df correspond to an alphabetical sort of class_names_ordered
                # or are already correctly mapped. If not, a more robust mapping is needed.
                # For simplicity, let's try to get unique sorted names from predictions.
                unique_true_labels = sorted(predictions_df['true_label_name'].unique())
                if not all(item in unique_true_labels for item in class_names_ordered) or not all(item in class_names_ordered for item in unique_true_labels):
                     print("Warning: Class name mismatch between metrics and predictions. Using names from predictions for CM.")
                     cm_display_labels = unique_true_labels
                else:
                     cm_display_labels = class_names_ordered # Prefer names from metrics for consistency

                max_idx = max(predictions_df['true_label_idx'].max(), predictions_df['predicted_label_idx'].max())
                if max_idx >= len(cm_display_labels):
                    print(f"Warning: Max label index ({max_idx}) exceeds number of display labels ({len(cm_display_labels)}). CM might be incorrect.")
                    # Fallback or error handling could be added here. For now, proceed with caution.

                plot_confusion_matrix_custom(
                    predictions_df['true_label_idx'],
                    predictions_df['predicted_label_idx'],
                    display_labels=cm_display_labels,
                    model_name=config['name'],
                    prefix=config['prefix']
                )
            else:
                print("Skipping Confusion Matrix: 'true_label_idx' or 'predicted_label_idx' not found in predictions.")


            print("\n--- Specific Confusions for Lowest Performing Classes ---")
            for class_name, f1 in worst_classes.items():
                print(f"  {class_name} (F1: {f1:.2f}):")
                common_mispreds = get_common_misclassifications(predictions_df, class_name, top_n=3)
                if not common_mispreds.empty:
                    for pred_class, count in common_mispreds.items():
                        print(f"    Predicted as {pred_class}: {count} times")
                else:
                    print("    No specific misclassifications data available or class perfectly recalled among errors.")

            print("\n--- American Pit Bull Terrier vs. Staffordshire Bull Terrier Confusion ---")
            apbt_sbt_confusion = analyze_specific_confusion(predictions_df, "American Pit Bull Terrier", "Staffordshire Bull Terrier")
            if apbt_sbt_confusion:
                print(f"  American Pit Bull Terrier misclassified as Staffordshire Bull Terrier: {apbt_sbt_confusion.get('American Pit Bull Terrier_as_Staffordshire Bull Terrier', 0)} times")
                print(f"  Staffordshire Bull Terrier misclassified as American Pit Bull Terrier: {apbt_sbt_confusion.get('Staffordshire Bull Terrier_as_American Pit Bull Terrier', 0)} times")
            else:
                print("  Could not analyze APBT vs SBT confusion (predictions_df might be missing).")

    print("\n\n" + "="*20 + " Overall Model Comparison " + "="*20)
    summary_df = pd.DataFrame(all_model_summary_metrics)
    summary_df.set_index("Model", inplace=True)
    print(summary_df.sort_values(by="Macro Avg F1-Score", ascending=False))

    print("\nNote: Loss/Accuracy plots per epoch require training history data, which is not available in the provided CSVs.")
    print(f"Analysis outputs (like confusion matrices) saved to '{OUTPUT_DIR}' directory.")


Skipping MobileNetV2 (Head Trained, Augmentation) due to missing metrics file.

Skipping MobileNetV2 (Mid Trained, No Augmentation) due to missing metrics file.

Skipping ResNet50 (Mid Trained, No Augmentation) due to missing metrics file.




KeyError: "None of ['Model'] are in the columns"