In [16]:
import os

import pandas as pd
import numpy as np

def calculate_model_metric(models_path, model_names, metric):
    """
    Calculate performance metrics for multiple models.
    :param models_path (str): Base path to models directory
    :param model_names (list): List of model names to analyze  
    :param metric (str): Metrics name that should be calculated
    :returns pd.DataFrame: DataFrame with model metrics
    """
    results = []
    val_metric = f"val_{metric}"

    for model_name in model_names:
        model_dir = os.path.join(models_path, model_name)
        val_metrics = pd.read_csv(os.path.join(model_dir, "val_metrics.csv"))
        train_metrics = pd.read_csv(os.path.join(model_dir, "train_metrics.csv"))
        
        best_validation    = val_metrics[val_metric].max()
        avarage_validation = val_metrics[val_metric].mean()
        epochs_to_best     = val_metrics[val_metric].idxmax() + 1

        train_metrics = train_metrics.groupby('epoch')[metric].mean()
        best_training = train_metrics.max()
    
        results.append({
            'Model': model_name,
            'Metric': metric,
            'Best Val': f"{best_validation:.4f}",
            'Avg Val': f"{avarage_validation:.4f}",
            'Training': f"{best_training:.4f}",
            'Epochs to Best': epochs_to_best,
        
        })
    
    return pd.DataFrame(results)


In [20]:
from src.utils.consts import MODELS_PATH
model_names = ['Simplified_DenseNet_v1', 'Simplified_DenseNet_v2', 'Simplified_DenseNet_v3', 'Simplified_DenseNet_v4']
calculate_model_metric(MODELS_PATH, model_names, 'recall')

Unnamed: 0,Model,Metric,Best Val,Avg Val,Training,Epochs to Best
0,Simplified_DenseNet_v1,recall,0.0536,0.0171,0.0316,4
1,Simplified_DenseNet_v2,recall,0.0907,0.0088,0.0474,17
2,Simplified_DenseNet_v3,recall,0.1047,0.0121,0.0424,9
3,Simplified_DenseNet_v4,recall,0.0494,0.0098,0.0469,23


In [None]:
def analyze_class_performance(model, x_test, y_test, class_names=None):
    """Analyze per-class performance with both default and optimized thresholds"""
    # Get model predictions
    y_pred = model.predict(x_test)
    
    # Initialize results dictionary
    results = {}
    
    # For each class
    for idx in range(y_test.shape[1]):
        class_name = class_names[idx] if class_names else f"Class_{idx}"
        
        # Get class-specific data
        y_true_class = y_test[:, idx]
        y_pred_class = y_pred[:, idx]
        
        # Class statistics
        class_size = np.sum(y_true_class)
        prevalence = class_size / len(y_true_class)
        
        # Default threshold metrics
        y_pred_default = (y_pred_class > 0.5).astype(int)
        default_metrics = {
            'precision': precision_score(y_true_class, y_pred_default, zero_division=0),
            'recall': recall_score(y_true_class, y_pred_default),
            'f1': f1_score(y_true_class, y_pred_default),
            'auc': roc_auc_score(y_true_class, y_pred_class) if np.sum(y_true_class) > 0 else np.nan
        }
        
        # Find optimal threshold 
        best_f1 = 0
        best_threshold = 0.5
        for t in np.arange(0.05, 0.95, 0.05):
            y_pred_t = (y_pred_class > t).astype(int)
            f1 = f1_score(y_true_class, y_pred_t, zero_division=0)
            if f1 > best_f1:
                best_f1 = f1
                best_threshold = t
        
        # Optimal threshold metrics
        y_pred_optimal = (y_pred_class > best_threshold).astype(int)
        optimal_metrics = {
            'threshold': best_threshold,
            'precision': precision_score(y_true_class, y_pred_optimal, zero_division=0),
            'recall': recall_score(y_true_class, y_pred_optimal),
            'f1': f1_score(y_true_class, y_pred_optimal)
        }
        
        # Store results
        results[class_name] = {
            'prevalence': prevalence,
            'count': int(class_size),
            'default': default_metrics,
            'optimal': optimal_metrics
        }
    
    return results