In [23]:
import json
import numpy as np
import pandas as pd

In [24]:
def compute_avg_and_std(metrics):
    """
    Helper function to compute the average and standard deviation
    for the list of metric values.
    """
    avg = np.mean(metrics)
    std = np.std(metrics)
    return avg, std

In [25]:
def process_model_data(model_data):
    """
    Process each model's data to calculate average and standard deviation
    for each label and its metrics (accuracy, sensitivity, specificity, time).
    """
    results = {}
    
    for label, metrics in model_data.items():
        accuracy_avg, accuracy_std = compute_avg_and_std(metrics['accuracy'])
        sensitivity_avg, sensitivity_std = compute_avg_and_std(metrics['sensitivity'])
        specificity_avg, specificity_std = compute_avg_and_std(metrics['specificity'])
        time_avg, time_std = compute_avg_and_std(metrics['time'])

        results[label] = {
            'accuracy': {'average': accuracy_avg, 'std_dev': accuracy_std},
            'sensitivity': {'average': sensitivity_avg, 'std_dev': sensitivity_std},
            'specificity': {'average': specificity_avg, 'std_dev': specificity_std},
            'time': {'average': time_avg, 'std_dev': time_std},
        }
    
    return results



In [26]:

def parse_and_compute(json_file_path):
    """
    Main function to read the JSON file and calculate the average and standard deviation
    for all models and labels.
    """
    with open(json_file_path, 'r') as f:
        data = json.load(f)
    
    final_results = {}
    
    for model, model_data in data.items():
        final_results[model] = process_model_data(model_data)
    
    return final_results



In [29]:

# Usage example
# Replace 'your_json_file.json' with the actual path of your JSON file
result = parse_and_compute('embedding_model_results.json')

In [30]:
def create_label_wise_table(data):
    model_data = []
    
    for model_name, diseases in data.items():
        for disease, metrics in diseases.items():
            sensitivity = f"{metrics['sensitivity']['average']} ± {metrics['sensitivity']['std_dev']}"
            specificity = f"{metrics['specificity']['average']} ± {metrics['specificity']['std_dev']}"
            accuracy = f"{metrics['accuracy']['average']} ± {metrics['accuracy']['std_dev']}"
            model_data.append([model_name, disease, sensitivity, specificity, accuracy])
    
    df_label_comparison = pd.DataFrame(model_data, columns=["Model", "Label", "Sensitivity", "Specificity", "Accuracy"])
    return df_label_comparison


In [31]:
def create_aggregated_model_table(data):
    aggregated_data = {}
    
    for model_name, diseases in data.items():
        accuracy_avg = 0
        sensitivity_avg = 0
        specificity_avg = 0
        accuracy_std = 0
        sensitivity_std = 0
        specificity_std = 0
        num_labels = len(diseases)
        
        for disease, metrics in diseases.items():
            accuracy_avg += metrics['accuracy']['average']
            sensitivity_avg += metrics['sensitivity']['average']
            specificity_avg += metrics['specificity']['average']
            
            accuracy_std += metrics['accuracy']['std_dev']
            sensitivity_std += metrics['sensitivity']['std_dev']
            specificity_std += metrics['specificity']['std_dev']
        
        # Calculate mean and std_dev across all labels
        accuracy_avg /= num_labels
        sensitivity_avg /= num_labels
        specificity_avg /= num_labels
        
        accuracy_std /= num_labels
        sensitivity_std /= num_labels
        specificity_std /= num_labels
        
        # Store result with ± formatting
        aggregated_data[model_name] = {
            "average_accuracy": f"{accuracy_avg} ± {accuracy_std}",
            "average_sensitivity": f"{sensitivity_avg} ± {sensitivity_std}",
            "average_specificity": f"{specificity_avg} ± {specificity_std}"
        }
    
    df_aggregated_comparison = pd.DataFrame(aggregated_data).T
    return df_aggregated_comparison


In [32]:
# Create the tables
df_label_wise = create_label_wise_table(result)
df_aggregated_model = create_aggregated_model_table(result)


In [34]:
df_label_wise.to_csv('embedding_model_label_wise_comparison.csv', index=False, encoding='utf-8')
df_aggregated_model.to_csv('embedding_model_aggregated_comparison.csv', encoding='utf-8')