In [27]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict

Dataset agnostic code

In [41]:
def build_image_id_to_pathology_dict(file_path, pathologies, gt_header = False):
    image_ids_to_pathologies = defaultdict(set)
    pathologies = [pathology.lower() for pathology in pathologies]
    with open(file_path, 'r') as f:
        if gt_header:
            header = f.readline() # skip header
        for line in f.readlines():
            line = line.strip().split(',')
            for pathology in line[1:]:
                    image_ids_to_pathologies[line[0]].add(pathology.lower())
            
            
            # for pathology in line[1:]:
            #     if pathology.lower() in pathologies:
            #         image_ids_to_pathologies[line[0]].add(pathology.lower())
                    
    return image_ids_to_pathologies

In [121]:
def normalize_labels(labels, synonym_mapping):
    normalized_labels = set()
    for label in labels:
        found = False
        for synonyms in synonym_mapping:
            if label in synonyms:
                normalized_labels.add(frozenset(synonyms))  # Adding the frozenset of synonyms
                found = True
                break
        if not found:
            normalized_labels.add(label)
    return normalized_labels

def contains_no_finding(label_set, synonym_mappings):
    for synonyms in synonym_mappings:
        if "no finding" in synonyms:
            if frozenset(synonyms) in label_set:
                return True
    return False

def calculate_metrics(ground_truth, predictions, synonym_mappings = []):

     # Normalize each set of pathologies in the ground truth and predictions using the provided synonym mappings
    normalized_ground_truth = {image_id: normalize_labels(gt_labels, synonym_mappings) for image_id, gt_labels in ground_truth.items()}
    normalized_predictions = {image_id: normalize_labels(pred_labels, synonym_mappings) for image_id, pred_labels in predictions.items()}
    
    # print both normalized ground truth and predictions sorted by image_id , print going across the screen

    exact_matches = 0
    exact_no_finding = 0
    exact_one_pathology = 0
    exact_multiple_pathologies = 0

    total_no_finding = 0
    total_one_pathology = 0
    total_multiple_pathologies = 0


    correct_matches = 0
    correct_no_finding = 0
    correct_pathology = 0

    for image_id, ground_truth_labels in normalized_ground_truth.items():
        pred_labels = normalized_predictions.get(image_id, set())

        is_no_finding = "no finding" in ground_truth_labels or contains_no_finding(ground_truth_labels, synonym_mappings)
        if is_no_finding:
            total_no_finding += 1
        elif len(ground_truth_labels) == 1:
            total_one_pathology += 1
        else:  # Assuming any non-empty set of labels greater than 1 is 'multiple pathologies'
            total_multiple_pathologies += 1
        
        # Calculate exact matches (Metric 1)
        if ground_truth_labels == pred_labels:
            exact_matches += 1
            # Update metrics for matches (Metric 2)
            if "no finding" in ground_truth_labels or contains_no_finding(ground_truth_labels, synonym_mappings):
                exact_no_finding += 1
            elif len(ground_truth_labels) == 1:
                exact_one_pathology += 1
            else:
                exact_multiple_pathologies += 1
        
        # Calculate individual correct matches (Metric 3)
        matched_pathologies = ground_truth_labels.intersection(pred_labels)
        correct_matches += len(matched_pathologies)
        
        # Update metrics for matches (Metric 4)
        for pathology in matched_pathologies:
            if "no finding" in ground_truth_labels or contains_no_finding({pathology}, synonym_mappings):
                correct_no_finding += 1
            else:
                correct_pathology += 1
    
    # Calculate metrics
    n = len(ground_truth)
    exact_matches_percentage = exact_matches / n
    exact_no_finding_percentage = exact_no_finding / total_no_finding if total_no_finding > 0 else 0
    exact_one_pathology_percentage = exact_one_pathology / total_one_pathology if total_one_pathology > 0 else 0
    exact_multiple_pathologies_percentage = exact_multiple_pathologies / total_multiple_pathologies if total_multiple_pathologies > 0 else 0
    
    print(f"Dataset Characteristics:")
    print(f"No finding proportion: {total_no_finding/n:.2f}")
    print(f"One pathology proportion: {total_one_pathology/n:.2f}")
    print(f"Multiple pathologies proportion: {total_multiple_pathologies/n:.2f}")
    
    print(f"\nExact Matches Characteristics:")
    print(f"Exact matches: {exact_matches_percentage:.2f}")
    print(f"Exact no finding: {exact_no_finding_percentage:.2f}")
    print(f"Exact one pathology: {exact_one_pathology_percentage:.2f}")
    print(f"Exact multiple pathologies: {exact_multiple_pathologies_percentage:.2f}")
    
    total_num_of_pathologies = sum([len(pathologies) for pathologies in ground_truth.values()])
    correct_matches_percentage = correct_matches / total_num_of_pathologies
    correct_no_finding_percentage = correct_no_finding / total_no_finding if total_no_finding > 0 else 0
    correct_pathology_percentage = correct_pathology / (total_num_of_pathologies - total_no_finding) if total_num_of_pathologies - total_no_finding > 0 else 0
    
    print(f"\nIndividual Pathology Characteristics:")
    print(f"Correct matches: {correct_matches_percentage:.2f}")
    print(f"Correct no finding: {correct_no_finding_percentage:.2f}")
    print(f"Correct pathology: {correct_pathology_percentage:.2f}")

    return exact_matches / n, exact_no_finding / n, exact_one_pathology / n, exact_multiple_pathologies / n, correct_matches / total_num_of_pathologies, correct_no_finding / total_num_of_pathologies, correct_pathology / total_num_of_pathologies


CheXpert specific

In [43]:
# CheXpert paths
cheXpert_test_ground_truth_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/CheXpert/test_written_pathologies")
cheXpert_layer_norm_predictions_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/evaluation/CheXpert/probe_results/chexpert_layer_norm_predictions.csv")
cheXpert_q_former_predictions_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/evaluation/CheXpert/probe_results/chexpert_q_former_predictions.csv")
cheXpert_cheXagent_predictions_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/evaluation/CheXpert/identify_pathologies")

xrv_224_chex_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/CheXpert_evaluation_results/test_set/xrv_224_chex.txt")
xrv_224_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/CheXpert_evaluation_results/test_set/xrv_224.txt")
xrv_512_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/CheXpert_evaluation_results/test_set/xrv_512.txt")

# CheXpert pathologies
cheXpert_pathologies = ['No Finding','Enlarged Cardiomediastinum','Cardiomegaly','Lung Opacity',
        'Lung Lesion','Edema','Consolidation','Pneumonia','Atelectasis','Pneumothorax',
        'Pleural Effusion','Pleural Other','Fracture','Support Devices']

In [52]:
cheXpert_test_ground_truth = build_image_id_to_pathology_dict(cheXpert_test_ground_truth_path,cheXpert_pathologies, gt_header = False)
cheXpert_layer_norm_predictions = build_image_id_to_pathology_dict(cheXpert_layer_norm_predictions_path,cheXpert_pathologies, gt_header = False)
cheXpert_q_former_predictions = build_image_id_to_pathology_dict(cheXpert_q_former_predictions_path,cheXpert_pathologies, gt_header = False)
cheXpert_cheXagent_predictions = build_image_id_to_pathology_dict(cheXpert_cheXagent_predictions_path,cheXpert_pathologies, gt_header = False)


xrv_224_chex = build_image_id_to_pathology_dict(xrv_224_chex_path,cheXpert_pathologies, gt_header = False)
xrv_224 = build_image_id_to_pathology_dict(xrv_224_path,cheXpert_pathologies, gt_header = False)
xrv_512 = build_image_id_to_pathology_dict(xrv_512_path,cheXpert_pathologies, gt_header = False)

In [124]:
print("CheXpert Layer Norm:")
_ = calculate_metrics(cheXpert_test_ground_truth, cheXpert_layer_norm_predictions)

CheXpert Layer Norm:
Dataset Characteristics:
No finding proportion: 0.25
One pathology proportion: 0.18
Multiple pathologies proportion: 0.57

Exact Matches Characteristics:
Exact matches: 0.22
Exact no finding: 0.61
Exact one pathology: 0.26
Exact multiple pathologies: 0.03

Individual Pathology Characteristics:
Correct matches: 0.54
Correct no finding: 0.62
Correct pathology: 0.54


In [125]:
print("CheXpert Q-Former:")
_ = calculate_metrics(cheXpert_test_ground_truth, cheXpert_q_former_predictions)


CheXpert Q-Former:
Dataset Characteristics:
No finding proportion: 0.25
One pathology proportion: 0.18
Multiple pathologies proportion: 0.57

Exact Matches Characteristics:
Exact matches: 0.29
Exact no finding: 0.90
Exact one pathology: 0.31
Exact multiple pathologies: 0.02

Individual Pathology Characteristics:
Correct matches: 0.44
Correct no finding: 0.91
Correct pathology: 0.39


In [126]:
print("CheXagent:")
synonym_mappings = [
    {'no finding', 'no pathologies'}, 
    {'enlarged cardiomediastinum', 'enlarged cardiac silhouette'},
    {'edema','pulmonary edema/hazy opacity'}
]
_ = calculate_metrics(cheXpert_test_ground_truth, cheXpert_cheXagent_predictions, synonym_mappings)

CheXagent:
Dataset Characteristics:
No finding proportion: 0.25
One pathology proportion: 0.18
Multiple pathologies proportion: 0.57

Exact Matches Characteristics:
Exact matches: 0.12
Exact no finding: 0.43
Exact one pathology: 0.06
Exact multiple pathologies: 0.00

Individual Pathology Characteristics:
Correct matches: 0.20
Correct no finding: 0.43
Correct pathology: 0.18


In [127]:
print("XRV-224-CheX:")
_ = calculate_metrics(cheXpert_test_ground_truth, xrv_224_chex)

XRV-224-CheX:
Dataset Characteristics:
No finding proportion: 0.25
One pathology proportion: 0.18
Multiple pathologies proportion: 0.57

Exact Matches Characteristics:
Exact matches: 0.07
Exact no finding: 0.30
Exact one pathology: 0.00
Exact multiple pathologies: 0.00

Individual Pathology Characteristics:
Correct matches: 0.64
Correct no finding: 0.30
Correct pathology: 0.68


In [129]:
print("XRV-224:")
_ = calculate_metrics(cheXpert_test_ground_truth, xrv_224)

XRV-224:
Dataset Characteristics:
No finding proportion: 0.25
One pathology proportion: 0.18
Multiple pathologies proportion: 0.57

Exact Matches Characteristics:
Exact matches: 0.01
Exact no finding: 0.05
Exact one pathology: 0.00
Exact multiple pathologies: 0.00

Individual Pathology Characteristics:
Correct matches: 0.64
Correct no finding: 0.05
Correct pathology: 0.70


In [130]:
print("XRV-512:")
_ = calculate_metrics(cheXpert_test_ground_truth, xrv_512)

XRV-512:
Dataset Characteristics:
No finding proportion: 0.25
One pathology proportion: 0.18
Multiple pathologies proportion: 0.57

Exact Matches Characteristics:
Exact matches: 0.23
Exact no finding: 0.94
Exact one pathology: 0.00
Exact multiple pathologies: 0.00

Individual Pathology Characteristics:
Correct matches: 0.11
Correct no finding: 0.94
Correct pathology: 0.02


VinDr specific

In [None]:
vindr_pathologies = ["Aortic enlargement", "Atelectasis", "Calcification", "Cardiomegaly",
            "Clavicle fracture", "Consolidation", "Emphysema", "Enlarged PA",
            "ILD", "Infiltration", "Lung Opacity", "Lung cavity", "Lung cyst",
            "Mediastinal shift","Nodule/Mass", "Pleural effusion", "Pleural thickening",
            "Pneumothorax", "Pulmonary fibrosis","Rib fracture", "Other lesion",
            "No finding"] 
