In [29]:
from pathlib import Path
from collections import defaultdict
import pandas as pd

Dataset agnostic code

In [106]:
def build_image_id_to_pathology_dict(file_path, pathologies, gt_header = False):
    image_ids_to_pathologies = defaultdict(set)
    pathologies = [pathology.lower() for pathology in pathologies]
    with open(file_path, 'r') as f:
        if gt_header:
            header = f.readline() # skip header
        for line in f.readlines():
            line = line.strip().split(',')
            for pathology in line[1:]:
                if pathology.lower() in pathologies:
                    image_ids_to_pathologies[line[0]].add(pathology.lower())
                    

    return image_ids_to_pathologies

In [107]:
def calculate_exact_matches(gt_dict, pred_dict):
    exact_matches = 0
    for k in gt_dict.keys():
        if gt_dict[k] == pred_dict[k]:
            exact_matches += 1

    return exact_matches/len(gt_dict.keys())

VinDr specific code

In [108]:
vindr_test_split_ground_truth_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/VinDr-CXR/test_set_three_splits/VinDr_test_test_split_with_labels.csv")
xrv_224_chex_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/VinDr_evaluation_results/xrv_224_chex.txt")
xrv_224_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/VinDr_evaluation_results/xrv_224.txt")
xrv_512_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/VinDr_evaluation_results/xrv_512.txt")    

cheXpert_same_pathologies = ["Atelectasis", "Cardiomegaly", "Consolidation", "Lung Opacity", "Pleural Effusion", "Pneumothorax", "No finding"]

In [109]:
ground_truth = build_image_id_to_pathology_dict(vindr_test_split_ground_truth_path, cheXpert_same_pathologies, gt_header = False)
xrv_224_chex_predictions = build_image_id_to_pathology_dict(xrv_224_chex_path, cheXpert_same_pathologies)
xrv_224_predictions = build_image_id_to_pathology_dict(xrv_224_path, cheXpert_same_pathologies)
xrv_512_predictions = build_image_id_to_pathology_dict(xrv_512_path, cheXpert_same_pathologies)

In [118]:
# print accuracy i.e. the images where sets are equal
print("VinDr")
print(f"xrv_224_chex: {calculate_exact_matches(ground_truth, xrv_224_chex_predictions)}")
print(f"xrv_224: {calculate_exact_matches(ground_truth, xrv_224_predictions)}")
print(f"xrv_512: {calculate_exact_matches(ground_truth, xrv_512_predictions)}")


VinDr
xrv_224_chex: 0.38402061855670105
xrv_224: 0.19072164948453607
xrv_512: 0.7654639175257731


CheXpert evaluation

In [112]:
def convert_zeros_and_ones_to_written_form(file_path):
    image_id_to_pathology_dict = defaultdict(set)
    df = pd.read_csv(file_path)

    # create a list to store the pathologies

    for _, row in df.iterrows():
        image_id = ("/").join(row['Path'].split('/')[-3:])  # Extract the image ID from the path
        pathologies = {col.lower() for col in df.columns[-14:] if row[col] == 1.0} # Find pathologies for the current row
        # if len(pathologies) == 0:
        #     pathologies.add("no finding")
        # image_pathologies = image_pathologies.append({'Image_ID': image_id, 'Pathologies': pathologies}, ignore_index=True)
        image_id_to_pathology_dict[image_id] = pathologies

    return image_id_to_pathology_dict

In [113]:
chexpert_small_test_ground_truth_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/CheXpert/small/test.csv")
chexpert_small_valid_ground_truth_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/CheXpert/small/valid.csv")

chexpert_small_test_xrv_224_chex_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/CheXpert_evaluation_results/test_set/xrv_224_chex.txt")
chexpert_small_test_xrv_224_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/CheXpert_evaluation_results/test_set/xrv_224.txt")
chexpert_small_test_xrv_512_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/CheXpert_evaluation_results/test_set/xrv_512.txt")

chexpert_small_valid_xrv_224_chex_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/CheXpert_evaluation_results/validation_set/xrv_224_chex.txt")
chexpert_small_valid_xrv_224_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/CheXpert_evaluation_results/validation_set/xrv_224.txt")
chexpert_small_valid_xrv_512_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/torchxrayvision/evaluations/CheXpert_evaluation_results/validation_set/xrv_512.txt")


all_cheXpert_pathologies = ['No Finding','Enlarged Cardiomediastinum','Cardiomegaly','Lung Opacity',
        'Lung Lesion','Edema','Consolidation','Pneumonia','Atelectasis','Pneumothorax',
        'Pleural Effusion','Pleural Other','Fracture','Support Devices']


In [114]:
chexpert_small_test_ground_truth = convert_zeros_and_ones_to_written_form(chexpert_small_test_ground_truth_path)
chexpert_small_valid_ground_truth = convert_zeros_and_ones_to_written_form(chexpert_small_valid_ground_truth_path)

In [115]:
# calculate accuracy of each model on test set
chexpert_small_test_xrv_224_chex_predictions = build_image_id_to_pathology_dict(chexpert_small_test_xrv_224_chex_path, all_cheXpert_pathologies)
chexpert_small_test_xrv_224_predictions = build_image_id_to_pathology_dict(chexpert_small_test_xrv_224_path, all_cheXpert_pathologies)
chexpert_small_test_xrv_512_predictions = build_image_id_to_pathology_dict(chexpert_small_test_xrv_512_path, all_cheXpert_pathologies)

# calculate accuracy of each model on validation set
chexpert_small_valid_xrv_224_chex_predictions = build_image_id_to_pathology_dict(chexpert_small_valid_xrv_224_chex_path, all_cheXpert_pathologies)
chexpert_small_valid_xrv_224_predictions = build_image_id_to_pathology_dict(chexpert_small_valid_xrv_224_path, all_cheXpert_pathologies)
chexpert_small_valid_xrv_512_predictions = build_image_id_to_pathology_dict(chexpert_small_valid_xrv_512_path, all_cheXpert_pathologies)


In [117]:
print("CheXpert Validation Set")   
print(f"xrv_224_chex: {calculate_exact_matches(chexpert_small_valid_ground_truth, chexpert_small_valid_xrv_224_chex_predictions)}")
print(f"xrv_224: {calculate_exact_matches(chexpert_small_valid_ground_truth, chexpert_small_valid_xrv_224_predictions)}")
print(f"xrv_512: {calculate_exact_matches(chexpert_small_valid_ground_truth, chexpert_small_valid_xrv_512_predictions)}")

print("CheXpert Test Set")
print(f"xrv_224_chex: {calculate_exact_matches(chexpert_small_test_ground_truth, chexpert_small_test_xrv_224_chex_predictions)}")
print(f"xrv_224: {calculate_exact_matches(chexpert_small_test_ground_truth, chexpert_small_test_xrv_224_predictions)}")
print(f"xrv_512: {calculate_exact_matches(chexpert_small_test_ground_truth, chexpert_small_test_xrv_512_predictions)}")



CheXpert Validation Set
xrv_224_chex: 0.0811965811965812
xrv_224: 0.0
xrv_512: 0.14957264957264957
CheXpert Test Set
xrv_224_chex: 0.058383233532934134
xrv_224: 0.011976047904191617
xrv_512: 0.15718562874251496
