In [1]:
import pandas as pd
import numpy as np
import ast
import re
import os
import statistics
from sklearn.metrics import roc_auc_score

In [18]:
# Function for reading the predictions, which are strings, as numpy arrays
def str2array(s):
    # Remove space after [
    s=re.sub('\[ +', '[', s.strip())
    # Replace commas and spaces
    s=re.sub('[,\s]+', ', ', s)
    return np.array(ast.literal_eval(s))


# Function to arrange preds nicely in a df
def get_preds_PD(orig_pred_df, true_labels_df, print_auc=True):
    
    pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
    all_preds = []
    
    for row_number in range(len(orig_pred_df)):
        for p_idx, p in enumerate(pathologies):
            preds = [[str2array(i["Preds_model1"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx],
                     [str2array(i["Preds_model2"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx],
                     [str2array(i["Preds_model3"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx]]
            all_preds.append(preds)
            
    # Constructing a df with the preds and 'true' labels
    preds_df = pd.DataFrame(list(zip(list(true_labels_df['ImagePath']),
                                     list(true_labels_df['Effusion']),
                                     list(true_labels_df['Pneumothorax']),
                                     list(true_labels_df['Atelectasis']),
                                     list(true_labels_df['Cardiomegaly']),
                                     list(true_labels_df['Pneumonia']),
                                     list(all_preds[0][0]),
                                     list(all_preds[0][1]),
                                     list(all_preds[0][2]),
                                     list(all_preds[1][0]),
                                     list(all_preds[1][1]),
                                     list(all_preds[1][2]),
                                     list(all_preds[2][0]),
                                     list(all_preds[2][1]),
                                     list(all_preds[2][2]),
                                     list(all_preds[3][0]),
                                     list(all_preds[3][1]),
                                     list(all_preds[3][2]),
                                     list(all_preds[4][0]),
                                     list(all_preds[4][1]),
                                     list(all_preds[4][2]))),
                            columns = ['ImagePath', 'Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly',
                                       'Pneumonia', 'preds_Effusion_model1', 'preds_Effusion_model2', 'preds_Effusion_model3',
                                       'preds_Pneumothorax_model1', 'preds_Pneumothorax_model2', 'preds_Pneumothorax_model3',
                                       'preds_Atelectasis_model1', 'preds_Atelectasis_model2', 'preds_Atelectasis_model3',
                                       'preds_Cardiomegaly_model1', 'preds_Cardiomegaly_model2', 'preds_Cardiomegaly_model3',
                                       'preds_Pneumonia_model1', 'preds_Pneumonia_model2', 'preds_Pneumonia_model3'])


    # Computing the auc for each pathology separately
    for p in pathologies:
        if print_auc:
            print(p)
        auc_list = []
        for i in range(3):
            #print(i+1)
            auc = roc_auc_score(preds_df[p], preds_df['preds_' + str(p) + '_model' + str(i+1)])
            auc_list.append(auc)
        #print(auc_list)
        
        if print_auc:
            print("Average auc:", round(sum(auc_list)/len(auc_list)*100, 1))
        
            print()

    return preds_df

In [67]:
def create_auc_table(dataframes, names):
    pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
    columns = ["name",'Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
    out_dict =  {pathology:list() for pathology in columns}

    for idx in range(len(dataframes)):
        out_dict["name"].append(names[idx])

        for p in pathologies:
            auc_list = list()
            for i in range(3):
                #print(i+1)
                auc = roc_auc_score(dataframes[idx][p],  dataframes[idx]['preds_' + str(p) + '_model' + str(i+1)])
                auc_list.append(auc)
            out_dict[p].append(round(sum(auc_list)/len(auc_list)*100, 1))

    return pd.DataFrame(out_dict)

In [2]:
test_padchest = pd.read_csv('../Data/Data_splits/pathology_detection-test.csv', index_col=0)

In [37]:
files = "/home/data_shares/purrlab/physionet.org/files/chexmask-cxr-segmentation-data/0.2"
padchest_masks = pd.read_csv(files+ "/OriginalResolution/Padchest.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [38]:
test_padchest = pd.merge(padchest_masks, test_padchest , how="inner", on= "ImageID")

In [75]:
results_df = create_auc_table(
    [get_preds_PD(pd.read_csv("../Predictions/" +i).iloc[-1:], test_padchest, print_auc=False) for i in os.listdir("../Predictions/") if i.startswith("PD")],
    [i.split(".")[0] for i in os.listdir("../Predictions/") if i.startswith("PD")]
)
results_df["type"] = results_df["name"].apply(lambda x: x.split("_")[-1])
results_df

Unnamed: 0,name,Effusion,Pneumothorax,Atelectasis,Cardiomegaly,Pneumonia,type
0,PD_preds_original_mask_inside,66.7,55.3,68.4,74.6,56.1,inside
1,PD_preds_original_mask_outside,85.4,72.5,80.4,85.8,67.1,outside
2,PD_preds_bbox_mask_inside,87.9,79.8,82.8,87.8,75.6,inside
3,PD_preds_bbox_mask_outside,67.8,81.3,62.0,61.8,63.8,outside
4,PD_preds_bbox_both_mask_inside,79.0,73.5,76.3,81.8,69.3,inside
5,PD_preds_bbox_both_mask_outside,74.4,76.7,66.3,67.3,63.9,outside
6,PD_preds_dilated_mask_1_inside,67.4,51.9,68.5,74.1,56.9,inside
7,PD_preds_dilated_mask_1_outside,85.6,70.5,79.3,85.5,68.1,outside
8,PD_preds_dilated_mask_2_inside,68.8,54.3,68.3,72.0,59.5,inside
9,PD_preds_dilated_mask_2_outside,84.3,70.5,77.7,84.8,67.6,outside


In [76]:
results_df[results_df["type"] == "inside"]

Unnamed: 0,name,Effusion,Pneumothorax,Atelectasis,Cardiomegaly,Pneumonia,type
0,PD_preds_original_mask_inside,66.7,55.3,68.4,74.6,56.1,inside
2,PD_preds_bbox_mask_inside,87.9,79.8,82.8,87.8,75.6,inside
4,PD_preds_bbox_both_mask_inside,79.0,73.5,76.3,81.8,69.3,inside
6,PD_preds_dilated_mask_1_inside,67.4,51.9,68.5,74.1,56.9,inside
8,PD_preds_dilated_mask_2_inside,68.8,54.3,68.3,72.0,59.5,inside
10,PD_preds_dilated_mask_3_inside,71.2,55.6,67.4,70.7,60.5,inside
12,PD_preds_dilated_mask_4_inside,72.7,53.3,68.6,70.1,61.0,inside


In [77]:
results_df[results_df["type"] == "outside"]

Unnamed: 0,name,Effusion,Pneumothorax,Atelectasis,Cardiomegaly,Pneumonia,type
1,PD_preds_original_mask_outside,85.4,72.5,80.4,85.8,67.1,outside
3,PD_preds_bbox_mask_outside,67.8,81.3,62.0,61.8,63.8,outside
5,PD_preds_bbox_both_mask_outside,74.4,76.7,66.3,67.3,63.9,outside
7,PD_preds_dilated_mask_1_outside,85.6,70.5,79.3,85.5,68.1,outside
9,PD_preds_dilated_mask_2_outside,84.3,70.5,77.7,84.8,67.6,outside
11,PD_preds_dilated_mask_3_outside,83.0,68.2,75.6,83.2,67.1,outside
13,PD_preds_dilated_mask_4_outside,81.0,63.3,73.8,80.7,65.8,outside
