In [1]:
import pandas as pd
import numpy as np
import ast
import re
import statistics
from sklearn.metrics import roc_auc_score

In [2]:
test_padchest = pd.read_csv('../Data/Data_splits/pathology_detection-test.csv', index_col=0)

In [28]:
preds_df = pd.read_csv("../Predictions/MT_preds_PD.csv")

Unnamed: 0.1,Unnamed: 0,Model_name,Val_data,Preds
0,0,"MT, Multiclass, gamma 0.2","PadChest, PD",[[8.28394014e-03 1.84599322e-03 3.88444727e-03...
1,1,"MT, Multiclass, gamma 0.4","PadChest, PD",[[4.34791250e-03 2.12838943e-03 3.98110878e-03...
2,2,"MT, Multiclass, gamma 0.5","PadChest, PD",[[7.59372814e-03 1.85289711e-03 3.92699335e-03...
3,3,"MT, Multiclass, gamma 0.6","PadChest, PD",[[9.90383234e-03 1.33429852e-03 7.08287489e-03...


In [42]:
# Function for reading the predictions, which are strings, as numpy arrays
def str2array(s):
    # Remove space after [
    s=re.sub('\[ +', '[', s.strip())
    # Replace commas and spaces
    s=re.sub('[,\s]+', ', ', s)
    return np.array(ast.literal_eval(s))


# Function to arrange preds nicely in a df
def get_preds_multiclass(orig_pred_df, true_labels_df, print_auc=False):
    
    pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
    all_preds = []
    
    for row_number in range(len(orig_pred_df)):
        for p_idx, p in enumerate(pathologies):
            preds = [[str2array(i["Preds"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx]]
            all_preds.append(preds)
            
    # Constructing a df with the preds and 'true' labels

    preds_df = pd.DataFrame(list(zip(list(true_labels_df['ImagePath']),
                                     list(true_labels_df['Effusion']),
                                     list(true_labels_df['Pneumothorax']),
                                     list(true_labels_df['Atelectasis']),
                                     list(true_labels_df['Cardiomegaly']),
                                     list(true_labels_df['Pneumonia']),
                                     list(all_preds[0][0]),
                                     list(all_preds[1][0]),
                                     list(all_preds[2][0]),
                                     list(all_preds[3][0]),
                                     list(all_preds[4][0]))),
                            columns = ['ImagePath', 
                                       'Effusion', 
                                       'Pneumothorax', 
                                       'Atelectasis', 
                                       'Cardiomegaly',
                                       'Pneumonia', 
                                       'preds_Effusion_model1',
                                       'preds_Pneumothorax_model1', 
                                       'preds_Atelectasis_model1', 
                                       'preds_Cardiomegaly_model1', 
                                       'preds_Pneumonia_model1'])


    # Computing the auc for each pathology separately
    for p in pathologies:
        if print_auc:
            print(p)
        auc_list = []
        for i in range(1):
            #print(i+1)
            auc = roc_auc_score(preds_df[p], preds_df['preds_' + str(p) + '_model' + str(i+1)])
            auc_list.append(auc)
        #print(auc_list)
        
        if print_auc:
            print("Average auc:", round(sum(auc_list)/len(auc_list)*100, 1))
        
            print()

    return preds_df


In [43]:
def create_auc_table(dataframes, names):
    pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
    columns = ["name",'Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
    out_dict =  {pathology:list() for pathology in columns}

    for idx in range(len(dataframes)):
        out_dict["name"].append(names[idx])

        for p in pathologies:
            auc = roc_auc_score(dataframes[idx][p], dataframes[idx]['preds_' + str(p) + '_model1'])
            out_dict[p].append(auc)

    return pd.DataFrame(out_dict)


In [44]:
create_auc_table(
    [get_preds_multiclass(preds_df.loc[i:i], test_padchest) for i in range(len(preds_df))],
    list(preds_df["Model_name"])
)

Unnamed: 0,name,Effusion,Pneumothorax,Atelectasis,Cardiomegaly,Pneumonia
0,"MT, Multiclass, gamma 0.2",0.941654,0.851472,0.856338,0.895451,0.805088
1,"MT, Multiclass, gamma 0.4",0.941223,0.824509,0.838859,0.889154,0.819655
2,"MT, Multiclass, gamma 0.5",0.950246,0.799905,0.843055,0.894642,0.816714
3,"MT, Multiclass, gamma 0.6",0.940423,0.850946,0.849249,0.878993,0.817855
