In [21]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt

os.environ["KERAS_BACKEND"] = "jax" 


import keras
import shap

In [22]:
# Function to change the image paths
def change_paths(df, data_directory):
    df = df.copy()


    df["ImagePath"] = df["ImagePath"].apply(lambda x: x.replace("padchest-preprocessed", data_directory))

    return df


In [23]:
mt_name = 'mt_gamma_0.8_epochs_50.keras'
mt_aug_name = 'mt_aug_gamma_0.8_epochs_50.keras'


pd_name = 'pd_epochs_50.keras'
pd_aug_name = 'pd_aug_epochs_50.keras'


td_name = 'td_epochs_50.keras'
td_aug_name = 'td_aug_epochs_50.keras'

pd_labels = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
td_labels = ['Chest_drain_tube', 'NSG_tube', 'Endotracheal_tube', 'Tracheostomy_tube']


data_dir = "/dtu/p1/johlau/LabelReliability_and_PathologyDetection_in_ChestXrays/"
model_dir =  "/dtu/p1/johlau/LabelReliability_and_PathologyDetection_in_ChestXrays/ObjectDetection/models/"
output_dir = "/dtu/p1/johlau/LabelReliability_and_PathologyDetection_in_ChestXrays/ObjectDetection/shap_plots/"


pathology_detection_test = pd.read_csv(data_dir + 'Data/Data_splits/pathology_detection-test.csv', index_col=0)
pathology_detection_test["ImagePath"] = pathology_detection_test["ImagePath"].apply(lambda x: x.replace("/home/data_shares/purrlab_students/", data_dir + "Data/"))


tube_detection_test = pd.read_csv("/dtu/p1/johlau/LabelReliability_and_PathologyDetection_in_ChestXrays/" + "Annotation/Annotations_aggregated.csv", index_col=0)
tube_detection_test = tube_detection_test.rename({"Chest_drain":"Chest_drain_tube"},axis=1)
tube_detection_test["ImagePath"] = tube_detection_test["ImagePath"].apply(lambda x: x.replace("../../", data_dir))
tube_detection_test = tube_detection_test.replace({-1:0})

pathology_detection_test_aug = change_paths(pathology_detection_test ,"padchest-cropped")
tube_detection_test_aug = change_paths(tube_detection_test ,"padchest-cropped")


In [24]:
results_dir = "/dtu/p1/johlau/LabelReliability_and_PathologyDetection_in_ChestXrays/ObjectDetection/predictions/"
result_names = os.listdir("/dtu/p1/johlau/LabelReliability_and_PathologyDetection_in_ChestXrays/ObjectDetection/predictions")

In [25]:
pd_result_names = [i for i in result_names if i.startswith("pd") and i.endswith(".json")]
pd_preds = {i:pd.read_json(results_dir+i) for i in pd_result_names}

In [26]:
td_result_names = [i for i in result_names if i.startswith("td") and i.endswith(".json")]
td_preds = {i:pd.read_json(results_dir+i) for i in td_result_names}

In [27]:
def get_most_confident_correct_and_incorrect_ids(name, preds, true, labels):
    preds_df = preds[name.replace("keras", "json")]

    most_confident_correct = [preds_df[true[i] == 1][i].idxmax() for i in labels]
    most_confident_incorrect = [preds_df[true[i] == 0][i].idxmax() for i in labels]


    print(most_confident_correct, most_confident_incorrect)
    return most_confident_correct, most_confident_incorrect



In [36]:
pd_noaug_confident_correct, pd_noaug_confident_incorrect = get_most_confident_correct_and_incorrect_ids(pd_name, pd_preds, pathology_detection_test, pd_labels)

[4713, 1646, 376, 6650, 6390] [562, 21, 4713, 3478, 21]


In [9]:
pd_aug_confident_correct, pd_aug_confident_incorrect = get_most_confident_correct_and_incorrect_ids(pd_aug_name, pd_preds, pathology_detection_test_aug, pd_labels)

[2171, 267, 9196, 6939, 6390] [8663, 3594, 3889, 5793, 5611]


In [10]:
pd_mt_noaug_confident_correct, pd_mt_noaug_confident_incorrect = get_most_confident_correct_and_incorrect_ids("pd_"+mt_name, pd_preds, pathology_detection_test, pd_labels)

[4713, 1646, 376, 8953, 6390] [8663, 9446, 4713, 5793, 21]


In [11]:
pd_mt_aug_confident_correct, pd_mt_aug_confident_incorrect = get_most_confident_correct_and_incorrect_ids("pd_"+mt_aug_name, pd_preds, pathology_detection_test_aug, pd_labels)

[9160, 267, 9196, 6939, 1103] [8663, 6843, 9160, 1529, 5611]


In [12]:
shappable_ids = sorted(list(set(np.array([pd_noaug_confident_correct, pd_noaug_confident_incorrect,
pd_aug_confident_correct, pd_aug_confident_incorrect,
pd_mt_noaug_confident_correct, pd_mt_noaug_confident_incorrect,
pd_mt_aug_confident_correct, pd_mt_aug_confident_incorrect]).flatten())))

In [13]:
shap_id_df = pd.DataFrame([pd_noaug_confident_correct, pd_noaug_confident_incorrect,
pd_aug_confident_correct, pd_aug_confident_incorrect,
pd_mt_noaug_confident_correct, pd_mt_noaug_confident_incorrect,
pd_mt_aug_confident_correct, pd_mt_aug_confident_incorrect], columns=pd_labels)


shap_id_df["name"]=[
    "pd_noaug",
    "pd_noaug",
    "pd_aug",
    "pd_aug",
    "mt_noaug",
    "mt_noaug",
    "mt_aug",
    "mt_aug",
]

shap_id_df["answered_correct?"]=[
    "correct",
    "incorrect",
    "correct",
    "incorrect",
    "correct",
    "incorrect",
    "correct",
    "incorrect",
]


shap_id_df

Unnamed: 0,Effusion,Pneumothorax,Atelectasis,Cardiomegaly,Pneumonia,name,answered_correct?
0,4713,1646,376,6650,6390,pd_noaug,correct
1,562,21,4713,3478,21,pd_noaug,incorrect
2,2171,267,9196,6939,6390,pd_aug,correct
3,8663,3594,3889,5793,5611,pd_aug,incorrect
4,4713,1646,376,8953,6390,mt_noaug,correct
5,8663,9446,4713,5793,21,mt_noaug,incorrect
6,9160,267,9196,6939,1103,mt_aug,correct
7,8663,6843,9160,1529,5611,mt_aug,incorrect


In [14]:
for i in range(len(shappable_ids)):
    print(pathology_detection_test.iloc[shappable_ids[i]][pd_labels])

Effusion        0
Pneumothorax    0
Atelectasis     0
Cardiomegaly    0
Pneumonia       0
Name: 21, dtype: object
Effusion        0
Pneumothorax    1
Atelectasis     0
Cardiomegaly    0
Pneumonia       0
Name: 267, dtype: object
Effusion        0
Pneumothorax    0
Atelectasis     1
Cardiomegaly    0
Pneumonia       0
Name: 376, dtype: object
Effusion        0
Pneumothorax    0
Atelectasis     0
Cardiomegaly    0
Pneumonia       0
Name: 562, dtype: object
Effusion        0
Pneumothorax    0
Atelectasis     0
Cardiomegaly    0
Pneumonia       1
Name: 1103, dtype: object
Effusion        0
Pneumothorax    0
Atelectasis     0
Cardiomegaly    0
Pneumonia       0
Name: 1529, dtype: object
Effusion        1
Pneumothorax    1
Atelectasis     1
Cardiomegaly    0
Pneumonia       0
Name: 1646, dtype: object
Effusion        1
Pneumothorax    0
Atelectasis     0
Cardiomegaly    0
Pneumonia       0
Name: 2171, dtype: object
Effusion        0
Pneumothorax    0
Atelectasis     0
Cardiomegaly    0
Pneum

In [15]:
shappable_index = {v:i for i, v in enumerate(shappable_ids)}

In [16]:
shap_folder = "/dtu/p1/johlau/LabelReliability_and_PathologyDetection_in_ChestXrays/ObjectDetection/shap_values/"

with open(shap_folder + "pd_epochs_50.pickle", "rb") as file:
    shap_values_pd_noaug = pickle.load(file)

with open(shap_folder + "pd_aug_epochs_50.pickle", "rb") as file:
    shap_values_pd_aug  = pickle.load(file)

with open(shap_folder + "mt_gamma_0.8_epochs_50.pickle", "rb") as file:
    shap_values_mt_noaug  = pickle.load(file)

with open(shap_folder + "mt_aug_gamma_0.8_epochs_50.pickle", "rb") as file:
   shap_values_mt_aug  = pickle.load(file)

shap_dict ={
    "pd_noaug": shap_values_pd_noaug,
    "pd_aug": shap_values_pd_aug,
    "mt_noaug":shap_values_mt_noaug,
    "mt_aug":shap_values_mt_aug,
}

correct_df = shap_id_df[shap_id_df["answered_correct?"] == "correct"]
incorrect_df = shap_id_df[shap_id_df["answered_correct?"] == "incorrect"]

In [17]:
# correct_df = shap_id_df[shap_id_df["answered_correct?"] == "correct"]
# incorrect_df = shap_id_df[shap_id_df["answered_correct?"] == "incorrect"]

# for label in pd_labels:
#     for i in range(3):
#         model_name = correct_df.iloc[i]["name"]
#         answer = correct_df.iloc[i]["answered_correct?"]

#         print(f"Model: {model_name}, and its confidence was {answer}")
#         shap_id = correct_df.iloc[i][label]
#         print(f"Pathology: {label}, id: {shap_id}")
#         shap_index = shappable_index[shap_id]
#         shap.image_plot(shap_dict[correct_df.iloc[i]["name"]][shap_index:(shap_index+1)])

#     for i in range(3):
#         model_name = incorrect_df.iloc[i]["name"]
#         answer = incorrect_df.iloc[i]["answered_correct?"]

#         print(f"Model: {model_name}, and its confidence was {answer}")
#         shap_id = incorrect_df.iloc[i][label]
#         print(f"Pathology: {label}, id: {shap_id}")
#         shap_index = shappable_index[shap_id]
#         shap.image_plot(shap_dict[incorrect_df.iloc[i]["name"]][shap_index:(shap_index+1)])    
        


In [39]:
print(["shap_plots/"+i for i in os.listdir("shap_plots")])

['shap_plots/4713_pd_noaug.png', 'shap_plots/4713_pd_aug.png', 'shap_plots/4713_mt_noaug.png', 'shap_plots/4713_mt_aug.png', 'shap_plots/2171_pd_noaug.png', 'shap_plots/2171_pd_aug.png', 'shap_plots/2171_mt_noaug.png', 'shap_plots/2171_mt_aug.png', 'shap_plots/1646_pd_noaug.png', 'shap_plots/1646_pd_aug.png', 'shap_plots/1646_mt_noaug.png', 'shap_plots/1646_mt_aug.png', 'shap_plots/267_pd_noaug.png', 'shap_plots/267_pd_aug.png', 'shap_plots/267_mt_noaug.png', 'shap_plots/267_mt_aug.png', 'shap_plots/376_pd_noaug.png', 'shap_plots/376_pd_aug.png', 'shap_plots/376_mt_noaug.png', 'shap_plots/376_mt_aug.png', 'shap_plots/9196_pd_noaug.png', 'shap_plots/9196_pd_aug.png', 'shap_plots/9196_mt_noaug.png', 'shap_plots/9196_mt_aug.png', 'shap_plots/6650_pd_noaug.png', 'shap_plots/6650_pd_aug.png', 'shap_plots/6650_mt_noaug.png', 'shap_plots/6650_mt_aug.png', 'shap_plots/6939_pd_noaug.png', 'shap_plots/6939_pd_aug.png', 'shap_plots/6939_mt_noaug.png', 'shap_plots/6939_mt_aug.png', 'shap_plots/639

In [18]:
seen_ids = list()

for label in pd_labels:
    print(label)

    for i in range(0,2):

        xray_id = correct_df.iloc[i][label]
        if xray_id in seen_ids:
            continue
        print(f"For X-ray with id: {xray_id}")

        
        for index in  shap_id_df[shap_id_df == xray_id].dropna(how="all").dropna(axis=1, how="all").index:
            
            name = shap_id_df.iloc[index]["name"]
            was_correct = shap_id_df.iloc[index]["answered_correct?"]
            confident_guesses = list(shap_id_df[shap_id_df == xray_id].iloc[index].dropna(how="all").keys())
            if was_correct == "correct":
                print(f"Model: {name} most confident correct answer was {confident_guesses} for this x-ray")
            else:
                print(f"Model: {name} most confident wrong answers were {confident_guesses} for this x-ray")
    
        
        for key in shap_dict.keys():
            #print(key)
            shap_index = shappable_index[xray_id]
            shap.image_plot(shap_dict[key][shap_index:(shap_index+1)], show=False)
            plt.savefig(output_dir+f"{xray_id}_{key}.png")
            plt.close()


        seen_ids.append(xray_id)



Effusion
For X-ray with id: 4713
Model: pd_noaug most confident correct answer was ['Effusion'] for this x-ray
Model: pd_noaug most confident wrong answers were ['Atelectasis'] for this x-ray
Model: mt_noaug most confident correct answer was ['Effusion'] for this x-ray
Model: mt_noaug most confident wrong answers were ['Atelectasis'] for this x-ray
For X-ray with id: 2171
Model: pd_aug most confident correct answer was ['Effusion'] for this x-ray
Pneumothorax
For X-ray with id: 1646
Model: pd_noaug most confident correct answer was ['Pneumothorax'] for this x-ray
Model: mt_noaug most confident correct answer was ['Pneumothorax'] for this x-ray
For X-ray with id: 267
Model: pd_aug most confident correct answer was ['Pneumothorax'] for this x-ray
Model: mt_aug most confident correct answer was ['Pneumothorax'] for this x-ray
Atelectasis
For X-ray with id: 376
Model: pd_noaug most confident correct answer was ['Atelectasis'] for this x-ray
Model: mt_noaug most confident correct answer was

MemoryError: Unable to allocate 24.0 MiB for an array with shape (1, 786431, 4) and data type float64

In [20]:
shap_dict[key][shap_index:(shap_index+1)].shape

MemoryError: Unable to allocate 30.0 MiB for an array with shape (1, 512, 512, 3, 5) and data type float64

In [None]:
# seen_ids = list()

# for label in pd_labels:
#     print(label)

#     for i in range(len(incorrect_df)):

#         xray_id = incorrect_df.iloc[i][label]
#         if xray_id in seen_ids:
#             continue
#         print(f"For X-ray with id: {xray_id}")

        
#         for index in  shap_id_df[shap_id_df == xray_id].dropna(how="all").dropna(axis=1, how="all").index:
            
#             name = shap_id_df.iloc[index]["name"]
#             was_correct = shap_id_df.iloc[index]["answered_correct?"]
#             confident_guesses = list(shap_id_df[shap_id_df == xray_id].iloc[index].dropna(how="all").keys())
#             if was_correct == "correct":
#                 print(f"Model: {name} most confident correct answer was {confident_guesses} for this x-ray")
#             else:
#                 print(f"Model: {name} most confident wrong answers were {confident_guesses} for this x-ray")
    
        
#         for key in shap_dict.keys():
#             print(key)
#             shap_index = shappable_index[xray_id]
#             shap.image_plot(shap_dict[key][shap_index:(shap_index+1)])  


#         seen_ids.append(xray_id)

    

1
['Pneumothorax', 'Pneumonia']
5
['Pneumonia']
