In [3]:
from collections import defaultdict
import pickle
from pathlib import Path

FILE_NAME = "findings_section_of_report" #"findings" #"radiology_report" = too verbose for L3
stored_responses_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/frontend/stored_responses")

subject_to_abnormal = defaultdict(str)
with open("/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/frontend/evaluation_metrics/dom_mimic.csv") as f:
    lines = f.readlines()

    for line in lines[1:]:
        line = line.split(",")
        subject_to_abnormal[line[0]] = line[1]


mimic_findings_path = stored_responses_path / "MIMIC-CXR" / f"{FILE_NAME}.pkl"
image_path_to_model_outputs = pickle.load(open(mimic_findings_path, "rb"))

In [4]:
subject_to_report = {}

mimic_cxr_path = Path('/vol/biodata/data/chest_xray/mimic-cxr')
mimic_cxr_jpg_path = Path('/vol/biodata/data/chest_xray/mimic-cxr-jpg')
subjects_for_eval = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/evaluation_datasets/MIMIC-CXR/mimic_ii_subjects_for_eval.txt")
for line in subjects_for_eval.read_text().splitlines():
    # two cases: 1) subject with single study 2) subject with multiple studies
    if line[0] == "p":
        parts = line.split("/")
        subject = parts[0][1:] # remove the 'p' from the subject
        study = parts[1][1:]

        subject_path = mimic_cxr_path/"files"/f"p{subject[:2]}"/f"p{subject}"
        report_path = list(subject_path.glob(f"s{study}.txt"))[0]
        report = report_path.read_text()

        subject_path_jpg = mimic_cxr_jpg_path/"files"/f"p{subject[:2]}"/f"p{subject}"/f"s{study}"
        jpg_file = list(subject_path_jpg.glob('*.jpg'))[0]

    else:
        subject = line

        subject_path = mimic_cxr_path/"files"/ f"p{subject[:2]}/p{subject}"
        report_path = list(subject_path.glob('*.txt'))[0]
        report = report_path.read_text()

        subject_path_jpg = mimic_cxr_jpg_path/"files"/ f"p{subject[:2]}/p{subject}"
        
        study_folder = list(subject_path_jpg.glob('*'))[0]
        jpg_file = list(study_folder.glob('*.jpg'))[0]

    
    # for report keep only the text after "FINDINGS: ", if FINDINGS: is not present, keep the text after IMPRESSION:
    if "FINDINGS: " in report:
        report = report.split("FINDINGS:")[1]
    else:
        try:
            report = report.split("IMPRESSION:")[1]    
        except:
            report = report
    subject_to_report[subject] = report


    # print(image_path_to_model_outputs)




In [8]:
with_key = False
if not with_key:
    all_path_to_save = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/frontend/stored_responses/MIMIC-CXR/reports/without_keys/all")
    abnormal_path_to_save = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/frontend/stored_responses/MIMIC-CXR/reports/without_keys/abnormal")
    normal_path_to_save = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/frontend/stored_responses/MIMIC-CXR/reports/without_keys/normal")
else:
    all_path_to_save = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/frontend/stored_responses/MIMIC-CXR/with_keys/reports/all")
    abnormal_path_to_save = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/frontend/stored_responses/MIMIC-CXR/with_keys/reports/abnormal")
    normal_path_to_save = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/frontend/stored_responses/MIMIC-CXR/with_keys/reports/normal")

# loop through the subject to report items and save as a file but sort by key first
sorted_subject_to_report = dict(sorted(subject_to_report.items()))

model_names = ["chexagent","gemini_agent","chexagent_agent","llama3_agent"]
# key = subject, value = report to 3 csvs (all, abnormal, normal)
# for key,value in image_path_to_model_outputs.items():
#     if "11924226" in str(key):
#         print(key)
#         print(value)

for key, value in sorted_subject_to_report.items():

    # replace newlines in value 
    value = value.replace("\n", " ")

    abnormal = subject_to_abnormal[key] # is a string of "True" or "False
    # first save reference report
    string_to_save = f"{key},{value}\n"
    if not with_key:
        string_to_save = f'"{value}",'

    with open(all_path_to_save/"reference.csv", "a") as f:
        f.write(string_to_save)
    if abnormal == "True":
        with open(abnormal_path_to_save/"reference.csv", "a") as f:
            f.write(string_to_save)
    else:
        with open(normal_path_to_save/"reference.csv", "a") as f:
            f.write(string_to_save)

    for ai_key,ai_report in image_path_to_model_outputs.items():
        if key in str(ai_key):
            for model_name in model_names:
                string_to_save = f"{key},{ai_report[model_name]}\n"
                if not with_key:
                    string_to_save = f'"{ai_report[model_name]}",'
                with open(all_path_to_save/f"{model_name}.csv", "a") as f:
                    f.write(string_to_save)
                if abnormal == "True":
                    with open(abnormal_path_to_save/f"{model_name}.csv", "a") as f:
                        f.write(string_to_save)
                else:
                    with open(normal_path_to_save/f"{model_name}.csv", "a") as f:
                        f.write(string_to_save)


    # if key in image_path_to_model_outputs:
    #     findings = image_path_to_model_outputs[key]
    #     if subject_to_abnormal[key] == "True":
    #         with open(abnormal_path_to_save/f"{model_name}.csv", "a") as f:
                
    #             f.write(f"Model,Subject,Report,Findings\n")
    #             f.write(f"{model_name},{key},{value},{findings}\n")
    #     else:
    #         with open(normal_path_to_save/f"{key}.csv", "a") as f:
    #             f.write(f"Model,Subject,Report,Findings\n")
    #             f.write(f"{model_name},{key},{value},{findings}\n")
    # else:
    #     with open(all_path_to_save/f"{key}.csv", "a") as f:
    #         f.write(f"Model,Subject,Report\n")
    #         f.write(f"{model_name},{key},{value}\n")    


In [6]:
print(subject_to_abnormal)
for ai_key,value in image_path_to_model_outputs.items():
    if "11924226" in str(ai_key):
        print(ai_key)
        print(value)

defaultdict(<class 'str'>, {'11924226': 'False', '18007398': 'True', '16957065': 'False', '19680874': 'True', '18767957': 'True', '11921090': 'True', '11644926': 'True', '11052935': 'True', '13947388': 'False', '15857729': 'True', '12536467': 'True', '16865871': 'False', '18437840': 'True', '14473057': 'True', '10183775': 'True', '12386201': 'False', '15452636': 'False', '18512911': 'True', '19079797': 'False', '10268877': 'True', '18038196': 'False', '12503812': 'True', '12998617': 'True', '11197890': 'False', '18113771': 'True', '12937037': 'True', '18552428': 'True', '16049879': 'True', '12085050': 'False', '17561996': 'False', '11717909': 'True', '16306599': 'False', '16313531': 'True', '13914812': 'True', '11500818': 'False'})
/vol/biodata/data/chest_xray/mimic-cxr-jpg/files/p11/p11924226/s58367071/fe5dd4a7-d88ab43b-fe20fb3b-aa6f0fe1-c9efd533.jpg
{'chexagent': 'The heart size is normal. The mediastinal and hilar contours are normal. The pulmonary vasculature is normal. The lungs a