In [18]:
import os
import pandas as pd
if os.path.basename(os.path.abspath(".")) == "notebooks":
    os.chdir("..")
from utils import main_setup, AttributeDict
from omegaconf import OmegaConf
from torch import autocast
from src.datasets import get_dataset
from utils import load_model_from_config, collate_batch
from src.visualization.utils import word_to_slice
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch
from src.preliminary_masks import preprocess_attention_maps
from src.visualization.utils import MIMIC_STRING_TO_ATTENTION
import numpy as np
import torchvision
from src.foreground_masks import GMMMaskSuggestor
from einops import repeat

phrase_grounding_mode = False
model_name = "frozen"

args = AttributeDict(EXP_PATH="src/experiments/default_cfg.py", EXP_NAME="confounder_notebook_learnable")
config = main_setup(args)
config.datasets.test["phrase_grounding"] = phrase_grounding_mode

MODEL_MAP = {"sdv2":"/vol/ideadata/ed52egek/diffusionmodels/latentdiffusion/512-base-ema.ckpt",
             "frozen":"/vol/ideadata/ed52egek/diffusionmodels/chest/miccai_models/frozen_30k.ckpt", "learnable":"/vol/ideadata/ed52egek/diffusionmodels/chest/miccai_models/learnable_60k.ckpt"}

config.ckpt = MODEL_MAP[model_name]
config.log_dir = os.path.dirname(config.log_dir)
print(f"Saving to {config.log_dir}")

2023-06-14 07:14:35,401 | utils.py-main_setup-0289 | INFO | Log dir: /vol/ideadata/ed52egek/pycharm/chest-distillation/log/confounder_notebook_learnable/2023-06-14T07-14-35
2023-06-14 07:14:35,403 | log.py-log_experiment-0021 | INFO | Args: {'EXP_PATH': 'src/experiments/default_cfg.py', 'EXP_NAME': 'confounder_notebook_learnable'}
2023-06-14 07:14:35,404 | log.py-log_experiment-0023 | INFO | Exp Path: src/experiments/default_cfg.py
Saving to /vol/ideadata/ed52egek/pycharm/chest-distillation/log/confounder_notebook_learnable


In [3]:
delattr(config.datasets.test, "limit_dataset")
dataset = get_dataset(config, "test")
model_config = OmegaConf.load(f"{config.config_path}")
model_config["model"]["params"]["use_ema"] = False
model_config["model"]["params"]["unet_config"]["params"]["attention_save_mode"] = "cross"

In [4]:
model = load_model_from_config(model_config, f"{config.ckpt}")
dataset.load_precomputed(model)

Loading model from /vol/ideadata/ed52egek/diffusionmodels/chest/miccai_models/learnable_60k.ckpt
Global Step: 60000
2023-06-14 05:55:21,719 | instantiator.py-<module>-0021 | INFO | Created a temporary directory at /tmp/tmpvhkhkuxd
2023-06-14 05:55:21,723 | instantiator.py-_write-0076 | INFO | Writing /tmp/tmpvhkhkuxd/_remote_module_non_scriptable.py
No module 'xformers'. Proceeding without it.
LatentDiffusion: Running in eps-prediction mode
DiffusionWrapper has 865.91 M params.
2023-06-14 05:55:28,962 | ddpm.py-__init__-0143 | INFO | Unconditional guidance probability: 0.0
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels
2023-06-14 05:55:30,178 | factory.py-create_model-0152 | INFO | Loaded ViT-H-14 model config.
2023-06-14 05:55:38,102 | factory.py-create_model-0199 | INFO | Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
2023-06-14 05:55:53,070 | chest.py-

In [4]:
dataset[0].keys()
cond_key = "label_text"
dataset[0]["finding_labels"], dataset[0][cond_key]

('Pneumonia', 'Bibasilar opacities|Bibasilar opacities')

In [5]:

def samples_to_path(mask_dir, samples, j):
    sample_path = samples["rel_path"][j]
    label = samples["finding_labels"][j]
    impr = samples["impression"][j].replace(" ", "_")
    path = os.path.join(mask_dir, sample_path + label + impr) + ".pt"
    return path


idx = 0
precision_scope = autocast

# visualization args
rev_diff_steps = 40

model = model.to("cuda")
mask_dir =  os.path.join(config.log_dir, "precomputed_masks")

In [6]:
def contrast_to_noise_ratio(ground_truth_img, prelim_mask_large):
    gt_mask = ground_truth_img.flatten()
    pr_mask = prelim_mask_large.flatten()

    roi_values = pr_mask[gt_mask == 1.0]
    not_roi_values = pr_mask[gt_mask != 1.0]

    contrast = roi_values.mean() - not_roi_values.mean()
    noise = torch.sqrt(
        roi_values.var() + not_roi_values.var()
    )
    cnr = contrast / noise
    return cnr

In [82]:

resize_to_imag_size = torchvision.transforms.Resize(512)
mask_suggestor = GMMMaskSuggestor(config)

results = {"rel_path":[], "word":[], "finding_labels":[], "cnr":[]}
results_positional = {"rel_path":[], "position":[], "finding_labels":[], "cnr":[]}

with torch.no_grad():
    with precision_scope("cuda"):
        with model.ema_scope():
            for i, sample in enumerate(dataset):
                print(sample["label_text"])
                label_text = [sample["label_text"].split("|")[0],]
                sample["impression"] = label_text
                sample["label_text"] = label_text
                model.cond_stage_model = model.cond_stage_model.to(model.device)
                images = model.log_images(sample, N=1, split="test", sample=False, inpaint=True,
                                               plot_progressive_rows=False, plot_diffusion_rows=False,
                                               use_ema_scope=False, cond_key=cond_key, mask=1.,
                                               save_attention=True)
                attention_maps = images.pop("attention")
                attention_images = preprocess_attention_maps(attention_maps, on_cpu=False)
                attention = attention_images[0]

                bboxes = sample["bboxxywh"].split("|")
                bbox_meta = dataset.bbox_meta_data.loc[sample["dicom_id"]]
                img_size = [bbox_meta["image_width"], bbox_meta["image_height"]]
                for j in range(len(bboxes)):
                    bbox = [int(box) for box in bboxes[j].split("-")]
                    bboxes[j] = bbox
                ground_truth_img = sample["bbox_img"].float()

                token_lens = model.cond_stage_model.compute_word_len(label_text[0].split(" "))
                token_positions = list(np.cumsum(token_lens) + 1)
                token_positions = [1,] + token_positions
                words = ["<SOS>",] + label_text[0].split(" ") + ["<EOS>",]
                attention = attention[-1 * rev_diff_steps:].mean(dim=(0,1))
                for j, word in enumerate(words):
                    if j == 0:
                        attention_map = attention[0:1]
                    elif j == (len(words) - 1):
                        attention_map = attention[token_positions[-1]:token_positions[-1]+1]
                    else:
                        attention_map = attention[token_positions[j-1]:token_positions[j-1]+1]
                    attention_map = attention_map.mean(dim=0)

                    prelim_mask = (attention_map - attention_map.min())/(attention_map.max() - attention_map.min())

                    prelim_mask_large = resize_to_imag_size(prelim_mask.unsqueeze(dim=0)).squeeze(dim=0)
                    cnr = contrast_to_noise_ratio(ground_truth_img, prelim_mask_large)


                    results["rel_path"].append(sample["rel_path"])
                    results["word"].append(word)
                    results["finding_labels"].append(sample["finding_labels"])
                    results["cnr"].append(cnr.cpu())

                for j in range(len(attention)):
                    attention_map = attention[j]
                    prelim_mask = (attention_map - attention_map.min())/(attention_map.max() - attention_map.min())

                    prelim_mask_large = resize_to_imag_size(prelim_mask.unsqueeze(dim=0)).squeeze(dim=0)
                    cnr = contrast_to_noise_ratio(ground_truth_img, prelim_mask_large)

                    results_positional["rel_path"].append(sample["rel_path"])
                    results_positional["position"].append(j)
                    results_positional["finding_labels"].append(sample["finding_labels"])
                    results_positional["cnr"].append(cnr.cpu())


Bibasilar opacities|Bibasilar opacities
Data shape for DDIM sampling is (1, 4, 64, 64), eta 0.0
Bilateral multifocal areas of consolidation|Bilateral multifocal areas of consolidation
Data shape for DDIM sampling is (1, 4, 64, 64), eta 0.0
Large right-sided pneumothorax
Data shape for DDIM sampling is (1, 4, 64, 64), eta 0.0
More dense consolidation at the right lung base raises possibility of superimposed infection
Data shape for DDIM sampling is (1, 4, 64, 64), eta 0.0
Large right pneumothorax
Data shape for DDIM sampling is (1, 4, 64, 64), eta 0.0
left basilar opacity
Data shape for DDIM sampling is (1, 4, 64, 64), eta 0.0
Persistent right middle and lower lobe opacities consistent with pneumonia
Data shape for DDIM sampling is (1, 4, 64, 64), eta 0.0
Small right pneumothorax
Data shape for DDIM sampling is (1, 4, 64, 64), eta 0.0
Left small-to-moderate apical pneumothorax
Data shape for DDIM sampling is (1, 4, 64, 64), eta 0.0
Small right apical pneumothorax
Data shape for DDIM sam

In [21]:
res = pd.DataFrame(results)
res.to_csv(os.path.join(config.log_dir, f"{model_name}_results.csv"))
res_positional = pd.DataFrame(results_positional)
res_positional.to_csv(os.path.join(config.log_dir, f"{model_name}_results_positional.csv"))

NameError: name 'results' is not defined

In [22]:
import pandas as pd
res = pd.read_csv(os.path.join(config.log_dir, f"{model_name}_results.csv"))

# Find highest mathing word in terms of localization for each disease but only for words appearning more than 25 times in our test data

In [25]:
diseases = res["finding_labels"].unique()
#diseases = ["Pneumonia",]
def str_to_float(x):
    # tensor(-1.330) --> -1.330
    if isinstance(x, float) :
        return x
    x = x.lstrip("tensor(")
    x = x.rstrip(")")
    return float(x)

mean_cnr_results =  {"disease":[], "cnr": [], "word": [], "occ": []}
res["cnr"] = res["cnr"].map(str_to_float)
for disease in diseases:
    sub_res = res[res["finding_labels"] == disease]
    word_count = sub_res["word"].value_counts()#.count()#.sort_values("rel_path", ascending=False)
    for word in list(word_count[word_count > 25].index):
        sub_res_word = sub_res[sub_res["word"] == word]
        mean_cnr_results["disease"].append(disease)
        mean_cnr_results["cnr"].append(sub_res_word["cnr"].mean())
        mean_cnr_results["word"].append(word)
        mean_cnr_results["occ"].append(len(sub_res_word))


mean_cnr_results = pd.DataFrame(mean_cnr_results)
mean_cnr_results = mean_cnr_results.sort_values(["disease", "cnr"], ascending=False)
mean_cnr_results.to_csv(os.path.join(config.log_dir, f"{model_name}_morethan25_results.csv"))
mean_cnr_results

Unnamed: 0,disease,cnr,word,occ
19,Pneumothorax,0.697748,small,66
16,Pneumothorax,0.653163,apical,115
15,Pneumothorax,0.461793,pneumothorax,218
14,Pneumothorax,0.428652,<EOS>,233
17,Pneumothorax,0.282838,right,108
...,...,...,...,...
29,Cardiomegaly,0.007381,is,165
24,Cardiomegaly,-0.398970,<SOS>,333
36,Atelectasis,1.117740,atelectasis,50
35,Atelectasis,0.855789,<EOS>,61


In [24]:

res

Unnamed: 0.1,Unnamed: 0,rel_path,word,finding_labels,cnr
0,0,files/p10/p10233088/s54276838/675d792f-a3521e4...,<SOS>,Pneumonia,-0.9801
1,1,files/p10/p10233088/s54276838/675d792f-a3521e4...,Bibasilar,Pneumonia,0.5942
2,2,files/p10/p10233088/s54276838/675d792f-a3521e4...,opacities,Pneumonia,1.1392
3,3,files/p10/p10233088/s54276838/675d792f-a3521e4...,<EOS>,Pneumonia,1.4480
4,4,files/p10/p10123147/s50230934/5318d353-daae9c3...,<SOS>,Pneumonia,-0.8740
...,...,...,...,...,...
8678,8678,files/p13/p13859188/s52811469/0677ec27-6132843...,pleural,Pleural Effusion,1.1051
8679,8679,files/p13/p13859188/s52811469/0677ec27-6132843...,effusions,Pleural Effusion,0.5952
8680,8680,files/p13/p13859188/s52811469/0677ec27-6132843...,are,Pleural Effusion,-0.0784
8681,8681,files/p13/p13859188/s52811469/0677ec27-6132843...,presumed,Pleural Effusion,0.3457


## CNR of Top 10 words by occurrence

In [26]:

mean_cnr_results_top10 =  {"disease":[], "cnr": [], "word": [], "occ":[]}
for disease in diseases:
    sub_res = res[res["finding_labels"] == disease]
    word_count = sub_res["word"].value_counts()#.count()#.sort_values("rel_path", ascending=False)
    for word in list(word_count.index)[:10]:
        sub_res_word = sub_res[sub_res["word"] == word]
        mean_cnr_results_top10["disease"].append(disease)
        mean_cnr_results_top10["cnr"].append(sub_res_word["cnr"].mean())
        mean_cnr_results_top10["word"].append(word)
        mean_cnr_results_top10["occ"].append(len(sub_res_word))

mean_cnr_results_top10 = pd.DataFrame(mean_cnr_results_top10)
#mean_cnr_results_top10 = mean_cnr_results_top10.sort_values(["disease", "cnr"], ascending=False)
mean_cnr_results_top10.to_csv(os.path.join(config.log_dir, f"{model_name}_top10.csv"))
mean_cnr_results_top10


Unnamed: 0,disease,cnr,word,occ
0,Pneumonia,-0.879866,<SOS>,165
1,Pneumonia,0.993722,<EOS>,165
2,Pneumonia,-0.035232,the,76
3,Pneumonia,0.998477,right,53
4,Pneumonia,1.195247,pneumonia,53
...,...,...,...,...
75,Pleural Effusion,0.467270,right,37
76,Pleural Effusion,0.580746,effusions,35
77,Pleural Effusion,0.832277,left,31
78,Pleural Effusion,-0.097977,is,26


In [93]:
results_positional = pd.DataFrame(results_positional)
diseases = results_positional["finding_labels"].unique()
#diseases = ["Pneumonia",]

mean_cnr_results_positional =  {"disease":[], "cnr": [], "pos": [], "occ": []}
results_positional["cnr"] = results_positional["cnr"].map(str_to_float)
for disease in diseases:
    sub_res = results_positional[results_positional["finding_labels"] == disease]
    for i in range(77):
        sub_res_word = sub_res[sub_res["position"] == i]
        mean_cnr_results_positional["disease"].append(disease)
        mean_cnr_results_positional["cnr"].append(sub_res_word["cnr"].mean())
        mean_cnr_results_positional["pos"].append(i)
        mean_cnr_results_positional["occ"].append(len(sub_res_word))


mean_cnr_results_positional = pd.DataFrame(mean_cnr_results_positional)
mean_cnr_results_positional = mean_cnr_results_positional.sort_values(["disease", "cnr"], ascending=False)
mean_cnr_results_positional.to_csv(os.path.join(config.log_dir, f"{model_name}_positional.csv"))
mean_cnr_results_positional

Unnamed: 0,disease,cnr,pos,occ
83,Pneumothorax,0.680958,6,6
79,Pneumothorax,0.664789,2,6
82,Pneumothorax,0.598770,5,6
84,Pneumothorax,0.570508,7,6
80,Pneumothorax,0.433629,3,6
...,...,...,...,...
33,Pneumonia,-0.334377,33,5
31,Pneumonia,-0.336890,31,5
30,Pneumonia,-0.337185,30,5
16,Pneumonia,-0.365959,16,5


In [80]:
metadata = pd.read_csv("/vol/ideadata/ed52egek/data/mimic/jpg/physionet.org/files/mimic-cxr-jpg/2.0.0/mimic_sccxr_preprocessed.csv")
metadata

Unnamed: 0,dicom_id,category_name,label_text,paths,image_width,image_height,bboxxywh
0,675d792f-a3521e48-5eec8573-1e81d644-e60c34f8,Pneumonia,Bibasilar opacities|Bibasilar opacities,files/p10/p10233088/s54276838/675d792f-a3521e4...,1767,2021,196-1136-532-315|1009-1134-491-350
1,5318d353-daae9c3d-2ee8648e-32b65198-aeff801e,Pneumonia,Bilateral multifocal areas of consolidation|Bi...,files/p10/p10123147/s50230934/5318d353-daae9c3...,2539,3050,1510-1268-755-796|454-690-637-670
2,4decce85-c6ede74e-7a8bc81c-e81edee9-5ec17116,Pneumothorax,Large right-sided pneumothorax,files/p10/p10500801/s58577720/4decce85-c6ede74...,3056,2544,265-90-880-2198
3,08c206fa-15bc2f42-f8918f67-9efc66b9-c01f6feb,Pneumonia,More dense consolidation at the right lung bas...,files/p10/p10388470/s51417242/08c206fa-15bc2f4...,3056,2544,440-1316-758-332
4,00258b8f-48b301d2-0cdf26b5-240d3e63-5a92e789,Pneumothorax,Large right pneumothorax,files/p10/p10176514/s56875184/00258b8f-48b301d...,1630,2022,165-138-544-1482
...,...,...,...,...,...,...,...
1128,01c47909-29055dd7-7ccfb97f-a38194de-11a8c26d,Pleural Effusion,Small pleural effusions are present bilaterall...,files/p12/p12133670/s51759637/01c47909-29055dd...,3050,2539,272-1740-1130-634|1631-1894-1284-485
1129,adb67a2b-a1473b01-b1004ae8-e612d0c9-1c2b5e3b,Pleural Effusion,There is bilateral lower lobe volume loss and ...,files/p18/p18208545/s51034438/adb67a2b-a1473b0...,2544,3056,109-1296-929-765|1316-1331-949-611
1130,18e1246d-a9c790ca-6b40ea18-81b9ccc8-3469c113,Pleural Effusion,Small bilateral pleural effusions|Small bilate...,files/p15/p15445857/s50967054/18e1246d-a9c790c...,1950,1826,171-1141-789-460|1057-1198-760-435
1131,f7e3f8a3-7299389e-84a7eb76-0cb81c79-06875895,Pleural Effusion,Persistent moderate-to-large bilateral pleural...,files/p12/p12668169/s54438962/f7e3f8a3-7299389...,2539,3050,322-972-1076-1239|1755-907-748-1304


In [29]:
metadata_mimic = pd.read_csv("/vol/ideadata/ed52egek/data/mimic/mimic_metadata_preprocessed.csv")
metadata_mimic

Unnamed: 0,dicom_id,subject_id,study_id,PerformedProcedureStepDescription,ViewPosition,Rows,Columns,StudyDate,StudyTime,ProcedureCodeSequence_CodeMeaning,...,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices,impression,Finding Label,Finding Labels
0,1a1fe7e3-cbac5d93-b339aeda-86bb86b5-4f31e82e,19999987.0,58971208.0,CHEST (PORTABLE AP),AP,3056.0,2544.0,21451103.0,50507.625,CHEST (PORTABLE AP),...,,,,,,,,Slight interval worsening of right lower lung ...,,Atelectasis
1,7ba273af-3d290f8d-e28d0ab4-484b7a86-7fc12b08,19999987.0,58621812.0,CHEST (PORTABLE AP),AP,3056.0,2544.0,21451102.0,202809.234,CHEST (PORTABLE AP),...,,,,,,,1.0,Appropriately positioned ET and NG tubes. Bib...,,Atelectasis|Support Devices
2,3fcd0406-9b111603-feae7033-96632b3a-111333e5,19999733.0,57132437.0,CHEST (PA AND LAT),PA,3056.0,2544.0,21520708.0,224550.171,CHEST (PA AND LAT),...,,1.0,,,,,,No acute cardiothoracic process.,,No Finding
3,428e2c18-5721d8f3-35a05001-36f3d080-9053b83c,19999733.0,57132437.0,CHEST (PA AND LAT),PA,3056.0,2544.0,21520708.0,224550.171,CHEST (PA AND LAT),...,,1.0,,,,,,No acute cardiothoracic process.,,No Finding
4,53e9b6d0-5d5317f5-f1a4c031-01d40558-fd14a425,19999376.0,57540554.0,CHEST (PORTABLE AP),AP,3056.0,2544.0,21450731.0,45417.656,CHEST (PORTABLE AP),...,,1.0,,,,,,No acute cardiopulmonary process. No evidence...,,No Finding
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186861,aaaf94b9-37d0427f-4d8c0285-e70028b4-e0b67870,,,,,,,,,,...,,,,,,,,,,
186862,dbb71182-0081bef5-cba5a036-c71c0f59-925942e4,,,,,,,,,,...,,,,,,,,,,
186863,01c47909-29055dd7-7ccfb97f-a38194de-11a8c26d,,,,,,,,,,...,,,,,,,,,,
186864,adb67a2b-a1473b01-b1004ae8-e612d0c9-1c2b5e3b,,,,,,,,,,...,,,,,,,,,,


In [51]:
dicom_id = "4decce85-c6ede74e-7a8bc81c-e81edee9-5ec17116"
path = os.path.join("/vol/ideadata/ed52egek/data/mimic/physionet.org/files/mimic-cxr/2.0.0/reports", "/".join(metadata[metadata["dicom_id"] == dicom_id].iloc[0]["paths"].split("/")[:-1])) + ".txt"
with open(path, "r") as fp:
    impression = fp.read()

impression

'                                 FINAL REPORT\n INDICATION:  Shortness of breath on the right side.\n \n COMPARISON:  Chest CTA ___ and chest radiograph ___.\n \n PA AND LATERAL VIEWS OF THE CHEST:  Large right-sided pneumothorax is\n demonstrated with leftward shift of mediastinal structures compatible with\n tension.  The left lung is hyperinflated with evidence of emphysema.  No\n pleural effusion or pulmonary vascular congestion is seen.   The heart size is\n normal.  The mediastinal contours are unremarkable.  Streaky linear opacities\n within the visualized left lung likely reflect bronchiectasis. There are\n multilevel degenerative changes in the thoracic spine. \n \n IMPRESSION:  Large right-sided pneumothorax with mild leftward shift of\n mediastinal structures indicative of tension.  Findings discussed with Dr.\n ___ by Dr. ___ ___ phone on ___ at 2:31 p.m.\n'

In [52]:
impressions = []
for i in range(len(metadata)):
    dicom_id = metadata.iloc[i]["dicom_id"]
    path = os.path.join("/vol/ideadata/ed52egek/data/mimic/physionet.org/files/mimic-cxr/2.0.0/reports", "/".join(metadata[metadata["dicom_id"] == dicom_id].iloc[0]["paths"].split("/")[:-1])) + ".txt"
    with open(path, "r") as fp:
        impressions.append(fp.read())



In [72]:
tidy_impressions = []
su = 0
for i in range(len(impressions)):
    text = impressions[i].split("IMPRESSION:")
    if len(text) != 2:
        tidy_impressions.append("")
    else:
        tidy_impressions.append(text[-1].strip().replace("\n", ""))

tidy_impressions[:4]


['Increased bibasilar opacities consistent with worsening multifocal pneumonia.',
 '',
 'Large right-sided pneumothorax with mild leftward shift of mediastinal structures indicative of tension.  Findings discussed with Dr. ___ by Dr. ___ ___ phone on ___ at 2:31 p.m.',
 'Moderate bilateral pleural effusions and associated atelectasis.  More dense consolidation at the right lung base raises possibility of superimposed infection.']

In [89]:
metadata["label_text"] = tidy_impressions
meta

oi


Unnamed: 0,dicom_id,category_name,label_text,paths,image_width,image_height,bboxxywh,impression
0,675d792f-a3521e48-5eec8573-1e81d644-e60c34f8,Pneumonia,Increased bibasilar opacities consistent with ...,files/p10/p10233088/s54276838/675d792f-a3521e4...,1767,2021,196-1136-532-315|1009-1134-491-350,Increased bibasilar opacities consistent with ...
1,5318d353-daae9c3d-2ee8648e-32b65198-aeff801e,Pneumonia,,files/p10/p10123147/s50230934/5318d353-daae9c3...,2539,3050,1510-1268-755-796|454-690-637-670,
2,4decce85-c6ede74e-7a8bc81c-e81edee9-5ec17116,Pneumothorax,Large right-sided pneumothorax with mild leftw...,files/p10/p10500801/s58577720/4decce85-c6ede74...,3056,2544,265-90-880-2198,Large right-sided pneumothorax with mild leftw...
3,08c206fa-15bc2f42-f8918f67-9efc66b9-c01f6feb,Pneumonia,Moderate bilateral pleural effusions and assoc...,files/p10/p10388470/s51417242/08c206fa-15bc2f4...,3056,2544,440-1316-758-332,Moderate bilateral pleural effusions and assoc...
4,00258b8f-48b301d2-0cdf26b5-240d3e63-5a92e789,Pneumothorax,Stable large right pneumothorax,files/p10/p10176514/s56875184/00258b8f-48b301d...,1630,2022,165-138-544-1482,Stable large right pneumothorax


In [92]:
metadata = metadata[metadata["label_text"] != ""]
metadata

Unnamed: 0,dicom_id,category_name,label_text,paths,image_width,image_height,bboxxywh,impression
0,675d792f-a3521e48-5eec8573-1e81d644-e60c34f8,Pneumonia,Increased bibasilar opacities consistent with ...,files/p10/p10233088/s54276838/675d792f-a3521e4...,1767,2021,196-1136-532-315|1009-1134-491-350,Increased bibasilar opacities consistent with ...
2,4decce85-c6ede74e-7a8bc81c-e81edee9-5ec17116,Pneumothorax,Large right-sided pneumothorax with mild leftw...,files/p10/p10500801/s58577720/4decce85-c6ede74...,3056,2544,265-90-880-2198,Large right-sided pneumothorax with mild leftw...
3,08c206fa-15bc2f42-f8918f67-9efc66b9-c01f6feb,Pneumonia,Moderate bilateral pleural effusions and assoc...,files/p10/p10388470/s51417242/08c206fa-15bc2f4...,3056,2544,440-1316-758-332,Moderate bilateral pleural effusions and assoc...
4,00258b8f-48b301d2-0cdf26b5-240d3e63-5a92e789,Pneumothorax,Stable large right pneumothorax,files/p10/p10176514/s56875184/00258b8f-48b301d...,1630,2022,165-138-544-1482,Stable large right pneumothorax
5,0bd6e8c7-8a005ad1-8975bd65-6f25528f-28f4a2c8,Pneumonia,Left basilar opacity potentially infection in ...,files/p10/p10446794/s51668285/0bd6e8c7-8a005ad...,3056,2544,2045-1704-748-384,Left basilar opacity potentially infection in ...
...,...,...,...,...,...,...,...,...
1123,e47abb05-dec27a0e-e5af4d59-1fd1d5a0-1c34655e,Pleural Effusion,AP chest compared to ___: Mild-to-moderate le...,files/p17/p17865089/s59717631/e47abb05-dec27a0...,3056,2544,99-1883-964-432,AP chest compared to ___: Mild-to-moderate le...
1124,a8c08cbf-15ac0dac-b76a40a0-dab826c7-18015767,Pleural Effusion,"Progression of postoperative healing, status p...",files/p19/p19991135/s50286241/a8c08cbf-15ac0da...,1736,2022,436-183-356-362|156-362-240-788,"Progression of postoperative healing, status p..."
1125,be319f71-2b1ab302-55580f5d-ffc6e9e0-9e90689a,Pleural Effusion,1. Nerve stimulating device is seen overlying...,files/p17/p17112432/s56998267/be319f71-2b1ab30...,2021,2021,338-528-227-270,1. Nerve stimulating device is seen overlying...
1130,18e1246d-a9c790ca-6b40ea18-81b9ccc8-3469c113,Pleural Effusion,1. Interval improvement in the patient's now ...,files/p15/p15445857/s50967054/18e1246d-a9c790c...,1950,1826,171-1141-789-460|1057-1198-760-435,1. Interval improvement in the patient's now ...


In [93]:
metadata.to_csv("mcxr_with_impressions.csv")
print("oi")

oi


In [91]:
os.path.abspath(".")

'/vol/ideadata/ed52egek/pycharm/chest-distillation'