In [1]:
import cv2
import random
import json
import time

import torch
import numpy as np
from tqdm import tqdm

import clip
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
from segment_anything.utils.transforms import ResizeLongestSide 

from PIL import Image  
from torchvision import transforms
from torchvision.transforms.functional import to_pil_image

from sam_caf import hyper_params_tuning, get_crops, retrieve_relevant_crop, retrieve_relevant_crop_biomed, get_sam_prompts, sam_predicton, retrieve_relevant_crop_biomed_topk

In [2]:
#config
class DictToObject:
    def __init__(self, dict_obj):
        for key, value in dict_obj.items():
            setattr(self, key, value)

config_dict = {
    "model_name" : "SAM",
    "model_type" : "vit_h",
    "source":    "False", 
    "refine" : "False",
    "pre_trained": "True", 
    "sam_ckpt":  "/data/aofei/LLM/SAM/sam_vit_h_4b8939.pth", 
    "clip_prompts": "./clip_prompts/abd_seg.json"
}

config = DictToObject(config_dict)

prompt_mode, mode = "crops", "sam_clip"

def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (256, 256))
    return image

In [3]:
import os
# os.environ["TRANSFORMERS_CACHE"]="/data/aofei/huggingface_cache/transformers"
os.environ["HF_HOME"]="/data/aofei/huggingface_cache/transformers"
from open_clip import create_model_from_pretrained, get_tokenizer # works on open-clip-torch>=2.23.0, timm>=0.9.8

biomed_clip_model, biomed_preprocess = create_model_from_pretrained('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224', device="cuda")
tokenizer = get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')

clip_model, preprocess = clip.load("ViT-L/14", device="cuda")
sam_checkpoint = config.sam_ckpt

sam = sam_model_registry[config.model_type](checkpoint=sam_checkpoint)
sam.to("cuda")
resize_transform = ResizeLongestSide(sam.image_encoder.img_size)

dice_scores = []
mask_generator, area = hyper_params_tuning(sam)

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
100%|███████████████████████████████████████| 890M/890M [01:34<00:00, 9.88MiB/s]


In [4]:
def sam_generation(image_path):
    image = preprocess_image(image_path=image_path)
    with torch.no_grad():
        # if mode == "sam_clip":
        masks = mask_generator.generate(image)
        masks = [mask for mask in masks if mask["area"] < area] # area filtering based on area value from hyper-params tuning
        img_crops = get_crops(image, masks, prompt_mode)
        
    return masks, img_crops

def filter_sam_results(masks, img_crops):
    new_masks, new_img_crops = [], []
    for i in range(len(masks)):
        mask = masks[i]
        if mask['bbox'][0] == 0 or mask['bbox'][1] == 0:
            continue
        if mask['bbox'][2] <= 12 or mask['bbox'][3] <= 12:
            continue
        y_max, x_max = mask['bbox'][1] + mask['bbox'][3], mask['bbox'][0] + mask['bbox'][2]
        if y_max > 253 or x_max > 253:
            continue
        new_masks.append(mask)
        new_img_crops.append(img_crops[i])
    return new_masks, new_img_crops

def get_topk_similar(k, crop_scores):
    sorted_scores = sorted([(i, m) for (i,m) in enumerate(crop_scores)], key=lambda x: x[1], reverse=True)
    return sorted_scores[:k]

def get_compelete_contour(masks):
    width_list = []
    # need to consider the chest xray
    for i in masks:
        width, height = i['bbox'][2], i['bbox'][3]
        width_list.append(width)
    sorted_width = sorted([(i, m) for (i,m) in  enumerate(width_list)], key=lambda x: x[1], reverse=True)
    return sorted_width[0][1]

def judge_inner_boxes(bboxes):
    for bbox in bboxes:
        bbox

In [5]:
masks_image_dict = dict()
masks_all_image_dict = dict()
crops_image_dict = dict()
def generate_segments(query, image_path):
    if masks_image_dict.__contains__(image_path):
        masks = masks_image_dict[image_path]
        img_crops = crops_image_dict[image_path]
    else:
        masks, img_crops = sam_generation(image_path=image_path)
        masks, img_crops = filter_sam_results(masks, img_crops)
        masks_image_dict[image_path] = masks
        crops_image_dict[image_path] = img_crops
    img_crops_filtered = img_crops
    prompts = {"query": [query]}
    max_indices, scores = retrieve_relevant_crop_biomed_topk(img_crops, prompts, biomed_clip_model, biomed_preprocess, config, tokenizer=tokenizer, topk=4)
    # topk_indices = get_topk_similar(3, scores["query"])
    # define a set of rules, firstly return top3
    # if there is no explicit organs to be used as query, then just use the whole segmentation
    # if the smaller boxes are in the bigger box, then use all of them but assign higher weights on smaller inner boxes
    bboxes = []
    segs = []
    # print(max_indices)
    if max_indices is not None:
        for i in max_indices["query"]:
            bboxes.append(masks[i]["bbox"])
            segs.append(masks[i]["segmentation"])
        return bboxes, segs, max_indices["query"]
    else:
        return bboxes, segs, []


def generate_all_segments(image_path):
    if masks_all_image_dict.__contains__(image_path):
        masks = masks_all_image_dict[image_path]
    else:
        masks, img_crops = sam_generation(image_path=image_path)
        masks, img_crops = filter_sam_results(masks, img_crops)
        masks_all_image_dict[image_path] = masks
    return masks

In [7]:
import json
with open(r"/data/aofei/hallucination/MIMIC_CXR/data_report/training_no_clean.json", "r") as f:
    data = json.load(f)
len(data)
all_train_data = data

In [8]:
all_train_data[0]

{'id': 0,
 'image': 'p14/p14090353/s53605505/abd28b40-48759275-f983e673-5ca480ba-42208726.jpg',
 'conversations': [{'from': 'human',
   'value': '<image>\nAnalyze the chest X-ray and generate a concise medical report.'},
  {'from': 'gpt',
   'value': '                                 FINAL REPORT\n EXAMINATION:  CHEST (PORTABLE AP)\n \n INDICATION:  ___ yo M struck by vehicle, R SDH s/p evac crani, L epidural\n hematoma untreated s/p tib fx ex fix  // fever. r/o PNA\n \n COMPARISON:  ___\n \n IMPRESSION: \n \n As compared to the previous radiograph, no relevant change has occurred. The\n lung parenchyma is clear and the current image shows no evidence of pneumonia.\n A minimal atelectasis at the right hilus is less severe than on the previous\n exam.  No larger pleural effusions.  No pneumothorax.  Normal size of the\n cardiac silhouette.\n'}]}

In [9]:
all_train_data_en = all_train_data

In [10]:
import copy
train_data_seg = copy.deepcopy(all_train_data_en)
len(train_data_seg)

2000

In [11]:
len(set([i['image'] for i in train_data_seg]))

2000

In [None]:
# len(set([i['img_name'] for i in train_data]))

450

In [16]:
# for i in range(len(train_rad_data)):
#failure case: 432-3 + 100
# from tqdm import tqdm
for i in tqdm(range(len(train_data_seg))):
# for i in tqdm(range(10)):
    data = train_data_seg[i]
    image_path = os.path.join("/data/aofei/hallucination/MIMIC_CXR/sampled_files_train", data["image"])
    question = "Medical image of lungs and chest X-ray."
    # question = question.split("The candidate Options are:")[0]
    # print(question)
    query = question

    bbox, segs, max_indices = [], [], []
    try:
        bbox, segs, max_indices = generate_segments(query, image_path)
    except:
        continue
    # print(bbox)
    data['bbox'] = bbox
    data['mask'] = segs
    data["bbox_indices"] = max_indices
    

  3%|▎         | 59/2000 [02:20<1:31:32,  2.83s/it]

Skipping zero-sized bounding box.


  3%|▎         | 63/2000 [02:31<1:31:20,  2.83s/it]

Skipping zero-sized bounding box.


  4%|▎         | 70/2000 [02:51<1:31:06,  2.83s/it]

Skipping zero-sized bounding box.


  4%|▍         | 78/2000 [03:14<1:31:12,  2.85s/it]

Skipping zero-sized bounding box.


  4%|▍         | 79/2000 [03:17<1:31:01,  2.84s/it]

Skipping zero-sized bounding box.


  5%|▌         | 109/2000 [04:42<1:28:20,  2.80s/it]

Skipping zero-sized bounding box.


  6%|▋         | 129/2000 [05:39<1:29:19,  2.86s/it]

Skipping zero-sized bounding box.


  8%|▊         | 154/2000 [06:50<1:27:07,  2.83s/it]

Skipping zero-sized bounding box.


  8%|▊         | 155/2000 [06:53<1:26:03,  2.80s/it]

Skipping zero-sized bounding box.


  8%|▊         | 167/2000 [07:26<1:25:43,  2.81s/it]

Skipping zero-sized bounding box.


  9%|▉         | 180/2000 [08:03<1:26:40,  2.86s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 10%|▉         | 191/2000 [08:35<1:25:20,  2.83s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 10%|▉         | 197/2000 [08:52<1:24:46,  2.82s/it]

Skipping zero-sized bounding box.


 10%|▉         | 198/2000 [08:55<1:25:10,  2.84s/it]

Skipping zero-sized bounding box.


 10%|█         | 200/2000 [09:00<1:24:31,  2.82s/it]

Skipping zero-sized bounding box.


 10%|█         | 202/2000 [09:06<1:24:23,  2.82s/it]

Skipping zero-sized bounding box.


 11%|█▏        | 227/2000 [10:17<1:24:07,  2.85s/it]

Skipping zero-sized bounding box.


 12%|█▏        | 247/2000 [11:13<1:21:50,  2.80s/it]

Skipping zero-sized bounding box.


 13%|█▎        | 256/2000 [11:39<1:23:37,  2.88s/it]

Skipping zero-sized bounding box.


 13%|█▎        | 266/2000 [12:07<1:21:00,  2.80s/it]

Skipping zero-sized bounding box.


 15%|█▍        | 291/2000 [13:18<1:19:43,  2.80s/it]

Skipping zero-sized bounding box.


 15%|█▍        | 295/2000 [13:29<1:20:17,  2.83s/it]

Skipping zero-sized bounding box.


 15%|█▍        | 299/2000 [13:41<1:21:09,  2.86s/it]

Skipping zero-sized bounding box.


 15%|█▌        | 307/2000 [14:03<1:18:44,  2.79s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 16%|█▌        | 320/2000 [14:40<1:19:27,  2.84s/it]

Skipping zero-sized bounding box.


 16%|█▋        | 329/2000 [15:05<1:18:09,  2.81s/it]

Skipping zero-sized bounding box.


 17%|█▋        | 346/2000 [15:53<1:17:13,  2.80s/it]

Skipping zero-sized bounding box.


 18%|█▊        | 357/2000 [16:25<1:18:40,  2.87s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 18%|█▊        | 361/2000 [16:36<1:18:23,  2.87s/it]

Skipping zero-sized bounding box.


 18%|█▊        | 368/2000 [16:56<1:16:45,  2.82s/it]

Skipping zero-sized bounding box.


 18%|█▊        | 369/2000 [16:59<1:16:08,  2.80s/it]

Skipping zero-sized bounding box.


 19%|█▉        | 375/2000 [17:16<1:15:45,  2.80s/it]

Skipping zero-sized bounding box.


 19%|█▉        | 383/2000 [17:39<1:16:33,  2.84s/it]

Skipping zero-sized bounding box.


 20%|█▉        | 391/2000 [18:01<1:15:51,  2.83s/it]

Skipping zero-sized bounding box.


 20%|██        | 406/2000 [18:44<1:13:56,  2.78s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 22%|██▏       | 443/2000 [20:29<1:13:26,  2.83s/it]

Skipping zero-sized bounding box.


 23%|██▎       | 459/2000 [21:15<1:13:17,  2.85s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 23%|██▎       | 463/2000 [21:26<1:13:29,  2.87s/it]

Skipping zero-sized bounding box.


 24%|██▎       | 470/2000 [21:46<1:11:41,  2.81s/it]

Skipping zero-sized bounding box.


 24%|██▍       | 483/2000 [22:22<1:11:02,  2.81s/it]

Skipping zero-sized bounding box.


 26%|██▋       | 527/2000 [24:28<1:10:19,  2.86s/it]

Skipping zero-sized bounding box.


 27%|██▋       | 531/2000 [24:39<1:09:55,  2.86s/it]

Skipping zero-sized bounding box.


 27%|██▋       | 547/2000 [25:24<1:09:18,  2.86s/it]

Skipping zero-sized bounding box.


 28%|██▊       | 560/2000 [26:01<1:07:36,  2.82s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 29%|██▉       | 577/2000 [26:49<1:06:54,  2.82s/it]

Skipping zero-sized bounding box.


 29%|██▉       | 588/2000 [27:21<1:06:19,  2.82s/it]

Skipping zero-sized bounding box.


 29%|██▉       | 589/2000 [27:23<1:06:25,  2.82s/it]

Skipping zero-sized bounding box.


 30%|███       | 600/2000 [27:55<1:06:49,  2.86s/it]

Skipping zero-sized bounding box.


 31%|███       | 612/2000 [28:29<1:04:40,  2.80s/it]

Skipping zero-sized bounding box.


 31%|███▏      | 626/2000 [29:08<1:04:21,  2.81s/it]

Skipping zero-sized bounding box.


 32%|███▏      | 635/2000 [29:34<1:04:13,  2.82s/it]

Skipping zero-sized bounding box.


 33%|███▎      | 657/2000 [30:36<1:03:31,  2.84s/it]

Skipping zero-sized bounding box.


 34%|███▎      | 672/2000 [31:19<1:03:33,  2.87s/it]

Skipping zero-sized bounding box.


 35%|███▌      | 703/2000 [32:46<1:01:44,  2.86s/it]

Skipping zero-sized bounding box.


 35%|███▌      | 706/2000 [32:54<1:00:16,  2.79s/it]

Skipping zero-sized bounding box.


 36%|███▌      | 713/2000 [33:14<1:01:18,  2.86s/it]

Skipping zero-sized bounding box.


 36%|███▌      | 718/2000 [33:29<1:00:50,  2.85s/it]

Skipping zero-sized bounding box.


 37%|███▋      | 743/2000 [34:40<59:38,  2.85s/it]  

Skipping zero-sized bounding box.


 38%|███▊      | 755/2000 [35:14<57:46,  2.78s/it]

Skipping zero-sized bounding box.


 38%|███▊      | 756/2000 [35:17<57:42,  2.78s/it]

Skipping zero-sized bounding box.


 40%|████      | 808/2000 [37:44<55:37,  2.80s/it]

Skipping zero-sized bounding box.


 41%|████      | 812/2000 [37:55<56:04,  2.83s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 41%|████▏     | 827/2000 [38:38<55:22,  2.83s/it]

Skipping zero-sized bounding box.


 42%|████▏     | 832/2000 [38:52<55:30,  2.85s/it]

Skipping zero-sized bounding box.


 42%|████▏     | 839/2000 [39:12<55:22,  2.86s/it]

Skipping zero-sized bounding box.


 42%|████▏     | 840/2000 [39:15<54:52,  2.84s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 42%|████▏     | 843/2000 [39:23<54:08,  2.81s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 43%|████▎     | 852/2000 [39:49<53:57,  2.82s/it]

Skipping zero-sized bounding box.


 43%|████▎     | 853/2000 [39:51<53:44,  2.81s/it]

Skipping zero-sized bounding box.


 44%|████▎     | 873/2000 [40:48<53:51,  2.87s/it]

Skipping zero-sized bounding box.


 45%|████▍     | 896/2000 [41:53<51:34,  2.80s/it]

Skipping zero-sized bounding box.


 46%|████▌     | 917/2000 [42:53<51:30,  2.85s/it]

Skipping zero-sized bounding box.


 46%|████▌     | 924/2000 [43:13<50:29,  2.82s/it]

Skipping zero-sized bounding box.


 47%|████▋     | 940/2000 [43:58<51:00,  2.89s/it]

Skipping zero-sized bounding box.


 48%|████▊     | 957/2000 [44:47<49:32,  2.85s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 49%|████▉     | 984/2000 [46:04<48:10,  2.84s/it]

Skipping zero-sized bounding box.


 51%|█████     | 1011/2000 [47:20<46:53,  2.84s/it]

Skipping zero-sized bounding box.


 53%|█████▎    | 1055/2000 [49:25<44:54,  2.85s/it]

Skipping zero-sized bounding box.


 53%|█████▎    | 1056/2000 [49:28<44:33,  2.83s/it]

Skipping zero-sized bounding box.


 53%|█████▎    | 1065/2000 [49:53<43:51,  2.81s/it]

Skipping zero-sized bounding box.


 54%|█████▍    | 1084/2000 [50:47<43:00,  2.82s/it]

Skipping zero-sized bounding box.


 55%|█████▌    | 1103/2000 [51:41<41:47,  2.80s/it]

Skipping zero-sized bounding box.


 55%|█████▌    | 1108/2000 [51:55<41:51,  2.82s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 58%|█████▊    | 1154/2000 [54:06<39:55,  2.83s/it]

Skipping zero-sized bounding box.


 59%|█████▉    | 1183/2000 [55:29<38:30,  2.83s/it]

Skipping zero-sized bounding box.


 59%|█████▉    | 1188/2000 [55:43<37:54,  2.80s/it]

Skipping zero-sized bounding box.


 59%|█████▉    | 1189/2000 [55:46<38:02,  2.81s/it]

Skipping zero-sized bounding box.


 62%|██████▏   | 1234/2000 [57:54<36:36,  2.87s/it]

Skipping zero-sized bounding box.


 62%|██████▏   | 1239/2000 [58:08<36:19,  2.86s/it]

Skipping zero-sized bounding box.


 63%|██████▎   | 1253/2000 [58:48<34:50,  2.80s/it]

Skipping zero-sized bounding box.


 63%|██████▎   | 1256/2000 [58:56<35:30,  2.86s/it]

Skipping zero-sized bounding box.


 63%|██████▎   | 1265/2000 [59:22<34:38,  2.83s/it]

Skipping zero-sized bounding box.


 64%|██████▍   | 1276/2000 [59:53<34:37,  2.87s/it]

Skipping zero-sized bounding box.


 65%|██████▍   | 1291/2000 [1:00:36<33:43,  2.85s/it]

Skipping zero-sized bounding box.


 65%|██████▍   | 1297/2000 [1:00:53<33:50,  2.89s/it]

Skipping zero-sized bounding box.


 65%|██████▍   | 1299/2000 [1:00:59<33:09,  2.84s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 66%|██████▌   | 1315/2000 [1:01:44<32:00,  2.80s/it]

Skipping zero-sized bounding box.


 68%|██████▊   | 1354/2000 [1:03:35<30:17,  2.81s/it]

Skipping zero-sized bounding box.


 68%|██████▊   | 1365/2000 [1:04:06<29:43,  2.81s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 71%|███████   | 1424/2000 [1:06:53<27:07,  2.82s/it]

Skipping zero-sized bounding box.


 71%|███████▏  | 1426/2000 [1:06:59<27:13,  2.85s/it]

Skipping zero-sized bounding box.


 72%|███████▏  | 1436/2000 [1:07:28<26:34,  2.83s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 73%|███████▎  | 1454/2000 [1:08:19<25:35,  2.81s/it]

Skipping zero-sized bounding box.


 73%|███████▎  | 1458/2000 [1:08:30<25:36,  2.83s/it]

Skipping zero-sized bounding box.


 73%|███████▎  | 1469/2000 [1:09:02<25:08,  2.84s/it]

Skipping zero-sized bounding box.


 74%|███████▍  | 1482/2000 [1:09:39<24:13,  2.81s/it]

Skipping zero-sized bounding box.


 74%|███████▍  | 1488/2000 [1:09:56<24:09,  2.83s/it]

Skipping zero-sized bounding box.


 76%|███████▌  | 1523/2000 [1:11:35<22:31,  2.83s/it]

Skipping zero-sized bounding box.


 77%|███████▋  | 1537/2000 [1:12:15<22:03,  2.86s/it]

Skipping zero-sized bounding box.


 79%|███████▊  | 1572/2000 [1:13:54<19:58,  2.80s/it]

Skipping zero-sized bounding box.


 79%|███████▉  | 1575/2000 [1:14:03<20:13,  2.86s/it]

Skipping zero-sized bounding box.
Skipping zero-sized bounding box.


 79%|███████▉  | 1579/2000 [1:14:15<20:08,  2.87s/it]

Skipping zero-sized bounding box.


 82%|████████▏ | 1640/2000 [1:17:08<16:56,  2.82s/it]

Skipping zero-sized bounding box.


 83%|████████▎ | 1656/2000 [1:17:54<16:20,  2.85s/it]

Skipping zero-sized bounding box.


 84%|████████▍ | 1678/2000 [1:18:56<15:06,  2.82s/it]

Skipping zero-sized bounding box.


 87%|████████▋ | 1731/2000 [1:21:27<12:43,  2.84s/it]

Skipping zero-sized bounding box.


 88%|████████▊ | 1769/2000 [1:23:15<10:59,  2.86s/it]

Skipping zero-sized bounding box.


 89%|████████▉ | 1777/2000 [1:23:38<10:37,  2.86s/it]

Skipping zero-sized bounding box.


 91%|█████████ | 1823/2000 [1:25:49<08:19,  2.82s/it]

Skipping zero-sized bounding box.


 92%|█████████▏| 1836/2000 [1:26:26<07:49,  2.86s/it]

Skipping zero-sized bounding box.


 93%|█████████▎| 1860/2000 [1:27:35<06:43,  2.88s/it]

Skipping zero-sized bounding box.


 94%|█████████▍| 1876/2000 [1:28:21<05:52,  2.84s/it]

Skipping zero-sized bounding box.


 94%|█████████▍| 1881/2000 [1:28:35<05:37,  2.83s/it]

Skipping zero-sized bounding box.


 96%|█████████▌| 1921/2000 [1:30:29<03:43,  2.83s/it]

Skipping zero-sized bounding box.


 96%|█████████▋| 1929/2000 [1:30:52<03:22,  2.86s/it]

Skipping zero-sized bounding box.


 96%|█████████▋| 1930/2000 [1:30:55<03:19,  2.85s/it]

Skipping zero-sized bounding box.


 97%|█████████▋| 1938/2000 [1:31:17<02:56,  2.84s/it]

Skipping zero-sized bounding box.


 97%|█████████▋| 1946/2000 [1:31:40<02:38,  2.93s/it]

Skipping zero-sized bounding box.


 99%|█████████▉| 1977/2000 [1:33:09<01:05,  2.86s/it]

Skipping zero-sized bounding box.


100%|█████████▉| 1990/2000 [1:33:46<00:28,  2.87s/it]

Skipping zero-sized bounding box.


100%|█████████▉| 1997/2000 [1:34:06<00:08,  2.85s/it]

Skipping zero-sized bounding box.


100%|██████████| 2000/2000 [1:34:15<00:00,  2.83s/it]


In [17]:
# np.sum(train_data_seg[0]['mask'][0])
np.sum(train_data_seg[0]['mask'][1].astype(int))

7794

In [18]:
s = 0
for i in train_data_seg:
    if not i.__contains__("mask"):
        s+= 1
s

40

In [19]:
train_data_seg[3]

{'id': 3,
 'image': 'p10/p10460886/s54191375/42ef040a-6c585c2a-64809db6-dd09da58-ceea06c8.jpg',
 'conversations': [{'from': 'human',
   'value': '<image>\nAnalyze the chest X-ray and generate a concise medical report.'},
  {'from': 'gpt',
   'value': '                                 FINAL REPORT\n CHEST RADIOGRAPH\n \n INDICATION:  Shortness of breath, evaluation for pulmonary edema.\n \n COMPARISON:  ___.\n \n FINDINGS:  As compared to the previous radiograph, the patient is now in\n moderate-to-severe pulmonary edema.  Moderate cardiomegaly, signs of\n intravascular and interstitial fluid overload, and blunting of the left\n costophrenic sinus, likely caused by a small pleural effusion.  No evidence of\n pneumonia.\n \n At the time of dictation and observation, 10:03 a.m., the referring physician,\n ___. ___, was paged for notification.\n'}],
 'bbox': [[122, 74, 70, 86],
  [26, 89, 58, 72],
  [62, 71, 25, 13],
  [161, 138, 23, 14]],
 'mask': [array([[False, False, False, ..., False,

In [20]:
#training2
new_train_data = []
segments_dict = dict()
for i in train_data_seg:
    template = dict()
    
    # template['answer_type'] = i['answer_type']
    template['image'] = i['image']
    template['id'] = i['id']
    template['conversations'] = i['conversations']
    template['bboxes'] = []
    template['masks'] = []
    segments_dict[str(i['id'])] = []
    if i.__contains__("bbox"):
        template['bboxes'] = i["bbox"]
    if i.__contains__("mask"):
        segments_dict[str(i['id'])] = i["mask"]
    new_train_data.append(template)

new_train_data[-6]

{'image': 'p11/p11184245/s51067929/16cc6792-105e73e2-c6581ee2-856ee331-ef9917e2.jpg',
 'id': 1994,
 'conversations': [{'from': 'human',
   'value': '<image>\nAnalyze the chest X-ray and generate a concise medical report.'},
  {'from': 'gpt',
   'value': '                                 FINAL REPORT\n CHEST RADIOGRAPH PERFORMED ON ___\n \n COMPARISON:  None.\n \n CLINICAL HISTORY:  Status post fall yesterday, dizziness, assess for\n pneumonia.\n \n FINDINGS:  PA and lateral views of the chest were obtained.  Lung volumes are\n low, though no focal consolidation, effusion, or pneumothorax is seen.  Heart\n and mediastinal contour appears normal.  Bony structures appear intact.  No\n definite bony fracture.\n \n IMPRESSION:  No acute findings in the chest.\n'}],
 'bboxes': [[3, 5, 84, 21],
  [38, 69, 80, 88],
  [95, 49, 24, 15],
  [160, 48, 69, 96]],
 'masks': []}

In [21]:
new_train_data_top4 = []
for i in new_train_data:
    j = i.copy()
    j["bboxes"] = j["bboxes"][:4]
    new_train_data_top4.append(j)

In [22]:
new_train_data_top4[99]

{'image': 'p10/p10775692/s54437438/c112b98d-29705e7c-3a9f8973-d3d54314-7206dd83.jpg',
 'id': 99,
 'conversations': [{'from': 'human',
   'value': '<image>\nAnalyze the chest X-ray and generate a concise medical report.'},
  {'from': 'gpt',
   'value': '                                 FINAL REPORT\n EXAMINATION:  CHEST (PA AND LAT)\n \n INDICATION:  ___ year old man with ICD  // evaluate for lead placement     \n evaluate for lead placement\n \n IMPRESSION: \n \n Compared to prior chest radiographs since ___, most recently ___.\n \n With the benefit of the lateral view it is now possible to see that the\n transvenous right atrial lead is oriented posteriorly to the back wall of the\n right atrium.  Transvenous right ventricular pacer defibrillator lead follows\n the expected course.  Both leads are continuous from the left pectoral\n generator.  No pneumothorax, pleural effusion, or mediastinal widening.  Heart\n is normal size in the lungs are clear aside from new platelike atelectasi

In [23]:
# segments_dict['0']
ed = 0
for i in segments_dict:
    if len(segments_dict[i]) == 0:
        ed += 1
ed

40

In [24]:
# save the masks to npz file

np.savez_compressed("/data/aofei/hallucination/MIMIC_CXR/data_report/training_segments_top4.npz", **segments_dict)

In [25]:
len(new_train_data)

2000

In [29]:
with open("/data/aofei/hallucination/MIMIC_CXR/data_report/training.json", "r") as f:
    clean_train_data = json.load(f)
clean_train_data[0], len(clean_train_data)

({'id': 0,
  'image': 'p14/p14090353/s53605505/abd28b40-48759275-f983e673-5ca480ba-42208726.jpg',
  'conversations': [{'from': 'human',
    'value': '<image>\nAnalyze the chest X-ray and generate a concise medical report.'},
   {'from': 'gpt',
    'value': 'The lung parenchyma is clear with no evidence of pneumonia. Minimal atelectasis at the right hilus, no larger pleural effusions, and no pneumothorax are observed. Cardiac silhouette is of normal size.'}]},
 2000)

In [30]:
for i in new_train_data_top4:
    for j in clean_train_data:
        if i["id"] == j["id"]:
            j["bboxes"] = i["bboxes"]
            j["masks"] = i["masks"]

In [31]:
clean_train_data[0]

{'id': 0,
 'image': 'p14/p14090353/s53605505/abd28b40-48759275-f983e673-5ca480ba-42208726.jpg',
 'conversations': [{'from': 'human',
   'value': '<image>\nAnalyze the chest X-ray and generate a concise medical report.'},
  {'from': 'gpt',
   'value': 'The lung parenchyma is clear with no evidence of pneumonia. Minimal atelectasis at the right hilus, no larger pleural effusions, and no pneumothorax are observed. Cardiac silhouette is of normal size.'}],
 'bboxes': [[6, 1, 85, 23],
  [144, 42, 72, 155],
  [229, 126, 18, 113],
  [152, 55, 63, 141]],
 'masks': []}

In [32]:
with open('/data/aofei/hallucination/MIMIC_CXR/data_report/training_masks_top4.json', 'w') as json_file:
    json.dump(clean_train_data, json_file, indent=4)