In [1]:
import io
import requests
import torch
from PIL import Image
from rich import print
from pathlib import Path
import os
# Setup cache for Hugging Face models
os.environ['HF_HOME'] = '/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/.cache'
from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig

In [3]:
def setup_model() -> tuple:
    device = "cuda"
    dtype = torch.float16

    processor = AutoProcessor.from_pretrained("StanfordAIMI/CheXagent-8b", trust_remote_code=True)
    generation_config = GenerationConfig.from_pretrained("StanfordAIMI/CheXagent-8b")
    model = AutoModelForCausalLM.from_pretrained(
        "StanfordAIMI/CheXagent-8b", torch_dtype=dtype, trust_remote_code=True
    ).to(device)

    return processor, model, device, dtype, generation_config

def generate(images, prompt, processor, model, device, dtype, generation_config):
    inputs = processor(
        images=images, text=f" USER: <s>{prompt} ASSISTANT: <s>", return_tensors="pt"
    ).to(device=device, dtype=dtype)
    output = model.generate(**inputs, generation_config=generation_config)[0]
    response = processor.tokenizer.decode(output, skip_special_tokens=True)
    return response


In [4]:
model_params = setup_model() 




Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]



In [5]:
# prompt playground
pathology = "pleural effusion"
# prompt = f"Is there any {pathology} in the image?"
# prompt = f"Give the location of the {pathology} in the image in a bounding box format."
# prompt = f"Localise the region of {pathology} in the image in a bounding box format."
# prompt = f"Is the {pathology} on the left or the right in the image?"
prompt = "What pathologies are in the image?"
# prompt = "Describe the findings in the image"
image_id = "e0dc2e79105ad93532484e956ef8a71a"
train_png_dset_path = Path('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/train')

test_png_dset_path = Path('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test')
image_path = test_png_dset_path / f"{image_id}.png"
image = Image.open(image_path)
image = image.convert("RGB")

images = [image]
response = generate(images, prompt, *model_params)
print(response)
pathologies = response.split(',')
print(pathologies)


  [torch.tensor(pixel_values) for pixel_values in encoding_image_processor["pixel_values"]]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


### VinDr Dataset Generations

In [None]:
# VINDR Paths
test_png_dset_path = Path('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test')
test_pathology_left_or_right = Path('/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/VinDr-CXR/image_text_reasoning_datasets/test_pathology_left_or_right')
test_all_left_or_right = Path('/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/VinDr-CXR/image_text_reasoning_datasets/test_all_left_or_right')
test_test_split = Path('/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/VinDr-CXR/test_set_three_splits/VinDr_test_test_split.txt')

train_png_dset_path = Path('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/train')
train_pathology_left_or_right = Path('/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/VinDr-CXR/image_text_reasoning_datasets/train_pathology_left_or_right_unaninmous_agreement_random_radiologist')


In [14]:
train = False
if train:
    pathology_path = train_pathology_left_or_right
    png_dset_path = train_png_dset_path
else:
    pathology_path = test_pathology_left_or_right
    png_dset_path = test_png_dset_path


responses = []
seen_image_ids = set()
with open(test_test_split) as f:
    lines = f.readlines()
    for index, line in enumerate(lines):
        image_id = line.split(',')[0].strip()
        if image_id in seen_image_ids:
            continue
        seen_image_ids.add(image_id)
        image = Image.open(png_dset_path / f"{image_id}.png").convert("RGB")
        # image = Image.open(small_test_root / image_id).convert("RGB")
        prompt = "What pathologies are in the image?"

        response = generate([image], prompt, *model_params)
        # image_id = ("/").join(image_id.split('/')[1:])
        responses.append(f"{image_id},{response}")

        # pathologies = response.split(',')
        # for pathology in pathologies:
        #     prompt = f"Localize the {pathology}, is it on the RIGHT or the LEFT or the RIGHT AND LEFT of the given image?"
        #     response = generate([image], prompt, *model_params)
        #     responses.append(f"{image_id},{pathology},{response}")


with open('vinDr_identify_pathologies_0.5', 'w') as f:
    f.write('\n'.join(responses))
print(f"Written up to index {index}")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for o

### CheXpert Dataset Generations

In [7]:
# CheXpert Paths
small_test_root = Path('/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/CheXpert/small')
small_test_csv = Path('/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/CheXpert/test.csv')

In [18]:

# CheXpert specific
responses = []
seen_image_ids = set()
with open(small_test_csv) as f:
    f.readline() # skip header
    lines = f.readlines()
    for index, line in enumerate(lines):
        image_id = line.split(',')[0].strip()
        if image_id in seen_image_ids:
            continue
        seen_image_ids.add(image_id)
        # image = Image.open(png_dset_path / f"{image_id}.png").convert("RGB")
        image = Image.open(small_test_root / image_id).convert("RGB")
        prompt = "What pathologies are in the image?"

        response = generate([image], prompt, *model_params)
        image_id = ("/").join(image_id.split('/')[1:])
        responses.append(f"{image_id},{response}")

        # pathologies = response.split(',')
        # for pathology in pathologies:
        #     prompt = f"Localize the {pathology}, is it on the RIGHT or the LEFT or the RIGHT AND LEFT of the given image?"
        #     response = generate([image], prompt, *model_params)
        #     responses.append(f"{image_id},{pathology},{response}")


with open('chexpert_identify_pathologies_0.5', 'w') as f:
    f.write('\n'.join(responses))
print(f"Written up to index {index}")

  [torch.tensor(pixel_values) for pixel_values in encoding_image_processor["pixel_values"]]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` t

### CheXbench Evaluations

In [8]:
openi_dataset_path = Path('/vol/biodata/data/chest_xray/OpenI/NLMCXR_png')
image_text_reasoning_file = Path('/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/CheXbench/image_text_reasoning_task')

In [23]:
image_text_reasoning_responses = []
with open(image_text_reasoning_file) as f:
    f.readline() # skip header
    for line in f:
        image_id = line.split(',')[2]
        image = Image.open(openi_dataset_path / f"{image_id}").convert("RGB")
        question = line.split(',')[3]
        option_1 = line.split(',')[5]
        option_2 = line.split(',')[6]

        # construct a prompt 
        prompt = f"{question} Option 1:{option_1} Option 2: {option_2}."
        # prompt = f"{question} Option 1:{option_1} Option 2: {option_2}. Answer with 1 for Option 1 and 2 for Option 2."
        response = generate([image], prompt, *model_params)
        image_text_reasoning_responses.append(f"{image_id},{response}")


  [torch.tensor(pixel_values) for pixel_values in encoding_image_processor["pixel_values"]]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` t

In [24]:
with open('openi_itr_unguided_responses', 'w') as f:
    f.write('\n'.join(image_text_reasoning_responses))

: 