In [1]:
import os
os.environ['HF_HOME'] = '/vol/biomedic3/bglocker/ugproj2324/nns20/cxr-agent/.hf_cache' ## THIS HAS TO BE BEFORE YOU IMPORT TRANSFORMERS

import transformers
import torch

from abc import ABC, abstractmethod
from pathlib import Path
from typing import Optional
from my_secrets import LLAMA3_INSTRUCT_ACCESS_TOKEN
from agent_utils import select_best_gpu

from pathology_detector import PathologyDetector, CheXagentVisionTransformerPathologyDetector
from pathology_sets import Pathologies

from phrase_grounder import PhraseGrounder, BioVilTPhraseGrounder

class GenerationEngine(ABC):
    @abstractmethod
    def generate_report(self, image_path: Path, prompt: Optional[str], output_dir: Optional[str]) -> str:
        pass


class Llama3Generation(GenerationEngine):

    def __init__(self):
        self.model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

        self.pipeline = transformers.pipeline(
            "text-generation",
            model=self.model_id,
            model_kwargs={"torch_dtype": torch.bfloat16},
            device_map= select_best_gpu() ,
            token=LLAMA3_INSTRUCT_ACCESS_TOKEN,
        )


    def generate_report(self, image_path: Path, prompt: Optional[str], pathology_detector: PathologyDetector, phrase_grounder: Optional[PhraseGrounder]) -> str:
        
        if pathology_detector is not None:
            pathology_confidences = pathology_detector.detect_pathologies(image_path, threshold = 0.5)
        else:
            return RuntimeError("Pathology detector not provided")
        
        

        #### PROMPT PIPELINE ###
        
        system_prompt = """You are a helpful assistant, specialising in radiology and interpreting Chest X-rays. Please answer concisely and in a professional manner."""

        user_prompt = f"""Using specialised pathology detection tools,
        you are given the following pathology detection results for a chest X-ray:
        {pathology_confidences}

        Please note the closer the value to 1, the more likely the pathology is present in the image. 
        Write up a findings section based on these observations"""

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt },
        ]

        prompt = self.pipeline.tokenizer.apply_chat_template(
                messages, 
                tokenize=False, 
                add_generation_prompt=True
        )

        terminators = [
            self.pipeline.tokenizer.eos_token_id,
            self.pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
        ]

        outputs = self.pipeline(
            prompt,
            max_new_tokens=256,
            eos_token_id=terminators,
            do_sample=True,
            temperature=0.6,
            top_p=0.9,
        )

        print(outputs[0]["generated_text"][len(prompt):])
        return outputs[0]["generated_text"][len(prompt):]

In [2]:
# l3 = Llama3Generation()
# pathology_detector = CheXagentVisionTransformerPathologyDetector(pathologies=Pathologies.CHEXPERT)
phrase_grounder = BioVilTPhraseGrounder()

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BertTokenizer'. 
The class this function is called from is 'CXRBertTokenizer'.
You are using a model of type bert to instantiate a model of type cxr-bert. This is not supported for all configurations of models and can yield errors.
  return self.fget.__get__(instance, owner)()
Some weights of the model checkpoint at microsoft/BiomedVLP-BioViL-T were not used when initializing CXRBertModel: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing CXRBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing CXRBertModel from the checkpoint of a model that you expect to 

Using downloaded and verified file: /tmp/biovil_t_image_model_proj_size_128.pt
GPU 0: NVIDIA TITAN RTX, Free memory: 22965 MB
GPU 1: NVIDIA TITAN RTX, Free memory: 24109 MB
Selecting GPU 1 with 24109 MB free memory, Device = cuda:1


In [5]:
vindr_image_id = "7a74f4e463b72f018838f26cf7aabdf2"
vindr_png_path = Path('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test')
image_path = vindr_png_path / f"{vindr_image_id}.png"

phrase_grounder_output = phrase_grounder.get_pathology_lateral_position(image_path, "Aortic enlargement")
print(phrase_grounder_output)

{'left Aortic enlargement': 0.2655527722760835, 'right Aortic enlargement': 0.2631697580533245}


In [4]:
chexpert_test_csv_path = Path("/vol/biodata/data/chest_xray/CheXpert-v1.0-small/CheXpert-v1.0-small/test.csv")
chexpert_test_path = Path("/vol/biomedic3/bglocker/ugproj2324/nns20/datasets/CheXpert/small/")

with open(chexpert_test_csv_path, 'r') as f:
    lines = f.readlines()
    header = lines[0].split(",")[1:]
    # print(header)
    for i, line in enumerate(lines[1:]):
        if i % 1000 == 0:
            print(f"Collecting image {i}")

        image_path = line.split(",")[0]
        print(f"{image_path=}")
        image_path = chexpert_test_path / image_path

        l3.generate_report(image_path, prompt = None, pathology_detector=pathology_detector)
        break


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Collecting image 0
image_path='test/patient64741/study1/view1_frontal.jpg'
Findings:

The chest X-ray examination reveals the following abnormalities:

* Cardiomegaly is moderately likely, with a detection score of 0.59.
* Lung Opacity is highly likely, with a detection score of 0.86, suggesting the presence of a significant amount of abnormal lung tissue.
* Atelectasis is moderately likely, with a detection score of 0.58, indicating possible collapse or consolidation of lung tissue.
* Pleural Effusion is highly likely, with a detection score of 0.96, suggesting the presence of fluid in the pleural space.
* Support Devices are highly likely, with a detection score of 0.93, indicating the presence of external devices such as oxygen tubing or central lines.

Overall, the findings suggest the presence of significant lung pathology, including lung opacity and pleural effusion, with possible atelectasis and cardiomegaly. Further evaluation and imaging studies may be necessary to fully chara