In [1]:
%env CUDA_VISIBLE_DEVICES=2

env: CUDA_VISIBLE_DEVICES=2


In [9]:
from medvqa.datasets.iuxray import IUXRAY_REPORTS_MIN_JSON_PATH
from medvqa.utils.files import (
    get_checkpoint_folder_path,
    get_results_folder_path,
    load_json, load_pickle,
    save_pickle,
)
from nltk import sent_tokenize

def _load_iuxray_sentences():
    print(f'Loading iuxray reports from {IUXRAY_REPORTS_MIN_JSON_PATH}')
    reports = load_json(IUXRAY_REPORTS_MIN_JSON_PATH)
    sentences = set()
    for r in reports.values():
        findings = r['findings']
        impression = r['impression']
        for x in (findings, impression):
            if x:
                for s in sent_tokenize(x):
                    s = ' '.join(s.split()) # Remove extra spaces
                    sentences.add(s)
    sentences = list(sentences)
    sentences.sort(key=lambda x: (len(x), x)) # Sort by length and then alphabetically
    sentences = [s for s in sentences if any(c.isalpha() for c in s)] # Remove sentences without any alphabetic character
    print(f'Number of sentences: {len(sentences)}')
    print(f'Shortest sentence: {sentences[0]}')
    print(f'Longest sentence: {sentences[-1]}')
    return sentences

sentences = _load_iuxray_sentences()

print('Loading CheXpert labeler')
from medvqa.metrics.medical.chexpert import ChexpertLabeler
chexpert_labeler = ChexpertLabeler()
chexpert_labels = chexpert_labeler.get_labels(sentences, update_cache_on_disk=True)

print('Loading CheXbert labeler')
from medvqa.metrics.medical.chexbert import CheXbertLabeler
chexbert_labeler = CheXbertLabeler()
chexbert_labels = chexbert_labeler.get_labels(sentences, update_cache_on_disk=True)

print('Loading RadGraph labeler')
from medvqa.metrics.medical.radgraph import RadGraphLabeler
radgraph_labeler = RadGraphLabeler()
radgraph_labels = radgraph_labeler.get_labels(sentences, update_cache_on_disk=True)    

Loading iuxray reports from /mnt/workspace/iu-x-ray/dataset/reports/reports.min.json
Number of sentences: 8617
Shortest sentence: KUB.
Longest sentence: The infrahilar pulmonary markings appear slightly prominent bilaterally, which XXXX represents XXXX appearance for the patient but difficult to completely exclude some reactive airway/bronchitic changes in the absence of comparison radiographs.. No airspace consolidation or lobar atelectasis.
Loading CheXpert labeler
Loading CheXbert labeler


8617it [00:00, 18598.40it/s]


Loading RadGraph labeler


In [44]:
import numpy as np
from medvqa.utils.metrics import jaccard_between_dicts

i = 3001

relevant_sentences = []
if len(radgraph_labels[i]) > 0:
    for j in range(len(sentences)):
        if i != j:
            js = jaccard_between_dicts(radgraph_labels[i], radgraph_labels[j])
            if js >= 0.4 or ((np.all(chexpert_labels[i] == chexpert_labels[j]) or \
                np.all(chexbert_labels[i] == chexbert_labels[j])) and js >= 0.2):
                relevant_sentences.append(j)
len(relevant_sentences)

221

In [45]:
print(sentences[i])
print()
for j in range(min(40, len(relevant_sentences))):
    print(sentences[relevant_sentences[j]])

Minimal degenerative disease thoracic spine

Thoracic spine.
Degenerative spine.
DISH of the thoracic spine.
Degenerative changes spine.
Minimal thoracic spondylosis.
Degenerative changes the spine.
Degenerative change of the spine.
Degenerative changes in the spine.
Degenerative changes of the spine.
Scattered thoracic spine spurring.
Degenerative changes of the spine..
Degenerative changes thoracic spine.
Levoscoliosis of the thoracic spine.
Degenerative changes of the the spine.
Dextroscoliosis of the thoracic spine.
degenerative changes within the spine.
Degenerative changes are present spine.
Degenerative changes of thoracic spine.
Degenerative disease of thoracic spine.
Mild degenerative changes in the spine.
Mild degenerative changes of the spine.
Mild thoracic spine degenerative change
Degenerative changes noted in the spine.
Degenerative spurring of thoracic spine.
Mild degenerative change thoracic spine.
Mild thoracic spine degenerative change.
No XXXX fractures of the thorac

In [70]:
chexpert_labels.shape, chexbert_labels.shape

((8617, 14), (8617, 14))

In [72]:
radgraph_labels[1]

{(11, 652562049788717786): 1}

In [6]:
from medvqa.eval_fact_embedding_on_ranking import _load_mimiccxr_radiologist_annotations

In [7]:
sentences, labels = _load_mimiccxr_radiologist_annotations()

In [11]:
import numpy as np

In [37]:
def score(a, b):
    for x, y in zip(a, b):
        if x == -2:
            continue
        if x != y:
            return 0
    return 1

In [41]:
sorted_idxs[0]

0

In [42]:
labels[0]

array([ 1, -2, -2, -2, -2, -2, -2,  1, -2, -2, -2, -2, -2, -2], dtype=int8)

In [66]:
idx = 11
print(sentences[idx])
sorted_idxs = np.argsort([score(labels[idx], x) or score(x, labels[idx]) for x in labels])[::-1]
print()
for i in range(100):
    j = sorted_idxs[i]
    print(sentences[j],  np.mean(labels[j] == labels[idx]), score(labels[idx], labels[j]) or score(labels[j], labels[idx]))

recurrent right lower lobe patchy airspace opacity , suggestive of waxing and waning atelectasis .

band of atelectasis crossing the left hilus has been present for several days . 0.8571428571428571 1
lungs are clear except for minimal patchy atelectasis in the right infrahilar region . 0.8571428571428571 1
lungs are clear except for minimal lingular atelectasis or scar . 0.8571428571428571 1
left lung is clear except for minor linear atelectasis in the left mid and lower lung regions . 0.8571428571428571 1
lungs are clear except for linear atelectasis at the lung bases , left greater than right . 0.8571428571428571 1
aside from mild atelectasis at the right base , lungs are clear . 0.8571428571428571 1
aside from atelectasis in the right base the lungs are clear . 0.8571428571428571 1
aside from atelectasis , left lung is grossly clear . 0.8571428571428571 1
no substantial atelectasis except for the retrocardiac lung area . 0.8571428571428571 1
left lung is clear aside from minimal le

In [18]:
from importlib import reload
import medvqa

In [24]:
reload(medvqa.eval_fact_embedding_on_ranking)

<module 'medvqa.eval_fact_embedding_on_ranking' from '/home/pamessina/medvqa/medvqa/eval_fact_embedding_on_ranking.py'>

In [25]:
from medvqa.eval_fact_embedding_on_ranking import SentenceRanker

In [26]:
sr = SentenceRanker('mimiccxr_rad_annotations')

In [63]:
query_idx = 11
output = sr.rank_sentences(query_idx, model_name='microsoft/BiomedVLP-CXR-BERT-specialized',
                           checkpoint_folder_path='/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_152825_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)',
                           top_k=100)

100%|███████████████████████████████████████████████████████████████████████████| 1165/1165 [00:00<00:00, 401905.26it/s]


In [56]:
query_idx = 6
output = sr.rank_sentences(query_idx, model_name='emilyalsentzer/Bio_ClinicalBERT', average_token_embeddings=True,
                           top_k=100)

100%|███████████████████████████████████████████████████████████████████████████| 1165/1165 [00:00<00:00, 434111.95it/s]


In [64]:
print(sr.sentences[query_idx])
print()
output

recurrent right lower lobe patchy airspace opacity , suggestive of waxing and waning atelectasis .



(['recurrent right lower lobe patchy airspace opacity , suggestive of waxing and waning atelectasis .',
  'pneumonia or atelectasis at the right lung base .',
  'there is patchy opacity at the right base which could reflect compressive atelectasis .',
  'right lower lobe collapse is persistent .',
  'consolidation continues to progress in the right lower lobe .',
  'patchy opacity at the right base may represent partial lower lobe atelectasis , although pneumonia or aspiration cannot be entirely excluded .',
  'subtle opacification at the right lung base , which may represent atelectasis , however an early developing pneumonia is a consideration .',
  'subtle opacity at the base of the right lung could represent atelectasis however infection should be considered in the appropriate clinical setting .',
  'focal opacity at the right lung base has increased , either an atelectasis or consolidation .',
  'subtle patchy right base opacity could be due to aspiration , infection or atelectasi

# MIMIC-CXR Radiologist Annotations

#### microsoft/BiomedVLP-CXR-BERT-specialized

In [216]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=116,1396652957813721259).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 128)
100%|█████████████████████████████████████| 1165/1165 [00:00<00:00, 5315.63it/s]
Embeddings shape: (1165, 128)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each senten

#### microsoft/BiomedVLP-BioViL-T

In [217]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-BioViL-T" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-BioViL-T
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=104,2678825193822464594).pkl
len(self.cache["hashes"]) = 4451804
self.cache["embeddings"].shape = (4451804, 128)
100%|██████████████████████████████████████| 1165/1165 [00:04<00:00, 236.70it/s]
Embeddings shape: (1165, 128)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each sentence as quer

#### michiyasunaga/BioLinkBERT-large (cls)

In [218]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "michiyasunaga/BioLinkBERT-large" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: michiyasunaga/BioLinkBERT-large
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=107,40983794350539675).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 1024)
100%|█████████████████████████████████████| 1165/1165 [00:00<00:00, 1517.76it/s]
Embeddings shape: (1165, 1024)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each sentence as que

#### michiyasunaga/BioLinkBERT-large (tok)

In [219]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "michiyasunaga/BioLinkBERT-large" \
--batch_size 100 \
--average_token_embeddings

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: michiyasunaga/BioLinkBERT-large
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: True
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=132,1253761809285128054).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 1024)
100%|█████████████████████████████████████| 1165/1165 [00:00<00:00, 1489.36it/s]
Embeddings shape: (1165, 1024)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each sentence as qu

#### microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext (cls)

In [220]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=137,1948119357925567498).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 768)
100%|█████████████████████████████████████| 1165/1165 [00:00<00:00, 1819.72it/s]
Embeddings shape: (1165, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking 

#### microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext (cls)

In [221]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext" \
--batch_size 100 \
--average_token_embeddings

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: True
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=162,1758876944325999998).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 768)
100%|█████████████████████████████████████| 1165/1165 [00:00<00:00, 1819.70it/s]
Embeddings shape: (1165, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking t

#### emilyalsentzer/Bio_ClinicalBERT (cls)

In [222]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "emilyalsentzer/Bio_ClinicalBERT" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: emilyalsentzer/Bio_ClinicalBERT
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=107,1542209846559636901).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 768)
100%|█████████████████████████████████████| 1165/1165 [00:00<00:00, 1776.79it/s]
Embeddings shape: (1165, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each sentence as que

#### emilyalsentzer/Bio_ClinicalBERT (tok)

In [223]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "emilyalsentzer/Bio_ClinicalBERT" \
--batch_size 100 \
--average_token_embeddings

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: emilyalsentzer/Bio_ClinicalBERT
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: True
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=132,412155249657523952).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 768)
100%|█████████████████████████████████████| 1165/1165 [00:00<00:00, 3205.03it/s]
Embeddings shape: (1165, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each sentence as query

#### CheXbert

In [235]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "CheXbert" \
--device "cuda" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: CheXbert
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
100%|███████████████████████████████████████| 1165/1165 [00:13<00:00, 85.50it/s]
Embeddings shape: (1165, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each sentence as query[0m
100%|█████████████████████████████████████| 1165/1165 [00:01<00:00, 1125.10it/s]
[1m[35mmean_AUC: 0.8698[0m
mean_relevant: 57.8080
count: 1156 / 1165 (99.23%)


#### CXR Fact Encoder (T)

In [224]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230927_194923_MIMIC-CXR(triplets-only)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230927_194923_MIMIC-CXR(triplets-only)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_40_ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9355.pt']
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=359,388447562913600091).pkl
len(self.cache["hashes"]) = 144285
self.ca

#### CXR Fact Encoder (T -> T+EC)

In [227]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231003_001641_MIMIC-CXR(triplets+entcont)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231003_001641_MIMIC-CXR(triplets+entcont)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_1_encc+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9444.pt']
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=366,4246634492375333681).pkl
len(self.cache["hashes"]) = 13485

#### CXR Fact Encoder (T -> T+C)

In [228]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_013825_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_013825_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_64_cacc+chf1+chf1+cscc+hscc+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9474.pt']
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=387,2492157991032224976).pkl
len(self.cac

In [4]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231004_155915_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231004_155915_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_29_cacc+chf1+chf1+cscc+hscc+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9495.pt']
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=387,1308360022390228203).pkl
len(self.cach

#### CXR Fact Encoder (T -> T+EC+NLI)

In [229]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_220207_MIMIC-CXR(triplets+entcon+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_220207_MIMIC-CXR(triplets+entcon+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_52_encc+nlcc+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9002.pt']
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=375,1493189425621868816).pkl
len(self.cache["hashes"]

#### CXR Fact Encoder (T -> T+R)

In [230]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_110825_MIMIC-CXR(triplets+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_110825_MIMIC-CXR(triplets+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_93_spss+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9415.pt']
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=368,1866338409828607810).pkl
len(self.cache["hashes"]) = 134

#### CXR Fact Encoder (T -> T+C+EC+NLI+R)

In [231]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_152825_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_152825_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_26_cacc+chf1+chf1+cscc+encc+hscc+nlcc+spss+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9386.pt']
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=423,1

In [3]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231004_174626_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231004_174626_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_50_cacc+chf1+chf1+cscc+encc+hscc+nlcc+spss+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9373.pt']
  0%|                                                  | 0/1165 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=423,3

#### CXR Fact Encoder (T -> T+SD)

In [2]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231010_210936_MIMIC-CXR(triplets+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231010_210936_MIMIC-CXR(triplets+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_166_sass+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9026.pt']
100%|███████████████████████████████████| 1165/1165 [00:00<00:00, 533387.64it/s]
Computing embeddings for 1165 new texts
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has be

#### CXR Fact Encoder (T -> T+C+EC+NLI+SD)

In [4]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "mimiccxr_radiologist_annotations" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231013_073812_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: mimiccxr_radiologist_annotations
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231013_073812_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading mimiccxr sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/mimiccxr_sentences_and_relevant.pkl
len(sentences): 1165
len(relevant_sentences): 1165
checkpoint_names = ['checkpoint_1_cacc+chf1+chf1+cscc+encc+hscc+nlcc+sass+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9160.pt']
100%|███████████████████████████████████| 1165/1165 [00:00<00:00, 518612.20it/s]
Computing embeddings for 1165 new texts
Explicitly passing a `revision` is encouraged when loading a configuratio

# IU X-ray Automatic Labelers (chexpert + chexbert + radgraph)

#### microsoft/BiomedVLP-CXR-BERT-specialized

In [47]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray reports from /mnt/workspace/iu-x-ray/dataset/reports/reports.min.json
Number of sentences: 8617
Shortest sentence: KUB.
Longest sentence: The infrahilar pulmonary markings appear slightly prominent bilaterally, which XXXX represents XXXX appearance for the patient but difficult to completely exclude some reactive airway/bronchitic changes in the absence of comparison radiographs.. No airspace consolidation or lobar atelectasis.
Loading CheXpert labeler
Loading CheXbert labeler
8617it [00:00, 17841.73it/s]
Loading RadGraph labeler
100%|███████████████████████████████████████| 8617/8617 [03:07<00:00, 45.92it/s]
Saving iuxray sentences and relevant sentences to /mnt/data/pamessina/workspaces/medvqa-w

#### microsoft/BiomedVLP-BioViL-T

In [48]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-BioViL-T" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-BioViL-T
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=104,2678825193822464594).pkl
len(self.cache["hashes"]) = 4451804
self.cache["embeddings"].shape = (4451804, 128)
100%|█████████████████████████████████████| 8617/8617 [00:03<00:00, 2565.91it/s]
Embeddings shape: (8617, 128)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each sen

#### michiyasunaga/BioLinkBERT-large (cls)

In [49]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "michiyasunaga/BioLinkBERT-large" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: michiyasunaga/BioLinkBERT-large
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=107,40983794350539675).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 1024)
100%|████████████████████████████████████| 8617/8617 [00:00<00:00, 26704.83it/s]
Embeddings shape: (8617, 1024)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each se

#### michiyasunaga/BioLinkBERT-large (tok)

In [50]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "michiyasunaga/BioLinkBERT-large" \
--batch_size 100 \
--average_token_embeddings

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: michiyasunaga/BioLinkBERT-large
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: True
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=132,1253761809285128054).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 1024)
100%|████████████████████████████████████| 8617/8617 [00:00<00:00, 26889.74it/s]
Embeddings shape: (8617, 1024)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each s

#### microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext (cls)

In [51]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=137,1948119357925567498).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 768)
100%|████████████████████████████████████| 8617/8617 [00:00<00:00, 32819.15it/s]
Embeddings shape: (8617, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embedding

#### microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext (tok)

In [52]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext" \
--batch_size 100 \
--average_token_embeddings

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: True
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=162,1758876944325999998).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 768)
100%|████████████████████████████████████| 8617/8617 [00:00<00:00, 33578.34it/s]
Embeddings shape: (8617, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings

#### emilyalsentzer/Bio_ClinicalBERT (cls)

In [53]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "emilyalsentzer/Bio_ClinicalBERT" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: emilyalsentzer/Bio_ClinicalBERT
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=107,1542209846559636901).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 768)
100%|████████████████████████████████████| 8617/8617 [00:00<00:00, 33897.08it/s]
Embeddings shape: (8617, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each se

#### emilyalsentzer/Bio_ClinicalBERT (tok)

In [54]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "emilyalsentzer/Bio_ClinicalBERT" \
--batch_size 100 \
--average_token_embeddings

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: emilyalsentzer/Bio_ClinicalBERT
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: True
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=132,412155249657523952).pkl
len(self.cache["hashes"]) = 144285
self.cache["embeddings"].shape = (144285, 768)
100%|████████████████████████████████████| 8617/8617 [00:00<00:00, 32540.98it/s]
Embeddings shape: (8617, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each sent

#### CheXbert

In [55]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "CheXbert" \
--device "cuda"

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: CheXbert
   device: cuda
   batch_size: 32
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
100%|███████████████████████████████████████| 8617/8617 [01:34<00:00, 91.56it/s]
Embeddings shape: (8617, 768)
Normalizing embeddings (for cosine similarity)
[93m[1mEvaluating embeddings on ranking task with each sentence as query[0m
100%|██████████████████████████████████████| 8617/8617 [01:00<00:00, 143.33it/s]
[1m[35mmean_AUC: 0.9327[0m
mean_relevant: 63.7905
count: 7742 / 8617 (89.85%)


#### CXR Fact Encoder (T)

In [56]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230927_194923_MIMIC-CXR(triplets-only)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230927_194923_MIMIC-CXR(triplets-only)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_40_ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9355.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=359,388447562913600091).pkl
len(self.cache["hashes"]) = 1

#### CXR Fact Encoder (T -> T+EC)

In [57]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231003_001641_MIMIC-CXR(triplets+entcont)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231003_001641_MIMIC-CXR(triplets+entcont)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_1_encc+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9444.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=366,4246634492375333681).pkl
len(self.cache["hashe

#### CXR Fact Encoder (T -> T+C)

In [58]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_013825_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_013825_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_64_cacc+chf1+chf1+cscc+hscc+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9474.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=387,2492157991032224976).pkl


In [59]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231004_155915_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231004_155915_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_29_cacc+chf1+chf1+cscc+hscc+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9495.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=387,1308360022390228203).pkl


#### CXR Fact Encoder (T -> T+EC+NLI)

In [60]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_220207_MIMIC-CXR(triplets+entcon+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_220207_MIMIC-CXR(triplets+entcon+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_52_encc+nlcc+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9002.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=375,1493189425621868816).pkl
len(self.cac

#### CXR Fact Encoder (T -> T+R)

In [61]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_110825_MIMIC-CXR(triplets+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_110825_MIMIC-CXR(triplets+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_93_spss+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9415.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=368,1866338409828607810).pkl
len(self.cache["has

#### CXR Fact Encoder (T -> T+C+EC+NLI+R)


In [62]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_152825_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_152825_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_26_cacc+chf1+chf1+cscc+encc+hscc+nlcc+spss+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9386.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cac

In [63]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231004_174626_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231004_174626_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_50_cacc+chf1+chf1+cscc+encc+hscc+nlcc+spss+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9373.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cac

#### CXR Fact Encoder (T -> T+SD)


In [6]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231010_210936_MIMIC-CXR(triplets+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231010_210936_MIMIC-CXR(triplets+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_166_sass+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9026.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_cache(hash=372,2533955929466572136).pkl
len(self.cache

#### CXR Fact Encoder (T -> T+C+EC+NLI+SD)

In [7]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "iuxray_with_automatic_labelers" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231013_073812_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: iuxray_with_automatic_labelers
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231013_073812_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl
len(sentences): 8617
len(relevant_sentences): 8617
checkpoint_names = ['checkpoint_1_cacc+chf1+chf1+cscc+encc+hscc+nlcc+sass+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9160.pt']
  0%|                                                  | 0/8617 [00:00<?, ?it/s]Loading cached text embeddings from /mnt/workspace/pamessina/medvqa-workspace/cache/text_embeddings_c

In [8]:
from medvqa.eval_fact_embedding_on_ranking import SentenceRanker

sr = SentenceRanker('iuxray_with_automatic_labelers')

Loading iuxray sentences and relevant sentences from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/iuxray_sentences_and_relevant(thr1=0.4,thr2=0.2).pkl


In [14]:
query_idx = 4010
print(sr.sentences[query_idx])
print('num_relevant:', len(sr.relevant_sentences[query_idx]))
print()
for i, idx in enumerate(sr.relevant_sentences[query_idx]):
    if i == 30: break
    print(sr.sentences[idx])

Normal heart size and normal mediastinal contours.
num_relevant: 193

The cardio mediastinal silhouette is of normal size and contour.
Mediastinal silhouette is normal.
Heart is normal in size.
Heart and mediastinal contour normal.
Heart size is at upper limits normal.
Heart size is normal lungs are clear.
The heart is normal in size and contour.
Mediastinal silhouette and pulmonary vascularity within normal limits.
Heart size normal and lungs are clear
Mediastinal contour appears normal and pulmonary vascularity is within normal limits.
Mediastinal silhouette and pulmonary vascularity are stable and within normal limits.
Normal heart size, mediastinal and aortic contours.
Mediastinal contours appear normal 7.
Stable normal cardiac size and contour, normal mediastinal silhouette.
Normal cardiac contours.
Heart size and pulmonary XXXX appear normal.
The cardiac mediastinal silhouettes are normal.
Heart size and pulmonary vascularity normal.
The cardiac and mediastinal silhouettes are no

In [16]:
output = sr.rank_sentences(
    query_idx,
    model_name="microsoft/BiomedVLP-CXR-BERT-specialized",
#     checkpoint_folder_path="/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_220207_MIMIC-CXR(triplets+entcon+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)",
#     checkpoint_folder_path="/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_013825_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)",
#     checkpoint_folder_path="/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231002_152825_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)",
#     checkpoint_folder_path="/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231004_155915_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)",
    checkpoint_folder_path="/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231013_073812_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)",
#     checkpoint_folder_path="/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231010_210936_MIMIC-CXR(triplets+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/",
    top_k=30,
)
print(sr.sentences[query_idx])
print('num_relevant:', len(sr.relevant_sentences[query_idx]))
print('-'*100)
print()
for i in range(len(output[0])):
    print(output[0][i])
    print(output[1][i])
    print()

100%|███████████████████████████████████████████████████████████████████████████| 8617/8617 [00:00<00:00, 511091.09it/s]

Normal heart size and normal mediastinal contours.
num_relevant: 193
----------------------------------------------------------------------------------------------------

Normal heart size and normal mediastinal contours.
0

Normal heart size and mediastinal contours.
1

Normal heart size and mediastinal silhouette.
1

Normal heart and mediastinal contours.
1

Normal heart size and mediastinum.
1

Normal heart size and mediastinal contour.
1

Normal cardiac size and contour unremarkable mediastinal silhouette.
0

Normal heart size mediastinal contours.
1

Normal heart and mediastinum.
0

Heart and mediastinum normal.
0

Cardiac and mediastinal XXXX appear normal.
1

Normal cardiomediastinal silhouettes.
0

Normal cardiomediastinal contours.
1

Normal cardiac size and contour, unremarkable mediastinal silhouette.
0

Normal size and mediastinal contours.
1

Heart and mediastinal contours are normal.
1

Normal cardiomediastinal silhouette.
0

Heart and mediastinal contour normal.
1

Heart




# Chest ImaGenome Gold Dataset

#### microsoft/BiomedVLP-CXR-BERT-specialized

In [3]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl
Loaded gold_accuracy matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(gold_accuracy,2412,108).pkl
Loaded chest_imagenome_gold_contradiction

#### microsoft/BiomedVLP-BioViL-T

In [4]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-BioViL-T" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-BioViL-T
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl
Loaded gold_accuracy matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(gold_accuracy,2412,108).pkl
Loaded chest_imagenome_gold_contradictions matrix fro

#### michiyasunaga/BioLinkBERT-large

In [5]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "michiyasunaga/BioLinkBERT-large" \
--average_token_embeddings \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: michiyasunaga/BioLinkBERT-large
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: True
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl
Loaded gold_accuracy matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(gold_accuracy,2412,108).pkl
Loaded chest_imagenome_gold_contradictions matrix f

#### microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext

In [6]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext" \
--average_token_embeddings \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: True
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl
Loaded gold_accuracy matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(gold_accuracy,2412,108).pkl
Loaded chest_imagenom

#### emilyalsentzer/Bio_ClinicalBERT

In [1]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "emilyalsentzer/Bio_ClinicalBERT" \
--average_token_embeddings \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: emilyalsentzer/Bio_ClinicalBERT
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: True
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl
Loaded gold_accuracy matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(gold_accuracy,2412,108).pkl
Loaded chest_imagenome_gold_contradictions matrix f

#### CheXbert

In [7]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "CheXbert" \
--device 'cuda' \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: CheXbert
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl
Loaded gold_accuracy matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(gold_accuracy,2412,108).pkl
Loaded chest_imagenome_gold_contradictions matrix from /mnt/workspace/pa

#### CXR Fact Encoder (T)

In [17]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230927_194923_MIMIC-CXR(triplets-only)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230927_194923_MIMIC-CXR(triplets-only)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl
Loa

#### CXR Fact Encoder (T+C)

In [18]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_013825_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_013825_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl


#### CXR Fact Encoder (T+R)

In [19]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_110825_MIMIC-CXR(triplets+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_110825_MIMIC-CXR(triplets+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl

#### CXR Fact Encoder (T+SD)

In [20]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231010_210936_MIMIC-CXR(triplets+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231010_210936_MIMIC-CXR(triplets+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38)

#### CXR Fact Encoder (T+NLI)

In [21]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_125012_MIMIC-CXR(triplets+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_125012_MIMIC-CXR(triplets+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl
Load

#### CXR Fact Encoder (T+EC)

In [22]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_080646_MIMIC-CXR(triplets+entcon)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_080646_MIMIC-CXR(triplets+entcon)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).pkl
L

#### CXR Fact Encoder (T+NLI+EC)

In [23]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240128_173216_MIMIC-CXR(triplets+entcon+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240128_173216_MIMIC-CXR(triplets+entcon+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2412,38).p

#### CXR Fact Encoder (T+C+NLI+EC+R)

In [24]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_213433_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_213433_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,

#### CXR Fact Encoder (T+C+NLI+EC+SD)

In [25]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_151525_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_151525_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevan

#### CXR Fact Encoder (T+C+NLI+EC)

In [2]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240207_235928_MIMIC-CXR(triplets+classif+entcont+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240207_235928_MIMIC-CXR(triplets+classif+entcont+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gold_relevance,2412,70,2

#### CXR Fact Encoder (T+C+NLI+EC+ER+SD)

In [3]:
!python ../eval_fact_embedding_on_ranking.py \
--evaluation_mode "chest_imagenome_gold" \
--chest_imagenome_sentence2labels_gold_filepath \
"/mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl" \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240207_184445_MIMIC-CXR(triplets+classif+entcont+nli+radgraph+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

script's arguments:
   evaluation_mode: chest_imagenome_gold
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240207_184445_MIMIC-CXR(triplets+classif+entcont+nli+radgraph+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Loading sentence2labels_gold from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/chest_imagenome/phrases2labels_gold(num_obs=70,num_anat=38,num_phrases=2412).pkl
Number of sentences: 2412
label.shape: (2412, 108)
Loaded chest_imagenome_gold_relevance matrix from /mnt/workspace/pamessina/medvqa-workspace/cache/score_matrix(chest_imagenome_gol

# PadChest-GR + MS-CXR custom ranking evaluation

#### microsoft/BiomedVLP-CXR-BERT-specialized

In [55]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--batch_size 100 \
--save_worst_queries_for_inspection

[32m2025-05-25 09:30:31,642 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-25 09:30:31,644 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300
   save_worst_queries_for_inspection: True[0m
[32m2025-05-25 09:30:31,644 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_custom_eval_data(num_query_samples=300).pkl[0m
[32m2025-05-25 09:30:31,657 - INFO - __main__[0m - len(query_phrases): 30

#### microsoft/BiomedVLP-BioViL-T

In [5]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-BioViL-T" \
--batch_size 100

[32m2025-05-24 20:58:17,761 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 20:58:17,762 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-BioViL-T
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 20:58:17,762 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_custom_eval_data(num_query_samples=300).pkl[0m
[32m2025-05-24 20:58:17,776 - INFO - __main__[0m - len(query_phrases): 300[0m
[32m2025-05-24 20:58:17,777 - INFO - __main__[0m

#### michiyasunaga/BioLinkBERT-large

In [6]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "michiyasunaga/BioLinkBERT-large" \
--batch_size 100

[32m2025-05-24 20:59:41,120 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 20:59:41,121 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: michiyasunaga/BioLinkBERT-large
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 20:59:41,121 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_custom_eval_data(num_query_samples=300).pkl[0m
[32m2025-05-24 20:59:41,136 - INFO - __main__[0m - len(query_phrases): 300[0m
[32m2025-05-24 20:59:41,136 - INFO - __main__

#### microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext

In [7]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext" \
--batch_size 100

[32m2025-05-24 21:01:29,157 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:01:29,159 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:01:29,159 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_custom_eval_data(num_query_samples=300).pkl[0m
[32m2025-05-24 21:01:29,173 - INFO - __main__[0m - len(query_phrases): 300[0m
[32m2025-05-24 2

#### emilyalsentzer/Bio_ClinicalBERT

In [8]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "emilyalsentzer/Bio_ClinicalBERT" \
--batch_size 100

[32m2025-05-24 21:01:53,302 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:01:53,303 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: emilyalsentzer/Bio_ClinicalBERT
   device: GPU
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:01:53,303 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_custom_eval_data(num_query_samples=300).pkl[0m
[32m2025-05-24 21:01:53,318 - INFO - __main__[0m - len(query_phrases): 300[0m
[32m2025-05-24 21:01:53,318 - INFO - __main__

#### CheXbert

In [10]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "CheXbert" \
--batch_size 100

[32m2025-05-24 21:03:50,170 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:03:50,172 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: CheXbert
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: None
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:03:50,172 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_custom_eval_data(num_query_samples=300).pkl[0m
[32m2025-05-24 21:03:50,185 - INFO - __main__[0m - len(query_phrases): 300[0m
[32m2025-05-24 21:03:50,186 - INFO - __main__[0m - len(candidate_ph

#### CXR Fact Encoder (T)

In [11]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230927_194923_MIMIC-CXR(triplets-only)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:04:22,045 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:04:22,047 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230927_194923_MIMIC-CXR(triplets-only)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:04:22,047 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_custom

#### CXR Fact Encoder (T+C)

In [12]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_013825_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:05:44,415 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:05:44,416 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_013825_MIMIC-CXR(triplets+classif)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:05:44,417 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_cus

#### CXR Fact Encoder (T+R)

In [13]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_110825_MIMIC-CXR(triplets+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:06:59,120 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:06:59,121 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20230928_110825_MIMIC-CXR(triplets+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:06:59,121 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_cu

#### CXR Fact Encoder (T+SD)

In [14]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231010_210936_MIMIC-CXR(triplets+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/" \
--batch_size 100

[32m2025-05-24 21:08:16,449 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:08:16,451 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20231010_210936_MIMIC-CXR(triplets+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)/
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:08:16,451 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscx

#### CXR Fact Encoder (T+NLI)

In [15]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_125012_MIMIC-CXR(triplets+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:08:52,859 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:08:52,860 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_125012_MIMIC-CXR(triplets+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:08:52,860 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_custom_

#### CXR Fact Encoder (T+EC)

In [16]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_080646_MIMIC-CXR(triplets+entcon)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:12:28,290 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:12:28,291 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_080646_MIMIC-CXR(triplets+entcon)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:12:28,291 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_cust

#### CXR Fact Encoder (T+NLI+EC)

In [17]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240128_173216_MIMIC-CXR(triplets+entcon+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:13:10,145 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:13:10,147 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240128_173216_MIMIC-CXR(triplets+entcon+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:13:10,147 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_

#### CXR Fact Encoder (T+C+NLI+EC+R)

In [18]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_213433_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:14:11,743 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:14:11,744 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_213433_MIMIC-CXR(triplets+classif+entcont+nli+radgraph)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:14:11,745 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/

#### CXR Fact Encoder (T+C+NLI+EC+SD)

In [19]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_151525_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:15:39,554 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:15:39,556 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240129_151525_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:15:39,556 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cac

#### CXR Fact Encoder (T+C+NLI+EC)

In [20]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240207_235928_MIMIC-CXR(triplets+classif+entcont+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:16:44,058 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:16:44,060 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240207_235928_MIMIC-CXR(triplets+classif+entcont+nli)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:16:44,060 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_

#### CXR Fact Encoder (T+C+NLI+EC+ER+SD)

In [21]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240207_184445_MIMIC-CXR(triplets+classif+entcont+nli+radgraph+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100

[32m2025-05-24 21:17:20,280 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-24 21:17:20,281 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240207_184445_MIMIC-CXR(triplets+classif+entcont+nli+radgraph+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300[0m
[32m2025-05-24 21:17:20,281 - INFO - __main__[0m - Loading evaluation data from /mnt/data/pamessina/workspaces/medvqa-work

#### CXR Fact Encoder (T+C+NLI+EC+ER+SD) v2

In [34]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240629_084405_MIMIC-CXR(triplets+classif+entcont+nli+radgraph+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100 \
--save_worst_queries_for_inspection

[32m2025-05-25 08:59:36,728 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-05-25 08:59:36,730 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240629_084405_MIMIC-CXR(triplets+classif+entcont+nli+radgraph+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300
   save_worst_queries_for_inspection: True[0m
[32m2025-05-25 08:59:36,730 - INFO - __main__[0m - Loading evaluation data from

#### CXR Fact Encoder (T+C+NLI+EC+SD) v2

In [107]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20250602_122454_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100 \
--save_worst_queries_for_inspection

[32m2025-06-02 17:37:36,336 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-06-02 17:37:36,338 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20250602_122454_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300
   save_worst_queries_for_inspection: True[0m
[32m2025-06-02 17:37:36,338 - INFO - __main__[0m - Loading evaluation data from /mnt/dat

#### CXR Fact Encoder (T+C+NLI+EC+SD) v3

In [10]:
!python ../../eval_fact_embedding_on_ranking.py \
--evaluation_mode "padchest_gr_mscxr_custom" \
--revised_groundings_filepath \
"/mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl" \
--num_query_samples 300 \
--model_name "microsoft/BiomedVLP-CXR-BERT-specialized" \
--model_checkpoint_folder_path \
"/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20250610_213953_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)" \
--batch_size 100 \
--save_worst_queries_for_inspection

[32m2025-06-11 01:47:40,663 - INFO - root[0m - Logging configured (Color: True).[0m
[32m2025-06-11 01:47:40,665 - INFO - medvqa.utils.common[0m - 
script's arguments:
   evaluation_mode: padchest_gr_mscxr_custom
   model_name: microsoft/BiomedVLP-CXR-BERT-specialized
   device: cuda
   batch_size: 100
   num_workers: 4
   model_checkpoint_folder_path: /mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20250610_213953_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)
   distance_metric: cosine
   average_token_embeddings: False
   chest_imagenome_sentence2labels_gold_filepath: None
   revised_groundings_filepath: /mnt/data/pamessina_folder_backup_15_10_24/pamessina/medvqa-workspace/cache/chest_imagenome/integrated_llm_revised_phrase_groundings.pkl
   num_query_samples: 300
   save_worst_queries_for_inspection: True[0m
[32m2025-06-11 01:47:40,665 - INFO - __main__[0m - Loading evaluation data from /mnt/dat

In [19]:
!ls "/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20250610_213953_MIMIC-CXR(triplets+classif+entcont+nli+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)"

'checkpoint_154_cacc+chf1+chf1+cscc+encc+hscc+nlcc+sass+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)+ta8)=0.9070.pt'
 metadata.json
 metrics_logs.csv


In [12]:
from medvqa.utils.files_utils import load_pickle

tmp = load_pickle('/mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_custom_eval_data(num_query_samples=300).pkl')

In [13]:
tmp['metrics'].keys()

dict_keys(['bleu1', 'rouge_l', 'levenshtein', 'bertscore_f1', 'cxr_bert_cosine', 'chexbert_cosine', 'radgraph_f1'])

In [14]:
worst_queries = load_pickle('/mnt/data/pamessina/workspaces/medvqa-workspace/cache/padchest_gr_mscxr_worst_queries_for_inspection_20250611_014756.pkl')

In [15]:
worst_queries.keys()

dict_keys(['queries', 'auc_scores'])

In [16]:
for i in range(20):
    print(worst_queries['auc_scores'][i], worst_queries['queries'][i])

0.657677039222273 The aortic knob is deformed by a large mediastinal hematoma
0.6918567540492067 Fluid within the fissures
0.7161880824123329 No definite effusions
0.7367287026723689 Minimal basal apical blood flow redistribution unchanged
0.7616047851607978 Minimal residual linear opacity likely scar
0.7616736551976169 Right-sided cardiac contour obliterated by retrocardiac neo-esophagus
0.7665093199026197 Bullous changes in the right lung base
0.7696063011241314 Pulmonary hyperinflation due to emphysema
0.8062298187285715 Increased opacity projecting over the left first costochondral junction
0.8069886761043845 Left upper lobe opacity improving
0.8076023521820022 Multiple moderate to severe compression deformities in the mid to lower thoracic spine
0.8188245562414469 Patchy left mid lung opacities, infection vs aspiration
0.824721076347441 Nodule in the left mid-to-upper lung
0.8250068615138056 Mild pleural thickening at the left lung base is noted
0.8264953314110567 Minimal atelecta

In [29]:
import numpy as np
from typing import Union, List, Dict, Any

def print_top_k_sentences(
    evaluation_data: Dict[str, Any],
    query_index: int,
    metric_input: Union[str, List[str]],
    k: int = 5,
    weights: Union[List[float], None] = None
):
    """
    Prints the specified query and its top K candidate sentences based on a single
    metric or a weighted average of multiple metrics.

    Args:
        evaluation_data (dict): The dictionary returned by a function like
            `compute_padchest_gr_mscxr_custom_evaluation_data`.
            Expected keys: 'query_phrases', 'candidate_phrases', 'metrics'.
        query_index (int): The index of the query in `evaluation_data['query_phrases']`.
        metric_input (Union[str, List[str]]):
            - If a string, it's the name of the single metric to use for ranking
              (e.g., 'bleu1', 'cxr_bert_cosine').
            - If a list of strings, these are the names of metrics to combine.
              In this case, `weights` must also be provided.
        k (int): The number of top candidate sentences to print. Defaults to 5.
        weights (Union[List[float], None]):
            - If `metric_input` is a list of metric names, this must be a list
              of corresponding weights (floats) of the same length.
            - Ignored if `metric_input` is a single string. Defaults to None.
    """

    # --- Input Validation ---
    if not isinstance(evaluation_data, dict):
        print("Error: evaluation_data must be a dictionary.")
        return
    required_keys = ['query_phrases', 'candidate_phrases', 'metrics']
    if not all(key in evaluation_data for key in required_keys):
        print(f"Error: evaluation_data is missing one or more required keys: {required_keys}")
        return
    if not isinstance(evaluation_data['metrics'], dict):
        print("Error: evaluation_data['metrics'] must be a dictionary.")
        return

    num_queries = len(evaluation_data['query_phrases'])
    if not 0 <= query_index < num_queries:
        print(f"Error: query_index {query_index} is out of bounds. Must be between 0 and {num_queries - 1}.")
        return

    if not isinstance(k, int) or k <= 0:
        print(f"Error: k must be a positive integer. Got {k}.")
        return

    # --- Determine scoring mode and validate metrics/weights ---
    is_weighted_average = isinstance(metric_input, list)
    display_metric_name = ""

    if is_weighted_average:
        metric_names_list = metric_input
        if not isinstance(weights, list) or len(weights) != len(metric_names_list):
            print("Error: If metric_input is a list, 'weights' must be a list of the same length.")
            return
        if not all(isinstance(w, (int, float)) for w in weights):
            print("Error: All weights must be numeric (int or float).")
            return
        for m_name in metric_names_list:
            if m_name not in evaluation_data['metrics']:
                print(f"Error: Metric '{m_name}' in metric_input list not found in evaluation_data['metrics'].")
                available_metrics = list(evaluation_data['metrics'].keys())
                print(f"Available metrics are: {available_metrics}")
                return
        weight_strs = [f"{w:.2f}*{m}" for w, m in zip(weights, metric_names_list)]
        display_metric_name = f"Weighted Average ({', '.join(weight_strs)})"
    else: # Single metric mode
        single_metric_name = metric_input
        if not isinstance(single_metric_name, str):
            print("Error: metric_input must be a string or a list of strings.")
            return
        if single_metric_name not in evaluation_data['metrics']:
            print(f"Error: Metric '{single_metric_name}' not found in evaluation_data['metrics'].")
            available_metrics = list(evaluation_data['metrics'].keys())
            print(f"Available metrics are: {available_metrics}")
            return
        display_metric_name = single_metric_name

    # --- Extract Data ---
    try:
        query_phrase = evaluation_data['query_phrases'][query_index]
        candidate_phrases = evaluation_data['candidate_phrases']
        num_candidates = len(candidate_phrases)

        if num_candidates == 0:
            print("Warning: There are no candidate phrases to rank.")
            print(f"\nQuery (Index {query_index}): \"{query_phrase}\"")
            return

        # Calculate scores to be used for ranking
        if is_weighted_average:
            scores_to_sort = np.zeros(num_candidates, dtype=np.float32)
            for metric_name, weight in zip(metric_names_list, weights):
                metric_scores = evaluation_data['metrics'][metric_name][query_index, :]
                scores_to_sort += metric_scores * weight
        else: # Single metric
            scores_to_sort = evaluation_data['metrics'][single_metric_name][query_index, :]

    except IndexError:
        print(f"Error: Could not access scores for query_index {query_index} and metric(s).")
        # Further details could be printed here if needed
        return
    except Exception as e:
        print(f"An unexpected error occurred while extracting/calculating scores: {e}")
        return

    # --- Rank and Select Top K ---
    sorted_candidate_indices = np.argsort(scores_to_sort)[::-1]
    actual_k = min(k, num_candidates)

    # --- Print Results ---
    print(f"\n--- Top {actual_k} Sentences for Query (Index {query_index}) using Metric '{display_metric_name}' ---")
    print(f"Query: \"{query_phrase}\"")
    print("-" * (len(display_metric_name) + 40)) # Adjust line width

    if actual_k == 0:
        print("No candidates to display (k=0 or no candidates available).")
        return

    for i in range(actual_k):
        candidate_idx = sorted_candidate_indices[i]
        top_candidate_phrase = candidate_phrases[candidate_idx]
        # The score displayed is either the single metric score or the final weighted score
        top_score_value = scores_to_sort[candidate_idx]
        print(f"{i+1}. Score: {top_score_value:.4f} | Sentence: \"{top_candidate_phrase}\"")

    print("-" * (len(display_metric_name) + 40))

In [None]:
'bleu1', 'rouge_l', 'levenshtein', 'bertscore_f1', 'cxr_bert_cosine', 'chexbert_cosine', 'radgraph_f1'

In [28]:
tmp['query_phrases'][-30:]

['Stable right mid lung zone airspace opacity',
 'Stable vague opacity in the right mid lung, likely post-surgical',
 'Subcutaneous emphysema overlying the left supraclavicular soft tissues',
 'Subsegmental atelectasis of the left base',
 'Subsegmental right mid to lower lung atelectasis',
 'Substantial right pneumothorax, unchanged',
 'Subtle confluent opacity in the right lower lung field concerning for pneumonia',
 'Subtle increase in right base opacity',
 'The aorta is unfolded but unchanged',
 'The aortic knob is deformed by a large mediastinal hematoma',
 'The cardiac silhouette is minimally enlarged',
 'The extent of the known hiatal hernia is unchanged',
 'The right pleural effusion is persistent',
 'There continues to be mild pulmonary vascular congestion',
 'There is a healed left clavicle fracture',
 'There is a remnant opacity at the right lung base, potentially reflecting either atelectasis or pneumonia',
 'There is linear focus of atelectasis in the left lower lung',
 'Th

In [102]:
query = 'Pulmonary hyperinflation due to emphysema'
query_index = tmp['query_phrases'].index(query)
print_top_k_sentences(evaluation_data=tmp,
                      query_index=query_index,
#                       metric_input=['bleu1', 'levenshtein', 'rouge_l', 'bertscore_f1'],
#                       weights=[1, 1, 0.5, 0.5],
#                       metric_input=['bleu1', 'radgraph_f1', 'cxr_bert_cosine', 'chexbert_cosine', 'bertscore_f1'],
#                       weights=[1, 1, 1, 1, 1],
                      metric_input=['bleu1', 'rouge_l', 'levenshtein', 'bertscore_f1', 'cxr_bert_cosine', 'chexbert_cosine', 'radgraph_f1'],
                      weights=[1, 1, 1, 1, 1, 1, 1],
#                       metric_input=['bleu1', 'rouge_l', 'levenshtein', 'cxr_bert_cosine', 'chexbert_cosine', 'radgraph_f1'],
#                       weights=[1, 1, 1, 1, 1, 1, ],
#                       metric_input=['bleu1', 'rouge_l', 'levenshtein', 'cxr_bert_cosine', 'chexbert_cosine'],
#                       weights=[1, 1, 1, 1, 1],
#                       metric_input=['bertscore_f1'],
#                       weights=[1],
#                         metric_input=['bleu1', 'rouge_l', 'cxr_bert_cosine', 'chexbert_cosine'],
#                         weights=[1, 1, 1, 1],
#                         metric_input=['bleu1', 'levenshtein', 'cxr_bert_cosine', 'chexbert_cosine'],
#                         weights=[1, 1, 1, 1],
#                       metric_input=['bleu1', 'rouge_l'],
#                       weights=[1, 1],
#                       metric_input=['bleu1', 'cxr_bert_cosine'],
#                         weights=[1, 1],
                      k=30,                      
                     )


--- Top 30 Sentences for Query (Index 214) using Metric 'Weighted Average (1.00*bleu1, 1.00*rouge_l, 1.00*levenshtein, 1.00*bertscore_f1, 1.00*cxr_bert_cosine, 1.00*chexbert_cosine, 1.00*radgraph_f1)' ---
Query: "Pulmonary hyperinflation due to emphysema"
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1. Score: 4.3205 | Sentence: "Pulmonary hyperinflation"
2. Score: 4.2769 | Sentence: "Signs of pulmonary hyperinflation"
3. Score: 3.6026 | Sentence: "Signs of lung hyperinflation"
4. Score: 3.5638 | Sentence: "Signs of hyperinflation"
5. Score: 3.3879 | Sentence: "Compensatory hyperinflation of the right hemithorax"
6. Score: 3.3294 | Sentence: "Extensive signs of air trapping due to emphysema predominantly in the upper fields"
7. Score: 3.2568 | Sentence: "Lung parenchyma with signs of hyperinflation"
8. Score: 3.0011 | Sentence: "Right hemithorax with