In [1]:
import torch
from transformers import AutoModel
from transformers import AutoProcessor
import yaml
from src.dataset.mimic_dataset import MIMICDataset
from src.embeddings.text_embeddings import encode_text_findings
from src.embeddings.image_embeddings import encode_images_study

In [2]:
# Carrega o arquivo YAML
with open("configs/configs.yaml", "r") as f:
    config = yaml.safe_load(f)

In [3]:
processor = AutoProcessor.from_pretrained(config['processor']['model'], token=config['processor']['auth_token'])
dataset = MIMICDataset(config['processor']['root_folder'], tokenizer=processor.tokenizer)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [4]:
sample = dataset[42]

print("Patient:", sample["patient_id"])
print("Study:", sample["study_id"])
print("N imagens:", sample["images"].shape[0])
print("Texto Findings:", sample["text"])
print("Tokenized:", sample["tokenized_text"].input_ids.shape)

Patient: p10001884
Study: s52060840
N imagens: 2
Texto Findings: No new focal consolidation is seen.  Mild right apical pleural thickening is
 seen.  No pleural effusion or pneumothorax is seen. The cardiac and
 mediastinal silhouettes are stable.  Small calcification projecting over the
 upper chest seen on the lateral view is present since at least ___, and may relate to aortic calcification.
Tokenized: torch.Size([1, 64])


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained(config['processor']['model'], token=config['processor']['auth_token']).to(device)

# Gera embeddings
e_text = encode_text_findings(sample, model, device)
e_img = encode_images_study(sample, processor, model, device)

# Combina (multimodal)
alpha = 0.5
e_study = alpha * e_text + (1 - alpha) * e_img
e_study = e_study / e_study.norm(dim=-1, keepdim=True)

print("e_text:", e_text)
print("e_img:", e_img)
print("e_study:", e_study)


It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.


e_text: tensor([[-0.0085,  0.0241, -0.0242,  ..., -0.0251, -0.0344, -0.0447]])
e_img: tensor([[-0.0269,  0.0358,  0.0142,  ..., -0.0207,  0.0523,  0.0216]])
e_study: tensor([[-0.0278,  0.0468, -0.0078,  ..., -0.0358,  0.0140, -0.0181]])


In [None]:
import sys
sys.path.append('/home/ia368/projetos/multimodal-rag-med')

from scripts.preprocess_embeddings import main

# Executa o pipeline
embeddings_data, output_path = main("configs/configs.yaml")

# Agora você pode usar os dados
print(f"\n✓ Total de embeddings gerados: {len(embeddings_data['e_study'])}")
print(f"✓ Salvos em: {output_path}")

Carregando configuração...
Carregando processor e modelo...


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Usando device: cpu
Carregando dataset...
Processando 227835 amostras...


Processando embeddings:   0%|          | 0/7120 [00:00<?, ?it/s]It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.
Processando embeddings:   0%|          | 2/7120 [03:51<228:36:08, 115.62s/it]


KeyboardInterrupt: 

# Teste em batch

In [52]:
import pandas as pd

labeled = pd.read_csv("../data/mimic_small/mimic-cxr-2.1.0-test-set-labeled.csv")

labeled.head()

Unnamed: 0,study_id,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Lesion,Airspace Opacity,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,58085167,,,,,,,,1.0,,,,,,
1,57798090,,1.0,1.0,,,0.0,,0.0,1.0,,,,,
2,50020371,1.0,,,,,,,,,,,,,
3,52225063,,,1.0,,,,,,1.0,,,,,
4,59039129,,,1.0,,1.0,-1.0,-1.0,,1.0,,1.0,,,
