In [1]:
import os, sys

PROJECT_ROOT = '/scratch/jq2uw/derm_vlms'
LLAVA_DERM_DIR = os.path.join(PROJECT_ROOT, 'llava_derm')

if LLAVA_DERM_DIR not in sys.path:
    sys.path.insert(0, LLAVA_DERM_DIR)

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'

import torch
torch.cuda.empty_cache()

from utils import load_model, predict_image, parse_label

print('Loading model...')
model, processor = load_model()
print('Model loaded.')

  from .autonotebook import tqdm as notebook_tqdm
`torch_dtype` is deprecated! Use `dtype` instead!


Loading model...


Loading checkpoint shards: 100%|██████████████████████████████████████| 3/3 [00:06<00:00,  2.03s/it]
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Total params: 7,063,427,072
Model loaded.


In [2]:
import pandas as pd
from pathlib import Path

sys.path.insert(0, PROJECT_ROOT)
from data_utils import sample_lesions

DATA_DIR = os.path.join(PROJECT_ROOT, 'data')
RESULTS_DIR = os.path.join(PROJECT_ROOT, 'results')
IMAGES_DIR = os.path.join(RESULTS_DIR, 'images')

df = pd.read_parquet(os.path.join(PROJECT_ROOT, 'data_share', 'midas_share.parquet'))
print(f'Loaded {len(df)} rows')
print(f'y3 distribution:\n{df["y3"].value_counts()}')

SEED = 42
N_PER_CLASS = 5
df_sample = sample_lesions(df, data_dir=DATA_DIR, output_dir=IMAGES_DIR, n_per_class=N_PER_CLASS, seed=SEED)
df_sample.head()

Loaded 3357 rows
y3 distribution:
y3
malignant    1391
benign       1322
other         644
Name: count, dtype: int64
Sampled 10 lesions (5 per class) -> 30 rows
Classes: {'benign': 15, 'malignant': 15}
Images saved to: /scratch/jq2uw/derm_vlms/results/images


Unnamed: 0,id,ground_truth,image_mode,image_path,original_image_name,lesion_id
0,1_photo,benign,photo,/scratch/jq2uw/derm_vlms/results/images/1_phot...,s-prd-667118134.jpg,534_left lower leg_no
1,1_dscope,benign,dscope,/scratch/jq2uw/derm_vlms/results/images/1_dsco...,s-prd-667118139.jpg,534_left lower leg_no
2,1_combined,benign,combined,/scratch/jq2uw/derm_vlms/results/images/1_comb...,s-prd-667118134.jpg; s-prd-667118139.jpg,534_left lower leg_no
3,2_photo,benign,photo,/scratch/jq2uw/derm_vlms/results/images/2_phot...,s-prd-505211076.jpg,215_central chest _no
4,2_dscope,benign,dscope,/scratch/jq2uw/derm_vlms/results/images/2_dsco...,s-prd-505211307.jpg,215_central chest _no


In [3]:
from PIL import Image
from tqdm import tqdm

q_describe = "Describe the lesion in detail."
q_classify = "Is the lesion malignant or benign, or other?"
q_describe_classify = q_describe + " " + q_classify
results = []

for _, row in tqdm(df_sample.iterrows(), total=len(df_sample)):
    try:
        image = Image.open(row['image_path']).convert('RGB')
    except Exception as e:
        print(f'[SKIP] {row["id"]}: {e}')
        continue

    results.append({
        'id': row['id'],
        'ground_truth': row['ground_truth'],
        'image_mode': row['image_mode'],
        'describe': predict_image(model, processor, image, prompt=q_describe),
        'classify': predict_image(model, processor, image, prompt=q_classify),
        'describe_then_classify': predict_image(model, processor, image, prompt=q_describe_classify),
        'original_image_name': row['original_image_name'],
        'lesion_id': row['lesion_id'],
    })

print(f'Collected {len(results)} predictions')

  0%|                                                                        | 0/30 [00:00<?, ?it/s]

100%|███████████████████████████████████████████████████████████████| 30/30 [02:09<00:00,  4.33s/it]

Collected 30 predictions





In [4]:
results_df = pd.DataFrame(results)
col_order = ['id', 'ground_truth', 'image_mode', 'describe', 'classify',
             'describe_then_classify', 'original_image_name', 'lesion_id']
results_df = results_df[col_order]

out_path = os.path.join(RESULTS_DIR, 'llava_derm_predictions_paired.csv')
results_df.to_csv(out_path, index=False)
print(f'Saved {len(results_df)} rows to {out_path}')

results_df

Saved 30 rows to /scratch/jq2uw/derm_vlms/results/llava_derm_predictions_paired.csv


Unnamed: 0,id,ground_truth,image_mode,describe,classify,describe_then_classify,original_image_name,lesion_id
0,1_photo,benign,photo,There is 0.55 chance that the lesion is Lichen...,The lesion is benign.,There is 0.33 chance that the lesion is Hemang...,s-prd-667118134.jpg,534_left lower leg_no
1,1_dscope,benign,dscope,There is 0.33 chance that the lesion is Psoria...,The possibility of the lesion being malignant ...,There is 0.55 chance that the lesion is Eczema...,s-prd-667118139.jpg,534_left lower leg_no
2,1_combined,benign,combined,There is 0.33 chance that the lesion is Eczema...,Other,There is 1.0 chance that the lesion is SCC/SCC...,s-prd-667118134.jpg; s-prd-667118139.jpg,534_left lower leg_no
3,2_photo,benign,photo,There is 0.33 chance that the lesion is Acne. ...,There is 0.55 chance that the lesion is Acne. ...,There is 0.33 chance that the lesion is Acne. ...,s-prd-505211076.jpg,215_central chest _no
4,2_dscope,benign,dscope,There is 0.55 chance that the lesion is Absces...,The possibility of the lesion being malignant ...,There is 0.55 chance that the lesion is Cyst. ...,s-prd-505211307.jpg,215_central chest _no
5,2_combined,benign,combined,There is 0.55 chance that the lesion is Absces...,Is the lesion inflicted or unintentional?,There is 0.33 chance that the lesion is Acne. ...,s-prd-505211076.jpg; s-prd-505211307.jpg,215_central chest _no
6,3_photo,benign,photo,There is 0.33 chance that the lesion is Tinea....,There is 0.33 chance that the lesion is Basal ...,There is 0.55 chance that the lesion is Basal ...,s-prd-529717709.jpg,262_left buttock_no
7,3_dscope,benign,dscope,There is 0.55 chance that the lesion is Tinea....,"Is the lesion present in the skin, subcutaneou...",There is 0.67 chance that the lesion is Tinea....,s-prd-529718156.jpg,262_left buttock_no
8,3_combined,benign,combined,There is 0.33 chance that the lesion is Basal ...,There is 0.55 chance that the lesion is Basal ...,There is 0.55 chance that the lesion is Basal ...,s-prd-529717709.jpg; s-prd-529718156.jpg,262_left buttock_no
9,4_photo,benign,photo,There is 0.55 chance that the lesion is Absces...,There is 0.55 chance that the lesion is Basal ...,There is 0.55 chance that the lesion is Basal ...,s-prd-649658541.jpg,476_mid vertex_no
