In [1]:
import os, sys

PROJECT_ROOT = '/scratch/jq2uw/derm_vlms'
DERMATO_LLAMA_DIR = os.path.join(PROJECT_ROOT, 'dermato_llama')

if DERMATO_LLAMA_DIR not in sys.path:
    sys.path.insert(0, DERMATO_LLAMA_DIR)

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'

import torch
torch.cuda.empty_cache()

# HF token for gated meta-llama model
sys.path.insert(0, PROJECT_ROOT)
from tokens import HF_TOKEN

from utils import load_model, predict_image, parse_label

print('Loading model...')
model, processor = load_model(hf_token=HF_TOKEN)
print('Model loaded.')

  from .autonotebook import tqdm as notebook_tqdm
`torch_dtype` is deprecated! Use `dtype` instead!


Loading model...


Loading checkpoint shards: 100%|████████████████████████████████████████| 5/5 [00:04<00:00,  1.12it/s]


Trainable params: 0
Total params:     10,696,435,235
Model loaded.


In [2]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path(PROJECT_ROOT) / 'data'

df = pd.read_parquet(os.path.join(PROJECT_ROOT, 'data_share', 'midas_share.parquet'))
print(f'Loaded {len(df)} rows')
print(f'y3 distribution:\n{df["y3"].value_counts()}')

def resolve_img_path(p):
    p = str(p)
    if os.path.isfile(p):
        return p
    candidate = DATA_DIR / Path(p).name
    if candidate.is_file():
        return str(candidate)
    return p

df['image_path_resolved'] = df['image_path'].apply(resolve_img_path)
n_found = df['image_path_resolved'].apply(os.path.isfile).sum()
print(f'Resolved images: {n_found}/{len(df)} found')

SEED = 42
N_PER_CLASS = 5
df_sample = df.groupby('y3', group_keys=False).apply(
    lambda g: g.sample(n=N_PER_CLASS, random_state=SEED),
).reset_index(drop=True)
print(f'\nStratified sample ({N_PER_CLASS} per class, seed={SEED}):')
print(df_sample['y3'].value_counts())
df_sample[['uid', 'y3', 'image_path_resolved']].head()

Loaded 3357 rows
y3 distribution:
y3
malignant    1391
benign       1322
other         644
Name: count, dtype: int64
Resolved images: 3357/3357 found

Stratified sample (5 per class, seed=42):
y3
benign       5
malignant    5
other        5
Name: count, dtype: int64


  df_sample = df.groupby('y3', group_keys=False).apply(


Unnamed: 0,uid,y3,image_path_resolved
0,1833,benign,/scratch/jq2uw/derm_vlms/data/s-prd-697891782.jpg
1,1191,benign,/scratch/jq2uw/derm_vlms/data/s-prd-593416010.jpg
2,610,benign,/scratch/jq2uw/derm_vlms/data/s-prd-639852881.jpg
3,1053,benign,/scratch/jq2uw/derm_vlms/data/s-prd-560547879.jpg
4,188,benign,/scratch/jq2uw/derm_vlms/data/s-prd-419238986.jpg


In [3]:
from PIL import Image
from tqdm import tqdm

q_describe = "Describe the lesion in detail. Is the lesion malignant or benign, or other?"
q_classify = "Is the lesion malignant or benign, or other?"
results = []

for _, row in tqdm(df_sample.iterrows(), total=len(df_sample)):
    uid = row['uid']
    try:
        image = Image.open(row['image_path_resolved']).convert('RGB')
    except Exception as e:
        print(f'[SKIP] uid={uid}: {e}')
        continue

    description = predict_image(model, processor, image, prompt=q_describe)
    classification = predict_image(model, processor, image, prompt=q_classify)

    results.append({
        'uid': uid,
        'y3': row['y3'],
        'description': description,
        'classification': classification,
    })

print(f'Collected {len(results)} predictions')

100%|█████████████████████████████████████████████████████████████████| 15/15 [01:18<00:00,  5.21s/it]

Collected 15 predictions





In [4]:
results_df = pd.DataFrame(results).set_index('uid')

results_df['pred_label'] = results_df['classification'].apply(parse_label)

print('Predicted label distribution:')
print(results_df['pred_label'].value_counts())

RESULTS_DIR = os.path.join(DERMATO_LLAMA_DIR, 'results')
os.makedirs(RESULTS_DIR, exist_ok=True)
out_path = os.path.join(RESULTS_DIR, 'dermato_llama_predictions.csv')
results_df.to_csv(out_path)
print(f'\nSaved to {out_path}')

results_df

Predicted label distribution:
pred_label
benign       10
malignant     5
Name: count, dtype: int64

Saved to /scratch/jq2uw/derm_vlms/dermato_llama/results/dermato_llama_predictions.csv


Unnamed: 0_level_0,y3,description,classification,pred_label
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1833,benign,"The image shows a skin lesion with a brown, so...",benign,benign
1191,benign,The image shows a skin lesion with a central d...,Benign,benign
610,benign,"The image shows a skin lesion with a pinkish, ...",Benign,benign
1053,benign,"The lesion is a small, round, brownish spot wi...",benign,benign
188,benign,"The lesion is a small, round, brown macule wit...",benign,benign
3050,malignant,"The lesion is a raised, dark, and crusted nodu...",Malignant,malignant
416,malignant,The lesion appears to be a skin growth with ir...,Malignant,malignant
3310,malignant,There are multiple lesions present. Some appea...,Malignant,malignant
2450,malignant,"The image shows an area of skin with a rough, ...",Benign,benign
969,malignant,"The lesion is a raised, circular, pinkish-red ...",Malignant,malignant
