In [1]:
import os, json, base64
import pandas as pd

os.chdir("/scratch/jq2uw/derm_vlms")

VLMS = {
    "DermatoLlama": "results/dermato_llama_predictions_paired.csv",
    "LLaVA-Derm": "results/llava_derm_predictions_paired.csv",
    "SkinGPT-4": "results/skingpt4_predictions_paired.csv",
}
IMAGE_DIR = "results/images"
OUTPUT_PATH = "results/interface2/index.html"

QUESTIONS = ["Q1", "Q2", "Q3"]

# --- load CSV data ---
vlm_data = {}
all_ids = set()
for name, path in VLMS.items():
    df = pd.read_csv(path)
    vlm_data[name] = df[["id", "describe", "classify", "describe_then_classify"]].to_dict("records")
    all_ids.update(df["id"])

# --- encode images as base64 ---
images = {}
for img_id in sorted(all_ids):
    p = os.path.join(IMAGE_DIR, f"{img_id}.jpg")
    with open(p, "rb") as f:
        images[img_id] = "data:image/jpeg;base64," + base64.b64encode(f.read()).decode()

print(f"Loaded {len(vlm_data)} VLMs, {len(images)} images")

Loaded 3 VLMs, 30 images


In [2]:
HTML_TEMPLATE = r"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Dermatology VLM Evaluation</title>
<style>
*{margin:0;padding:0;box-sizing:border-box}
body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;background:#f0f2f5;color:#1a1a2e;min-height:100vh}
header{background:#fff;border-bottom:1px solid #e0e0e0;padding:14px 24px;display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:10px}
header h1{font-size:1.2rem;color:#1a1a2e}
.hctl{display:flex;align-items:center;gap:14px;flex-wrap:wrap}
.hctl select{padding:6px 12px;border:1px solid #ccc;border-radius:6px;font-size:.9rem}
.progress{font-size:.9rem;color:#555;font-weight:500}
.btn-exp{padding:6px 14px;background:#059669;color:#fff;border:none;border-radius:6px;cursor:pointer;font-size:.85rem}
.btn-exp:hover{background:#047857}

main{max-width:920px;margin:20px auto;padding:0 16px}

.img-sec{background:#fff;border-radius:12px;padding:20px;text-align:center;margin-bottom:18px;box-shadow:0 1px 3px rgba(0,0,0,.08)}
.img-sec img{max-width:100%;max-height:340px;border-radius:8px;object-fit:contain}
.img-id{margin-top:6px;font-size:.82rem;color:#888}

.task-sec{background:#fff;border-radius:12px;box-shadow:0 1px 3px rgba(0,0,0,.08);margin-bottom:18px;overflow:hidden}
.tabs{display:flex;border-bottom:2px solid #e5e7eb}
.tab{flex:1;padding:11px;text-align:center;background:#f9fafb;border:none;cursor:pointer;font-size:.9rem;font-weight:500;color:#6b7280;transition:all .2s}
.tab:hover{background:#f3f4f6}
.tab.active{background:#fff;color:#2563eb;border-bottom:2px solid #2563eb;margin-bottom:-2px}
.tab .dot{display:inline-block;width:7px;height:7px;border-radius:50%;margin-left:6px;background:#d1d5db;vertical-align:middle}
.tab .dot.done{background:#059669}

.tc{padding:20px 24px}
.field{margin-bottom:16px}
.field-label{font-size:.78rem;text-transform:uppercase;letter-spacing:.05em;color:#6b7280;font-weight:600;margin-bottom:4px}
.field-val{font-size:.93rem;line-height:1.5;color:#1e293b}
.field-val.prompt-val{font-style:italic;color:#374151}
.resp{background:#f8fafc;border:1px solid #e2e8f0;border-radius:8px;padding:14px;font-size:.91rem;line-height:1.6;color:#334155;max-height:200px;overflow-y:auto;white-space:pre-wrap}
.survey{display:flex;flex-direction:column;gap:10px}
.survey label{font-size:.85rem;font-weight:500;color:#374151;display:block}
.survey input{width:100%;padding:7px 10px;border:1px solid #d1d5db;border-radius:6px;font-size:.9rem;margin-top:3px}
.survey input:focus{outline:none;border-color:#2563eb;box-shadow:0 0 0 2px rgba(37,99,235,.15)}

.nav{display:flex;justify-content:center;align-items:center;gap:14px;padding-bottom:36px}
.nav button{padding:9px 26px;border-radius:8px;font-size:.93rem;cursor:pointer;font-weight:500;border:1px solid #d1d5db;background:#fff;color:#374151;transition:all .15s}
.nav button:hover:not(:disabled){background:#f3f4f6}
.nav button:disabled{opacity:.35;cursor:not-allowed}
.status{font-size:.8rem;color:#059669;font-weight:500;min-width:80px;text-align:center;opacity:0;transition:opacity .3s}
.status.show{opacity:1}
</style>
</head>
<body>

<header>
  <h1>Dermatology VLM Evaluation</h1>
  <div class="hctl">
    <label>Model: <select id="vlm-sel"></select></label>
    <span class="progress" id="prog"></span>
    <button class="btn-exp" id="btn-exp">Export All CSVs</button>
  </div>
</header>

<main>
  <div class="img-sec">
    <img id="img" alt="lesion" />
    <div class="img-id" id="img-id"></div>
  </div>

  <div class="task-sec">
    <div class="tabs" id="tabs"></div>
    <div class="tc">
      <div class="field">
        <div class="field-label">Prompt</div>
        <div class="field-val prompt-val" id="prompt"></div>
      </div>
      <div class="field">
        <div class="field-label">VLM Response</div>
        <div class="resp" id="resp"></div>
      </div>
      <div class="survey" id="survey"></div>
    </div>
  </div>

  <div class="nav">
    <button id="btn-prev">&laquo; Previous</button>
    <span class="status" id="status"></span>
    <button id="btn-next">Next &raquo;</button>
  </div>
</main>

<script>
// ========== DATA (injected) ==========
const VLM_DATA = __VLM_DATA__;
const IMAGES  = __IMAGES__;
const QUESTIONS = __QUESTIONS__;

// ========== CONFIG ==========
const TASKS = [
  {key:'describe',                label:'Describe',           prompt:'Describe the lesion in detail.'},
  {key:'classify',                label:'Classify',           prompt:'Is the lesion malignant or benign, or other?'},
  {key:'describe_then_classify',  label:'Describe & Classify',prompt:'Describe the lesion in detail. Is the lesion malignant or benign, or other?'}
];
const SK = 'vlm_eval_responses';

// ========== STATE ==========
const S = {
  vlm:  Object.keys(VLM_DATA)[0],
  idx:  0,
  task: 'describe',
  R:    JSON.parse(localStorage.getItem(SK)||'{}')
};

// ========== INIT DOM ==========
const vlmSel = document.getElementById('vlm-sel');
Object.keys(VLM_DATA).forEach(n=>{
  const o=document.createElement('option'); o.value=n; o.textContent=n; vlmSel.appendChild(o);
});

const tabsEl = document.getElementById('tabs');
TASKS.forEach(t=>{
  const b = document.createElement('button');
  b.className = 'tab'; b.dataset.task = t.key;
  b.innerHTML = t.label + '<span class="dot"></span>';
  b.onclick = ()=>{ collect(); S.task=t.key; render(); };
  tabsEl.appendChild(b);
});

const surveyEl = document.getElementById('survey');
QUESTIONS.forEach((q,i)=>{
  const lbl = document.createElement('label');
  lbl.textContent = q;
  const inp = document.createElement('input');
  inp.type='text'; inp.id='q'+i; inp.placeholder='Your response...';
  lbl.appendChild(inp);
  surveyEl.appendChild(lbl);
});
const qInputs = QUESTIONS.map((_,i)=>document.getElementById('q'+i));

// ========== HELPERS ==========
function row(){ return VLM_DATA[S.vlm][S.idx]; }
function rkey(task,i){ return task+'_q'+(i+1); }
function getR(task,i){ return S.R[S.vlm]?.[row().id]?.[rkey(task,i)]||''; }
function setR(task,i,v){
  if(!S.R[S.vlm]) S.R[S.vlm]={};
  if(!S.R[S.vlm][row().id]) S.R[S.vlm][row().id]={};
  S.R[S.vlm][row().id][rkey(task,i)]=v;
}
function collect(){ qInputs.forEach((el,i)=>setR(S.task,i,el.value)); }
function persist(){ localStorage.setItem(SK,JSON.stringify(S.R)); }

function taskDone(task){
  const id=row().id, r=S.R[S.vlm]?.[id];
  if(!r) return false;
  return QUESTIONS.every((_,i)=>r[rkey(task,i)]&&r[rkey(task,i)].trim());
}

// ========== RENDER ==========
function render(){
  const r=row(), data=VLM_DATA[S.vlm];
  document.getElementById('img').src = IMAGES[r.id];
  document.getElementById('img-id').textContent = r.id;
  document.getElementById('prog').textContent = 'Image '+(S.idx+1)+' of '+data.length;
  document.getElementById('btn-prev').disabled = S.idx===0;
  document.getElementById('btn-next').disabled = S.idx===data.length-1;

  document.querySelectorAll('.tab').forEach(t=>{
    const k=t.dataset.task;
    t.classList.toggle('active', k===S.task);
    t.querySelector('.dot').classList.toggle('done', taskDone(k));
  });

  const cfg = TASKS.find(t=>t.key===S.task);
  document.getElementById('prompt').textContent = cfg.prompt;
  document.getElementById('resp').textContent = r[S.task];
  qInputs.forEach((el,i)=>{ el.value=getR(S.task,i); });
}

// ========== EVENTS ==========
vlmSel.onchange = e=>{ collect(); persist(); S.vlm=e.target.value; S.idx=0; S.task='describe'; render(); };
document.getElementById('btn-prev').onclick = ()=>{ collect(); persist(); if(S.idx>0){S.idx--;render();} };
document.getElementById('btn-next').onclick = ()=>{ collect(); persist(); if(S.idx<VLM_DATA[S.vlm].length-1){S.idx++;render();} };

let saveTimer;
document.getElementById('survey').addEventListener('input', ()=>{
  collect(); persist();
  const el=document.getElementById('status');
  el.textContent='\u2713 Auto-saved'; el.classList.add('show');
  clearTimeout(saveTimer); saveTimer=setTimeout(()=>el.classList.remove('show'),1500);
});

document.getElementById('btn-exp').onclick = ()=>{
  collect(); persist();
  const qCols=[]; TASKS.forEach(t=>QUESTIONS.forEach((_,i)=>qCols.push(rkey(t.key,i))));

  Object.keys(VLM_DATA).forEach(vlm=>{
    const rows=VLM_DATA[vlm], resp=S.R[vlm]||{};
    let csv=['id',...qCols].join(',')+'\n';
    rows.forEach(r=>{
      const a=resp[r.id]||{};
      const vals=[r.id,...qCols.map(c=>a[c]||'')];
      csv+=vals.map(v=>'"'+String(v).replace(/"/g,'""')+'"').join(',')+'\n';
    });
    const blob=new Blob([csv],{type:'text/csv'});
    const link=document.createElement('a');
    link.href=URL.createObjectURL(blob);
    link.download=vlm.replace(/[^a-zA-Z0-9_-]/g,'_')+'_responses.csv';
    document.body.appendChild(link); link.click(); document.body.removeChild(link);
  });
};

// keyboard nav
document.addEventListener('keydown', e=>{
  if(e.target.tagName==='INPUT') return;
  if(e.key==='ArrowLeft') document.getElementById('btn-prev').click();
  if(e.key==='ArrowRight') document.getElementById('btn-next').click();
});

// ========== START ==========
render();
</script>
</body>
</html>"""

# --- inject data and write ---
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)

html = (HTML_TEMPLATE
    .replace("__VLM_DATA__", json.dumps(vlm_data))
    .replace("__IMAGES__",   json.dumps(images))
    .replace("__QUESTIONS__", json.dumps(QUESTIONS))
)

with open(OUTPUT_PATH, "w") as f:
    f.write(html)

size_mb = os.path.getsize(OUTPUT_PATH) / 1024 / 1024
print(f"Generated {OUTPUT_PATH} ({size_mb:.1f} MB)")

Generated results/interface2/index.html (96.5 MB)
