In [None]:
!pip uninstall -y transformers peft tokenizers accelerate
!pip install transformers==4.38.2 peft==0.8.2 accelerate==0.27.2


In [None]:
!pip install --upgrade accelerate peft transformers datasets pillow huggingface_hub --quiet


In [None]:
!pip install --upgrade git+https://github.com/haotian-liu/LLaVA.git
!pip install transformers accelerate datasets safetensors peft bitsandbytes sentencepiece


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Public model
BASE_MODEL = "mosaicml/mpt-7b-instruct"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="auto",
    torch_dtype="auto"
)

# Example prompt
prompt = "Explain quantum mechanics in simple terms."
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
# ========================================================
# Full working MPT-7B-Instruct pipeline (no gated models)
# ========================================================

# Install dependencies (run once)
# !pip install transformers accelerate datasets peft safetensors bitsandbytes --upgrade

from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch

# -------------------------------
# Configuration
# -------------------------------
BASE_MODEL = "mosaicml/mpt-7b-instruct"  # public model, no HF token needed
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_TOKENS = 200

# -------------------------------
# Load tokenizer
# -------------------------------
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

# -------------------------------
# Load model
# -------------------------------
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="auto",
    dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)

# Ensure pad_token_id is set for generation
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token

# -------------------------------
# Generation function
# -------------------------------
def generate_text(prompt: str, max_new_tokens: int = MAX_TOKENS):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    generation_config = GenerationConfig(
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    outputs = model.generate(**inputs, generation_config=generation_config)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# -------------------------------
# Example usage
# -------------------------------
prompt = "Explain quantum mechanics in simple terms."
result = generate_text(prompt)
print(result)

# -------------------------------
# Optional: LoRA fine-tuning template
# -------------------------------
"""
from peft import LoraConfig, get_peft_model, TaskType

# LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

# Wrap the model with LoRA
model = get_peft_model(model, lora_config)

# Training loop example
from datasets import load_dataset
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./lora_mpt7b",
    per_device_train_batch_size=1,
    num_train_epochs=1,
    logging_steps=10,
    save_steps=100,
    fp16=True if torch.cuda.is_available() else False,
    push_to_hub=False,
)

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets
)

trainer.train()
"""


In [None]:
!pip install -q transformers accelerate datasets peft safetensors diffusers sentencepiece huggingface_hub pillow


In [None]:
!pip install --upgrade pip
!pip install transformers==4.52.0 accelerate safetensors

In [None]:
!pip install -q transformers==4.52.0 datasets accelerate peft bitsandbytes huggingface_hub


In [None]:
import torch, json
from datasets import Dataset
from transformers import (
    AutoProcessor,
    AutoTokenizer,
    LlavaForConditionalGeneration,
    TrainingArguments,
    Trainer,
)
from peft import LoraConfig, get_peft_model
from huggingface_hub import login

login("YOUR_HF_TOKEN_HERE

In [None]:
MODEL_ID = "llava-hf/llava-1.5-7b-hf"

processor = AutoProcessor.from_pretrained(MODEL_ID)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)

model = LlavaForConditionalGeneration.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
)

model.gradient_checkpointing_enable()


In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_2_SEQ_LM",
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
with open("/kaggle/input/json-dataset/train (1).json") as f:
    raw = json.load(f)

def build_text(example):
    text = ""
    for t in example["conversations"]:
        text += f"{t['from'].upper()}: {t['value']}\n"
    return {
        "image": example["image"],
        "text": text
    }

dataset = Dataset.from_list([build_text(x) for x in raw])


In [None]:
def collate_fn(batch):
    images = [b["image"] for b in batch]
    texts  = [b["text"] for b in batch]

    processed = processor(
        images=images,
        text=texts,
        padding=True,
        truncation=True,
        return_tensors="pt"
    )

    processed["labels"] = processed["input_ids"].clone()
    return processed


In [None]:
args = TrainingArguments(
    output_dir="llava-xray-lora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=2,
    fp16=True,

    logging_steps=10,
    save_steps=200,
    save_total_limit=2,

    push_to_hub=True,
    hub_model_id="mohit311/LLaVA-data-json",

    remove_unused_columns=False,  # REQUIRED FOR MULTIMODAL
)


In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset,
    data_collator=collate_fn,
)

trainer.train()
trainer.push_to_hub()


In [None]:
!pip uninstall -y torch torchvision torchaudio transformers accelerate peft bitsandbytes


In [None]:
!pip install -U \
  transformers==4.40.2 \
  accelerate==0.27.2 \
  peft==0.10.0 \
  datasets \
  pillow \
  huggingface_hub


In [None]:
from huggingface_hub import login

login("YOUR_HF_TOKEN_HERE

In [None]:
import torch
import json
from PIL import Image
from datasets import Dataset
from transformers import (
    LlavaForConditionalGeneration,
    LlavaProcessor,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model
from huggingface_hub import login

login("YOUR_HF_TOKEN_HERE

In [None]:

MODEL_ID = "llava-hf/llava-1.5-7b-hf"

processor = LlavaProcessor.from_pretrained(MODEL_ID)

model = LlavaForConditionalGeneration.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto"
)


In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
with open("/kaggle/input/json-dataset/train (1).json") as f:
    raw_data = json.load(f)

dataset = Dataset.from_list(raw_data)


In [None]:
class LlavaDataCollator:
    def __init__(self, processor):
        self.processor = processor

    def __call__(self, batch):
        images = []
        texts = []

        for ex in batch:
            img = Image.open(ex["image"]).convert("RGB")
            images.append(img)

            conv = ""
            for turn in ex["conversations"]:
                conv += f"{turn['from'].upper()}: {turn['value']}\n"
            texts.append(conv)

        inputs = self.processor(
            images=images,
            text=texts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=1024
        )

        inputs["labels"] = inputs["input_ids"].clone()
        return inputs


In [None]:
training_args = TrainingArguments(
    output_dir="./llava-med",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
    push_to_hub=True,
    hub_model_id="mohit311/LLaVA-data-json",
    hub_strategy="checkpoint",
    remove_unused_columns=False,
    report_to="none"
)


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=LlavaDataCollator(processor),
)


In [None]:
trainer.train()
trainer.push_to_hub()


In [None]:
import json
import re

INPUT_JSON = "/kaggle/input/json-dataset/train (1).json"
OUTPUT_JSON = "/kaggle/working/train_clean.json"

def clean_text(t):
    t = t.strip()
    t = re.sub(r"GPT:|USER:|ASSISTANT:", "", t, flags=re.I)
    t = re.sub(r"\n{3,}", "\n\n", t)
    return t.strip()

clean_data = []

with open(INPUT_JSON) as f:
    data = json.load(f)

for ex in data:
    image = ex["image"]

    findings = None
    impression = None

    for turn in ex["conversations"]:
        txt = turn["value"]

        if "FINDINGS:" in txt.upper():
            findings = txt
        if "IMPRESSION:" in txt.upper():
            impression = txt

    if findings and impression:
        findings = clean_text(findings)
        impression = clean_text(impression)

        assistant = f"""FINDINGS:
{findings.replace("FINDINGS:", "").strip()}

IMPRESSION:
{impression.replace("IMPRESSION:", "").strip()}
"""

        clean_data.append({
            "image": image,
            "conversations": [
                {
                    "from": "human",
                    "value": "<image>\nGenerate a chest X-ray report."
                },
                {
                    "from": "gpt",
                    "value": assistant
                }
            ]
        })

print(f"Clean samples: {len(clean_data)}")

with open(OUTPUT_JSON, "w") as f:
    json.dump(clean_data, f, indent=2)


In [None]:
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"

model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)


In [None]:
from peft import PeftModel

model = PeftModel.from_pretrained(
    model,
    "mohit311/LLaVA-data-json",  # <-- NOT checkpoint-xxxx
    is_trainable=True
)


In [None]:
import json
import re

INPUT_JSON = "/kaggle/input/json-dataset/train (1).json"
OUTPUT_JSON = "/kaggle/working/train_clean.json"

def clean_text(t):
    t = t.strip()
    t = re.sub(r"GPT:|USER:|ASSISTANT:", "", t, flags=re.I)
    t = re.sub(r"\n{3,}", "\n\n", t)
    return t.strip()

clean_data = []

with open(INPUT_JSON) as f:
    data = json.load(f)

for ex in data:
    image = ex["image"]

    findings = None
    impression = None

    for turn in ex["conversations"]:
        txt = turn["value"]

        if "FINDINGS:" in txt.upper():
            findings = txt
        if "IMPRESSION:" in txt.upper():
            impression = txt

    if findings and impression:
        findings = clean_text(findings)
        impression = clean_text(impression)

        assistant = f"""FINDINGS:
{findings.replace("FINDINGS:", "").strip()}

IMPRESSION:
{impression.replace("IMPRESSION:", "").strip()}
"""

        clean_data.append({
            "image": image,
            "conversations": [
                {
                    "from": "human",
                    "value": "<image>\nGenerate a chest X-ray report."
                },
                {
                    "from": "gpt",
                    "value": assistant
                }
            ]
        })

print(f"Clean samples: {len(clean_data)}")

with open(OUTPUT_JSON, "w") as f:
    json.dump(clean_data, f, indent=2)


In [None]:
from huggingface_hub import snapshot_download

LOCAL_CKPT_DIR = "./llava-med/checkpoint-5361"

snapshot_download(
    repo_id="mohit311/LLaVA-data-json",
    local_dir=LOCAL_CKPT_DIR,
    allow_patterns=[
        "last-checkpoint/*",
        "adapter_*",
        "optimizer.pt",
        "scheduler.pt",
        "trainer_state.json",
        "training_args.bin",
        "rng_state.pth",
    ],
    local_dir_use_symlinks=False,
)


In [None]:
!pip install -U transformers accelerate peft datasets pillow


In [None]:
import torch, json
from PIL import Image
from datasets import Dataset
from transformers import LlavaForConditionalGeneration, LlavaProcessor, TrainingArguments, Trainer
from peft import PeftModel
from huggingface_hub import login

login("YOUR_HF_TOKEN_HERE
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
ADAPTER_REPO = "mohit311/LLaVA-data-json"

# Processor
processor = LlavaProcessor.from_pretrained(BASE_MODEL)

# Base model
base_model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Load LoRA adapter from HF
model = PeftModel.from_pretrained(
    base_model,
    ADAPTER_REPO,
    is_trainable=True
)

model.print_trainable_parameters()


In [None]:
class LlavaDataCollator:
    def __init__(self, processor):
        self.processor = processor

    def __call__(self, batch):
        images, texts = [], []

        for ex in batch:
            images.append(Image.open(ex["image"]).convert("RGB"))
            user = ex["conversations"][0]["value"]
            assistant = ex["conversations"][1]["value"]
            texts.append(f"USER: {user}\nASSISTANT: {assistant}")

        inputs = self.processor(
            images=images,
            text=texts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=768
        )

        inputs["labels"] = inputs["input_ids"].clone()
        return inputs


In [None]:
from huggingface_hub import snapshot_download

LOCAL_CKPT_DIR = "./llava-med/checkpoint-6500"

snapshot_download(
    repo_id="mohit311/LLaVA-data-json",
    local_dir=LOCAL_CKPT_DIR,
    allow_patterns=[
        "checkpoint-6500/*",
    ]
)


In [None]:
import torch
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import LoraConfig, get_peft_model

BASE_MODEL = "llava-hf/llava-1.5-7b-hf"

processor = LlavaProcessor.from_pretrained(BASE_MODEL)

model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)

lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
import json
from datasets import Dataset

with open("/kaggle/input/train-clean/train_clean.json") as f:
    raw_data = json.load(f)

dataset = Dataset.from_list(raw_data)


In [None]:
from PIL import Image

class LlavaDataCollator:
    def __init__(self, processor):
        self.processor = processor

    def __call__(self, batch):
        images, texts = [], []

        for ex in batch:
            images.append(Image.open(ex["image"]).convert("RGB"))

            user = ex["conversations"][0]["value"]
            assistant = ex["conversations"][1]["value"]

            texts.append(f"USER: {user}\nASSISTANT: {assistant}")

        inputs = self.processor(
            images=images,
            text=texts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=1024
        )

        inputs["labels"] = inputs["input_ids"].clone()
        return inputs


In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./llava-med",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=2,     # TOTAL epochs (not additional)
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
    push_to_hub=True,
    hub_model_id="mohit311/LLaVA-data-json",
    hub_strategy="checkpoint",
    remove_unused_columns=False,
    report_to="none"
)


In [None]:
from huggingface_hub import snapshot_download

LOCAL_CKPT_DIR = "./llava-med/last-checkpoint"

snapshot_download(
    repo_id="mohit311/LLaVA-data-json",
    local_dir=LOCAL_CKPT_DIR,
    repo_type="model",
    allow_patterns=[
        "last-checkpoint/*"
    ]
)

print("Checkpoint downloaded to:", LOCAL_CKPT_DIR)


In [None]:
from huggingface_hub import snapshot_download

LOCAL_CKPT_DIR = "./llava-med/checkpoint-6500"

snapshot_download(
    repo_id="mohit311/LLaVA-data-json",
    local_dir=LOCAL_CKPT_DIR,
    allow_patterns=[
        "adapter_model.safetensors",
        "adapter_config.json",
        "optimizer.pt",
        "scheduler.pt",
        "trainer_state.json",
        "training_args.bin",
        "rng_state.pth"
    ],
    local_dir_use_symlinks=False
)


In [None]:
import torch
import json
from PIL import Image
from datasets import Dataset
from transformers import LlavaForConditionalGeneration, LlavaProcessor, TrainingArguments, Trainer
from peft import PeftModel
from huggingface_hub import login

login("YOUR_HF_TOKEN_HERE
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
CKPT_DIR = "./llava-med/checkpoint-6500"

processor = LlavaProcessor.from_pretrained(BASE_MODEL)

base_model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)

model = PeftModel.from_pretrained(
    base_model,
    CKPT_DIR,
    is_trainable=True
)

model.print_trainable_parameters()


In [None]:
with open("/kaggle/input/train-clean/train_clean.json") as f:
    raw_data = json.load(f)

dataset = Dataset.from_list(raw_data)


In [None]:
#HLRS
# =========================
# RESUME & FINISH TRAINING
# =========================

import torch, json
from PIL import Image
from datasets import Dataset
from transformers import (
    LlavaForConditionalGeneration,
    LlavaProcessor,
    TrainingArguments,
    Trainer
)
from peft import PeftModel
from huggingface_hub import login

# ---------------- LOGIN ----------------
login("YOUR_HF_TOKEN_HERE
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
LORA_REPO = "mohit311/LLaVA-data-json"

# ---------------- PROCESSOR ----------------
processor = LlavaProcessor.from_pretrained(BASE_MODEL)

# ---------------- BASE MODEL ----------------
model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)

# ---------------- LOAD TRAINED LoRA (step ~6500) ----------------
model = PeftModel.from_pretrained(
    model,
    LORA_REPO,
    is_trainable=True
)

model.print_trainable_parameters()

# ---------------- DATASET ----------------
with open("/kaggle/input/train-clean/train_clean.json") as f:
    raw_data = json.load(f)

dataset = Dataset.from_list(raw_data)

# ---------------- DATA COLLATOR ----------------
class LlavaDataCollator:
    def __init__(self, processor):
        self.processor = processor

    def __call__(self, batch):
        images, texts = [], []

        for ex in batch:
            images.append(Image.open(ex["image"]).convert("RGB"))
            text = f"USER: {ex['conversations'][0]['value']}\nASSISTANT: {ex['conversations'][1]['value']}"
            texts.append(text)

        inputs = self.processor(
            images=images,
            text=texts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=1024
        )
        inputs["labels"] = inputs["input_ids"].clone()
        return inputs

# ---------------- TRAINING ARGS ----------------
training_args = TrainingArguments(
    output_dir="./llava-med",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=0.7,   # FINISH REMAINING TRAINING
    learning_rate=1e-4,     # lower LR for stability
    fp16=True,
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
    push_to_hub=True,
    hub_model_id=LORA_REPO,
    hub_strategy="end",
    remove_unused_columns=False,
    report_to="none"
)

# ---------------- TRAIN ----------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=LlavaDataCollator(processor),
)

trainer.train()
trainer.push_to_hub()


In [None]:
import torch
from PIL import Image
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import PeftModel
from huggingface_hub import login

# ---------------- LOGIN ----------------
login("YOUR_HF_TOKEN_HERE
# ---------------- MODEL IDS ----------------
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
LORA_REPO  = "mohit311/LLaVA-data-json"

# ---------------- DEVICE ----------------
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# ---------------- LOAD PROCESSOR ----------------
processor = LlavaProcessor.from_pretrained(BASE_MODEL)

# ---------------- LOAD BASE MODEL (SINGLE GPU) ----------------
base_model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map={"": device}
).eval()

# ---------------- LOAD LoRA ADAPTER ----------------
model = PeftModel.from_pretrained(
    base_model,
    LORA_REPO,
    is_trainable=False
).eval()

# ---------------- LOAD IMAGE ----------------
image_path = "/kaggle/input/mimic-cxr/Main2/p1000/i1000.jpg"   # CHANGE THIS
image = Image.open(image_path).convert("RGB")

# ---------------- PROMPT (MUST CONTAIN <image>) ----------------
prompt = """<image>
You are a radiologist.

Write a chest X-ray report with exactly two sections.

FINDINGS:
Describe only what is visible on the image.

IMPRESSION:
Provide a concise clinical summary.

Do not repeat sections.
Do not add history.
"""

# ---------------- PROCESS INPUT ----------------
inputs = processor(
    images=image,
    text=prompt,
    return_tensors="pt"
).to(device)

# ---------------- GENERATE ----------------
with torch.no_grad():
    output_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=False
    )

# ---------------- DECODE ----------------
output = processor.tokenizer.decode(
    output_ids[0],
    skip_special_tokens=True
)

print("===== MODEL OUTPUT =====")
print(output)


In [None]:
def clean_report(text):
    if "FINDINGS:" in text:
        text = text[text.index("FINDINGS:"):]
    text = text.split("FINDINGS:", 1)[0] + "FINDINGS:" + text.split("FINDINGS:", 1)[1]
    return text.strip()

print(clean_report(output))


In [None]:
import torch
from PIL import Image
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import PeftModel

# ---------------- CONFIG ----------------
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
LORA_REPO = "mohit311/LLaVA-data-json"
IMAGE_PATH = "/kaggle/input/mimic-cxr/Main2/p10003/i10003.jpg"  # change this

DEVICE = "cuda:0"

# ---------------- LOAD PROCESSOR ----------------
processor = LlavaProcessor.from_pretrained(BASE_MODEL)

# ---------------- LOAD BASE MODEL (SINGLE GPU) ----------------
base_model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map={"": 0}   # force single GPU
).eval()

# ---------------- LOAD LORA ADAPTER ----------------
model = PeftModel.from_pretrained(
    base_model,
    LORA_REPO,
    is_trainable=False
).eval()

# ---------------- LOAD IMAGE ----------------
image = Image.open(IMAGE_PATH).convert("RGB")

# ---------------- PROMPT (CRITICAL) ----------------
prompt = """<image>
You are an expert radiologist.

Generate a chest X-ray report using EXACTLY this format:

FINDINGS:
- Describe lungs, pleura, heart size, mediastinum, lines or devices if visible.

IMPRESSION:
- Provide a concise clinical summary (1-2 sentences).

Return exactly ONE report.
"""

# ---------------- PREPARE INPUTS ----------------
inputs = processor(
    text=prompt,
    images=image,
    return_tensors="pt"
)

inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

# ---------------- GENERATE ----------------
with torch.no_grad():
    output_ids = model.generate(
        **inputs,
        max_new_tokens=220,
        do_sample=False,      # important
        temperature=0.0,
        top_p=1.0,
        num_beams=1,
        repetition_penalty=1.1
    )

output = processor.decode(output_ids[0], skip_special_tokens=True)

print("===== MODEL OUTPUT =====\n")
print(output)


In [None]:
!pip install sqlalchemy --upgrade
!pip install gradio transformers peft torch pillow accelerate protobuf -q

In [None]:
import gradio as gr
import torch
from PIL import Image
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import PeftModel
import gc

# Global variables to cache model
model = None
processor = None
device = None

def load_model_once():
    """Load model once on startup"""
    global model, processor, device
    
    if model is not None:
        return
    
    print("Loading processor...")
    processor = LlavaProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
    
    print("Loading base model...")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float16 if device == "cuda" else torch.float32
    
    base_model = LlavaForConditionalGeneration.from_pretrained(
        "llava-hf/llava-1.5-7b-hf",
        torch_dtype=dtype,
        device_map="auto"
    )
    
    print("Loading LoRA adapter...")
    model = PeftModel.from_pretrained(
        base_model,
        "mohit311/LLaVA-data-json",
        is_trainable=False
    ).eval()
    
    print(f"Model loaded on {device.upper()}!")

def diagnose_image(image, question):
    """Generate diagnosis from image and question"""
    
    if image is None:
        return "Please upload an image first!"
    
    if not question or question.strip() == "":
        return "Please enter a question!"
    
    try:
        # Ensure image is RGB
        if image.mode != 'RGB':
            image = image.convert('RGB')
        
        # Create prompt
        prompt = f"<image>\n{question}"
        
        # Process inputs
        inputs = processor(
            text=prompt,
            images=image,
            return_tensors="pt"
        )
        
        # Move to device
        inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
        
        # Generate response
        with torch.no_grad():
            output_ids = model.generate(
                **inputs,
                max_new_tokens=220,
                do_sample=False,
                temperature=0.0,
                top_p=1.0,
                num_beams=1,
                repetition_penalty=1.1
            )
        
        # Decode output
        input_length = inputs['input_ids'].shape[1]
        new_tokens = output_ids[0, input_length:]
        response = processor.decode(new_tokens, skip_special_tokens=True)
        
        # Clean up
        del inputs, output_ids
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        gc.collect()
        
        return response.strip()
    
    except Exception as e:
        return f"Error: {str(e)}"

# Load model on startup
load_model_once()

# Create Gradio interface
with gr.Blocks(title="Medical Image Diagnosis") as demo:
    gr.Markdown("# Medical Image Diagnosis Assistant")
    gr.Markdown("Upload a medical image and ask questions about it using AI analysis powered by LLaVA 1.5 7B with medical LoRA fine-tuning.")
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Upload Image")
            image_input = gr.Image(label="Medical Image", type="pil")
            
            gr.Markdown("### Ask Your Question")
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="e.g., What abnormalities do you see in this chest X-ray?",
                lines=3
            )
            
            submit_button = gr.Button("Analyze Image", variant="primary")
        
        with gr.Column():
            gr.Markdown("### AI Response")
            output_text = gr.Textbox(
                label="Diagnosis",
                lines=10,
                interactive=False
            )
    
    # Example questions
    gr.Markdown("### Example Questions")
    example_questions = [
        "What is shown in this medical image?",
        "What abnormalities can you identify?",
        "Describe the key findings in this image.",
        "What anatomical structures are visible?",
        "Are there any signs of disease or pathology?"
    ]
    
    gr.Examples(
        examples=[
            [None, q] for q in example_questions
        ],
        inputs=[image_input, question_input],
        label="Try these questions (add your own image)"
    )
    
    # Connect button
    submit_button.click(
        fn=diagnose_image,
        inputs=[image_input, question_input],
        outputs=output_text
    )
    
    gr.Markdown("---")
    gr.Markdown("""
    ### Technical Details
    - **Model**: LLaVA 1.5 7B
    - **Fine-tuning**: LoRA adapter (medical imaging)
    - **Device**: GPU (T4 on Kaggle / Colab)
    
    **Disclaimer**: For educational purposes only. Not a substitute for professional medical advice.
    """)

if __name__ == "__main__":
    demo.launch(share=True)  # share=True generates a public link


In [None]:
demo.launch(share=True)

In [None]:
!pip install -q evaluate rouge-score nltk


In [None]:
import torch
import evaluate
from PIL import Image
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import PeftModel
from datasets import load_dataset

# ---------------- CONFIG ----------------
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
LORA_REPO = "mohit311/LLaVA-data-json"
DATA_PATH = "/kaggle/input/train-clean/train_clean.json"
MAX_SAMPLES = 200   # start small (increase later)

device = "cuda" if torch.cuda.is_available() else "cpu"

# ---------------- LOAD METRICS ----------------
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")

# ---------------- LOAD MODEL ----------------
processor = LlavaProcessor.from_pretrained(BASE_MODEL)

base_model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)

model = PeftModel.from_pretrained(
    base_model,
    LORA_REPO
)

model.eval()

# ---------------- LOAD DATA ----------------
dataset = load_dataset("json", data_files=DATA_PATH)["train"]
dataset = dataset.select(range(MAX_SAMPLES))

predictions = []
references = []

# ---------------- INFERENCE LOOP ----------------
for sample in dataset:
    image = Image.open(sample["image"]).convert("RGB")

    prompt = sample["conversations"][0]["value"]

    inputs = processor(
        images=image,
        text=prompt,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=False
        )

    pred = processor.decode(
        output_ids[0],
        skip_special_tokens=True
    )

    gt = sample["conversations"][1]["value"]

    predictions.append(pred)
    references.append([gt])  # BLEU expects list of references

# ---------------- COMPUTE METRICS ----------------
bleu_score = bleu.compute(predictions=predictions, references=references)
rouge_score = rouge.compute(predictions=predictions, references=[r[0] for r in references])

print("===== EVALUATION RESULTS =====")
print("BLEU:", bleu_score)
print("ROUGE:", rouge_score)


In [None]:
import torch
import evaluate
from PIL import Image
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import PeftModel
from datasets import load_dataset
from tqdm import tqdm

# ================= CONFIG =================
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
LORA_REPO = "mohit311/LLaVA-data-json"
DATA_PATH = "/kaggle/input/train-clean/train_clean.json"

MAX_SAMPLES = 50
MAX_NEW_TOKENS = 64

device = "cuda:0"

# ================= METRICS =================
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")

# ================= PROCESSOR =================
processor = LlavaProcessor.from_pretrained(BASE_MODEL)

# ================= DATA =================
dataset = load_dataset("json", data_files=DATA_PATH)["train"]
dataset = dataset.select(range(MAX_SAMPLES))

# ================= EVAL FUNCTION =================
def run_eval(model, name):
    model.eval()
    preds, refs = [], []

    for sample in tqdm(dataset, desc=name):
        image = Image.open(sample["image"]).convert("RGB")
        prompt = sample["conversations"][0]["value"]
        gt = sample["conversations"][1]["value"]

        inputs = processor(
            images=image,
            text=prompt,
            return_tensors="pt"
        ).to(device)

        with torch.no_grad():
            out = model.generate(
                **inputs,
                max_new_tokens=MAX_NEW_TOKENS,
                do_sample=False,
                use_cache=True
            )

        pred = processor.decode(out[0], skip_special_tokens=True)

        preds.append(pred)
        refs.append([gt])

    return (
        bleu.compute(predictions=preds, references=refs),
        rouge.compute(predictions=preds, references=[r[0] for r in refs])
    )

# ================= BASELINE =================
baseline = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map={"": 0}
)

baseline_bleu, baseline_rouge = run_eval(baseline, "Baseline")

del baseline
torch.cuda.empty_cache()

# ================= FINETUNED =================
base = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map={"": 0}
)

finetuned = PeftModel.from_pretrained(base, LORA_REPO)
finetuned_bleu, finetuned_rouge = run_eval(finetuned, "Finetuned")

# ================= RESULTS =================
print("\n===== BASELINE =====")
print("BLEU:", baseline_bleu)
print("ROUGE:", baseline_rouge)

print("\n===== FINETUNED =====")
print("BLEU:", finetuned_bleu)
print("ROUGE:", finetuned_rouge)

print("\n===== DELTA =====")
print("Δ BLEU:", finetuned_bleu["bleu"] - baseline_bleu["bleu"])
print("Δ ROUGE-L:", finetuned_rouge["rougeL"] - baseline_rouge["rougeL"])


In [None]:
# ==============================
# Pathology-level F1 Evaluation
# Baseline vs Finetuned LLaVA
# ==============================

import torch
import re
from PIL import Image
from tqdm import tqdm
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import PeftModel
from datasets import load_dataset
from sklearn.metrics import precision_recall_fscore_support

# ---------------- CONFIG ----------------
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
LORA_REPO = "mohit311/LLaVA-data-json"
DATA_PATH = "/kaggle/input/train-clean/train_clean.json"

MAX_SAMPLES = 50
MAX_NEW_TOKENS = 256
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# ---------------- PATHOLOGY LIST ----------------
PATHOLOGIES = [
    "atelectasis", "consolidation", "pneumonia", "edema",
    "pleural effusion", "cardiomegaly", "pneumothorax",
    "lung opacity", "support devices"
]

# ---------------- LOAD PROCESSOR ----------------
processor = LlavaProcessor.from_pretrained(BASE_MODEL)

# ---------------- LOAD DATA ----------------
dataset = load_dataset("json", data_files=DATA_PATH)["train"]
dataset = dataset.select(range(MAX_SAMPLES))

# ---------------- PATHOLOGY EXTRACTION ----------------
def extract_pathologies(text):
    text = text.lower()
    found = set()
    for p in PATHOLOGIES:
        if re.search(rf"\b{re.escape(p)}\b", text):
            found.add(p)
    return found

# ---------------- REPORT GENERATION ----------------
def generate_reports(model, name):
    preds, refs = [], []

    for sample in tqdm(dataset, desc=name):
        image = Image.open(sample["image"]).convert("RGB")
        prompt = sample["conversations"][0]["value"]
        gt = sample["conversations"][1]["value"]

        inputs = processor(
            images=image,
            text=prompt,
            return_tensors="pt"
        )
        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

        with torch.no_grad():
            out = model.generate(
                **inputs,
                max_new_tokens=MAX_NEW_TOKENS,
                do_sample=False
            )

        pred = processor.decode(out[0], skip_special_tokens=True)

        preds.append(pred)
        refs.append(gt)

    return preds, refs

# ---------------- PATHOLOGY F1 ----------------
def pathology_f1(preds, refs):
    y_true, y_pred = [], []

    for p, r in zip(preds, refs):
        p_set = extract_pathologies(p)
        r_set = extract_pathologies(r)

        for path in PATHOLOGIES:
            y_true.append(int(path in r_set))
            y_pred.append(int(path in p_set))

    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average="binary", zero_division=0
    )

    return {
        "Precision": precision,
        "Recall": recall,
        "F1": f1
    }

# ================= BASELINE MODEL =================
baseline_model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16
).to(DEVICE)

baseline_model.eval()

baseline_preds, baseline_refs = generate_reports(baseline_model, "Baseline")
baseline_metrics = pathology_f1(baseline_preds, baseline_refs)

# ---- FREE GPU AFTER BASELINE ----
del baseline_model
torch.cuda.empty_cache()

# ================= FINETUNED MODEL =================
finetuned_base = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16
).to(DEVICE)

finetuned_model = PeftModel.from_pretrained(
    finetuned_base,
    LORA_REPO
)

finetuned_model.eval()

finetuned_preds, finetuned_refs = generate_reports(finetuned_model, "Finetuned")
finetuned_metrics = pathology_f1(finetuned_preds, finetuned_refs)

# ================= RESULTS =================
print("\n===== PATHOLOGY-LEVEL RESULTS =====\n")

print("Baseline:")
for k, v in baseline_metrics.items():
    print(f"{k}: {v:.4f}")

print("\nFinetuned:")
for k, v in finetuned_metrics.items():
    print(f"{k}: {v:.4f}")

print("\nDelta (Finetuned - Baseline):")
print(f"Δ Precision: {finetuned_metrics['Precision'] - baseline_metrics['Precision']:.4f}")
print(f"Δ Recall:    {finetuned_metrics['Recall'] - baseline_metrics['Recall']:.4f}")
print(f"Δ F1:        {finetuned_metrics['F1'] - baseline_metrics['F1']:.4f}")


In [None]:
import pandas as pd

# ============================
# INSERT YOUR FINAL SCORES
# ============================

results = {
    "Baseline": {
        "BLEU": 0.000008079,
        "ROUGE-L": 0.04207,
        "Pathology Precision": 0.0000,
        "Pathology Recall": 0.0000,
        "Pathology F1": 0.0000,
    },
    "Finetuned (Ours)": {
        "BLEU": 0.007122,
        "ROUGE-L": 0.15310,
        "Pathology Precision": 0.4425,
        "Pathology Recall": 0.3817,
        "Pathology F1": 0.4098,
    }
}

# ============================
# BUILD COMPARISON TABLE
# ============================

df = pd.DataFrame(results).T
df["Δ (Ours - Baseline)"] = df.loc["Finetuned (Ours)"] - df.loc["Baseline"]

# Reorder columns (paper-style)
df = df[
    ["BLEU", "ROUGE-L", "Pathology Precision", "Pathology Recall", "Pathology F1", "Δ (Ours - Baseline)"]
]

# Round for publication
df = df.round(4)

print("\n===== PAPER-STYLE COMPARISON TABLE =====\n")
display(df)

# ============================
# OPTIONAL: EXPORT
# ============================

# CSV (for Excel / plotting)
df.to_csv("comparison_table.csv")

# LaTeX (for paper)
latex_table = df.to_latex(
    caption="Comparison of Baseline LLaVA and Fine-tuned Model on Chest X-ray Report Generation",
    label="tab:comparison",
    bold_rows=True
)

with open("comparison_table.tex", "w") as f:
    f.write(latex_table)

print("\nSaved:")
print(" - comparison_table.csv")
print(" - comparison_table.tex")


In [None]:
import re
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, f1_score
from tqdm import tqdm

# =============================
# CONFIG
# =============================
PATHOLOGIES = [
    "cardiomegaly",
    "consolidation",
    "edema",
    "pleural effusion",
    "pneumonia",
    "pneumothorax",
]

N_BOOTSTRAP = 1000
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# =============================
# PATHOLOGY EXTRACTION
# =============================
def extract_labels(text):
    text = text.lower()
    labels = {}
    for p in PATHOLOGIES:
        labels[p] = int(bool(re.search(rf"\b{p}\b", text)))
    return labels

# =============================
# METRIC COMPUTATION
# =============================
def compute_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()

    acc = (tp + tn) / (tp + tn + fp + fn + 1e-8)
    sens = tp / (tp + fn + 1e-8)
    spec = tn / (tn + fp + 1e-8)
    f1 = f1_score(y_true, y_pred, zero_division=0)

    return acc, sens, spec, f1

def bootstrap_ci(y_true, y_pred, metric_fn):
    scores = []
    n = len(y_true)
    for _ in range(N_BOOTSTRAP):
        idx = np.random.choice(n, n, replace=True)
        scores.append(metric_fn(y_true[idx], y_pred[idx]))
    return np.percentile(scores, [2.5, 97.5])

# =============================
# MAIN EVALUATION FUNCTION
# =============================
def evaluate_model(predictions, references, model_name):
    rows = []

    for pathology in PATHOLOGIES:
        y_true = np.array([extract_labels(r)[pathology] for r in references])
        y_pred = np.array([extract_labels(p)[pathology] for p in predictions])

        acc, sens, spec, f1 = compute_metrics(y_true, y_pred)

        acc_ci = bootstrap_ci(y_true, y_pred, lambda a,b: compute_metrics(a,b)[0])
        sens_ci = bootstrap_ci(y_true, y_pred, lambda a,b: compute_metrics(a,b)[1])
        spec_ci = bootstrap_ci(y_true, y_pred, lambda a,b: compute_metrics(a,b)[2])
        f1_ci = bootstrap_ci(y_true, y_pred, lambda a,b: compute_metrics(a,b)[3])

        rows.append({
            "Metric": "Accuracy",
            "Pathology": pathology.title(),
            model_name: f"{acc:.2f} ({acc_ci[0]:.2f}, {acc_ci[1]:.2f})"
        })
        rows.append({
            "Metric": "Sensitivity",
            "Pathology": pathology.title(),
            model_name: f"{sens:.2f} ({sens_ci[0]:.2f}, {sens_ci[1]:.2f})"
        })
        rows.append({
            "Metric": "Specificity",
            "Pathology": pathology.title(),
            model_name: f"{spec:.2f} ({spec_ci[0]:.2f}, {spec_ci[1]:.2f})"
        })
        rows.append({
            "Metric": "F1 score",
            "Pathology": pathology.title(),
            model_name: f"{f1:.2f} ({f1_ci[0]:.2f}, {f1_ci[1]:.2f})"
        })

    return pd.DataFrame(rows)


# =============================
# ENSURE REFERENCES EXIST
# =============================
assert len(baseline_preds) == len(finetuned_preds), "Prediction length mismatch"

references = []
for sample in dataset:
    references.append(sample["conversations"][1]["value"])

assert len(references) == len(baseline_preds), "References do not match predictions"
# =============================
# RUN EVALUATION
# =============================
baseline_df = evaluate_model(baseline_preds, references, "Baseline")
finetuned_df = evaluate_model(finetuned_preds, references, "Finetuned (Ours)")

# Merge like Table 4
table4 = baseline_df.merge(
    finetuned_df,
    on=["Metric", "Pathology"],
    how="left"
)

print("\n===== TABLE 4 STYLE RESULTS =====\n")
display(table4)

# =============================
# EXPORT
# =============================
table4.to_csv("table4_pathology_results.csv", index=False)

latex = table4.to_latex(
    index=False,
    caption="Model performance by pathology on external test set",
    label="tab:pathology_results"
)

with open("table4_pathology_results.tex", "w") as f:
    f.write(latex)

print("\nSaved:")
print(" - table4_pathology_results.csv")
print(" - table4_pathology_results.tex")


In [None]:
# ============================================================
# TABLE-4 STYLE PATHOLOGY EVALUATION (SINGLE GPU, STABLE)
# ============================================================

import torch
import json
import random
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from datasets import load_dataset
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import PeftModel
from sklearn.metrics import confusion_matrix
from statsmodels.stats.proportion import proportion_confint

# ---------------- CONFIG ----------------
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
LORA_REPO = "mohit311/LLaVA-data-json"
VAL_PATH = "/kaggle/input/json-dataset/val (1).json"

DEVICE = torch.device("cuda:0")   # <<< FORCE SINGLE GPU
SUBSET_SIZE = 300
MAX_NEW_TOKENS = 200
RANDOM_SEED = 42

PATHOLOGIES = [
    "cardiomegaly",
    "consolidation",
    "edema",
    "pleural effusion",
    "pneumonia",
    "pneumothorax",
]

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.cuda.empty_cache()

# ---------------- LOAD PROCESSOR ----------------
processor = LlavaProcessor.from_pretrained(BASE_MODEL)

# ---------------- LOAD MODEL (NO AUTO DEVICE MAP) ----------------
base_model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map=None,     # <<< CRITICAL
)

base_model.to(DEVICE)

model = PeftModel.from_pretrained(
    base_model,
    LORA_REPO
)

model.to(DEVICE)
model.eval()

# ---------------- LOAD & SUBSET DATA ----------------
dataset = load_dataset("json", data_files=VAL_PATH)["train"]

indices = np.random.choice(len(dataset), SUBSET_SIZE, replace=False)
dataset = dataset.select(indices.tolist())

# ---------------- PATHOLOGY EXTRACTION ----------------
def extract_labels(text):
    text = text.lower()
    return {p: int(p in text) for p in PATHOLOGIES}

# ---------------- METRICS ----------------
def compute_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()

    acc = (tp + tn) / max(tp + tn + fp + fn, 1)
    sens = tp / max(tp + fn, 1)
    spec = tn / max(tn + fp, 1)
    f1 = (2 * tp) / max((2 * tp + fp + fn), 1)

    acc_ci = proportion_confint(tp + tn, tp + tn + fp + fn, method="wilson")
    sens_ci = proportion_confint(tp, tp + fn, method="wilson") if (tp + fn) > 0 else (0,0)
    spec_ci = proportion_confint(tn, tn + fp, method="wilson") if (tn + fp) > 0 else (0,0)

    return {
        "Accuracy": (acc, acc_ci),
        "Sensitivity": (sens, sens_ci),
        "Specificity": (spec, spec_ci),
        "F1 score": (f1, (0,0)),
    }

# ---------------- RUN EVALUATION ----------------
y_true = {p: [] for p in PATHOLOGIES}
y_pred = {p: [] for p in PATHOLOGIES}

print(f"\nEvaluating OUR model on {SUBSET_SIZE} validation samples (single GPU)...\n")

for sample in tqdm(dataset, desc="Evaluating Ours"):
    image = Image.open(sample["image"]).convert("RGB")
    prompt = sample["conversations"][0]["value"]
    gt_text = sample["conversations"][1]["value"]

    inputs = processor(
        images=image,
        text=prompt,
        return_tensors="pt"
    )

    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=False
        )

    pred_text = processor.decode(out[0], skip_special_tokens=True)

    gt_labels = extract_labels(gt_text)
    pred_labels = extract_labels(pred_text)

    for p in PATHOLOGIES:
        y_true[p].append(gt_labels[p])
        y_pred[p].append(pred_labels[p])

# ---------------- BUILD TABLE 4 ----------------
rows = []

for pathology in PATHOLOGIES:
    metrics = compute_metrics(y_true[pathology], y_pred[pathology])

    for metric, (val, ci) in metrics.items():
        rows.append({
            "Metric": metric,
            "Pathology": pathology.title(),
            "CXR-LLaVA (Ours)": f"{val:.2f} ({ci[0]:.2f}, {ci[1]:.2f})",
            "GPT-4V (Paper)": "—",
            "Gemini-Pro (Paper)": "—"
        })

df = pd.DataFrame(rows)

# ---------------- OUTPUT ----------------
print("\n===== TABLE 4 STYLE RESULTS (VALIDATION SUBSET) =====\n")
print(df.to_string(index=False))

df.to_csv("table4_validation_subset_ours.csv", index=False)
df.to_latex("table4_validation_subset_ours.tex", index=False)

print("\nSaved:")
print(" - table4_validation_subset_ours.csv")
print(" - table4_validation_subset_ours.tex")


In [None]:
# ============================================================
# TABLE-4 STYLE PATHOLOGY EVALUATION (SINGLE GPU, STABLE)
# ============================================================

import torch
import json
import random
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from datasets import load_dataset
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import PeftModel
from sklearn.metrics import confusion_matrix
from statsmodels.stats.proportion import proportion_confint

# ---------------- CONFIG ----------------
BASE_MODEL = "llava-hf/llava-1.5-7b-hf"
LORA_REPO = "mohit311/LLaVA-data-json"
VAL_PATH = "/kaggle/input/json-dataset/val (1).json"

DEVICE = torch.device("cuda:0")   # <<< FORCE SINGLE GPU
SUBSET_SIZE = 500
MAX_NEW_TOKENS = 400
RANDOM_SEED = 42

PATHOLOGIES = [
    "cardiomegaly",
    "consolidation",
    "edema",
    "pleural effusion",
    "pneumonia",
    "pneumothorax",
]

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.cuda.empty_cache()

# ---------------- LOAD PROCESSOR ----------------
processor = LlavaProcessor.from_pretrained(BASE_MODEL)

# ---------------- LOAD MODEL (NO AUTO DEVICE MAP) ----------------
base_model = LlavaForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map=None,     # <<< CRITICAL
)

base_model.to(DEVICE)

model = PeftModel.from_pretrained(
    base_model,
    LORA_REPO
)

model.to(DEVICE)
model.eval()

# ---------------- LOAD & SUBSET DATA ----------------
dataset = load_dataset("json", data_files=VAL_PATH)["train"]

indices = np.random.choice(len(dataset), SUBSET_SIZE, replace=False)
dataset = dataset.select(indices.tolist())

# ---------------- PATHOLOGY EXTRACTION ----------------
def extract_labels(text):
    text = text.lower()
    return {p: int(p in text) for p in PATHOLOGIES}

# ---------------- METRICS ----------------
def compute_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()

    acc = (tp + tn) / max(tp + tn + fp + fn, 1)
    sens = tp / max(tp + fn, 1)
    spec = tn / max(tn + fp, 1)
    f1 = (2 * tp) / max((2 * tp + fp + fn), 1)

    acc_ci = proportion_confint(tp + tn, tp + tn + fp + fn, method="wilson")
    sens_ci = proportion_confint(tp, tp + fn, method="wilson") if (tp + fn) > 0 else (0,0)
    spec_ci = proportion_confint(tn, tn + fp, method="wilson") if (tn + fp) > 0 else (0,0)

    return {
        "Accuracy": (acc, acc_ci),
        "Sensitivity": (sens, sens_ci),
        "Specificity": (spec, spec_ci),
        "F1 score": (f1, (0,0)),
    }

# ---------------- RUN EVALUATION ----------------
y_true = {p: [] for p in PATHOLOGIES}
y_pred = {p: [] for p in PATHOLOGIES}

print(f"\nEvaluating OUR model on {SUBSET_SIZE} validation samples (single GPU)...\n")

for sample in tqdm(dataset, desc="Evaluating Ours"):
    image = Image.open(sample["image"]).convert("RGB")
    prompt = sample["conversations"][0]["value"]
    gt_text = sample["conversations"][1]["value"]

    inputs = processor(
        images=image,
        text=prompt,
        return_tensors="pt"
    )

    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=False
        )

    pred_text = processor.decode(out[0], skip_special_tokens=True)

    gt_labels = extract_labels(gt_text)
    pred_labels = extract_labels(pred_text)

    for p in PATHOLOGIES:
        y_true[p].append(gt_labels[p])
        y_pred[p].append(pred_labels[p])

# ---------------- BUILD TABLE 4 ----------------
rows = []

for pathology in PATHOLOGIES:
    metrics = compute_metrics(y_true[pathology], y_pred[pathology])

    for metric, (val, ci) in metrics.items():
        rows.append({
            "Metric": metric,
            "Pathology": pathology.title(),
            "CXR-LLaVA (Ours)": f"{val:.2f} ({ci[0]:.2f}, {ci[1]:.2f})",
            "GPT-4V (Paper)": "—",
            "Gemini-Pro (Paper)": "—"
        })

df = pd.DataFrame(rows)

# ---------------- OUTPUT ----------------
print("\n===== TABLE 4 STYLE RESULTS (VALIDATION SUBSET) =====\n")
print(df.to_string(index=False))

df.to_csv("table4_validation_subset_ours.csv", index=False)
df.to_latex("table4_validation_subset_ours.tex", index=False)

print("\nSaved:")
print(" - table4_validation_subset_ours.csv")
print(" - table4_validation_subset_ours.tex")
