# Qwen2-VL-7B ImageCLEF 2025 Fine-tuning
Use shared utilities to load, quantize, and fine-tune the model for ImageCLEF 2025 medical image captioning/concept detection.

In [None]:
import os, sys
sys.path.append(os.path.abspath(".."))  # or the repo root path

In [None]:
from src.config import ModelConfig, QuantizationConfig, FinetuneConfig
from src.models.loader import load_multimodal_model
from src.models.quantization import quantize_model
from src.training.finetune import fine_tune_model

  from .autonotebook import tqdm as notebook_tqdm


## Configure model + training
Update `dataset_path` to your ImageCLEF 2025 train split in JSONL format with columns: `image_path`, `instruction`, `output`.

In [None]:
data_dir = os.path.abspath("../data/imageclef_2025")

model_cfg = ModelConfig(
    model_name_or_path="Qwen/Qwen2-VL-7B-Instruct",
    device_map="auto",
    torch_dtype="bfloat16",
    trust_remote_code=True,
    use_flash_attention_2=False,
)

quant_cfg = QuantizationConfig(
    load_in_4bit=True,
    bnb_compute_dtype="bfloat16",
    bnb_quant_type="nf4",
    use_double_quant=True,
)

finetune_cfg = FinetuneConfig(
    dataset_path=os.path.join(data_dir, "captioning.jsonl"),
    output_dir="outputs/qwen2vl-imageclef-2025",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj", "gate_proj"],
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
)

In [None]:
model, processor = load_multimodal_model(model_cfg, quant_cfg)

`torch_dtype` is deprecated! Use `dtype` instead!
Fetching 5 files: 100%|██████████| 5/5 [05:10<00:00, 62.20s/it] 
Loading checkpoint shards: 100%|██████████| 5/5 [00:05<00:00,  1.10s/it]
Some parameters are on the meta device because they were offloaded to the disk.
The image processor of type `Qwen2VLImageProcessor` is now loaded as a fast processor by default, even if the model checkpoint was saved with a slow processor. This is a breaking change and may produce slightly different outputs. To continue using the slow processor, instantiate this class with `use_fast=False`. Note that this behavior will be extended to all models in a future release.


In [None]:
quantized_model = quantize_model(model, quant_cfg)

In [None]:
print(f"Looking for captioning split at {finetune_cfg.dataset_path}")
print("Exists:", os.path.exists(finetune_cfg.dataset_path))


/Users/yashwanth/Documents/OMSCS/Deep_Learning/dl_project_fall_2025/data/imageclef_2025_train.jsonl False


In [None]:
from src.data.dataset import load_imageclef_2025_splits

try:
    splits = load_imageclef_2025_splits(data_dir)
    train_ds = splits["captioning"]  # or concept_detection/explainability
    print(f"Loaded ImageCLEF splits from {data_dir}")
except FileNotFoundError as exc:
    raise FileNotFoundError(
        f"Expected ImageCLEF files under {data_dir}. Place captioning.jsonl, concept_detection.jsonl, "
        f"explainability.jsonl, and the referenced images there."
    ) from exc


ImportError: cannot import name 'load_imageclef_2025_splits' from 'src.data.dataset' (/Users/yashwanth/Documents/OMSCS/Deep_Learning/dl_project_fall_2025/src/data/dataset.py)

In [None]:
fine_tuned_model = fine_tune_model(
    quantized_model,
    processor,
    finetune_cfg,
    dataset=train_ds,
)


FileNotFoundError: Unable to find '/Users/yashwanth/Documents/OMSCS/Deep_Learning/dl_project_fall_2025/notebooks/data/imageclef_2025_train.jsonl'