# Qwen2-VL-7B ImageCLEF 2025 Fine-tuning
Use shared utilities to load, quantize, and fine-tune the model for ImageCLEF 2025 medical image captioning/concept detection.

In [None]:
from src.config import ModelConfig, QuantizationConfig, FinetuneConfig
from src.models.loader import load_multimodal_model
from src.models.quantization import quantize_model
from src.training.finetune import fine_tune_model


## Configure model + training
Update `dataset_path` to your ImageCLEF 2025 train split in JSONL format with columns: `image_path`, `instruction`, `output`.

In [None]:
model_cfg = ModelConfig(
    model_name_or_path="Qwen/Qwen2-VL-7B-Instruct",
    device_map="auto",
    torch_dtype="bfloat16",
    trust_remote_code=True,
    use_flash_attention_2=False,
)

quant_cfg = QuantizationConfig(
    load_in_4bit=True,
    bnb_compute_dtype="bfloat16",
    bnb_quant_type="nf4",
    use_double_quant=True,
)

finetune_cfg = FinetuneConfig(
    dataset_path="data/imageclef_2025_train.jsonl",
    output_dir="outputs/qwen2vl-imageclef-2025",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj", "gate_proj"],
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
)


In [None]:
model, processor = load_multimodal_model(model_cfg, quant_cfg)

In [None]:
quantized_model = quantize_model(model, quant_cfg)

In [None]:
fine_tuned_model = fine_tune_model(
    quantized_model,
    processor,
    finetune_cfg,
)
