In [1]:
import torch
import json
from datasets import Dataset
from unsloth import FastVisionModel
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# Carga del modelo y activación de LoRA

In [2]:
# Ruta local al modelo descargado previamente
model_id = "Qwen/Qwen2.5-VL-7B-Instruct"

model, tokenizer = FastVisionModel.from_pretrained(
    model_id,
    load_in_4bit=False,
    use_gradient_checkpointing="unsloth",
)

model = FastVisionModel.get_peft_model(
    model,
    finetune_vision_layers=True,
    finetune_language_layers=True,
    finetune_attention_modules=True,
    finetune_mlp_modules=True,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_rslora=False,
    loftq_config=None,
    random_state=3407,
)


  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"{DEVICE_TYPE}:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.7.3: Fast Qwen2_5_Vl patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 1. Max memory: 23.988 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.7.1+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.


Loading checkpoint shards: 100%|██████████| 4/4 [00:11<00:00,  2.76s/it]
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


# Cargar y preparar el dataset

In [3]:
# Ruta a tu archivo JSON
ruta_dataset = "C:/Users/aaron/Desktop/Examenes/JSON_fine_tuning/subset_finetune.json"

# Cargar datos
with open(ruta_dataset, "r", encoding="utf-8") as f:
    datos = json.load(f)

# Combinar input + output en un solo string para cada ejemplo (como texto plano)
def fusionar_prompts(datos):
    ejemplos = []
    for ejemplo in datos:
        prompt = ejemplo["input_text"]
        respuesta = ejemplo["output_text"]
        texto_final = f"{prompt}\n{respuesta}"
        ejemplos.append({"text": texto_final})
    return ejemplos

# Preparar el dataset Hugging Face
ejemplos_formateados = fusionar_prompts(datos)
dataset = Dataset.from_list(ejemplos_formateados)

# Configuración del fine-tuning

### Verifica el pad_token_id del tokenizer antes del entrenamiento

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.pad_token_id

### Valida el dataset antes de pasarlo al trainer

In [5]:
for i, ejemplo in enumerate(dataset):
    if not ejemplo["text"] or not isinstance(ejemplo["text"], str):
        print(f"❌ Ejemplo inválido en índice {i}: {ejemplo}")

### Entrenamiento

In [6]:
# Hiperparámetros de entrenamiento
training_args = TrainingArguments(
    output_dir="../outputs_qwen2vl_finetuned",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=100,
    learning_rate=2e-4,
    weight_decay=0.01,
    fp16=not is_bfloat16_supported(),
    bf16=is_bfloat16_supported(),
    logging_steps=2,
    optim="adamw_8bit",
    lr_scheduler_type="linear",
    seed=3407,
    report_to="none"
)

# Entrenador
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    packing=False,
    dataset_num_proc=2,
    args=training_args,
)

# Lanzar entrenamiento
trainer_stats = trainer.train()

Unsloth: Tokenizing ["text"]: 100%|██████████| 102/102 [00:00<00:00, 5074.78 examples/s]
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 102 | Num Epochs = 4 | Total steps = 100
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 51,521,536 of 8,343,688,192 (0.62% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
2,2.6385
4,2.2742
6,1.9497
8,2.0312
10,1.8336
12,1.8144
14,1.6895
16,1.6095
18,1.5338
20,1.4291


Unsloth: Will smartly offload gradients to save VRAM!


In [7]:
# Guardar el modelo y el tokenizer con los adaptadores LoRA
model.save_pretrained("models/modelo_finetuneado_qwen2vl")
tokenizer.save_pretrained("models/modelo_finetuneado_qwen2vl")

[]