# 4.0 - Finetune Qwen3 0.6B on Orange QA train data

In [1]:
import os
import json
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

# --- CONFIGURATION ---
MODEL_ID = "Qwen/Qwen3-0.6B"
PEFT_SUFFIX = "LoRA_qkvo"

OUTPUT_DIR = os.path.join(os.getcwd(), '..', 'models', f"orange_qa_finetuned_{MODEL_ID.split('/')[-1]}_{PEFT_SUFFIX}")
DATA_FILE = os.path.join(os.getcwd(), '..', 'data', 'train_test_dataset', 'orange_qa_train.jsonl')

# 1. Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # Fix: Qwen has no default pad token

dataset = load_dataset("json", data_files=DATA_FILE, split="train")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    trust_remote_code=True,
    dtype=torch.float16,
)

peft_config = LoraConfig(
    r=4,        # Rank (Higher = more parameters to train, smarter but slower)
    lora_alpha=8,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], #, "gate_proj", "down_proj", "up_proj"],
    use_dora=False, # <--- This enables DoRA (Better learning than standard LoRA)
)

model_dora = get_peft_model(model, peft_config)

for name, module in model_dora.named_modules():
    if 'proj' in name:
        print(name)

'NoneType' object has no attribute 'cadam32bit_grad_fp32'
base_model.model.model.layers.0.self_attn.q_proj
base_model.model.model.layers.0.self_attn.q_proj.base_layer
base_model.model.model.layers.0.self_attn.q_proj.lora_dropout
base_model.model.model.layers.0.self_attn.q_proj.lora_dropout.default
base_model.model.model.layers.0.self_attn.q_proj.lora_A
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default
base_model.model.model.layers.0.self_attn.q_proj.lora_B
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default
base_model.model.model.layers.0.self_attn.q_proj.lora_embedding_A
base_model.model.model.layers.0.self_attn.q_proj.lora_embedding_B
base_model.model.model.layers.0.self_attn.q_proj.lora_magnitude_vector
base_model.model.model.layers.0.self_attn.k_proj
base_model.model.model.layers.0.self_attn.k_proj.base_layer
base_model.model.model.layers.0.self_attn.k_proj.lora_dropout
base_model.model.model.layers.0.self_attn.k_proj.lora_dropout.default
base_model.model.

  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [3]:
# 6. Training Arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,          # How many times to read the docs
    per_device_train_batch_size=4, 
    gradient_accumulation_steps=2,
    learning_rate=1e-4,
    fp16=True,                   # Use mixed precision
    logging_steps=2,
    optim="adamw_torch",   
    save_strategy="epoch",       # Save a checkpoint every epoch
)

# 7. Initialize Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    args=training_args,
    processing_class=tokenizer,
)

# 8. Train & Save
print("Starting training...")
trainer.train()

print(f"Saving model to {OUTPUT_DIR}...")
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print("Done!")

The model is already on multiple devices. Skipping the move to device specified in `args`.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151645}.


Starting training...


Step,Training Loss
2,4.2019
4,3.7235
6,3.7224
8,3.396
10,3.3001
12,2.9328
14,2.7704
16,2.6898
18,2.69
20,2.4781


Saving model to /Users/martin/Documents/FRI/Workshops/LoRA-tutorial/notebooks/../models/orange_qa_finetuned_Qwen3-0.6B_LoRA_qkvo...
Done!
