In [1]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 
dtype = None 
load_in_4bit = True 


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-14B",
    max_seq_length = max_seq_length,
    dtype = torch.bfloat16,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2024.12.2: Fast Qwen2 patching. Transformers:4.47.0.
   \\   /|    GPU: NVIDIA A100-SXM4-80GB. Max memory: 79.252 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.83s/it]


In [2]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0.01, 
    bias = "none",    
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False, 
    loftq_config = None,
)

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.01.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2024.12.2 patched 48 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [3]:
import json
from datasets import Dataset
def prepare_dataset():
    
    
    with open("/home/ltnga/ITDSIU21079/training/training_data.json", 'r', encoding='utf-8') as f:
        raw_data = json.load(f)
    
    processed_data = []
    
    for example in raw_data:
        # Format inputs - simulate RAG system prompts
        if example["question_type"] == "violation_type":
            system_prompt = "Dựa trên tài liệu đã trích xuất, hãy phân tích và trả lời câu hỏi."
        else:
            system_prompt = "Dựa trên tài liệu đã trích xuất, hãy phân tích và trả lời câu hỏi về thông tin luật giao thông."
            
        input_text = f"{system_prompt}\n\nCâu hỏi: {example['formatted_query']}\n\nTài liệu: {example['retrieved_context']}"
        
        if example["expected_decision"] == "Đã đủ thông tin":
            # If information is sufficient, train model to generate final answer
            output_text = json.dumps({
                "analysis": example["expected_analysis"],
                "decision": "Đã đủ thông tin",
                "next_query": "",
                "final_answer": example["expected_final_answer"]
            }, ensure_ascii=False)
        else:
            # If information is insufficient, train model to ask follow-up
            output_text = json.dumps({
                "analysis": example["expected_analysis"],
                "decision": "Cần thêm thông tin",
                "next_query": example["expected_next_query"],
                "final_answer": ""
            }, ensure_ascii=False)
        
        # Full text for training
        full_text = f"{input_text}\n\n{output_text}"
        
        processed_data.append({
            "text": full_text,
            "input": input_text,
            "output": output_text
        })
    
    # Create Hugging Face dataset
    dataset = Dataset.from_list(processed_data)
    return dataset

In [4]:
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=max_seq_length,
        padding="max_length"
    )

In [5]:
dataset = prepare_dataset()
tokenized_dataset = dataset.map(
        tokenize_function,
        batched=True,
        num_proc=4,
        remove_columns=["text", "input", "output"]
    )
tokenized_dataset


Map (num_proc=4): 100%|██████████| 51/51 [00:00<00:00, 212.20 examples/s]


Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 51
})

In [6]:
from transformers import (
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,    
)
from unsloth import is_bfloat16_supported
from trl import SFTTrainer

OUTPUT_DIR = "trained_autorag_model"
LORA_R = 16
LORA_ALPHA = 32
LORA_DROPOUT = 0.1
LEARNING_RATE = 2e-4
BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4
NUM_EPOCHS = 3

training_args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        per_device_train_batch_size=BATCH_SIZE,
        per_device_eval_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        learning_rate=LEARNING_RATE,
        num_train_epochs=NUM_EPOCHS,
        max_steps=100,
        weight_decay=0.01,
        logging_dir=f"{OUTPUT_DIR}/logs",
        logging_steps=10,
        evaluation_strategy="steps",
        eval_steps=100,
        save_strategy="steps",
        save_steps=100,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        load_best_model_at_end=True,
        save_total_limit=3,
        remove_unused_columns=False
    )



In [8]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = tokenized_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 3, # Set this for 1 full training run.
        max_steps = 1000,
        learning_rate = 2e-2,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 51 | Num Epochs = 334
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 1,000
 "-____-"     Number of trainable parameters = 68,812,800


Step,Training Loss
10,62.1066
20,57.8245
30,40.6527
40,25.7457
50,26.7831
60,22.7643
70,22.189
80,19.4718
90,19.4149
100,17.249
