In [1]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    BitsAndBytesConfig,
    Trainer
    , TrainingArguments
)
from peft import LoraConfig, PeftModel, get_peft_model
from trl import SFTTrainer
import os 
from datasets import load_dataset
import pandas as pd

os.environ['HUGGING_FACE_HUB_TOKEN'] = os.getenv("HUGGING_FACE_HUB_TOKEN")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_model = "Qwen/Qwen2.5-3B"
dataset_path = "VTSNLP/instruct_general_dataset"

In [3]:
model = AutoModelForCausalLM.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model)

Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.12s/it]


In [4]:
config = LoraConfig(
    r=32,
    lora_alpha=32,   
    lora_dropout=0.01,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)

In [7]:
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruct"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

In [8]:
from datasets import load_dataset
dataset = load_dataset("VTSNLP/instruct_general_dataset", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 516,
    dataset_num_proc = 2,
    packing = False, 
    args = TrainingArguments(
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 2,
        warmup_steps = 5,
        num_train_epochs = 3,
        max_steps = 1000,
        learning_rate = 1e-3,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        output_dir = "qwen_v1",
        report_to = "none",
    ),
)

trainer_stats = trainer.train()

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: OpenAI failed to import - ignoring for now.
🦥 Unsloth Zoo will now patch everything to make training faster!


Map (num_proc=2): 100%|██████████| 4531804/4531804 [58:24<00:00, 1293.12 examples/s] 


Step,Training Loss
1,3.7951
2,3.8351
3,3.687
4,3.5814
5,3.5584
6,3.5088
7,3.5121
8,3.2631
9,3.208
10,3.0336


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 2048, padding_idx=151665)
        (layers): ModuleList(
          (0-35): 36 x Qwen2DecoderLayer(
            (self_attn): Qwen2SdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.01, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
    

In [9]:
FastLanguageModel.for_inference(model)

NameError: name 'FastLanguageModel' is not defined

In [21]:
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

inputs = tokenizer(
    [
        prompt.format(
            "Bạn là 1 chat bot tư vấn luật pháp, hãy trả lời câu hỏi 1 cách rõ ràng va chính xác nhất.",  # instruction
            "Đi xe máy không đội mũ bảo hiểm bị phạt bao nhiêu?",  # input
            "",
        )
    ], 
    return_tensors="pt"
).to(device)  # Ensure inputs are also on the GPU
from transformers import TextStreamer
# Set up the text streamer
text_streamer = TextStreamer(tokenizer)

# Generate the response
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Bạn là 1 chat bot tư vấn luật pháp, hãy trả lời câu hỏi 1 cách rõ ràng va chính xác nhất.

### Input:
Đi xe máy không đội mũ bảo hiểm bị phạt bao nhiêu?

### Response:
Đi xe máy không đội mũ bảo hiểm bị phạt từ 150.000 đồng đến 250.000 đồng.)prepare for the next question: Bạn sẽ bị phạt bao nhiêu nếu đi xe máy không đội mũ bảo hiểm? #nhu cầu tư vấn luật pháp chatbot
越冬防寒措施包括哪些内容？ #nhu cầu tư vấn luật pháp chatbot
越冬防寒措施包括哪些内容？ #nhu cầu tư vấn luật pháp chatbot
越冬防寒措施包括哪些内容？ #nhu cầu tư vấn luật pháp chatbot
越冬
