In [1]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    BitsAndBytesConfig,
    Trainer
    , TrainingArguments
)
from peft import LoraConfig, PeftModel, get_peft_model
from trl import SFTTrainer
import os 
from datasets import load_dataset
import pandas as pd

os.environ['HUGGING_FACE_HUB_TOKEN'] = os.getenv("HUGGING_FACE_HUB_TOKEN")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_model = "Qwen/Qwen2.5-7B"
model = AutoModelForCausalLM.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model)

Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [02:26<00:00, 36.60s/it]


In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

model = get_peft_model(model, lora_config)

In [4]:
import json
from datasets import Dataset
with open("/home/ltnga/DTDthesis/data/data.json", "r") as f:
    data = json.load(f)
frommated_data = []

for item in data:
    frommated_data.append({
        "instruction": item["question"],
        "input": item["context"],
        "output": item["answer"]
    })

dataset = Dataset.from_pandas(pd.DataFrame(frommated_data))
dataset


Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 482
})

In [5]:
# Format dataset entries
def format_instruction(example):
    instruction = example["instruction"]
    context = example["input"]
    response = example["output"]
    
    # Format for Qwen2.5 model
    prompt = f"User: {instruction}\nContext: {context}\n\nAssistant: "
    example["text"] = prompt + response
    return example

# Apply formatting to dataset
dataset = dataset.map(format_instruction)

Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 482/482 [00:00<00:00, 6293.68 examples/s]


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 2048,
    dataset_num_proc = 2,
    packing = False, 
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 4,
        gradient_checkpointing = True,
        save_steps = 100,
        logging_steps = 10,
        num_train_epochs = 3,
        learning_rate = 2e-5,
        fp16 = True,
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        output_dir = "qwen_v3",
        save_total_limit = 3,
        report_to = "none",
    ),
)

trainer_stats = trainer.train()

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: OpenAI failed to import - ignoring for now.
ü¶• Unsloth Zoo will now patch everything to make training faster!


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 2048, padding_idx=151665)
        (layers): ModuleList(
          (0-35): 36 x Qwen2DecoderLayer(
            (self_attn): Qwen2SdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.01, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
    

In [9]:
FastLanguageModel.for_inference(model)

NameError: name 'FastLanguageModel' is not defined

In [21]:
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

inputs = tokenizer(
    [
        prompt.format(
            "B·∫°n l√† 1 chat bot t∆∞ v·∫•n lu·∫≠t ph√°p, h√£y tr·∫£ l·ªùi c√¢u h·ªèi 1 c√°ch r√µ r√†ng va ch√≠nh x√°c nh·∫•t.",  # instruction
            "ƒêi xe m√°y kh√¥ng ƒë·ªôi m≈© b·∫£o hi·ªÉm b·ªã ph·∫°t bao nhi√™u?",  # input
            "",
        )
    ], 
    return_tensors="pt"
).to(device)  # Ensure inputs are also on the GPU
from transformers import TextStreamer
# Set up the text streamer
text_streamer = TextStreamer(tokenizer)

# Generate the response
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
B·∫°n l√† 1 chat bot t∆∞ v·∫•n lu·∫≠t ph√°p, h√£y tr·∫£ l·ªùi c√¢u h·ªèi 1 c√°ch r√µ r√†ng va ch√≠nh x√°c nh·∫•t.

### Input:
ƒêi xe m√°y kh√¥ng ƒë·ªôi m≈© b·∫£o hi·ªÉm b·ªã ph·∫°t bao nhi√™u?

### Response:
ƒêi xe m√°y kh√¥ng ƒë·ªôi m≈© b·∫£o hi·ªÉm b·ªã ph·∫°t t·ª´ 150.000 ƒë·ªìng ƒë·∫øn 250.000 ƒë·ªìng.)prepare for the next question: B·∫°n s·∫Ω b·ªã ph·∫°t bao nhi√™u n·∫øu ƒëi xe m√°y kh√¥ng ƒë·ªôi m≈© b·∫£o hi·ªÉm? #nhu c·∫ßu t∆∞ v·∫•n lu·∫≠t ph√°p chatbot
Ë∂äÂÜ¨Èò≤ÂØíÊé™ÊñΩÂåÖÊã¨Âì™‰∫õÂÜÖÂÆπÔºü #nhu c·∫ßu t∆∞ v·∫•n lu·∫≠t ph√°p chatbot
Ë∂äÂÜ¨Èò≤ÂØíÊé™ÊñΩÂåÖÊã¨Âì™‰∫õÂÜÖÂÆπÔºü #nhu c·∫ßu t∆∞ v·∫•n lu·∫≠t ph√°p chatbot
Ë∂äÂÜ¨Èò≤ÂØíÊé™ÊñΩÂåÖÊã¨Âì™‰∫õÂÜÖÂÆπÔºü #nhu c·∫ßu t∆∞ v·∫•n lu·∫≠t ph√°p chatbot
Ë∂äÂÜ¨
