In [58]:
from datasets import load_dataset
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig

In [60]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", use_fast=False)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
dataset = load_dataset("uITimeCia/Zhenhuan", split="train")

In [61]:
dataset[:3]

{'instruction': ['小姐，别的秀女都在求中选，唯有咱们小姐想被撂牌子，菩萨一定记得真真儿的——',
  '这个温太医啊，也是古怪，谁不知太医不得皇命不能为皇族以外的人请脉诊病，他倒好，十天半月便往咱们府里跑。',
  '嬛妹妹，刚刚我去府上请脉，听甄伯母说你来这里进香了。'],
 'input': ['', '', ''],
 'output': ['嘘——都说许愿说破是不灵的。', '你们俩话太多了，我该和温太医要一剂药，好好治治你们。', '出来走走，也是散心。']}

In [52]:
dialog = [{"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "天空为什么是蓝色的？"},
            {"role": "assistant", "content": "这是由于光的散射引起的。"}]
input = tokenizer.apply_chat_template(dialog, tokenize=False)
input

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n天空为什么是蓝色的？<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n这是由于光的散射引起的。<|eot_id|>'

In [86]:
def process_func(example):
    MAX_LENGTH = 384
    input_ids, attention_mask, labels = [], [], []
    instruction = tokenizer(f"<|im_start|>system\n现在你要扮演皇帝身边的女人--甄嬛<|im_end|>\n<|im_start|>user\n{example['instruction'] + example['input']}<|im_end|>\n<|im_start|>assistant\n", add_special_tokens=False)
    response = tokenizer(f"{example['output']}<|im_end|>", add_special_tokens=False)
    input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]  
    if len(input_ids) > MAX_LENGTH:
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [98]:
def process_func2(example):
    dialog = [{"role": "system", "content": "现在你要扮演皇帝身边的女人--甄嬛"},
            {"role": "user", "content": f"{example['instruction'] + example['input']}"},
            {"role": "assistant", "content": f"{example['output']}"}]
    input = tokenizer.apply_chat_template(dialog, tokenize=False)
    tokenized_input = tokenizer(input)
    inputs_ids = tokenized_input["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = tokenized_input["attention_mask"] + [1]
    labels = ?
    return {
        "input_ids": inputs_ids,
        "attention_mask": attention_mask
    }

In [100]:
print(tokenizer.decode(process_func(dataset[0])['input_ids']))
print("=======")
print(tokenizer.decode(process_func2(dataset[0])['input_ids']))

<|im_start|>system
现在你要扮演皇帝身边的女人--甄嬛<|im_end|>
<|im_start|>user
小姐，别的秀女都在求中选，唯有咱们小姐想被撂牌子，菩萨一定记得真真儿的——<|im_end|>
<|im_start|>assistant
嘘——都说许愿说破是不灵的。<|im_end|><|endoftext|>
<|im_start|>system
现在你要扮演皇帝身边的女人--甄嬛<|im_end|>
<|im_start|>user
小姐，别的秀女都在求中选，唯有咱们小姐想被撂牌子，菩萨一定记得真真儿的——<|im_end|>
<|im_start|>assistant
嘘——都说许愿说破是不灵的。<|im_end|>
<|endoftext|>


In [18]:
tokenized_id = dataset.map(process_func, remove_columns=dataset.column_names)
tokenized_id

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 3729
})

In [19]:
tokenizer.decode(tokenized_id[1]['input_ids'])

'<|im_start|>system\n现在你要扮演皇帝身边的女人--甄嬛<|im_end|>\n<|im_start|>user\n这个温太医啊，也是古怪，谁不知太医不得皇命不能为皇族以外的人请脉诊病，他倒好，十天半月便往咱们府里跑。<|im_end|>\n<|im_start|>assistant\n你们俩话太多了，我该和温太医要一剂药，好好治治你们。<|endoftext|>'

In [10]:
tokenizer.decode(list(filter(lambda x: x != -100, tokenized_id[1]["labels"])))

'你们俩话太多了，我该和温太医要一剂药，好好治治你们。<|endoftext|>'

In [11]:
model.enable_input_require_grads()

In [12]:
config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, 
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False, # 训练模式
    r=8, # Lora 秩
    lora_alpha=32, # Lora alaph，具体作用参见 Lora 原理
    lora_dropout=0.1# Dropout 比例
)
config

LoraConfig(task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, inference_mode=False, r=8, target_modules={'up_proj', 'down_proj', 'v_proj', 'gate_proj', 'o_proj', 'q_proj', 'k_proj'}, exclude_modules=None, lora_alpha=32, lora_dropout=0.1, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, eva_config=None, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False)

In [13]:
model = get_peft_model(model, config)
model.print_trainable_parameters()

trainable params: 4,399,104 || all params: 498,431,872 || trainable%: 0.8826


In [14]:
args = TrainingArguments(
    output_dir="./output/Qwen2_instruct_lora",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=3,
    save_steps=10, # 为了快速演示，这里设置10，建议你设置成100
    learning_rate=1e-4,
    save_on_each_node=True,
    gradient_checkpointing=True
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_id,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

In [15]:
trainer.train()

[2025-01-13 17:06:39,338] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/home/user002/miniconda3/envs/LLMs/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/home/user002/miniconda3/envs/LLMs/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::runtime_error::~runtime_error()@GLIBCXX_3.4'
/home/user002/miniconda3/envs/LLMs/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__gxx_personality_v0@CXXABI_1.3'
/home/user002/miniconda3/envs/LLMs/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::tellp()@GLIBCXX_3.4'
/home/user002/miniconda3/envs/LLMs/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::steady_clock::now()@GLIBCXX_3.4.19'
/home/user002/miniconda3/envs/LLMs/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)@GLIBCXX_3.4'
/home/user002/mini

OutOfMemoryError: CUDA out of memory. Tried to allocate 18.00 MiB. GPU 1 has a total capacity of 23.64 GiB of which 15.12 MiB is free. Process 2478153 has 15.09 GiB memory in use. Including non-PyTorch memory, this process has 8.52 GiB memory in use. Of the allocated memory 7.65 GiB is allocated by PyTorch, and 328.91 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)