# 基于Qwen2.5的LoRA微调

参考以下教程实现

[Qwen2.5-7B-Instruct Lora 微调](https://github.com/datawhalechina/self-llm?tab=readme-ov-file)

## Step1 导入相关包

In [1]:
import os
os.environ["HF_HOME"] = '/root/autodl-tmp/huggingface'
os.environ["TRANSFORMERS_CACHE"] = '/root/autodl-tmp/huggingface'

In [2]:
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer



## Step2 加载数据集

In [4]:
import json

from datasets import Dataset

# 数据集路径
input_file = "./dataset/chat_trans.jsonl"


def load_jsonl_data(file_path):
    json_data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            try:
                json_data.append(json.loads(line.strip()))
            except json.JSONDecodeError:
                continue
    return json_data



# 将数据加载为 Dataset 格式
data = load_jsonl_data(input_file)
dataset = Dataset.from_list(data)
dataset

Dataset({
    features: ['question', 'answer'],
    num_rows: 12822
})

## Step3 数据预处理

In [5]:
# 这里使用的是AUTODL上的镜像，直接加载镜像里的模型
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct", use_fast=False, trust_remote_code=True)
#tokenizer = AutoTokenizer.from_pretrained('/root/autodl-tmp/qwen/Qwen2.5-7B-Instruct/', use_fast=False, trust_remote_code=True)

In [6]:
def process_func(example):
    MAX_LENGTH = 384
    input_ids_batch, attention_mask_batch, labels_batch = [], [], []

    for idx, question in enumerate(example['question']):
        instruction = tokenizer(f"<|im_start|>system\n宜家你系讲信宜话个智能助手--阿信<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n",
                                add_special_tokens=False)
        response = tokenizer(f"{example['answer'][idx]}", add_special_tokens=False)
        input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
        attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]
        labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]

        # 截断操作
        if len(input_ids) > MAX_LENGTH:
            input_ids = input_ids[:MAX_LENGTH]
            attention_mask = attention_mask[:MAX_LENGTH]
            labels = labels[:MAX_LENGTH]

        # 添加到批量列表
        input_ids_batch.append(input_ids)
        attention_mask_batch.append(attention_mask)
        labels_batch.append(labels)

    return {
        "input_ids": input_ids_batch,
        "attention_mask": attention_mask_batch,
        "labels": labels_batch
    }

In [7]:
tokenized_dataset = dataset.map(process_func, batched=True)

Map:   0%|          | 0/12822 [00:00<?, ? examples/s]

In [8]:
# 半精度加载模型，减少内存使用
# model = AutoModelForCausalLM.from_pretrained('/root/autodl-tmp/qwen/Qwen2.5-7B-Instruct/', device_map="auto", torch_dtype=torch.bfloat16)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-7B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
model.enable_input_require_grads()

In [10]:
model.dtype

torch.bfloat16

## Step5 定义lora参数

In [11]:
from peft import LoraConfig, TaskType

config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False,  # 训练模式
    r=8,  # Lora 秩
    lora_alpha=32,  # Lora alaph，具体作用参见 Lora 原理
    lora_dropout=0.1  # Dropout 比例
)

In [12]:
from peft import get_peft_model

# # 应用 LoRA 配置到模型
model = get_peft_model(model, config)
config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', revision=None, task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, inference_mode=False, r=8, target_modules={'v_proj', 'q_proj', 'up_proj', 'k_proj', 'gate_proj', 'o_proj', 'down_proj'}, lora_alpha=32, lora_dropout=0.1, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None)

In [13]:
# 查看训练参数
model.print_trainable_parameters()

trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.26434798934534914


## Step6 配置训练参数

In [14]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="./output/Qwen2.5_instruct_lora",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=3,
    save_steps=100,
    learning_rate=1e-4,
    save_on_each_node=True,
    gradient_checkpointing=True
)

## Step7 创建Trainer

In [15]:
from transformers import DataCollatorForSeq2Seq, Trainer

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

## Step8 训练模型

In [16]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
10,3.3219
20,2.2312
30,1.8905
40,1.6969
50,1.6502
60,1.6118
70,1.5794
80,1.4449
90,1.4482
100,1.4366


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enab

TrainOutput(global_step=2403, training_loss=0.9425098227303672, metrics={'train_runtime': 2382.5559, 'train_samples_per_second': 16.145, 'train_steps_per_second': 1.009, 'total_flos': 1.450465414315868e+17, 'train_loss': 0.9425098227303672, 'epoch': 2.9981285090455394})

## Step9 评估模型

In [1]:
import os
os.environ["HF_HOME"] = '/root/autodl-tmp/huggingface'
os.environ["TRANSFORMERS_CACHE"] = '/root/autodl-tmp/huggingface'
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel

mode_path = 'Qwen/Qwen2.5-7B-Instruct'
lora_path = './output/Qwen2.5_instruct_lora/checkpoint-2403'  # 这里改称你的 lora 输出对应 checkpoint 地址

# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(mode_path, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True).eval()

# 加载lora权重
model = PeftModel.from_pretrained(model, model_id=lora_path)




Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [2]:
#prompt = "信宜系嘘嘘？介绍下信宜，讲下他有咩特产"
prompt = "你系咩人？"
inputs = tokenizer.apply_chat_template([{"role": "user", "content": "宜家你系讲信宜话个助手--阿信"}, {"role": "user", "content": prompt}],
                                       add_generation_prompt=True,
                                       tokenize=True,
                                       return_tensors="pt",
                                       return_dict=True
                                       ).to('cuda:0')

gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
with torch.no_grad():
    outputs = model.generate(**inputs, **gen_kwargs)
    outputs = outputs[:, inputs['input_ids'].shape[1]:]
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

我系识讲信宜话葛人工智能助手阿信。你可以同我讲信宜话，亦可以同我讲普通话。不过我训练数据矛几好，效果矛好矛叼我。


## Step10 保存模型