# 下载模型

In [13]:
from modelscope import snapshot_download
model_dir = snapshot_download('qwen/Qwen2.5-1.5B-Instruct', cache_dir='/root/llms', revision='master')

Downloading Model to directory: /root/llms/hub/qwen/Qwen2.5-1.5B-Instruct




Downloading [config.json]:   0%|          | 0.00/660 [00:00<?, ?B/s]

Downloading [configuration.json]:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading [generation_config.json]:   0%|          | 0.00/242 [00:00<?, ?B/s]

Downloading [LICENSE]:   0%|          | 0.00/11.1k [00:00<?, ?B/s]

Downloading [merges.txt]:   0%|          | 0.00/1.59M [00:00<?, ?B/s]

Downloading [model.safetensors]:   0%|          | 0.00/2.88G [00:00<?, ?B/s]

Downloading [README.md]:   0%|          | 0.00/4.79k [00:00<?, ?B/s]

Downloading [tokenizer.json]:   0%|          | 0.00/6.71M [00:00<?, ?B/s]

Downloading [tokenizer_config.json]:   0%|          | 0.00/7.13k [00:00<?, ?B/s]

Downloading [vocab.json]:   0%|          | 0.00/2.65M [00:00<?, ?B/s]

2024-12-01 23:51:29,269 - modelscope - INFO - Creating symbolic link /root/llms/hub/qwen/Qwen2___5-1___5B-Instruct -> /root/llms/hub/qwen/Qwen2.5-1.5B-Instruct.


# 导入模型

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "/root/llms/qwen/Qwen2___5-1___5B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

2024-12-01 23:52:23.110023: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# 导入并适配数据集

### 导入数据集

In [2]:
from datasets import load_dataset

data_id="dataset/huanhuan.json"
dataset = load_dataset("json", data_files=data_id)
print(dataset["train"])

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 1500
})


In [3]:
# tokenize_func 函数
def tokenize_func(example, tokenizer, ignore_label_id=-100):
    """
    对单个数据样本进行tokenize处理。

    参数:
    example (dict): 包含'content'和'summary'键的字典，代表训练数据的一个样本。
    tokenizer (transformers.PreTrainedTokenizer): 用于tokenize文本的tokenizer。
    ignore_label_id (int, optional): 在label中用于填充的忽略ID，默认为-100。

    返回:
    dict: 包含'tokenized_input_ids'和'labels'的字典，用于模型训练。
    """
    prompt_text = ''                          # 所有数据前的指令文本
    max_input_length = 512                    # 输入的最大长度
    max_output_length = 1536                  # 输出的最大长度

    # 构建问题文本
    question = prompt_text + example['instruction']
    if example.get('input', None) and example['input'].strip():
        question += f'\n{example["input"]}'

    # 构建答案文本
    answer = example['output']

    # 对问题和答案文本进行tokenize处理
    q_ids = tokenizer.encode(text=question, add_special_tokens=False)
    a_ids = tokenizer.encode(text=answer, add_special_tokens=False)

    # 如果tokenize后的长度超过最大长度限制，则进行截断
    if len(q_ids) > max_input_length - 2:  # 保留空间给gmask和bos标记
        q_ids = q_ids[:max_input_length - 2]
    if len(a_ids) > max_output_length - 1:  # 保留空间给eos标记
        a_ids = a_ids[:max_output_length - 1]

    # 构建模型的输入格式
    input_ids = tokenizer.build_inputs_with_special_tokens(q_ids, a_ids)
    question_length = len(q_ids) + 2  # 加上gmask和bos标记

    # 构建标签，对于问题部分的输入使用ignore_label_id进行填充
    labels = [ignore_label_id] * question_length + input_ids[question_length:]

    return {'input_ids': input_ids, 'labels': labels}

### 调整数据集

In [4]:
# 获取 'train' 部分的列名
column_names = dataset['train'].column_names  

# 使用lambda函数调用tokenize_func函数，并传入example和tokenizer作为参数
tokenized_dataset = dataset['train'].map(
    lambda example: tokenize_func(example, tokenizer),
    batched=False,  # 不按批次处理
    remove_columns=column_names  # 移除特定列（column_names中指定的列）
)

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

In [5]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    model=model,
    label_pad_token_id=-100,
    pad_to_multiple_of=None,
    padding=True
)

# 配置LoRA

In [6]:
from peft.utils import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING

target_modules = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING['qwen2']

In [7]:
# 从peft库导入LoraConfig和get_peft_model函数
from peft import LoraConfig, get_peft_model, TaskType

# 创建一个LoraConfig对象，用于设置LoRA（Low-Rank Adaptation）的配置参数
config = LoraConfig(
    r=8,  # LoRA的秩，影响LoRA矩阵的大小
    lora_alpha=32,  # LoRA适应的比例因子
    # 指定需要训练的模型层的名字，不同模型对应层的名字不同
    # target_modules=["query_key_value"],
    target_modules=target_modules,
    lora_dropout=0.05,  # 在LoRA模块中使用的dropout率
    bias="none",  # 设置bias的使用方式，这里没有使用bias
    # task_type="CAUSAL_LM"  # 任务类型，这里设置为因果(自回归）语言模型
    task_type=TaskType.CAUSAL_LM
)

# 使用get_peft_model函数和给定的配置来获取一个PEFT模型
model = get_peft_model(model, config)

# 打印出模型中可训练的参数
model.print_trainable_parameters()

trainable params: 1,089,536 || all params: 1,544,803,840 || trainable%: 0.0705


### 配置LoRA 超参数

In [8]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
        output_dir="./output/Qwen2.5_instruct_lora",  # 指定模型输出和保存的目录
        per_device_train_batch_size=4,  # 每个设备上的训练批量大小
        learning_rate=2e-4,  # 学习率
        fp16=True,  # 启用混合精度训练，可以提高训练速度，同时减少内存使用
        logging_steps=20,  # 指定日志记录的步长，用于跟踪训练进度
        save_strategy="steps",   # 模型保存策略
        save_steps=50,   # 模型保存步数
        # max_steps=50, # 最大训练步长
        num_train_epochs=1  # 训练的总轮数
    )

### 开始训练

In [9]:
trainer = Trainer(
    model=model,  # 指定训练时使用的模型
    train_dataset=tokenized_dataset,  # 指定训练数据集
    args=training_args,
    data_collator=data_collator,
)

model.use_cache = False
# trainer.train() 
trainer.train()


[2024-12-01 23:52:54,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)


df: /root/.triton/autotune: 没有那个文件或目录


Step,Training Loss
20,3.8604
40,3.6016
60,3.4049
80,3.5706
100,3.4026
120,3.4396
140,3.2994
160,3.6075
180,3.5264
200,3.4507


TrainOutput(global_step=375, training_loss=3.487805185953776, metrics={'train_runtime': 144.1631, 'train_samples_per_second': 10.405, 'train_steps_per_second': 2.601, 'total_flos': 823808942137344.0, 'train_loss': 3.487805185953776, 'epoch': 1.0})

### 保存LoRA结果

In [None]:
# lora_model_path = "lora/chatglm3-6b-int8"
# trainer.model.save_pretrained(lora_model_path )

# 测试LoRA结果

In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel

mode_path = '/root/llms/qwen/Qwen2___5-1___5B-Instruct/'
lora_path = 'output/Qwen2.5_instruct_lora/checkpoint-375' # 这里改称你的 lora 输出对应 checkpoint 地址

# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(mode_path, device_map="auto",torch_dtype=torch.bfloat16, trust_remote_code=True).eval()

# 加载lora权重
model = PeftModel.from_pretrained(model, model_id=lora_path)

prompt = "皇上是谁"
inputs = tokenizer.apply_chat_template([{"role": "user", "content": "现在你要扮演皇帝身边的女人--甄嬛"},{"role": "user", "content": prompt}],
                                       add_generation_prompt=True,
                                       tokenize=True,
                                       return_tensors="pt",
                                       return_dict=True
                                       ).to('cuda')


gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
with torch.no_grad():
    outputs = model.generate(**inputs, **gen_kwargs)
    outputs = outputs[:, inputs['input_ids'].shape[1]:]
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

我是Qwen，一个AI助手。如果您有任何问题或需要帮助，请随时告诉我。


# 合并LoRA结果

In [1]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch 

model_path="/root/work/chatglm3-6b"
peft_model_path="./lora/chatglm3-6b-int8"
save_path = "chatglm3-6b-lora"

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.float16, device_map="auto")
model = PeftModel.from_pretrained(model, peft_model_path)
model = model.merge_and_unload()

tokenizer.save_pretrained(save_path)
model.save_pretrained(save_path)

2024-12-01 23:59:36.495648: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


OSError: Incorrect path_or_model_id: '/root/work/chatglm3-6b'. Please provide either the path to a local folder or the repo_id of a model on the Hub.