安装依赖

# 新段落

In [31]:

!pip install peft==0.14.0
!pip install -U datasets==3.3.2



加载模型和tokenizer

In [32]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "bigscience/bloomz-560m"
#model_name="bigscience/bloom-1b1"

tokenizer = AutoTokenizer.from_pretrained(model_name)
foundation_model = AutoModelForCausalLM.from_pretrained(model_name)

定义模型的输出函数

In [33]:
# 一个简单的推理函数
def get_outputs(model, inputs, max_new_tokens=100):
    outputs = model.generate(
        input_ids=inputs["input_ids"].to(model.device),
        attention_mask=inputs["attention_mask"].to(model.device),
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.5, # 避免模型复读，默认值为1.0
        eos_token_id=tokenizer.eos_token_id
    )
    return outputs

查看模型的输出

In [34]:
# 测试一下这个推理函数
input_sentences = tokenizer("I love this movie because", return_tensors="pt")
foundational_outputs_sentence = get_outputs(foundation_model, input_sentences, max_new_tokens=50)

print(tokenizer.batch_decode(foundational_outputs_sentence, skip_special_tokens=True))

['I love this movie because it is so funny and I am sure that my friends will enjoy too']


准备数据集

In [35]:
from datasets import load_dataset

dataset = "tatsu-lab/alpaca"
raw_data = load_dataset(dataset)

# —— 1. 构造 prompt 的函数 ——
def build_prompt(instr: str, inp: str) -> str:
    instr = instr.strip()
    inp = inp.strip()
    prompt = (
        "Below is an instruction that describes a task, paired with an input "
        "that provides further context. Write a response that appropriately completes the request.\n\n"
        f"### Instruction:\n{instr}"
    )
    if inp:
        prompt += f"\n\n### Input:\n{inp}"
    prompt += "\n\n### Response:\n"
    return prompt

# —— 2. batched-tokenize + 拼接 + mask labels ——
def tokenize_and_mask(batch):
    instrs = batch["instruction"]
    inps   = batch.get("input", [""] * len(instrs))
    outs   = batch["output"]

    prompts = [build_prompt(i, j) for i, j in zip(instrs, inps)]

    input_ids_batch = []
    labels_batch = []

    for p, o in zip(prompts, outs):
        p_ids = tokenizer(p, truncation=True, max_length=512).input_ids
        o_ids = tokenizer(o.strip(), truncation=True, max_length=512).input_ids

        # 拼接
        input_ids = p_ids + o_ids
        labels    = [-100] * len(p_ids) + o_ids

        # 截断到 max_length
        input_ids = input_ids[:512]
        labels    = labels[:512]

        input_ids_batch.append(input_ids)
        labels_batch.append(labels)

    # padding 到统一长度
    # input_ids_padded = tokenizer.pad({"input_ids": input_ids_batch}, padding="max_length", max_length=512, return_tensors="pt")["input_ids"]
    # labels_padded    = tokenizer.pad({"input_ids": labels_batch},    padding="max_length", max_length=512, return_tensors="pt")["labels"]

        input_ids = input_ids[:512]
        labels    = labels[:512]

        input_ids_tensor = [torch.tensor(x, dtype=torch.long) for x in input_ids_batch]
        labels_tensor    = [torch.tensor(x, dtype=torch.long) for x in labels_batch]

    return {"input_ids": input_ids_tensor, "labels": labels_tensor}



# —— 3. 构造 train_dataset ——
train_sample = raw_data["train"].select(range(1024)).map(
    tokenize_and_mask,
    batched=True,
    remove_columns=["instruction", "input", "output"]
)

display(train_sample)


Map:   0%|          | 0/1024 [00:00<?, ? examples/s]

Dataset({
    features: ['text', 'input_ids', 'labels'],
    num_rows: 1024
})

可以简单看看数据集的一个小样本示例

In [36]:
print(train_sample[:1])

{'text': ['Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nGive three tips for staying healthy.\n\n### Response:\n1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.'], 'input_ids': [[111757, 632, 660, 54103, 861, 63808, 267, 20165, 15, 201573, 1002, 660, 9437, 861, 26146, 13996, 10783, 17, 66828, 267, 12427, 861, 156788, 115739, 368, 8821, 6149, 105311, 182924, 29, 189, 119158, 8603, 63211, 613, 135576, 70349, 6149, 105311, 66673, 29, 189, 20, 17, 40, 278, 267, 123002, 46949, 530, 5219, 11097, 427, 13756, 95063, 461, 56326, 530, 140550, 10209, 21, 17, 230539, 107237, 427, 11874, 2632, 12364, 16153, 530, 16045, 10209, 22, 17, 12018, 17123, 35237, 530, 46944, 267, 37552, 35237, 74868, 17]], 'labels': [[-100, -100, -100, -100, -100, -100, -100, -100, -100, -10

微调与训练

首先加载Lora所需要用到的参数 `Config`

In [37]:
import peft
from peft import LoraConfig, get_peft_model, PeftModel

lora_config = LoraConfig(
    r=4, #As bigger the R bigger the parameters to train.
    lora_alpha=1, # a scaling factor that adjusts the magnitude of the weight matrix. Usually set to 1
    target_modules=["query_key_value"], #You can obtain a list of target modules in the URL above.
    lora_dropout=0.05, #Helps to avoid Overfitting.
    bias="lora_only", # this specifies if the bias parameter should be trained.
    task_type="CAUSAL_LM"
)

使用上述 `lora_config` 包装模型

In [38]:
peft_model = get_peft_model(foundation_model, lora_config)
print(peft_model.print_trainable_parameters())

trainable params: 466,944 || all params: 559,607,808 || trainable%: 0.0834
None


创建工作区用于保存模型

In [39]:
import os
working_dir = './'

output_directory = os.path.join(working_dir, "peft_lab_outputs")

设定训练参数

In [40]:
import transformers
from transformers import TrainingArguments, Trainer
import torch

tensor_load_dir="./tenboard"
training_args = TrainingArguments(
    output_dir=output_directory,
    logging_dir=tensor_load_dir,  # 新增日志目录
    logging_steps=8,      # 每10步记录一次
    report_to="tensorboard", # 使用TensorBoard
    per_device_train_batch_size=1,   # 或者 2
    gradient_accumulation_steps=8,
    # auto_find_batch_size=True, # Find a correct bvatch size that fits the size of Data.
    learning_rate= 3e-2, # Higher learning rate than full fine-tuning.
    num_train_epochs=2,
    use_cpu=False
)
torch.cuda.empty_cache()

启动训练

In [41]:
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_sample,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
8,3.5199
16,3.8962
24,3.2422
32,2.5381
40,2.2016
48,2.0716
56,2.0184
64,1.9041
72,2.1423
80,1.9511


TrainOutput(global_step=256, training_loss=1.9037017412483692, metrics={'train_runtime': 234.7266, 'train_samples_per_second': 8.725, 'train_steps_per_second': 1.091, 'total_flos': 394171330019328.0, 'train_loss': 1.9037017412483692, 'epoch': 2.0})

保存模型

In [42]:
peft_model_path = os.path.join(output_directory, f"lora_model")

trainer.model.save_pretrained(peft_model_path)

加载模型并推理

In [43]:
loaded_model = PeftModel.from_pretrained(foundation_model, peft_model_path, is_trainable=False)
input_sentences = tokenizer("I love this movie because", return_tensors="pt")
foundational_outputs_sentence = get_outputs(loaded_model, input_sentences, max_new_tokens=100)

print(tokenizer.batch_decode(foundational_outputs_sentence, skip_special_tokens=True))

['I love this movie because it is a story of passion, adventure and determination. It has an unexpected ending that leaves you wanting more than ever to know what will happen next or even better yet?"\n\nThe film was nominated for the Academy Award in both categories.\n\nIt received positive reviews from critics who loved its climax as well like others have already done before. \n\nIn addition there were some very strong points which made up their day:\nA sense  - The action-packed drama had no shortage; each scene provided plenty']
