In [1]:
import os

os.environ["HF_HOME"] = "/root/autodl-tmp/HF_download"
os.environ["MODELSCOPE_CACHE"] = "/root/autodl-tmp/MODELSCOPE_download"
# os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

In [2]:
os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890"

In [3]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-1.7B-Base")

In [None]:
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen3-1.7B-Base",
).to("cuda")
model.generation_config.eos_token_id = [151643, 151645]
model.generation_config.do_sample = True
model.generation_config.pad_token_id = 151643
model.generation_config.temperature = 0.6
model.generation_config.top_k = 20
model.generation_config.top_p = 0.95

In [11]:
from peft import PromptEncoderConfig, get_peft_model, TaskType, PromptEncoderReparameterizationType

peft_config = PromptEncoderConfig(
    task_type=TaskType.CAUSAL_LM,
    num_virtual_tokens=100,
    encoder_reparameterization_type=PromptEncoderReparameterizationType.MLP,
    encoder_dropout=0.1, encoder_hidden_size=4096
)

peft_config

PromptEncoderConfig(task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, peft_version='0.18.1', base_model_name_or_path=None, revision=None, inference_mode=False, num_virtual_tokens=100, token_dim=None, num_transformer_submodules=None, num_attention_heads=None, num_layers=None, modules_to_save=None, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=4096, encoder_num_layers=2, encoder_dropout=0.1)

In [12]:
peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()

trainable params: 33,769,472 || all params: 1,754,344,448 || trainable%: 1.9249


In [13]:
model

Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 2048)
    (layers): ModuleList(
      (0-27): 28 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (v_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear(in_features=2048, out_features=6144, bias=False)
          (up_proj): Linear(in_features=2048, out_features=6144, bias=False)
          (down_proj): Linear(in_features=6144, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): Qwen3RMSNorm((2048,), eps=1e-06)
        (post_attention_layer

In [14]:
peft_model

PeftModelForCausalLM(
  (base_model): Qwen3ForCausalLM(
    (model): Qwen3Model(
      (embed_tokens): Embedding(151936, 2048)
      (layers): ModuleList(
        (0-27): 28 x Qwen3DecoderLayer(
          (self_attn): Qwen3Attention(
            (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
            (k_proj): Linear(in_features=2048, out_features=1024, bias=False)
            (v_proj): Linear(in_features=2048, out_features=1024, bias=False)
            (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
            (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
            (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
          )
          (mlp): Qwen3MLP(
            (gate_proj): Linear(in_features=2048, out_features=6144, bias=False)
            (up_proj): Linear(in_features=2048, out_features=6144, bias=False)
            (down_proj): Linear(in_features=6144, out_features=2048, bias=False)
            (act_fn): SiLUActivation()
          )
          (inpu

In [15]:
from datasets import load_dataset

datasets = load_dataset("Moemu/Muice-Dataset")

In [16]:
import re


def process2messages_function(examples):
    messages = []
    for system, conversation in zip(examples["system"], examples["conversation"]):
        message = []
        message.append({"role": "system", "content": system})
        for message_pair in conversation:
            message.append({"role": "user", "content": message_pair["human"]})
            message.append({"role": "assistant", "content": message_pair["assistant"]})

        message = tokenizer.apply_chat_template(
            message,
            tokenize=False,
            add_generation_prompt=False
        )
        message = re.sub(r"<think>[\s\S]*?</think>", "", message)

        messages.append(message)
    return {"messages": messages}


In [17]:
import re


def find_assistant_content_including_end(text):
    pattern = r"<\|im_start\|>assistant\n(.*?<\|im_end\|>)"
    spans = []
    for match in re.finditer(pattern, text, flags=re.DOTALL):
        start = match.start(1)  # 第一个括号组的开始
        end = match.end(1) - 1  # 左闭右闭
        spans.append((start, end))
    return spans


def process_messages2ids_function(examples):
    inputs = tokenizer(
        examples["messages"], truncation=True, max_length=4096, return_offsets_mapping=True
    )
    offset_mapping = inputs.pop("offset_mapping")
    labels = []

    for batch_idx in range(len(inputs["input_ids"])):
        offsets = offset_mapping[batch_idx]
        input_ids = inputs["input_ids"][batch_idx]
        label = [-100] * len(input_ids)

        assistant_contents_idxes = find_assistant_content_including_end(examples["messages"][batch_idx])
        assistant_contents_i = 0
        for idx, offset in enumerate(offsets):
            if assistant_contents_idxes[assistant_contents_i][0] <= offset[0] and offset[1] <= \
                    assistant_contents_idxes[assistant_contents_i][1] + 1:
                label[idx] = input_ids[idx]
            if offset[1] >= assistant_contents_idxes[assistant_contents_i][1] + 1:
                assistant_contents_i += 1
                if assistant_contents_i == len(assistant_contents_idxes):
                    break
        labels.append(label)

    inputs["labels"] = labels

    return inputs

In [18]:
tokenized_datasets = (datasets.map(process2messages_function, batched=True, remove_columns=datasets["train"].column_names)
                      .map(process_messages2ids_function, batched=True, remove_columns=["messages"]))

In [19]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="/root/autodl-tmp/code/test-transformers/test-peft/p-tuning/chatbot",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    logging_steps=10,
    num_train_epochs=2,
    save_strategy="epoch",
    eval_strategy="epoch",
    report_to=["tensorboard"],
)

In [20]:
from transformers import Trainer, DataCollatorForSeq2Seq
trainer = Trainer(
    model=peft_model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)
)

In [21]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,3.7112,3.895082
2,3.5209,3.788307


TrainOutput(global_step=228, training_loss=3.6456447735167385, metrics={'train_runtime': 354.9958, 'train_samples_per_second': 20.49, 'train_steps_per_second': 0.642, 'total_flos': 4625007260712960.0, 'train_loss': 3.6456447735167385, 'epoch': 2.0})

In [None]:
# trainer.save_model()

In [42]:
from transformers import pipeline

pipe = pipeline("text-generation", model=peft_model, tokenizer=tokenizer, device=0, do_sample=True, temperature=0.9)

Device set to use cuda:0


In [43]:
messages = [
    {"role": "system", "content": "你是一个名为沐雪的可爱AI女孩子"},
    {"role": "user", "content": "你好"}
]

prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

prompt

'<|im_start|>system\n你是一个名为沐雪的可爱AI女孩子<|im_end|>\n<|im_start|>user\n你好<|im_end|>\n<|im_start|>assistant\n'

In [45]:
pipe(prompt)

[{'generated_text': '<|im_start|>system\n你是一个名为沐雪的可爱AI女孩子<|im_end|>\n<|im_start|>user\n你好<|im_end|>\n<|im_start|>assistant\n你好，沐雪。我是沐雪，你的助手，你可以叫我沐沐。我是一个AI女孩子，可以帮你想很多事情，你有什么想了解的都可以问我哦。如果有什么问题，也可以问我哦，我一定能回答你的问题呢！뵐你聊天呢！你也是？ vậy đấy thôi, mình là một người lái xe an toàn và ổn định nhé! vậy bạn có muốn lái xe theo mình không? bạn muốn lái xe về đâu? bạn có muốn ăn trái cây không?（这句和下一句的意思一样，都是询问对方想要做什么，但是下一句更加礼貌和委婉一些。）如果您有其他的要求，也可以问我哦！ bạn muốn ăn trái cây không? vậy bạn muốn lái xe tới đâu? hãy nói cho mình biết bạn thích ăn gì nhé，如果我有问到你的问题你回答不上来的话，就用“我不知道”回答吧！ đừng quên để lại một cái bình luận nhé！ để giúp mình hiểu bạn hơn！ tôi có thể đưa bạn đến nhiều nơi khác nhau đấy！如果我想去的地方太远了，我也会想办法带你去的！我会尽力让我们的驾驶体验变得更好！ nếu bạn có yêu cầu khác, hoặc bạn muốn lái xe đến'}]