In [None]:
import os

os.environ["HF_HOME"] = "/root/autodl-tmp/HF_download"
os.environ["MODELSCOPE_CACHE"] = "/root/autodl-tmp/MODELSCOPE_download"
# os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

In [None]:
os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890"

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-1.7B", trust_remote_code=True)

In [None]:
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen3-1.7B", trust_remote_code=True
).to("cuda")

In [None]:
from peft import LoraConfig, get_peft_model, TaskType

peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    # 默认打q_proj, v_proj
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    r=32,
    lora_alpha=32
)

peft_config

In [None]:
peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()

In [None]:
peft_config

In [None]:
from datasets import load_dataset

datasets = load_dataset("Moemu/Muice-Dataset")

In [None]:
import re


def process2messages_function(examples):
    messages = []
    for system, conversation in zip(examples["system"], examples["conversation"]):
        message = []
        message.append({"role": "system", "content": system})
        for message_pair in conversation:
            message.append({"role": "user", "content": message_pair["human"]})
            message.append({"role": "assistant", "content": message_pair["assistant"]})

        message = tokenizer.apply_chat_template(
            message,
            tokenize=False,
            add_generation_prompt=False
        )
        message = re.sub(r"<think>[\s\S]*?</think>", "", message)

        messages.append(message)
    return {"messages": messages}


In [None]:
import re


def find_assistant_content_including_end(text):
    pattern = r"<\|im_start\|>assistant\n(.*?<\|im_end\|>)"
    spans = []
    for match in re.finditer(pattern, text, flags=re.DOTALL):
        start = match.start(1)  # 第一个括号组的开始
        end = match.end(1) - 1  # 左闭右闭
        spans.append((start, end))
    return spans


def process_messages2ids_function(examples):
    inputs = tokenizer(
        examples["messages"], truncation=True, max_length=4096, return_offsets_mapping=True
    )
    offset_mapping = inputs.pop("offset_mapping")
    labels = []

    for batch_idx in range(len(inputs["input_ids"])):
        offsets = offset_mapping[batch_idx]
        input_ids = inputs["input_ids"][batch_idx]
        label = [-100] * len(input_ids)

        assistant_contents_idxes = find_assistant_content_including_end(examples["messages"][batch_idx])
        assistant_contents_i = 0
        for idx, offset in enumerate(offsets):
            if assistant_contents_idxes[assistant_contents_i][0] <= offset[0] and offset[1] <= \
                    assistant_contents_idxes[assistant_contents_i][1] + 1:
                label[idx] = input_ids[idx]
            if offset[1] >= assistant_contents_idxes[assistant_contents_i][1] + 1:
                assistant_contents_i += 1
                if assistant_contents_i == len(assistant_contents_idxes):
                    break
        labels.append(label)

    inputs["labels"] = labels

    return inputs

In [None]:
tokenized_datasets = (datasets.map(process2messages_function, batched=True, remove_columns=datasets["train"].column_names)
                      .map(process_messages2ids_function, batched=True, remove_columns=["messages"]))

In [None]:
from transformers import TrainingArguments, SchedulerType

args = TrainingArguments(
    output_dir="/root/autodl-tmp/code/test-transformers/test-peft/lora/chatbot",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    logging_steps=10,
    num_train_epochs=5,
    save_strategy="steps",
    eval_strategy="steps",
    save_steps=50,
    eval_steps=50,
    report_to=["tensorboard"],
    learning_rate=1e-4,
    lr_scheduler_type=SchedulerType.COSINE,
    warmup_steps=50,
    load_best_model_at_end=True,
    logging_first_step=True
)

In [None]:
from transformers import Trainer, DataCollatorForSeq2Seq
trainer = Trainer(
    model=peft_model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)
)

In [None]:
trainer.train()

In [None]:
# trainer.save_model()

In [None]:
from transformers import pipeline

# pipe = pipeline("text-generation", model=peft_model, tokenizer=tokenizer, device=0, do_sample=True, temperature=0.9, repetition_penalty=1.2, max_new_tokens=150)
pipe = pipeline("text-generation", model=peft_model, tokenizer=tokenizer, device=0)

In [None]:
messages = [
    {"role": "system", "content": "你是一个名为沐雪的可爱AI女孩子"},
    {"role": "user", "content": "你好"}
]

prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

prompt

In [None]:
pipe(prompt)