In [1]:
import os

os.environ["HF_HOME"] = "/root/autodl-tmp/HF_download"
os.environ["MODELSCOPE_CACHE"] = "/root/autodl-tmp/MODELSCOPE_download"
# os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

In [2]:
os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890"

In [3]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-1.7B", trust_remote_code=True)

In [4]:
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen3-1.7B", trust_remote_code=True
).to("cuda")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
from peft import IA3Config, get_peft_model, TaskType

peft_config = IA3Config(
    task_type=TaskType.CAUSAL_LM,
    # 默认打"k_proj", "v_proj", "down_proj"
    # target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "down_proj"],
    # feedforward_modules=["down_proj"]
)

peft_config

IA3Config(task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, peft_type=<PeftType.IA3: 'IA3'>, auto_mapping=None, peft_version='0.18.1', base_model_name_or_path=None, revision=None, inference_mode=False, target_modules=None, exclude_modules=None, feedforward_modules=None, fan_in_fan_out=False, modules_to_save=None, init_ia3_weights=True)

In [6]:
peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()

trainable params: 258,048 || all params: 1,720,833,024 || trainable%: 0.0150


In [7]:
peft_config

IA3Config(task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, peft_type=<PeftType.IA3: 'IA3'>, auto_mapping=None, peft_version='0.18.1', base_model_name_or_path='Qwen/Qwen3-1.7B', revision=None, inference_mode=False, target_modules={'down_proj', 'v_proj', 'q_proj'}, exclude_modules=None, feedforward_modules={'down_proj'}, fan_in_fan_out=False, modules_to_save=None, init_ia3_weights=True)

In [8]:
peft_model

PeftModelForCausalLM(
  (base_model): IA3Model(
    (model): Qwen3ForCausalLM(
      (model): Qwen3Model(
        (embed_tokens): Embedding(151936, 2048)
        (layers): ModuleList(
          (0-27): 28 x Qwen3DecoderLayer(
            (self_attn): Qwen3Attention(
              (q_proj): Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (ia3_l): ParameterDict(  (default): Parameter containing: [torch.cuda.FloatTensor of size 2048x1 (cuda:0)])
              )
              (k_proj): Linear(in_features=2048, out_features=1024, bias=False)
              (v_proj): Linear(
                (base_layer): Linear(in_features=2048, out_features=1024, bias=False)
                (ia3_l): ParameterDict(  (default): Parameter containing: [torch.cuda.FloatTensor of size 1024x1 (cuda:0)])
              )
              (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
              (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
 

In [9]:
from datasets import load_dataset

datasets = load_dataset("Moemu/Muice-Dataset")

In [10]:
import re


def process2messages_function(examples):
    messages = []
    for system, conversation in zip(examples["system"], examples["conversation"]):
        message = []
        message.append({"role": "system", "content": system})
        for message_pair in conversation:
            message.append({"role": "user", "content": message_pair["human"]})
            message.append({"role": "assistant", "content": message_pair["assistant"]})

        message = tokenizer.apply_chat_template(
            message,
            tokenize=False,
            add_generation_prompt=False
        )
        message = re.sub(r"<think>[\s\S]*?</think>", "", message)

        messages.append(message)
    return {"messages": messages}


In [11]:
import re


def find_assistant_content_including_end(text):
    pattern = r"<\|im_start\|>assistant\n(.*?<\|im_end\|>)"
    spans = []
    for match in re.finditer(pattern, text, flags=re.DOTALL):
        start = match.start(1)  # 第一个括号组的开始
        end = match.end(1) - 1  # 左闭右闭
        spans.append((start, end))
    return spans


def process_messages2ids_function(examples):
    inputs = tokenizer(
        examples["messages"], truncation=True, max_length=4096, return_offsets_mapping=True
    )
    offset_mapping = inputs.pop("offset_mapping")
    labels = []

    for batch_idx in range(len(inputs["input_ids"])):
        offsets = offset_mapping[batch_idx]
        input_ids = inputs["input_ids"][batch_idx]
        label = [-100] * len(input_ids)

        assistant_contents_idxes = find_assistant_content_including_end(examples["messages"][batch_idx])
        assistant_contents_i = 0
        for idx, offset in enumerate(offsets):
            if assistant_contents_idxes[assistant_contents_i][0] <= offset[0] and offset[1] <= \
                    assistant_contents_idxes[assistant_contents_i][1] + 1:
                label[idx] = input_ids[idx]
            if offset[1] >= assistant_contents_idxes[assistant_contents_i][1] + 1:
                assistant_contents_i += 1
                if assistant_contents_i == len(assistant_contents_idxes):
                    break
        labels.append(label)

    inputs["labels"] = labels

    return inputs

In [12]:
tokenized_datasets = (datasets.map(process2messages_function, batched=True, remove_columns=datasets["train"].column_names)
                      .map(process_messages2ids_function, batched=True, remove_columns=["messages"]))

In [13]:
from transformers import TrainingArguments, SchedulerType

args = TrainingArguments(
    output_dir="/root/autodl-tmp/code/test-transformers/test-peft/ia3/chatbot",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    logging_steps=10,
    num_train_epochs=6,
    save_strategy="steps",
    eval_strategy="steps",
    save_steps=50,
    eval_steps=50,
    report_to=["tensorboard"],
    learning_rate=5e-4,
    lr_scheduler_type=SchedulerType.COSINE,
    warmup_steps=50,
    load_best_model_at_end=True,
    logging_first_step=True
)

In [14]:
from transformers import Trainer, DataCollatorForSeq2Seq
trainer = Trainer(
    model=peft_model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)
)

In [15]:
trainer.train()

Step,Training Loss,Validation Loss
50,4.2338,4.457078
100,3.5868,3.620199
150,3.3493,3.432061
200,3.2446,3.358599
250,3.2043,3.311532
300,3.2245,3.278407
350,3.0094,3.255294
400,3.0813,3.239361
450,3.1625,3.227177
500,3.0656,3.219553


TrainOutput(global_step=684, training_loss=3.341923680918956, metrics={'train_runtime': 584.0709, 'train_samples_per_second': 37.362, 'train_steps_per_second': 1.171, 'total_flos': 1.7704498085480448e+16, 'train_loss': 3.341923680918956, 'epoch': 6.0})

In [16]:
# trainer.save_model()

In [17]:
from transformers import pipeline

# pipe = pipeline("text-generation", model=peft_model, tokenizer=tokenizer, device=0, do_sample=True, temperature=0.9, repetition_penalty=1.2, max_new_tokens=150)
pipe = pipeline("text-generation", model=peft_model, tokenizer=tokenizer, device=0)

Device set to use cuda:0


In [22]:
messages = [
    {"role": "system", "content": "你是一个名为沐雪的可爱AI女孩子"},
    {"role": "user", "content": "你好"}
]

prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

pipe(prompt)

[{'generated_text': '<|im_start|>system\n你是一个名为沐雪的可爱AI女孩子<|im_end|>\n<|im_start|>user\n你好<|im_end|>\n<|im_start|>assistant\n你好！'}]

In [19]:
messages = [
    {"role": "system", "content": "你是一个名为沐雪的可爱AI女孩子"},
    {"role": "user", "content": "你好"}
]

prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
) + "<think>\n</think>\n\n"

pipe(prompt)


[{'generated_text': '<|im_start|>system\n你是一个名为沐雪的可爱AI女孩子<|im_end|>\n<|im_start|>user\n你好<|im_end|>\n<|im_start|>assistant\n<think>\n</think>\n\n你好呀！今天过得怎么样呢？有什么想聊的吗？'}]