In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
  !pip install unsloth
else:
  # Do this only in Colab notebooks! Otherwise use pip install unsloth
  !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
  !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
  !pip install --no-deps unsloth

In [None]:
# 加载unsloth
from unsloth import FastLanguageModel
import torch

# 设置模型参数
max_seq_length = 2048 # 选择序列最大长度，unsloth内部自动支持RoPE缩放
dtype = None # 自动检测数据类型。Tesla T4、V100上使用Float16，Ampere+架构上使用Bfloat16
load_in_4bit = True # 使用4位量化以减少内存使用，可以设为False关闭量化

# 从预训练模型加载FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
  model_name = "unsloth/Qwen2.5-7B",
  max_seq_length = max_seq_length,      # 设置之前定义的最大序列长度
  dtype = dtype,               # 设置之前定义的数据类型
  load_in_4bit = load_in_4bit,        # 设置是否使用4位量化
)

In [None]:
# 继续添加一个LoRA Adapters, 冻结参数只微调1%-10%的权重
model = FastLanguageModel.get_peft_model(
  model,                          # 传入之前加载的基础模型
  r = 16,                         # LoRA秩参数，值越大模型可塑性越高，建议值为8、16、32、64、128
                                # 该参数决定了LoRA适配器的复杂度
  target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
                                        # 指定需要应用LoRA的模块
                                        # 这里选择了注意力机制的投影层和MLP层作为微调目标
                                        # q_proj/k_proj/v_proj/o_proj: 注意力机制的查询/键/值/输出投影层
                                        # gate_proj/up_proj/down_proj: MLP网络中的门控/上投影/下投影层
  lora_alpha = 16,                # LoRA缩放参数，通常设置为与r相同或更大，控制LoRA更新对原始权重的影响程度
  lora_dropout = 0,               # LoRA的丢弃率，0表示不使用dropout，设为0可获得最佳性能优化
  bias = "none",                  # 是否训练偏置参数，"none"表示不训练偏置，设为"none"可获得最佳性能优化
  use_gradient_checkpointing = "unsloth",      # 梯度检查点设置，可选值为True或"unsloth"，"unsloth"是优化版本，比标准梯度检查点更高效
  random_state = 42,            # 随机数种子，确保实验可重复性
  use_rslora = False,             # 是否使用秩稳定化LoRA(Rank Stabilized LoRA)，默认不启用，启用后可提高模型训练稳定性
  loftq_config = None,            # LoftQ配置，用于低位精度量化训练，默认不启用
)

这里用的是dair-ai/emotion数据集，做一下数据清洗

huggingface:dair-ai/emotion

In [None]:
from datasets import load_dataset

train_dataset = load_dataset("dair-ai/emotion", split="train")
eval_dataset = load_dataset("dair-ai/emotion", split="validation")

emo_dict= {
  0:"sadness",
  1:"joy",
  2:"love",
  3:"anger",
  4:"fear",
  5:"surprise",
}

prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
There are five types: anger, fear, joy, love, sadness, and surprise. You need to determine which category the emotion of input belongs to and return.

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    inputs = examples["text"]
    outputs = examples["label"]
    texts = []
    for input, output in zip(inputs, outputs):
        emotion_word = emo_dict[output]
        text = prompt.format(input, emotion_word) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}


train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
eval_dataset = eval_dataset.map(formatting_prompts_func, batched=True)


# empathetic_prompt = """
# ### Instruction:
# Recognize the emotion from the input. Your should follow the Response format, including a word describing the user's emotion and a reply(No more than 30 words).
# your return must have the fomula like this: 【happy】I guess you have the nice weekend.
# ### Input:
# {}
# ### Response:
# 【{}】{}
# """
# def format_empathetic_dialogues(examples):
#   texts = []
#   for conversation, emotion in zip(examples["conversations"], examples["emotion"]):
#     if conversation[-1]["role"] == "user":
#       continue
#     for i in range(0, len(conversation) - 1, 2):
#       if i+1 < len(conversation) and conversation[i]["role"] == "user" and conversation[i+1]["role"] == "assistant":
#         user_message = conversation[i]["content"]
#         assistant_message = conversation[i+1]["content"]
#         formatted_text = empathetic_prompt.format(
#             user_message,
#             emotion,
#             assistant_message
#         ) + EOS_TOKEN
#         texts.append(formatted_text)

#   return {"text": texts}

# train_dataset = train_dataset.map(format_empathetic_dialogues, batched=True, remove_columns=train_dataset.column_names)
# eval_dataset = eval_dataset.map(format_empathetic_dialogues, batched=True, remove_columns=eval_dataset.column_names)


In [None]:
print(train_dataset[:1]["text"])
print(eval_dataset[:1]["text"])

In [None]:
from trl import SFTConfig, SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = True,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        # per_device_eval_batch_size = 8,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        num_train_epochs = 1,
        eval_strategy = "steps",
        eval_steps = 10,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "wandb",
    ),
    # args = SFTConfig(packing=True),
)

In [None]:

trainer_stats = trainer.train()

In [None]:
# state = trainer.evaluate()
# state

In [None]:
trainer_stats
import matplotlib.pyplot as plt

logs = trainer.state.log_history

# Extract loss values
training_loss = []
train_steps = []
val_loss = []
val_steps = []

for log in logs:
  if "loss" in log and "eval_loss" not in log:
    training_loss.append(log["loss"])
    train_steps.append(log["step"])
  elif "eval_loss" in log:
    val_loss.append(log["eval_loss"])
    val_steps.append(log["step"])

plt.figure(figsize=(10, 6))
plt.plot(train_steps, training_loss, label="Train Loss", marker="o")
plt.plot(val_steps, val_loss, label="Validation Loss", marker="x")
plt.title('Cross-Entropy Loss Over Time')
plt.xlabel('Steps')
plt.ylabel('Cross-Entropy Loss')
plt.grid(True)
plt.savefig('training_and_evaluate_loss.png')
plt.show()

In [None]:
# 保存LoRA适配器
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")

In [None]:
!zip -r lora_model.zip lora_model

In [None]:
if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "lora_model",
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit
    )
    FastLanguageModel.for_inference(model)

In [None]:
# 性格识别测试
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Then there are five types: anger, fear, joy, love, sadness, and surprise. You need to determine which category the emotion of input belongs to and return.

### Input:
{}

### Response:
{}"""

inputs = tokenizer(
[
    prompt.format(
        "He actually stole my business. It really pissed me off！！！！",
        "", # output
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

In [None]:
# 对话能力测试
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are a gentle and lovely voice assistant, you need to give responses based on input.
### Input:
{}

### Response:
{}"""

inputs = tokenizer(
[
    prompt.format(
        # "He actually stole my business. It really pissed me off！！！！",
        "so,what do you like to do in weekend?",
        "", # output
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

In [None]:
# 额外测试分块功能。
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are a master of imitation. You need to insert some of the following sound effects【uh,(long-break),um,(laugh),(cough),(lip-smacking),(sigh)】 into the context to make it sound more in line with the input emotion.
EXAMPLE:context:You are intresting! emotion:joy --->  (laugh)You are intresting!
### Input:
context:{} emotion:{}

### Response:
{}"""

inputs = tokenizer(
[
    prompt.format(
        "Today is so hard. I really don't want to go to work at all.",
        "sad",
        ""
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

In [None]:
# 测试集测试
from datasets import load_dataset

eval_dataset = load_dataset("dair-ai/emotion", split="test")

def text_dunc(input,emotion):
  prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

  ### Instruction:
  Then there are five types: anger, fear, joy, love, sadness, and surprise. You need to determine which category the emotion of input belongs to and return.

  ### Input:
  {}

  ### Response:
  {}"""
  inputs = tokenizer(
  [
      prompt.format(
          input,
          "", # output
      )
  ], return_tensors = "pt").to("cuda")
  outputs = model.generate(**inputs, max_new_tokens=128)
  input_length = inputs["input_ids"].shape[1]
  generated_tokens = outputs[0][input_length:]
  generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
  # print(generated_text)
  if generated_text.strip() == emotion:
    return True
  else:
    return False

emo_dict= {
  0:"sadness",
  1:"joy",
  2:"love",
  3:"anger",
  4:"fear",
  5:"surprise",
}
all_example = 0
error = 0
correct = 0
for data in eval_dataset:
  all_example += 1
  if text_dunc(data["text"],emo_dict[data["label"]]):
    correct += 1
  else:
    error += 1

print(f"Accuracy:{correct/all_example:.2f}")

In [None]:
# 8bit Q8_0
if False: model.save_pretrained_gguf("model_qwen2.5_7b_Q8_0", tokenizer,)
if False: model.push_to_hub_gguf("hf/model", tokenizer, token = "")

# 16bit GGUF
if False: model.save_pretrained_gguf("model_qwen2.5_7b_16bit", tokenizer, quantization_method = "f16")
if False: model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")

# q4_k_m GGUF
if True: model.save_pretrained_gguf("model_qwen2.5_7b_q4_k_m", tokenizer, quantization_method = "q4_k_m")
if False: model.push_to_hub_gguf("", tokenizer, quantization_method = "q4_k_m", token = "")

# multiple GGUF options
if False:
    model.push_to_hub_gguf(
        "hf/model",
        tokenizer,
        quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
        token = "",
    )