In [None]:
# pre deal dataset
from datasets import load_dataset
import json

# 1. 加载 BANKING77 数据集（训练集和测试集）
dataset = load_dataset("banking77")
train_ds = dataset["train"]
test_ds = dataset["test"]

# 2. 定义映射表：原始意图名称 -> Agent 类型
intent_to_agent = {
    "activate_my_card": "common",       "age_limit": "common",
    "apple_pay_or_google_pay": "option", "atm_support": "common",
    "automatic_top_up": "common",
    "balance_not_updated_after_bank_transfer": "common",
    "balance_not_updated_after_cheque_or_cash_deposit": "cash",
    "beneficiary_not_allowed": "common", "cancel_transfer": "common",
    "card_about_to_expire": "common",   "card_acceptance": "common",
    "card_arrival": "common",          "card_delivery_estimate": "common",
    "card_linking": "common",          "card_not_working": "common",
    "card_payment_fee_charged": "common",
    "card_payment_not_recognised": "common",
    "card_payment_wrong_exchange_rate": "cash",
    "card_swallowed": "common",        "cash_withdrawal_charge": "cash",
    "cash_withdrawal_not_recognised": "cash",
    "change_pin": "common",            "compromised_card": "common",
    "contactless_not_working": "option","country_support": "common",
    "declined_card_payment": "common","declined_cash_withdrawal": "cash",
    "declined_transfer": "common",     "direct_debit_payment_not_recognised": "common",
    "disposable_card_limits": "common","edit_personal_details": "common",
    "exchange_charge": "cash",         "exchange_rate": "equity",
    "exchange_via_app": "em",          "extra_charge_on_statement": "common",
    "failed_transfer": "common",       "fiat_currency_support": "cash",
    "get_disposable_virtual_card": "em","get_physical_card": "common",
    "getting_spare_card": "common",    "getting_virtual_card": "em",
    "lost_or_stolen_card": "common",   "lost_or_stolen_phone": "common",
    "order_physical_card": "common",   "passcode_forgotten": "common",
    "pending_card_payment": "common",  "pending_cash_withdrawal": "cash",
    "pending_top_up": "common",        "pending_transfer": "common",
    "pin_blocked": "common",           "receiving_money": "common",
    "Refund_not_showing_up": "common", "request_refund": "common",
    "reverted_card_payment": "common", "supported_cards_and_currencies": "equity","reverted_card_payment?": "common",
    "terminate_account": "common",     "top_up_by_bank_transfer_charge": "common",
    "top_up_by_card_charge": "common", "top_up_by_cash_or_cheque": "cash",
    "top_up_failed": "common",         "top_up_limits": "common",
    "top_up_reverted": "common",       "topping_up_by_card": "common",
    "transaction_charged_twice": "common",
    "transfer_fee_charged": "common",  "transfer_into_account": "common",
    "transfer_not_received_by_recipient": "common", "transfer_timing": "common",
    "unable_to_verify_identity": "common", "verify_my_identity": "common",
    "verify_source_of_funds": "common","verify_top_up": "common",
    "virtual_card_not_working": "em",   "visa_or_mastercard": "option",
    "why_verify_identity": "common",   "wrong_amount_of_cash_received": "cash",
    "wrong_exchange_rate_for_cash_withdrawal": "cash"
}

# 3. 将数据集中每条样本映射为对话格式：用户提问 + 提示 + 机器人类型
def convert_to_agent_example(ex):
    intent_name = train_ds.features["label"].int2str(ex["label"])  # 将数字标签转为名称
    agent = intent_to_agent[intent_name]  # 查表得到 Agent 类型
    user_text = ex["text"] + "\nPlease answer using exactly one of the following types: equity, option, cash, em, common."
    # 构建符合 Llama 对话微调格式的 JSON 结构
    return {"conversations": [
        {"role": "user",    "content": user_text},
        {"role": "assistant","content": agent}
    ]}

# 对训练集和测试集分别处理
train_conv = train_ds.map(lambda ex: convert_to_agent_example(ex))
test_conv  = test_ds.map(lambda ex: convert_to_agent_example(ex))


with open("banking77_agent_train.jsonl", "w", encoding="utf-8") as ftr:
    for ex in train_conv:
        ftr.write(json.dumps({"conversations": ex["conversations"]}, ensure_ascii=False) + "\n")

# 保存测试集
with open("banking77_agent_test.jsonl", "w", encoding="utf-8") as fte:
    for ex in test_conv:
        fte.write(json.dumps({"conversations": ex["conversations"]}, ensure_ascii=False) + "\n")

In [3]:
import torch

print("PyTorch CUDA 版本:", torch.version.cuda)  # 应显示 12.4
print("CUDA 是否可用:", torch.cuda.is_available())

PyTorch CUDA 版本: 12.4
CUDA 是否可用: True


In [4]:
import os

os.environ['HF_DATASETS_CACHE'] = '/home/roy/models/datasets_cache'
os.environ['HF_HOME'] = '/home/roy/models'

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForSeq2Seq, TrainingArguments
from datasets import load_dataset
from unsloth.chat_templates import standardize_sharegpt, train_on_responses_only

train_data = load_dataset("json", data_files={"train": "banking77_agent_train.jsonl"})["train"]

tokenizer = AutoTokenizer.from_pretrained("/home/roy/models/models--unsloth--Llama-3.2-3B-Instruct")
# 3. 加载处理好的对话训练数据
train_data = load_dataset("json", data_files={"train": "banking77_agent_train.jsonl"})["train"]
# 使用 unsloth 的 sharegpt 标准化（生成 'conversations' 字段为统一格式）
train_data = standardize_sharegpt(train_data)

# 4. 格式化输入文本：将对话转换为纯文本序列
def format_for_training(batch):
    texts = []
    for convo in batch["conversations"]:
        # unsloth 的 apply_chat_template 将对话转换为带有分隔标记的输入文本
        text = tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
        texts.append(text)
    return {"text": texts}

train_data = train_data.map(format_for_training, batched=True)


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth.chat_templates import standardize_sharegpt, train_on_responses_only


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [7]:
train_data[0]

{'conversations': [{'content': 'I am still waiting on my card?\nPlease answer using exactly one of the following types: equity, option, cash, em, common.',
   'role': 'user'},
  {'content': 'common', 'role': 'assistant'}],
 'text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 12 Jul 2025\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI am still waiting on my card?\nPlease answer using exactly one of the following types: equity, option, cash, em, common.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\ncommon<|eot_id|>'}

In [8]:
train_data[10]

{'conversations': [{'content': 'Why has my new card still not come?\nPlease answer using exactly one of the following types: equity, option, cash, em, common.',
   'role': 'user'},
  {'content': 'common', 'role': 'assistant'}],
 'text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 12 Jul 2025\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhy has my new card still not come?\nPlease answer using exactly one of the following types: equity, option, cash, em, common.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\ncommon<|eot_id|>'}

In [10]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForSeq2Seq, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
from unsloth.chat_templates import standardize_sharegpt, train_on_responses_only
import os

# 1. 加载预训练的 Llama-3.2-3B-Instruct（8-bit 量化以节省显存）
model = AutoModelForCausalLM.from_pretrained(
    "/home/roy/models/models--unsloth--Llama-3.2-3B-Instruct",
    load_in_4bit=True,
    device_map="auto"
)

# 2. 配置 LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
)
model = get_peft_model(model, lora_config)  # 将 LoRA 层注入模型


# 5. 配置 SFTTrainer（监督微调）
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_data,
    dataset_text_field="text",
    max_seq_length=512,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        fp16=False,
        bf16=True,
        optim="adamw_8bit",
        max_steps=100,           # 训练步数根据数据量酌情设置
        output_dir="llama3_finetune"
    )
)

# 6. 仅在回答部分进行训练
trainer = train_on_responses_only(
    trainer,
    instruction_part="<|start_header_id|>user<|end_header_id|>\n\n",
    response_part="<|start_header_id|>assistant<|end_header_id|>\n\n",
)
trainer.train()


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.08s/it]


Unsloth: We found double BOS tokens - we shall remove one automatically.


Unsloth: Tokenizing ["text"] (num_proc=28): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10003/10003 [00:03<00:00, 3075.96 examples/s]
Map (num_proc=28): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10003/10003 [00:00<00:00, 42398.26 examples/s]


Step,Training Loss


TrainOutput(global_step=100, training_loss=0.3226005935668945, metrics={'train_runtime': 61.1685, 'train_samples_per_second': 13.079, 'train_steps_per_second': 1.635, 'total_flos': 1008007295041536.0, 'train_loss': 0.3226005935668945})

In [11]:
# 保存整个 PEFT 模型（LoRA 层）
trainer.model.save_pretrained("./llama3-lora")

# 同时保存 tokenizer（必要）
tokenizer.save_pretrained("./llama3-lora")

('./llama3-lora/tokenizer_config.json',
 './llama3-lora/special_tokens_map.json',
 './llama3-lora/tokenizer.json')

In [14]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

base_model = AutoModelForCausalLM.from_pretrained(
    "/home/roy/models/models--unsloth--Llama-3.2-3B-Instruct",
    load_in_4bit=True,
    device_map="auto"
)

traind_tokenizer = AutoTokenizer.from_pretrained("./llama3-lora")

# Step 3: 加载 LoRA adapter 权重
traind_model = PeftModel.from_pretrained(base_model, "./llama3-lora")
# 将模型切换到 评估模式（evaluation mode）
traind_model.eval()

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.00it/s]


PeftModel(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 3072, padding_idx=128004)
        (layers): ModuleList(
          (0-27): 28 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_p

In [19]:
test_ds = load_dataset("json", data_files={"train": "banking77_agent_test.jsonl"})['train']

In [22]:
test_ds[0]

{'conversations': [{'content': 'How do I locate my card?\nPlease answer using exactly one of the following types: equity, option, cash, em, common.',
   'role': 'user'},
  {'content': 'common', 'role': 'assistant'}]}

In [38]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset

def predict_agent_type(user_input):
    # 构造消息
    messages = [{"role": "user", "content": user_input}]
    
    # 使用 tokenizer 处理输入，返回 PyTorch 张量
    encodings = traind_tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    )
    
    # 直接使用 encodings 作为 input_ids，并移动到模型设备
    input_ids = encodings.to(traind_model.device)
    
    # 生成输出
    with torch.no_grad():
        outputs = traind_model.generate(
            input_ids=input_ids,
            max_new_tokens=10,
            do_sample=False,
            temperature=1.0
        )
    
    # 解码输出
    reply = traind_tokenizer.decode(outputs[0], skip_special_tokens=True)

    return reply.strip().split('assistant')[1].strip()

# 假设 traind_model 和 traind_tokenizer 已正确加载
# 示例加载方式（需根据实际情况调整）：
# traind_tokenizer = AutoTokenizer.from_pretrained("path/to/model")
# traind_model = AutoModelForCausalLM.from_pretrained("path/to/model")
# traind_model.eval()

# 遍历测试集并评估准确率
correct = 0
total = 0
wrong_samples = []

for ex in test_ds:
    user_input = ex["conversations"][0]["content"]  # 用户输入
    true_label = ex["conversations"][1]["content"]  # 真实标签

    pred_label = predict_agent_type(user_input)
    total += 1
    if pred_label.lower() == true_label.lower():
        correct += 1
    else:
        wrong_samples.append((user_input, true_label, pred_label))

# 打印准确率与错误样例
accuracy = correct / total
print(f"\n✅ Accuracy: {accuracy:.2%}")
print("🔍 前5个预测错误样例：")
for text, true, pred in wrong_samples[:5]:
    print(f"\n📝 输入: {text}\n✅ 真实: {true}\n❌ 预测: {pred}")


✅ Accuracy: 86.95%
🔍 前5个预测错误样例：

📝 输入: How often do your exchange rates change
Please answer using exactly one of the following types: equity, option, cash, em, common.
✅ 真实: equity
❌ 预测: common

📝 输入: what are exchange rates
Please answer using exactly one of the following types: equity, option, cash, em, common.
✅ 真实: equity
❌ 预测: cash

📝 输入: What are the most current exchange rates?
Please answer using exactly one of the following types: equity, option, cash, em, common.
✅ 真实: equity
❌ 预测: common

📝 输入: Can you explain your exchange rate policy to me?
Please answer using exactly one of the following types: equity, option, cash, em, common.
✅ 真实: equity
❌ 预测: common

📝 输入: Is it a good time to exchange?
Please answer using exactly one of the following types: equity, option, cash, em, common.
✅ 真实: equity
❌ 预测: common


'system'

In [37]:
def predict_agent_type(user_input):
    # 构造消息
    messages = [{"role": "user", "content": user_input}]
    
    # 使用 tokenizer 处理输入，返回 PyTorch 张量
    encodings = traind_tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    )
    
    # 直接使用 encodings 作为 input_ids，并移动到模型设备
    input_ids = encodings.to(traind_model.device)
    
    # 生成输出
    with torch.no_grad():
        outputs = traind_model.generate(
            input_ids=input_ids,
            max_new_tokens=10,
            do_sample=False,
            temperature=1.0
        )
    
    # 解码输出
    reply = traind_tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(reply)
    # 返回第一个词作为预测结果
    return reply.strip().split('assistant')[1].strip()

user_input = 'I am still waiting on my card? Please answer using exactly one of the following types: equity, option, cash, em, common.'
predict_agent_type(user_input)

system

Cutting Knowledge Date: December 2023
Today Date: 12 Jul 2025

user

I am still waiting on my card? Please answer using exactly one of the following types: equity, option, cash, em, common.assistant

common


'common'