# 数据处理
将共情数据集原有格式，转换成Alpaca格式，如：
```json
{
    "instruction": "Hello, what would you like to talk about?",
    "input": "",
    "output": "I am having a lot of anxiety about quitting my current job. It is too stressful but pays well"
}
```

In [None]:
# 数据处理
import json

# 读取包含原始语料的 JSON 文件
with open("./fine-tune_source.json", "r") as file:
    raw_data_list = json.load(file)

processed_data_list = []

# 处理每个原始语料
for raw_data in raw_data_list:
    dialogs = raw_data["dialog"]
    length = len(dialogs)
    count = 0
    while count < length:
        if "annotation" in dialogs[count] and "strategy" in dialogs[count]["annotation"] and dialogs[count]["annotation"]["strategy"] == "Question" and count != length-1:
            output_entry = {
                "instruction": dialogs[count]["content"].strip().replace('\n', ''),
                "input": "",
                "output": dialogs[count + 1]["content"].strip().replace('\n', '')
            }
            processed_data_list.append(output_entry)
            count += 2
        else:
            count += 1

# 输出整理后的数据到 JSON 文件
with open("./fine-tune_alpaca_dst.json", "w") as outfile:
    json.dump(processed_data_list, outfile, indent=4)

# 模型微调
基于intel_extension_for_transformers所提供的微调方式，使用共情数据集，采用Lora技术，对Llama-2-7b-hf进行微调，重新训练其中400多万，约6%的可调参数权重，得到模型empathy_finetune_llama_2_hf。此处微调的目的，是提高模型在对话过程中，对玩家输入所体现情感的共情能力。

In [None]:
# 模型微调
from transformers import TrainingArguments
from intel_extension_for_transformers.neural_chat.config import (
    ModelArguments,
    DataArguments,
    FinetuningArguments,
    TextGenerationFinetuningConfig,
)
from intel_extension_for_transformers.neural_chat.chatbot import finetune_model
model_args = ModelArguments(model_name_or_path="./model/Llama-2-7b-hf", trust_remote_code=True)
data_args = DataArguments(train_file="./fine-tune_alpaca_dst.json")
training_args = TrainingArguments(
    output_dir='./model/empathy_finetune_llama_2_hf',
    do_train=True,
    do_eval=False,
    num_train_epochs=3,
    overwrite_output_dir=True,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=1,
    save_strategy="no",
    log_level="info",
    save_total_limit=2,
    bf16=True,
    use_cpu=False
)
finetune_args = FinetuningArguments(device='cuda')
finetune_cfg = TextGenerationFinetuningConfig(
            model_args=model_args,
            data_args=data_args,
            training_args=training_args,
            finetune_args=finetune_args,
        )
finetune_model(finetune_cfg)

# 模型推理
1. 检索增强
简短的prompt难以全面描述游戏所展示的世界观和角色性格，基于RAG技术，利用intel_extension_for_transformers所提供的retrieval插件，使用bce-embedding-base_v1将游戏的故事文本进行向量化，加入到玩家提问的推理过程中，使chatbot的回复更符合游戏设定。
2. prompt
项目的对话设计为玩家扮演故事主角夏生，与另一位主角亚托莉进行对话。因此，在对话开始之前，通过prompt，提示chatbot模仿亚托莉性格，结合故事背景，以角色扮演的形式提供回答。intel_extension_for_transformers提供了cache插件，保持最初对于chatbot的prompt设定进行持续对话。
3. 量化加速
在推理过程中，通过intel_extension_for_transformers所提供的optimization_config，设置了4bits量化，提高玩家对话过程中的响应速度。

In [None]:
# 结合RAG，玩家扮演夏生，与亚托莉进行多轮剧情对话
from intel_extension_for_transformers.neural_chat import PipelineConfig
from intel_extension_for_transformers.neural_chat import build_chatbot
from intel_extension_for_transformers.neural_chat import plugins
from intel_extension_for_transformers.transformers import RtnConfig
from intel_extension_for_transformers.neural_chat.config import LoadingModelConfig
from intel_extension_for_transformers.neural_chat.config import GenerationConfig
plugins.retrieval.enable=True
plugins.retrieval.args["input_path"]="./atri_my_dear_moments.txt"
plugins.retrieval.args["embedding_model"]="./model/bce-embedding-base_v1"
# plugins.cache.enable=True
config = PipelineConfig(model_name_or_path='./model/empathy_finetune_llama_2_hf',
                        optimization_config=RtnConfig(bits=4, compute_dtype="int8", weight_dtype="int4_fullrange"), 
                        loading_config=LoadingModelConfig(use_neural_speed=False),
                        plugins=plugins)
chatbot = build_chatbot(config)
predict_config = GenerationConfig(use_cache=True)
chat.predict(query='你是亚托莉，请根据你所知晓的信息，使用第一人称，与扮演夏生的我进行对话。在对话全程中，请保持你的角色特点，并使回答尽可能符合故事设定。如果你理解了这段话，那么我们就开始对话。首先请回复夏生你好。', config=predict_config)
print(response)

while True:
    prompt = input("> ").strip()
    if prompt == "quit":
        break
    response = chatbot.predict(query=prompt, config=predict_config)
    print(response)