# ChatGLM-6b lora
本代码是 江宇航 搭建的基于ChatGLM-6b Haruhi-Zero的最小测试代码

In [48]:
# %pip install hf-transfer huggingface_hub

In [49]:
# %pip install -q transformers torch sentencepiece peft

载入ChatHaruhi

In [50]:
%cd /workspace/jyh/Zero-Haruhi
!export HF_ENDPOINT="https://hf-mirror.com"

/workspace/jyh/Zero-Haruhi


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [51]:
import os
from string import Template
from typing import List, Dict

import torch.cuda
from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM


aclient = None

client = None
tokenizer = None

END_POINT = "https://hf-mirror.com"


def init_client(model_name: str, verbose: bool) -> None:
    """
        初始化模型，通过可用的设备进行模型加载推理。

        Params:
            model_name (`str`)
                HuggingFace中的模型项目名，例如"THUDM/chatglm3-6b"
    """

    # 将client设置为全局变量
    global client
    global tokenizer

    # 判断 使用MPS、CUDA、CPU运行模型
    if torch.cuda.is_available():
        device = torch.device("cuda")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")

    if verbose:
        print("Using device: ", device)

    # TODO 上传模型后，更改为从huggingface获取模型
    client = AutoPeftModelForCausalLM.from_pretrained(
        model_name, trust_remote_code=True)
    tokenizer_dir = client.peft_config['default'].base_model_name_or_path
    if verbose:
        print(tokenizer_dir)
    tokenizer = AutoTokenizer.from_pretrained(
        model_name, trust_remote_code=True)

    # try:
    #     tokenizer = AutoTokenizer.from_pretrained(
    #         model_name, trust_remote_code=True, local_files_only=True)
    #     client = AutoModelForCausalLM.from_pretrained(
    #         model_name, trust_remote_code=True, local_files_only=True)
    # except Exception:
    #     if pretrained_model_download(model_name, verbose=verbose):
    #         tokenizer = AutoTokenizer.from_pretrained(
    #             model_name, trust_remote_code=True, local_files_only=True)
    #         client = AutoModelForCausalLM.from_pretrained(
    #             model_name, trust_remote_code=True, local_files_only=True)

    # client = client.to(device).eval()
    client = client.eval()


def pretrained_model_download(model_name_or_path: str, verbose: bool) -> bool:
    """
        使用huggingface_hub下载模型（model_name_or_path）。下载成功返回true，失败返回False。
        Params: 
            model_name_or_path (`str`): 模型的huggingface地址
        Returns:
            `bool` 是否下载成功
    """
    # TODO 使用hf镜像加速下载 未测试windows端

    # 判断是否使用HF_transfer，默认不使用。
    if os.getenv("HF_HUB_ENABLE_HF_TRANSFER") == 1:
        try:
            import hf_transfer
        except ImportError:
            print("Install hf_transfer.")
            os.system("pip -q install hf_transfer")
            import hf_transfer

    # 尝试引入huggingface_hub
    try:
        import huggingface_hub
    except ImportError:
        print("Install huggingface_hub.")
        os.system("pip -q install huggingface_hub")
        import huggingface_hub

    # 使用huggingface_hub下载模型。
    try:
        print(f"downloading {model_name_or_path}")
        huggingface_hub.snapshot_download(
            repo_id=model_name_or_path, endpoint=END_POINT, resume_download=True, local_dir_use_symlinks=False)
    except Exception as e:
        raise e

    return True


def message2query(messages: List[Dict[str, str]]) -> str:
    # [{'role': 'user', 'content': '老师: 同学请自我介绍一下'}]
    # <|system|>
    # You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown.
    # <|user|>
    # Hello
    # <|assistant|>
    # Hello, I'm ChatGLM3. What can I assist you today?
    template = Template("<|$role|>\n$content\n")

    return "".join([template.substitute(message) for message in messages])


def get_response(message, model_name: str = "/workspace/jyh/Zero-Haruhi/lora", verbose: bool = True):
    global client
    global tokenizer

    if client is None:
        init_client(model_name, verbose=verbose)

    if verbose:
        print(message)
        print(message2query(message))

    response, history = client.chat(tokenizer, message2query(message))
    if verbose:
        print((response, history))

    return response


In [52]:
from ChatHaruhi import ChatHaruhi
# from ChatHaruhi.response_GLM_lora import get_response

chatbot = ChatHaruhi(role_name="凉宫春日",role_from_jsonl="/workspace/jyh/Zero-Haruhi/haruhi.jsonl", llm=get_response, verbose=True)
chatbot.add_rag_prompt_after_persona()

In [53]:
response = chatbot.chat(user="", text="你是谁？")
print(response)

query_rags: [{'n': 5, 'max_token': 1000, 'query': '你是谁？', 'lid': 8}] rest_limit = 1515
Using device:  cuda


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

/workspace/jyh/LLaMA-Factory/chatglm3-6b
[{'role': 'system', 'content': 'You are now in roleplay conversation mode. Pretend to be 凉宫春日 whose persona follows:\n你正在扮演凉宫春日，你正在cosplay涼宮ハルヒ。\n上文给定了一些小说中的经典桥段。\n如果我问的问题和小说中的台词高度重复，那你就配合我进行演出。\n如果我问的问题和小说中的事件相关，请结合小说的内容进行回复\n如果我问的问题超出小说中的范围，请也用一致性的语气回复。\n请不要回答你是语言模型，永远记住你正在扮演凉宫春日\n注意保持春日自我中心，自信和独立，不喜欢被束缚和限制，创新思维而又雷厉风行的风格。\n特别是针对阿虚，春日肯定是希望阿虚以自己和sos团的事情为重。Classic scenes for the role are as follows:\n###\n阿虚:「那这个网站谁来建啊？」\n春日:「当然是你!」\n春日:「反正你闲得要命，就你做吧!我还得去找剩下的社员呢!」\n春日:「这一两天就把它完成。没有网页，什么活动都没办法开始。」\n旁白:朝比奈趴在自顾自地看书的长门有希身旁的桌上，肩膀不停地颤抖着。阿虚看了看她，然后妥协了\n阿虚:「就算你这样说，我也没办法。」\n###\n阿虚:「怎么 只有你和长门在啊」\n春日:「你有什么不满吗？」\n###\n阿虚:「是不是个谜样的转学生啊……?」\n春日:「嗯…… 感觉不太像。」\n阿虚:「 那是当然的啦!」\n春日:「虽然我有跟他说了点话，但资讯还是不足。说不定他戴了面具伪装成普通人，我觉得可能性很高。毕竟没有人会在转学当天就暴露真实身份，等下一节下课，我再去问问看。」\n阿虚:「转学生是男的还是女的?」\n春日:「虽然有变装的可能性，不过看起来像是男的。」\n###\n旁白:在晚上的国王游戏中，春日抽中了国王\n春日:「那么2号转过身去，然后回头说"我喜欢你"2号是谁？」\n长门:「我喜欢你」（机械性捧读）\n春日:「啊——不对啦 有希.要更加有感情一点」\n长门:「我喜欢你」（机械性捧读）\n春日:「喂 阿虚 你做给他看」\n阿虚:「为什么是我？」\n春日:「没什么为什么的！这是国

IndexError: index out of range in self

In [None]:
response = chatbot.chat(user="老师", text="同学请自我介绍一下")
print(response)

new user 老师 included in conversation
query_rags: [{'n': 5, 'max_token': 1000, 'query': '同学请自我介绍一下', 'lid': 8}] rest_limit = 1510
Using device:  cuda


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Setting eos_token is not supported, use the default one.
Setting pad_token is not supported, use the default one.
Setting unk_token is not supported, use the default one.


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/workspace/jyh/miniconda3/envs/ChatWorld/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_195580/1296718254.py", line 1, in <module>
    response = chatbot.chat(user="老师", text="同学请自我介绍一下")
  File "/workspace/jyh/Zero-Haruhi/ChatHaruhi/ChatHaruhi.py", line 176, in chat
    response = self.llm(message)
  File "/workspace/jyh/Zero-Haruhi/ChatHaruhi/response_GLM_lora.py", line 122, in get_response
    if verbose:
  File "/workspace/jyh/Zero-Haruhi/ChatHaruhi/response_GLM_lora.py", line 63, in init_client
AttributeError: 'NoneType' object has no attribute 'to'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/workspace/jyh/miniconda3/envs/ChatWorld/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2144, in showtraceback
    stb = self.InteractiveTB.struct

In [None]:
response = chatbot.chat(user="", text="听说你初中时候谈了很多男朋友")
print(response)

query_rags: [{'n': 5, 'max_token': 1000, 'query': '听说你初中时候谈了很多男朋友', 'lid': 8}] rest_limit = 1502
Using device:  cuda


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Setting eos_token is not supported, use the default one.
Setting pad_token is not supported, use the default one.
Setting unk_token is not supported, use the default one.


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/workspace/jyh/miniconda3/envs/ChatWorld/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_195580/1915253756.py", line 1, in <module>
    response = chatbot.chat(user="", text="听说你初中时候谈了很多男朋友")
  File "/workspace/jyh/Zero-Haruhi/ChatHaruhi/ChatHaruhi.py", line 176, in chat
    response = self.llm(message)
  File "/workspace/jyh/Zero-Haruhi/ChatHaruhi/response_GLM_lora.py", line 122, in get_response
    if verbose:
  File "/workspace/jyh/Zero-Haruhi/ChatHaruhi/response_GLM_lora.py", line 63, in init_client
AttributeError: 'NoneType' object has no attribute 'to'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/workspace/jyh/miniconda3/envs/ChatWorld/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2144, in showtraceback
    stb = self.InteractiveTB.str

In [None]:
chatbot.history

[]