In [2]:
# 定义模型路径
model_path = './IEITYuan/Yuan2-2B-Mars-hf'
# model_path = './qwen/Qwen1___5-4B-Chat-GGUF'
# 定义向量模型路径
embedding_model_path = './AI-ModelScope/bge-large-zh-v1___5'



In [3]:
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader, CSVLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.llms.base import LLM
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.text_splitter import RecursiveCharacterTextSplitter
import re
from typing import Any, List, Optional
from langchain_community.vectorstores import Chroma
import torch
# 定义源大模型类
class Yuan2_LLM(LLM):
    """
    class for Yuan2_LLM
    """
    tokenizer: AutoTokenizer = None
    model: AutoModelForCausalLM = None

    def __init__(self, mode_path :str):
        super().__init__()

        # 加载预训练的分词器和模型
        print("Creat tokenizer...")
        self.tokenizer = AutoTokenizer.from_pretrained(mode_path, add_eos_token=False, add_bos_token=False, eos_token='<eod>')
        self.tokenizer.add_tokens(['<sep>', '<pad>', '<mask>', '<predict>', '<FIM_SUFFIX>', '<FIM_PREFIX>', '<FIM_MIDDLE>','<commit_before>','<commit_msg>','<commit_after>','<jupyter_start>','<jupyter_text>','<jupyter_code>','<jupyter_output>','<empty_output>'], special_tokens=True)

        print("Creat model...")
        self.model = AutoModelForCausalLM.from_pretrained(mode_path, torch_dtype=torch.bfloat16, trust_remote_code=True).cuda()

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        prompt = prompt.strip()
        prompt += "<sep>"
        inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"].cuda()
        outputs = self.model.generate(inputs,do_sample=False,max_length=4096)
        output = self.tokenizer.decode(outputs[0])
        response = output.split("<sep>")[-1].split("<eod>")[0]

        return response

    @property
    def _llm_type(self) -> str:
        return "Yuan2_LLM"

# 定义一个函数，用于获取llm和embeddings
@st.cache_resource
def get_models():
    # llm = Yuan2_LLM(model_path)

    model_kwargs = {'device': 'cpu'}
    encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
    embeddings = HuggingFaceEmbeddings(
        model_name=embedding_model_path,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )
    return embeddings

In [None]:
class ZhipuAILLM(LLM):
        # 默认选用 glm-4 模型
    model: str = "glm-4"
    # 温度系数
    temperature: float = 0.1
    # API_Key
    api_key: str = "c33e39bf88d7c169484bcd28af694e0f.6XKKA71BsvhG8gNU"
    max_tokens: int = 2048

    # 定义 _call 方法：
    # 这个方法实现了实际的 API 调用逻辑：
    # 初始化 ZhipuAI 客户端。
    # 生成请求参数。
    # 调用 chat.completions.create 方法获取响应。
    # 返回响应中的内容，如果没有结果则返回错误信息。
    def _call(self, prompt: str, stop: Optional[List[str]] = None,
              run_manager: Optional[CallbackManagerForLLMRun] = None,
              **kwargs: Any):
        # 生成 GLM 模型请求参数的方法：
        # 生成 GLM 模型的请求参数 messages，包括系统消息和用户输入
        def gen_glm_params(prompt):
            '''
            构造 GLM 模型请求参数 messages
            请求参数：
                prompt: 对应的用户提示词
            '''
            messages = [{"role": "user", "content": prompt}]
            return messages

        client = ZhipuAI(
            api_key=self.api_key
        )

        messages = gen_glm_params(prompt)
        response = client.chat.completions.create(
            model="glm-4",
            messages=messages,
            temperature=self.temperature,
            max_tokens=self.max_tokens
        )

        if len(response.choices) > 0:
            return response.choices[0].message.content
        return "generate answer error"

    # 定义属性方法：
    # _default_params：返回调用 API 的默认参数。
    # _llm_type：返回模型类型的字符串标识。
    # _identifying_params：返回模型的标识参数。
    # 首先定义一个返回默认参数的方法
    @property
    def _default_params(self) -> Dict[str, Any]:
        """获取调用Ennie API的默认参数。"""
        normal_params = {
            "temperature": self.temperature,
        }
        # print(type(self.model_kwargs))
        return {**normal_params}

    @property
    def _llm_type(self) -> str:
        return "Zhipu"

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {**{"model": self.model}, **self._default_params}

In [33]:
# 获取llm和embeddings
embeddings = get_models()
# 将csv文件转向量储存
persist_directory = "./vector_db"
csvloader = CSVLoader(file_path="./chioceproblem.csv", encoding="utf-8", csv_args={'delimiter': ',', 'quotechar': '"'})
pdfloader = PyPDFLoader("./中国近现代史纲要：2023 年版(1).pdf")
pdf = []
# for loader in loaders_chinese:
#     docs.extend(loader.load())
pdf = pdfloader.load()
CHUNK_SIZE = 500
# 知识库中相邻文本重合长度
OVERLAP_SIZE = 50
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=OVERLAP_SIZE
)
split_pdf = text_splitter.split_documents(pdf)
print(len(split_pdf))
csv = csvloader.load()
choiceproblem_vectordb = Chroma.from_documents(
    documents=csv,
    embedding=embeddings,
    persist_directory=persist_directory + "/choiceproblem"
)
choiceproblem_vectordb.persist()
analysis_vectordb = Chroma.from_documents(
    documents=split_pdf,
    embedding=embeddings,
    persist_directory=persist_directory + "/pdf"
)
analysis_vectordb.persist()

Creat tokenizer...
Creat model...
860


In [52]:
from langchain.chains.sequential import SequentialChain

template = """使用以下背景知识来回答最后的问题。不要试图编造答案。尽量简明扼要地回答。
背景知识：现在有这样一道题目：{context}，
问题：{query}"""


template1 = """给你一个任务：给定一段文本，这段文本中第一行是题目，题目中会有括号，后面四行为A，B，C，D四个选项，最后一行是题目的答案。你需要做的是：将题目中的括号以及括号内的内容替换为答案并输出。
不需要你检查指出题目的对错，不要输出除了以上任务之外的任何回答，否则地球会爆炸！
这是你需要处理的文本：{question}
"""

# 定义ChatBot类
class ChatBot:
    """
    class for ChatBot.
    """

    def __init__(self, llm, embeddings, choiceproblem_vectordb, analysis_vectordb):
        self.prompt1 = PromptTemplate(
            input_variables=["question"],
            template=template1
        )
        self.chain1 = LLMChain(
            llm=llm, prompt=self.prompt1
        )
        self.embeddings = embeddings
        self.choiceproblem_vectordb = choiceproblem_vectordb
        self.analysis_vectordb = analysis_vectordb
        # self.overall_chain = SequentialChain(
        #     chains=[self.prompt1],
        #     verbose=True,
        #     input_variables=["question"],
        #     output_variables=["right_answer"]
        # )

    def run(self, query):

        sim_docs = self.choiceproblem_vectordb.max_marginal_relevance_search(query, k=1, fetch_k=1)
        question = ""
        for i, sim_doc in enumerate(sim_docs):
            question = question + sim_doc.page_content+'\n'
        # related_analysis = self.analysis_vectordb.max_marginal_relevance_search(query, k=1, fetch_k=1)
        # for i, related_doc in enumerate(related_analysis):
        #     context = context + related_doc.page_content
        # print(context)
        # self.prompt.format(context=context, query=query)
        # 生成回复
        response = self.chain1(question)
        # res
        return response

In [96]:
import torch
# 向量模型下载
from modelscope import snapshot_download
model_dir = snapshot_download('qwen/Qwen1.5-4B-Chat-GGUF', cache_dir='./')

In [53]:
query = "党在新时代的强军目标是建设一支(   )的人民军队"

# 初始化ChatBot
chatbot = ChatBot(llm, embeddings, choiceproblem_vectordb, analysis_vectordb)

response = chatbot.run(query)
print(response['question'])
print("--------")
print(response['text'])

问题: 235、党在新时代的强军目标是建设一支(   )的人民军队。
A: 听党指挥，能打能杀，坚定执着
B: 作风优良，敢打硬仗，不怕吃苦
C: 强大后勤保障
D: 听党指挥、能打胜仗、作风优良
答案: 听党指挥、能打胜仗、作风优良

--------
 问题: 235、党在新时代的强军目标是建设一支(   )的人民军队。
A: 听党指挥，能打能杀，坚定执着
B: 作风优良，敢打硬仗，不怕吃苦
C: 强大后勤保障
D: 听党指挥、能打胜仗、作风优良
答案: 听党指挥、能打胜仗、作风优良
