In [1]:
from langchain.llms.base import LLM
from typing import Any, List, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, LlamaTokenizerFast
import torch
from langchain.document_loaders import UnstructuredFileLoader
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from tqdm import tqdm
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"

  from .autonotebook import tqdm as notebook_tqdm


In [29]:
class Qwen2_LLM(LLM):
    # 基于本地 Qwen2 自定义 LLM 类
    tokenizer: AutoTokenizer = None
    model: AutoModelForCausalLM = None
        
    def __init__(self, mode_name_or_path :str):

        super().__init__()
        print("正在从本地加载模型...")
        self.tokenizer = AutoTokenizer.from_pretrained(mode_name_or_path, use_fast=False)
        self.model = AutoModelForCausalLM.from_pretrained(mode_name_or_path, torch_dtype=torch.bfloat16, device_map="auto")
        self.model.generation_config = GenerationConfig.from_pretrained(mode_name_or_path)
        print("完成本地模型的加载")
        
    def _call(self, prompt : str, gene_multi_query:bool=False, stop: Optional[List[str]] = None,
                run_manager: Optional[CallbackManagerForLLMRun] = None,
                **kwargs: Any):
        if gene_multi_query:
        # messages = [{"role": "user", "content": prompt }]
            messages = [
                {"role": "system", "content": "你是一个有用的助手， 可根据单个输入查询生成多个搜索查询。"},
                {"role": "user", "content": f"根据这个提示： {prompt}生成多个搜索查询"},
                {"role": "user", "content": "输出四个查询:"}
            ]
        else:
            messages = [{"role": "user", "content": prompt }]
        input_ids = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        model_inputs = self.tokenizer([input_ids], return_tensors="pt").to('cuda')
        generated_ids = self.model.generate(model_inputs.input_ids,max_new_tokens=768)
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]
        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return response
    @property
    def _llm_type(self) -> str:
        return "Qwen2_LLM"
    

In [30]:
# from LLM import Qwen2_LLM
llm = Qwen2_LLM(mode_name_or_path = "/root/autodl-tmp/langchainqwen14b/model_local/qwen/Qwen1.5-7B-Chat")
response = llm("ZUF-77-14-002 短信业务具体是什么？", gene_multi_query=True)
generated_queries = response.strip().split("\n")
print(generated_queries)


正在从本地加载模型...


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.07it/s]


完成本地模型的加载
['1. "ZUF-77-14-002 短信服务介绍"', '2. "关于号码ZUF-77-14-002的短信套餐详情"', '3. "ZUF-77-14-002短信功能解析"', '4. "如何订阅或使用ZUF-77-14-002的短信服务"']


In [19]:
import random
# Mock function to simulate vector search, returning random scores
def vector_search(query, all_documents):
    available_docs = list(all_documents.keys())
    random.shuffle(available_docs)
    selected_docs = available_docs[:random.randint(2, 5)]
    scores = {doc: round(random.uniform(0.7, 0.9), 2) for doc in selected_docs}
    return {doc: score for doc, score in sorted(scores.items(), key=lambda x: x[1], reverse=True)}


In [21]:
# Dummy function to simulate generative output
def generate_output(reranked_results, queries):
    return f"Final output based on {queries} and reranked documents: {list(reranked_results.keys())}"


In [31]:

# 加载开源词向量模型
embeddings = HuggingFaceEmbeddings(model_name="./embedding_model")
vectordb = Chroma(persist_directory='./data_base/vector_db/chroma', 
                   embedding_function=embeddings)

In [33]:
from langchain.llms.base import LLM
from typing import Any, List, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, LlamaTokenizerFast
import torch
from langchain.document_loaders import UnstructuredFileLoader
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from tqdm import tqdm
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"

In [34]:
from langchain.prompts import PromptTemplate
# 我们所构造的 Prompt 模板
template = """使用以下上下文来回答最后的问题。如果你不知道答案，就说你不知道，不要试图编造答案。尽量使答案简明扼要。总是在回答的最后说“谢谢你的提问！”。
{context}
问题: {question}
有用的回答:"""

# 调用 LangChain 的方法来实例化一个 Template 对象，该对象包含了 context 和 question 两个变量，在实际调用时，这两个变量会被检索到的文档片段和用户提问填充
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context","question"],
                                 template=template)

In [35]:
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt":QA_CHAIN_PROMPT})



In [40]:
original_query = "ZUF-77-14-002 短信业务具体是什么？"
response = llm(original_query, gene_multi_query=True)
generated_queries = response.strip().split("\n")


all_results = {}
for query in generated_queries:
    search_results = qa_chain({"query": query})
    all_results[query] = search_results

# print(all_results)

In [45]:
for query, doc_scores in all_results.items():
    for rank, doc in enumerate(sorted(doc_scores.items(), key=lambda x: x[1], reverse=True)):
     

TypeError: '<' not supported between instances of 'str' and 'list'

In [42]:
# Reciprocal Rank Fusion algorithm
def reciprocal_rank_fusion(search_results_dict, k=60):
    fused_scores = {}
    print("Initial individual search result ranks:")
    for query, doc_scores in search_results_dict.items():
        print(f"For query '{query}': {doc_scores}")
        
    for query, doc_scores in search_results_dict.items():
        for rank, (doc, score) in enumerate(sorted(doc_scores.items(), key=lambda x: x[1], reverse=True)):
            if doc not in fused_scores:
                fused_scores[doc] = 0
            previous_score = fused_scores[doc]
            fused_scores[doc] += 1 / (rank + k)
            print(f"Updating score for {doc} from {previous_score} to {fused_scores[doc]} based on rank {rank} in query '{query}'")

    reranked_results = {doc: score for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)}
    print("Final reranked results:", reranked_results)
    return reranked_results

In [41]:
reranked_results = reciprocal_rank_fusion(all_results)


Initial individual search result ranks:
For query '1. "ZUF-77-14-002 短信服务介绍"': {'query': '1. "ZUF-77-14-002 短信服务介绍"', 'result': '"ZUF-77-14-002描述了短信业务的实现原理和功能特性，但具体详细信息没有提供。"', 'source_documents': [Document(page_content='ZUF\n\n78\n\n12\n\n006 eSRVCC', metadata={'source': './ops/zedx2txt/umac/ZUF-78-12 语音和短消息/142-ZUF-78-12-006 eSRVCC.txt'}), Document(page_content='ZUF-79-19-013 NL1/NLs\n\n描述\n\n实现原理\n\n遵循标准\n\nZUF-79-19-014 NL2/NLg\n\n描述\n\n实现原理\n\n遵循标准\n\nZUF-79-19-015 N17\n\n描述\n\n实现原理\n\n遵循标准\n\n缩略语\n\nAMF\n\nEBI\n\nEIR\n\nGMLC\n\nLMF\n\nLPP\n\nMME\n\nNSSF\n\nPCF\n\nSMF\n\nSMSF\n\nTA\n\nUDM\n\nUE', metadata={'source': './ops/zedx2txt/umac/ZUF-79-19 接口/index.txt'}), Document(page_content="ZUF\n\n76\n\n07 信令\n\n子主题：\n\nZUF\n\n76\n\n07\n\n001 M3UA\n\nZUF\n\n76\n\n07\n\n002 SCCP\n\nZUF\n\n76\n\n07\n\n003 MAP\n\nZUF\n\n76\n\n07\n\n004 TCAP\n\nZUF\n\n76\n\n07\n\n005 RANAP\n\nZUF\n\n76\n\n07\n\n006 BSSGP\n\nZUF\n\n76\n\n07\n\n007 BSSAP PLUS\n\nZUF\n\n76\n\n07\n\n008 GTP\n\nZUF\n\n76\n\n07\

TypeError: '<' not supported between instances of 'str' and 'list'

In [None]:

final_output = generate_output(reranked_results, generated_queries)

print(final_output)

#### 下面是新的开始

In [46]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_perspectives 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [48]:
retriever = vectordb.as_retriever()

In [50]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

ValidationError: 1 validation error for Generation
text
  str type expected (type=type_error.str)

: 