# 本地知识问答系统

## 设置openai api key

In [1]:
import logging
import sys
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ['base_url'] = "https://api.fe8.cn/v1"
# api_key = os.getenv("OPENAI_API_KEY")
# base_url = "https://api.fe8.cn/v1"


## 加载并拆分数据

In [None]:
## load the PDF using pypdf
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# load the data
loader = PyPDFLoader('/book.pdf')

# the 10k financial report are huge, we will need to split the doc into multiple chunk.
# This text splitter is the recommended one for generic text. It is parameterized by a list of characters. 
# It tries to split on them in order until the chunks are small enough.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)  # 文本分割1000个字符为一个chunk，重叠0个字符
data = loader.load()
texts = text_splitter.split_documents(data)

# view the first chunk
texts[0]

## 文本保存数据库

In [3]:
# import Chroma and OpenAIEmbeddings
from langchain.vectorstores import Chroma
# from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings

# initialize OpenAIEmbedding
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')

# use Chroma to create in-memory embedding database from the doc
docsearch = Chroma.from_documents(texts, embeddings,  metadatas=[{"source": str(i)} for i in range(len(texts))])

Using embedded DuckDB without persistence: data will be transient


## 文本问答

In [5]:
## perform search based on the question
query = "What is the operating income?"
docs = docsearch.similarity_search(query)  # 找到和query最相似的文档

## 上述方法总结
上述方法工作流程
- 上述方法将输入的文本按照1000个字符进行切分
- 将切分后的文本Embedding后保存到数据库
- 通过输入的问题，将问题Embedding后与数据库中的文本进行相似度计算，找到最相似的文本

上述方法存在的问题
- 每个文本1000个字符，且每个文本之间没有重叠，容易产生每一个文本语义完成的文本，导致无法很好的回答问题
- 上述方法中chromadb是临时数据库，如果后续需要则需要建立长期数据库
- Embedding模型选用的是openai的Embedding模型，可以使用开源的Embedding模型进行替换，降低成本
- 检索的方式仅使用了向量检索，检索的效果可能不是最好的

## Langchain提供了四种预先构建的问答Chain，具体如下：

- 问答：load_qa_chain
- 有来源问答：load_qa_with_sources_chain
- 检索问题答案：RetrievalQA
- 资源检索问答：RetrievalQAWithSourcesChain

In [None]:
## importing necessary framework
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains import RetrievalQA
from langchain.chains import RetrievalQAWithSourcesChain

from langchain.chat_models import ChatOpenAI

1. 问答

In [None]:
## use LLM to get answering
chain = load_qa_chain(ChatOpenAI(temperature=0.2,model_name='gpt-3.5-turbo'), 
                      chain_type="stuff")
query = "What is the operating income?"
chain.run(input_documents=docs, question=query) 

2. 有来源问答

In [None]:
chain = load_qa_with_sources_chain(ChatOpenAI(temperature=0.2,model_name='gpt-3.5-turbo'), 
                                   chain_type="stuff")
query = "What is the operating income?"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

3. 检索问题答案

In [None]:

qa=RetrievalQA.from_chain_type(llm=ChatOpenAI(temperature=0.2,model_name='gpt-3.5-turbo'), chain_type="stuff", 
                                                retriever=docsearch.as_retriever())
query = "What is the operating income?"
qa.run(query)

4. 资源检索问答

In [None]:
chain=RetrievalQAWithSourcesChain.from_chain_type(ChatOpenAI(temperature=0.2,model_name='gpt-3.5-turbo'), chain_type="stuff", 
                                                    retriever=docsearch.as_retriever())
chain({"question": "What is the operating income?"}, return_only_outputs=True)


# LlamaIndex
- LlamaIndex是通过以下步骤将LLM连接到用户来响应查询的另一种方式（类似于Langchain的方式）：
    - 加载文档（手动或通过数据加载程序）
    - 将文档解析为节点
    - 构造索引（从节点或文档）
    - [可选，高级]在其他指数之上构建指数
    - 查询索引

In [7]:
import logging
import sys

## setup your OpenAI Key
# import os
# os.environ["OPENAI_API_KEY"] = "<YOUR_OPENAI_API_KEY>"
# enable logs to see what happen underneath
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

## LlamaIndex的核心是指数，有多种类型的指数。
- 列表索引
- 矢量存储索引
- 树索引
- 关键字表索引
- 图形索引
- SQL索引。

每个索引都有其独特的用途，具有不同的用途。好处是，您可以将索引堆叠在其他索引之上，这样做将使您的应用程序更强大，能够理解您的文档上下文和应用程序需求。

## 加载文档

In [None]:
from llama_index import GPTVectorStoreIndex
from llama_index import download_loader

# we will use this UnstructuredReader to read PDF file
UnstructuredReader = download_loader('UnstructuredReader', refresh_cache=True)
loader = UnstructuredReader()
# load the data
data = loader.load_data(f'../notebooks/documents/_10-Q-Q1-2022-(As-Filed).pdf', split_documents=False)

## Document表示数据源的轻量级容器。可以选择下面两步骤之一：

将Document对象直接输入索引

首先，将文档转换为Node对象

       同样，本系列的目的是帮助您尽快构建第一个应用程序，因此我将直接讨论索引构建。我将在未来的一篇文章中介绍LLamaIndex的所有方面。

索引构建与查询

       我们现在可以在这些Document对象上建立一个索引。最简单的高级抽象是在索引初始化期间加载Document对象。

In [None]:
index = GPTVectorStoreIndex.from_documents(data)
query_engine = index.as_query_engine()
response = query_engine.query("What is the operating income?")
print(response)

根据您使用的索引，LlamaIndex可能会进行LLM调用以构建索引。GPTVvectorStoreIndex不会调用LLM，但GPTTreeStoreIndex会调用。

## 自定义LLM
默认情况下，LlamaIndex使用OpenAI的text-davinci-003模型。在构造索引时，您可以选择使用另一个LLM。

In [None]:
from llama_index import LLMPredictor, PromptHelper, ServiceContext
from langchain.chat_models import ChatOpenAI

# define LLM
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.2,model_name='gpt-3.5-turbo'))
# define prompt helper
# set maximum input size
max_input_size = 4096
# set number of output tokens
num_output = 256
# set maximum chunk overlap
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
index = GPTVectorStoreIndex.from_documents(
    documents, 
    service_context=service_context
)
query_engine = index.as_query_engine()
response = query_engine.query("What is the operating income?")
print(response)

## 完整示例

In [None]:

# do imports
from langchain.agents import Tool
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent

from llama_index.langchain_helpers.agents import LlamaToolkit, create_llama_chat_agent, IndexToolConfig
query_engine = index.as_query_engine()
tool_config = IndexToolConfig(
    query_engine=query_engine, 
    name=f"Financial Report",
    description=f"useful for when you want to answer queries about the Apple financial report",
    tool_kwargs={"return_direct": True}
)
toolkit = LlamaToolkit(
    index_configs=[tool_config]
)
memory = ConversationBufferMemory(memory_key="chat_history")
llm=ChatOpenAI(temperature=0.2,model_name='gpt-3.5-turbo')
agent_chain = create_llama_chat_agent(
    toolkit,
    llm,
    memory=memory,
    verbose=True
)
while True:
    text_input = input("User: ")
    response = agent_chain.run(input=text_input)
    print(f'Agent: {response}')