In [1]:
!pip install langchain weaviate-client

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting langchain
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/ff/39/62b8d2eb468a3b924c6bd1e411ef969c340130f4833abee35c15f0df421b/langchain-0.2.1-py3-none-any.whl (973 kB)
Collecting weaviate-client
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/5f/54/f450f3b25d847c4f9d6cbb89b2627e7febe05f4d09e54de58db9334af6b3/weaviate_client-4.6.3-py3-none-any.whl (324 kB)
     -------------------------------------- 324.9/324.9 kB 2.0 MB/s eta 0:00:00
Collecting SQLAlchemy<3,>=1.4
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/74/9a/eec023807ae78e83342567303916b34a348d9d40703e7cef5dfb1e3635b6/SQLAlchemy-2.0.30-cp311-cp311-win_amd64.whl (2.1 MB)
Collecting aiohttp<4.0.0,>=3.8.3
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/a4/69/0d415c6d8450842652ce01b29f43416a0f30122b75899de01485623c7850/aiohttp-3.9.5-cp311-cp311-win_amd64.whl (370 kB)
Collecting langchain-core<0.3.0,>=0.2.0
  Using cached ht


[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


### RAG需要从向量数据库检索上下文然后输入LLM进行生成，因此需要提前将文本数据向量化并存储到向量数据库。主要步骤如下：
准备文本资料
将文本分块
嵌入以及存储块到向量数据库

In [2]:
from langchain_community.document_loaders import TextLoader # 文本加载器
from langchain.text_splitter import CharacterTextSplitter # 文本分块器
from langchain_community.embeddings import OllamaEmbeddings # Ollama向量嵌入器
import weaviate # 向量数据库
from weaviate.embedded import EmbeddedOptions # 向量嵌入选项
from langchain.prompts import ChatPromptTemplate # 聊天提示模板
from langchain_community.chat_models import ChatOllama # ChatOllma聊天模型
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser # 输出解析器
from langchain_community.vectorstores import Weaviate # 向量数据库
import requests

ModuleNotFoundError: No module named 'langchain_community'

### 下载&加载语料
这里使用拜登总统2022年的国情咨文作为示例。文件链接https://raw.githubusercontent.com/langchain-ai/langchain/mast...。langchain提供了多个文档加载器，这里我们使用TextLoaders即可。

In [None]:
# 下载文件
url = "https://raw.githubusercontent.com/langchain-ai/langchain/master/docs/docs/modules/state_of_the_union.txt"
res = requests.get(url)
with open("state_of_the_union.txt", "w") as f:
    f.write(res.text)
# 加载文件
loader = TextLoader('./state_of_the_union.txt')
documents = loader.load()

由于原始文档过大，超出了LLM的上下文窗口，需要将其分块才能让LLM识别。LangChain 提供了许多内置的文本分块工具，这里用CharacterTextSplitter作为示例：

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

嵌入以及存储到向量数据库
为了对语料分块进行搜索，需要为每个块生成向量并嵌入文档，最后将文档和向量一起存储。这里使用Ollama&llama3生成向量，并存储到Weaviate向量数据库。

In [None]:
client = weaviate.Client(
    embedded_options=EmbeddedOptions()
)
print("store vector")
vectorstore = Weaviate.from_documents(
    client=client,
    documents=chunks,
    embedding=OllamaEmbeddings(model="llama3"),
    by_text=False
)

检索 & 增强
向量数据库加载数据后，可以作为检索器，通过用户查询和嵌入向量之间的语义相似性获取数据，然后使用一个固定的聊天模板即可。

In [None]:
# 检索器
retriever = vectorstore.as_retriever()
# LLM提示模板
template = """You are an assistant for question-answering tasks.
   Use the following pieces of retrieved context to answer the question.
   If you don't know the answer, just say that you don't know.
   Use three sentences maximum and keep the answer concise.
   Question: {question}
   Context: {context}
   Answer:
   """
prompt = ChatPromptTemplate.from_template(template)

生成
最后，将检索器、聊天模板以及LLM组合成RAG链就可以了。

In [None]:
llm = ChatOllama(model="llama3", temperature=10)
rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()} # 上下文信息
        | prompt
        | llm
        | StrOutputParser()
)
# 开始查询&生成
query = "What did the president mainly say?"
print(rag_chain.invoke(query))

可以看到还是像那么回事的，LLM使用的输入预料的内容答复了一些关于新冠疫情以及工作、社区等内容。

langchain支持多种LLM，有需要的读者可以尝试下使用OpenAI提供的LLM。
读者可以根据需要替换下输入预料，构造自己的私有知识检索库。

本文所有代码如下：

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
import weaviate
from weaviate.embedded import EmbeddedOptions
from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_community.vectorstores import Weaviate
import requests
# 下载数据
url = "https://raw.githubusercontent.com/langchain-ai/langchain/master/docs/docs/modules/state_of_the_union.txt"
res = requests.get(url)
with open("state_of_the_union.txt", "w") as f:
    f.write(res.text)
# 加载数据
loader = TextLoader('./state_of_the_union.txt')
documents = loader.load()
# 文本分块
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
# 初始化向量数据库并嵌入目标文档
client = weaviate.Client(
    embedded_options=EmbeddedOptions()
)
vectorstore = Weaviate.from_documents(
    client=client,
    documents=chunks,
    embedding=OllamaEmbeddings(model="llama3"),
    by_text=False
)
# 检索器
retriever = vectorstore.as_retriever()
# LLM提示模板
template = """You are an assistant for question-answering tasks.
   Use the following pieces of retrieved context to answer the question.
   If you don't know the answer, just say that you don't know.
   Use three sentences maximum and keep the answer concise.
   Question: {question}
   Context: {context}
   Answer:
   """
prompt = ChatPromptTemplate.from_template(template)
llm = ChatOllama(model="llama3", temperature=10)
rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
)
# 开始查询&生成
query = "What did the president mainly say?"
print(rag_chain.invoke(query))