# 矢量存储和检索器

需要使用到的包：
- langchain
- langchain-chroma
- langchain-openai

## LangSmith

In [None]:
"""
export LANGCHAIN_TRACING_V2="true"
export LANGCHAIN_API_KEY="..."
"""

import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

## 文件（Documents）
- page_content：表示文档的内容
- metadata：包含元数据的字典

In [None]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

## 矢量存储（vector stores）
以下是chroma的矢量存储的示例：

In [None]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings(),
)

字符串相似查询

In [None]:
vectorstore.similarity_search("cat")

异步查询

In [None]:
await vectorstore.asimilarity_search("cat")

返回分数

In [None]:
# Note that providers implement different scores; Chroma here
# returns a distance metric that should vary inversely with
# similarity.

vectorstore.similarity_search_with_score("cat")

向量查询

In [None]:
embedding = OpenAIEmbeddings().embed_query("cat")

vectorstore.similarity_search_by_vector(embedding)

## 检索器（Retrievers）

In [None]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(
    k=1)  # select top result

retriever.batch(["cat", "shark"])

In [None]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 1},
)

retriever.batch(["cat", "shark"])

## RAG

In [None]:
!pip install -qU langchain-openai

In [None]:
from langchain_openai import ChatOpenAI
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()


llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""

prompt = ChatPromptTemplate.from_messages([("human", message)])

rag_chain = {"context": retriever,
             "question": RunnablePassthrough()} | prompt | llm

In [None]:
response = rag_chain.invoke("tell me about cats")

print(response.content)