# Multi Query Retrievers

In [1]:
from langchain.storage import InMemoryByteStore
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loaders = [
    TextLoader("data/langchain.md"),
    TextLoader("data/langchain2.md"),
]
docs = []
for loader in loaders:
    docs.extend(loader.load())
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000)
docs = text_splitter.split_documents(docs)

# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    collection_name="full_documents", embedding_function=OllamaEmbeddings(model='snowflake-arctic-embed:33m')
)

## Smaller chunks

In [2]:
import uuid

from langchain.retrievers.multi_vector import MultiVectorRetriever

# The storage layer for the parent documents
store = InMemoryByteStore()
id_key = "doc_id"

# The retriever (empty to start)
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store,
    id_key=id_key,
)

doc_ids = [str(uuid.uuid4()) for _ in docs]

In [3]:
# The splitter to use to create smaller chunks
child_text_splitter = RecursiveCharacterTextSplitter(chunk_size=400)

sub_docs = []
for i, doc in enumerate(docs):
    _id = doc_ids[i]
    _sub_docs = child_text_splitter.split_documents([doc])
    for _doc in _sub_docs:
        _doc.metadata[id_key] = _id
    sub_docs.extend(_sub_docs)

In [4]:
retriever.vectorstore.add_documents(sub_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

In [5]:
retriever.vectorstore.similarity_search("LangChain")[0]

Document(metadata={'doc_id': 'a6465e67-d932-4bb5-ab5a-e3233e5321d6', 'source': 'data/langchain.md'}, page_content='*   Provides support for [async programming](https://python.langchain.com/docs/concepts/async/), [efficient batching](https://python.langchain.com/docs/concepts/runnables/#optimized-parallel-execution-batch), [a rich streaming API](https://python.langchain.com/docs/concepts/streaming/).')

In [None]:
retriever.vectorstore.similarity_search("LangChain")[0]

Document(metadata={'doc_id': 'a6465e67-d932-4bb5-ab5a-e3233e5321d6', 'source': 'data/langchain.md'}, page_content='*   Provides support for [async programming](https://python.langchain.com/docs/concepts/async/), [efficient batching](https://python.langchain.com/docs/concepts/runnables/#optimized-parallel-execution-batch), [a rich streaming API](https://python.langchain.com/docs/concepts/streaming/).')

In [None]:
retriever.vectorstore.similarity_search("LangChain")[0]

Document(metadata={'doc_id': 'a6465e67-d932-4bb5-ab5a-e3233e5321d6', 'source': 'data/langchain.md'}, page_content='*   Provides support for [async programming](https://python.langchain.com/docs/concepts/async/), [efficient batching](https://python.langchain.com/docs/concepts/runnables/#optimized-parallel-execution-batch), [a rich streaming API](https://python.langchain.com/docs/concepts/streaming/).')

In [6]:
len(retriever.invoke("LangChain")[0].page_content)



9488

In [7]:
from langchain.retrievers.multi_vector import SearchType

retriever.search_type = SearchType.mmr

len(retriever.invoke("LangChain")[0].page_content)

9488

## Associating summaries with a document for retrieval

In [8]:
import getpass
import os
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama
# llm = ChatOpenAI(model="gpt-4o-mini")
llm= ChatOllama(model='llama3.2:1b')



In [9]:
import uuid

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

chain = (
    {"doc": lambda x: x.page_content}
    | ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
    | llm
    | StrOutputParser()
)



In [10]:
import uuid

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

chain = (
    {"doc": lambda x: x.page_content}
    | ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
    | llm
    | StrOutputParser()
)

In [11]:
summaries = chain.batch(docs, {"max_concurrency": 5})


In [12]:
# The vectorstore to use to index the child chunks
vectorstore = Chroma(collection_name="summaries", embedding_function=OpenAIEmbeddings())
# The storage layer for the parent documents
store = InMemoryByteStore()
id_key = "doc_id"
# The retriever (empty to start)
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store,
    id_key=id_key,
)
doc_ids = [str(uuid.uuid4()) for _ in docs]

summary_docs = [
    Document(page_content=s, metadata={id_key: doc_ids[i]})
    for i, s in enumerate(summaries)
]

retriever.vectorstore.add_documents(summary_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

In [13]:
# # We can also add the original chunks to the vectorstore if we so want
# for i, doc in enumerate(docs):
#     doc.metadata[id_key] = doc_ids[i]
# retriever.vectorstore.add_documents(docs)

In [14]:
sub_docs = retriever.vectorstore.similarity_search("LangChain")

print(sub_docs[0])

page_content='The document discusses LangChain, a library for interacting with large language models (LLMs). It provides an overview of how LLMs work, including their capabilities and limitations. The document also explains the different types of LLMs available, such as chat models, and how they differ from traditional LLMs.

Some key points discussed in the document include:

*   **Interface**: LangChain chat models implement the `BaseChatModel` interface, which allows for standard streaming, async programming, optimized batching, and more.
*   **Standard parameters**: Many chat models have standardized parameters that can be used to configure the model, such as temperature, maximum token count, and max wait time.
*   **Message formats**: LangChain supports two message formats: LangChain's own message format and OpenAI's message format.
*   **Key methods**: The document explains the key methods of a chat model, including `invoke`, `stream`, `batch`, `bind_tools`, and `with_structured_

In [15]:
retrieved_docs = retriever.invoke("LangChain")

len(retrieved_docs[0].page_content)

9488