# Multi Query Retrievers

In [1]:
from langchain.storage import InMemoryByteStore
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loaders = [
    TextLoader("data/langchain.md"),
    TextLoader("data/langchain2.md"),
]
docs = []
for loader in loaders:
    docs.extend(loader.load())
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000)
docs = text_splitter.split_documents(docs)

# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    collection_name="full_documents", embedding_function=OllamaEmbeddings(model='snowflake-arctic-embed:33m')
)

## Smaller chunks

In [2]:
import uuid

from langchain.retrievers.multi_vector import MultiVectorRetriever

# The storage layer for the parent documents
store = InMemoryByteStore()
id_key = "doc_id"

# The retriever (empty to start)
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store,
    id_key=id_key,
)

doc_ids = [str(uuid.uuid4()) for _ in docs]

In [3]:
# The splitter to use to create smaller chunks
child_text_splitter = RecursiveCharacterTextSplitter(chunk_size=400)

sub_docs = []
for i, doc in enumerate(docs):
    _id = doc_ids[i]
    _sub_docs = child_text_splitter.split_documents([doc])
    for _doc in _sub_docs:
        _doc.metadata[id_key] = _id
    sub_docs.extend(_sub_docs)

In [4]:
retriever.vectorstore.add_documents(sub_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

In [5]:
retriever.vectorstore.similarity_search("LangChain")[0]

Document(metadata={'doc_id': '1cbe455e-758c-4998-9aab-9476345b2909', 'source': 'data/langchain.md'}, page_content='LangChain provides a consistent interface for working with chat models from different providers while offering additional features for monitoring, debugging, and optimizing the performance of applications that use LLMs.')

In [6]:
retriever.vectorstore.similarity_search("LangChain")[0]

Document(metadata={'doc_id': '1cbe455e-758c-4998-9aab-9476345b2909', 'source': 'data/langchain.md'}, page_content='LangChain provides a consistent interface for working with chat models from different providers while offering additional features for monitoring, debugging, and optimizing the performance of applications that use LLMs.')

In [7]:
retriever.vectorstore.similarity_search("LangChain")[0]

Document(metadata={'doc_id': '1cbe455e-758c-4998-9aab-9476345b2909', 'source': 'data/langchain.md'}, page_content='LangChain provides a consistent interface for working with chat models from different providers while offering additional features for monitoring, debugging, and optimizing the performance of applications that use LLMs.')

In [8]:
len(retriever.invoke("LangChain")[0].page_content)



9488

In [9]:
from langchain.retrievers.multi_vector import SearchType

retriever.search_type = SearchType.mmr

len(retriever.invoke("LangChain")[0].page_content)

9488

## Associating summaries with a document for retrieval

In [10]:
import getpass
import os
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama

# llm = ChatOpenAI(model="gpt-4o-mini")
llm = ChatOllama(model='llama3.2:1b')



In [11]:
import uuid

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

chain = (
        {"doc": lambda x: x.page_content}
        | ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
        | llm
        | StrOutputParser()
)



In [12]:
import uuid

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

chain = (
        {"doc": lambda x: x.page_content}
        | ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
        | llm
        | StrOutputParser()
)

In [13]:
summaries = chain.batch(docs, {"max_concurrency": 5})


In [14]:
# The vectorstore to use to index the child chunks
vectorstore = Chroma(collection_name="summaries", embedding_function=OpenAIEmbeddings())
# The storage layer for the parent documents
store = InMemoryByteStore()
id_key = "doc_id"
# The retriever (empty to start)
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store,
    id_key=id_key,
)
doc_ids = [str(uuid.uuid4()) for _ in docs]

summary_docs = [
    Document(page_content=s, metadata={id_key: doc_ids[i]})
    for i, s in enumerate(summaries)
]

retriever.vectorstore.add_documents(summary_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

In [15]:
# # We can also add the original chunks to the vectorstore if we so want
# for i, doc in enumerate(docs):
#     doc.metadata[id_key] = doc_ids[i]
# retriever.vectorstore.add_documents(docs)

In [16]:
sub_docs = retriever.vectorstore.similarity_search("LangChain")

print(sub_docs[0])

page_content='The document provides an overview of LangChain, a platform for developing and deploying language models (LLMs) without requiring task-specific fine-tuning. It explains how modern LLMs are accessed through a chat model interface, which takes messages as input and returns responses as output.

Key features of LangChain include:

*   A chat model interface that can handle multiple requests concurrently
*   Support for [structured output](https://python.langchain.com/docs/concepts/structured_outputs/) for models that natively support this format
*   Standard parameters for configuring the model, including temperature, maximum token count, and response waiting time

The document also provides information on standard methods in LangChain, such as:

*   `invoke`: The primary method for interacting with a chat model
*   `stream`: A method that allows streaming the output of a chat model
*   `batch`: A method for batching multiple requests together for more efficient processing
* 

In [17]:
retrieved_docs = retriever.invoke("LangChain")

len(retrieved_docs[0].page_content)

9488