# Auto Merging Retriever
This retriever excels at navigating hierarchical index structures. It merges chunks of information to provide contextual retrieval, ensuring that the retrieved information is relevant to the user's query within the broader context of the data source.

Pros:



- Contextual Retrieval:  By navigating hierarchical index structures and merging chunks, this retriever facilitates retrieving information within its relevant context, potentially leading to more accurate and comprehensive results.


Cons:



- Potential for Over-Merging:  Merging chunks inappropriately might lead to the inclusion of irrelevant information or obscure the boundaries between distinct ideas. This information is not from the sources, you may want to verify it independently.

In [None]:
from llama_index.core import Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama

ollama_embedding = OllamaEmbedding(
    model_name="mxbai-embed-large",
    base_url="http://localhost:11434",
)

ollama = Ollama(
    model="llama3.2:3b-instruct-fp16",
    base_url="http://localhost:11434"
)

Settings.llm = ollama
Settings.embed_model = ollama_embedding

In [None]:
from pathlib import Path

from llama_index.readers.file import PDFReader
from llama_index.readers.file import PyMuPDFReader

In [None]:
loader = PyMuPDFReader()
docs0 = loader.load(file_path=Path("documents"))

In [None]:
from llama_index.core import Document

doc_text = "\n\n".join([d.get_content() for d in docs0])
docs = [Document(text=doc_text)]

In [None]:
from llama_index.core.node_parser import (
    HierarchicalNodeParser,
    SentenceSplitter,
)
node_parser = HierarchicalNodeParser.from_defaults()
nodes = node_parser.get_nodes_from_documents(docs)
len(nodes)

In [None]:
from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes
leaf_nodes = get_leaf_nodes(nodes)
len(leaf_nodes)
root_nodes = get_root_nodes(nodes)

In [None]:
# define storage context
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core import StorageContext
from llama_index.llms.openai import OpenAI

docstore = SimpleDocumentStore()

# insert nodes into docstore
docstore.add_documents(nodes)

# define storage context (will include vector store by default too)
storage_context = StorageContext.from_defaults(docstore=docstore)


In [None]:
## Load index into vector index
from llama_index.core import VectorStoreIndex

base_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
)

In [None]:
from llama_index.core.retrievers import AutoMergingRetriever


base_retriever = base_index.as_retriever(similarity_top_k=6)
retriever = AutoMergingRetriever(base_retriever, storage_context, verbose=True)

In [None]:
query_str = (
    "What is a stack?"
    "What is a queue?"
    "What is the difference between stack and queue?"
)

nodes = retriever.retrieve(query_str)
base_nodes = base_retriever.retrieve(query_str)

In [None]:
len(nodes)
len(base_nodes)


In [None]:
from llama_index.core.response.notebook_utils import display_source_node

for node in nodes:
    display_source_node(node, source_length=10000)

In [None]:
from llama_index.core.query_engine import RetrieverQueryEngine
query_engine = RetrieverQueryEngine.from_args(retriever)
base_query_engine = RetrieverQueryEngine.from_args(base_retriever)
response = query_engine.query(query_str)
print(str(response))


In [None]:
base_response = base_query_engine.query(query_str)
print(str(base_response))