**Load the libraries**

In [1]:
from pyprojroot import here
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index.node_parser import get_leaf_nodes, HierarchicalNodeParser
from llama_index import (load_index_from_storage,
                         set_global_service_context,
                         ServiceContext,
                         StorageContext,
                         SimpleDirectoryReader,
                         VectorStoreIndex)
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.retrievers import AutoMergingRetriever

import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

**Load the GPT model and the embedding model from AzureOpenAI**

In [2]:
llm = AzureOpenAI(
    model="gpt-35-turbo-16k",
    engine="gpt-35-turbo-16k",
    deployment_name=os.getenv("gpt_deployment_name"),
    api_key=os.getenv("OPENAI_API_KEY"),
    azure_endpoint=os.getenv("OPENAI_API_BASE"),
    api_version=os.getenv("OPENAI_API_VERSION"),
)
embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name=os.getenv("embed_deployment_name"),
    api_key=os.getenv("OPENAI_API_KEY"),
    azure_endpoint=os.getenv("OPENAI_API_BASE"),
    api_version=os.getenv("OPENAI_API_VERSION"),
)
# NOTE: Uncomment if you want to use an open source embedding model
# embed_model = "local:BAAI/bge-small-en-v1.5"

**Set the serivce context**

In [3]:
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

set_global_service_context(service_context)

**Load the documents**

In [4]:
documents = SimpleDirectoryReader(
    input_files=[here(f"data/docs/{d}") for d in os.listdir(here("data/docs"))]
).load_data()

**Process documents and prepare the index:**

Processing steps for `Auto-merging retrieval`:

* We create the parent node and the child nodes. Chunk sizes in this code are [2048, 512, 128]

Functions:
- build_automerging_index
- get_automerging_query_engine

In [5]:
# Arguments
save_dir=here("data/indexes/merging_index")
rerank_model = "BAAI/bge-reranker-base"
similarity_top_k=12,
rerank_top_n=2,

In [6]:
def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir=save_dir,
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index

automerging_index = build_automerging_index(
    documents,
    llm,
    embed_model=embed_model,
    save_dir=save_dir # save the index automatically from here or manually as below
)

**You can save the index separately if you wish**

In [21]:
automerging_index.storage_context.persist(save_dir)

**Load index index separately**

In [7]:
# Rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=save_dir)

# Load index from the storage context
automerging_index = load_index_from_storage(storage_context)

**Build the query engine**

In [8]:
def get_automerging_query_engine(
    automerging_index,
    similarity_top_k:int=12,
    rerank_top_n:int=2,
):
    base_retriever = automerging_index.as_retriever(
        similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine

automerging_query_engine = get_automerging_query_engine(
    automerging_index,
)

**Test with a query**

In [9]:
response = automerging_query_engine.query(
    "Explain is the architecture of vision transformer model"
)
print(str(response))

The architecture of the vision transformer model combines the Transformer architecture with convolutional networks. In computer vision tasks, attention is either used alongside convolutional networks or replaces specific components of convolutional networks while maintaining their overall structure. The vision transformer model achieves impressive results compared to state-of-the-art convolutional networks, while requiring fewer computational resources to train.
