# **Experimenting with Building Production-Ready LLM Apps With LlamaIndex: Recursive Document Agents for Dynamic Retrieval**

#### Combining recursive retrieval and document agents for advanced retrieval

## **Importing the Libraries**

In [1]:
from llama_index import (
    VectorStoreIndex,
    ListIndex,
    SummaryIndex,
    SimpleKeywordTableIndex,
    SimpleDirectoryReader,
    ServiceContext,
)

from llama_index.schema import IndexNode
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.llms import OpenAI
from llama_index.agent import OpenAIAgent
from llama_index.retrievers import RecursiveRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.response_synthesizers import get_response_synthesizer

import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

## **Loading the Documents**

In [2]:
titles = [
    "GEED-10013-LIFE-AND-WORKS-OF-RIZAL_(1)", 
    "UNIT 2_GEED-10013-LIFE-AND-WORKS-OF-RIZAL_2nd Sem.", 
    "UNIT 3 GEED-10013-LIFE-AND-WORKS-OF-RIZAL (1)"
    ]

In [3]:
dict_documents = {}

for title in titles:
    dict_documents[title] = SimpleDirectoryReader(input_files=[f"../data/{title}.pdf"]).load_data()

print(f"loaded documents with {len(dict_documents)} documents")

loaded documents with 3 documents


## **Creating Document Agents**

In [4]:
service_context = ServiceContext.from_defaults(chunk_size_limit=512)

In [14]:
# Build agents dictionary
agents = {}

for title in titles:

    # build vector index
    vector_index = VectorStoreIndex.from_documents(dict_documents[title], service_context=service_context)
    
    # build list index
    list_index = ListIndex.from_documents(dict_documents[title], service_context=service_context)
    
    # define query engines
    vector_query_engine = vector_index.as_query_engine()
    list_query_engine = list_index.as_query_engine()

    # define tools
    query_engine_tools = [
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name="vector_tool",
                description=f"Useful for retrieving specific context related to {title}",
            ),
        ),
        QueryEngineTool(
            query_engine=list_query_engine,
            metadata=ToolMetadata(
                name="summary_tool",
                description=f"Useful for summarization questions related to {title}",
            ),
        ),
    ]

    # build agent
    function_llm = OpenAI(model="gpt-4")
    agent = OpenAIAgent.from_tools(
        query_engine_tools,
        llm=function_llm,
        verbose=True,
    )

    agents[title] = agent

## **Creating Index Nodes**

In [16]:
# define index nodes that link to the document agents
nodes = []
for title in titles:
    doc_summary = (
        f"This content contains details about {title}. "
        f"Use this index if you need to lookup specific facts about {title}.\n"
        "Do not use this index if you want to query multiple documents."
    )
    node = IndexNode(text=doc_summary, index_id=title)
    nodes.append(node)

## **Define recursive retriever and query engine**

In [17]:
# define retriever
vector_index = VectorStoreIndex(nodes)
vector_retriever = vector_index.as_retriever(similarity_top_k=1)

# define recursive retriever
# note: can pass `agents` dict as `query_engine_dict` since every agent can be used as a query engine
recursive_retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever},
    query_engine_dict=agents,
    verbose=True,
)

response_synthesizer = get_response_synthesizer(response_mode="compact")

# define query engine
query_engine = RetrieverQueryEngine.from_args(
    recursive_retriever,
    response_synthesizer=response_synthesizer,
    service_context=service_context,
)

GPT-3.5 turbo

In [13]:
response = query_engine.query("Act like Dr. Jose Rizal and explain what is Rizal Law or Republic Act No. 1425, that is all about you, in tagalog.")
print(response)


[36;1m[1;3mRetrieving with query id None: Act like Dr. Jose Rizal and explain what is Rizal Law or Republic Act No. 1425, that is all about you, in tagalog.
[0m[38;5;200m[1;3mRetrieved node with id, entering: UNIT 3 GEED-10013-LIFE-AND-WORKS-OF-RIZAL (1)
[0m[36;1m[1;3mRetrieving with query id UNIT 3 GEED-10013-LIFE-AND-WORKS-OF-RIZAL (1): Act like Dr. Jose Rizal and explain what is Rizal Law or Republic Act No. 1425, that is all about you, in tagalog.
[0m[32;1m[1;3mGot response: Ako si Dr. Jose Rizal at ipapaliwanag ko sa iyo ang Rizal Law o Republic Act No. 1425 sa Tagalog.

Ang Rizal Law o Batas Rizal ay isang batas na ipinasa noong 1956 na naglalayong bigyang-pansin at pag-aralan ang mga akda, buhay, at mga kontribusyon ni Dr. Jose Rizal, ang ating pambansang bayani. Layunin ng batas na ito na palawakin ang kaalaman ng mga Pilipino tungkol sa buhay at mga gawa ni Rizal, at ang kanyang mga paniniwala at prinsipyo.

Sa pamamagitan ng Rizal Law, ang mga paaralan sa Pilipinas

GPT-4

In [18]:
response = query_engine.query("Act like Dr. Jose Rizal and explain what is Rizal Law or Republic Act No. 1425, that is all about you, in tagalog.")
print(response)

[36;1m[1;3mRetrieving with query id None: Act like Dr. Jose Rizal and explain what is Rizal Law or Republic Act No. 1425, that is all about you, in tagalog.
[0m[38;5;200m[1;3mRetrieved node with id, entering: UNIT 3 GEED-10013-LIFE-AND-WORKS-OF-RIZAL (1)
[0m[36;1m[1;3mRetrieving with query id UNIT 3 GEED-10013-LIFE-AND-WORKS-OF-RIZAL (1): Act like Dr. Jose Rizal and explain what is Rizal Law or Republic Act No. 1425, that is all about you, in tagalog.
[0m[32;1m[1;3mGot response: Bilang si Dr. Jose Rizal, ako ay nagagalak na ipaliwanag ang batas na may kaugnayan sa aking buhay at mga gawa, ang Rizal Law o Republic Act No. 1425.

Ang Rizal Law o Republic Act No. 1425 ay isang batas na ipinasa noong Hunyo 12, 1956. Layunin ng batas na ito na ituro ang aking mga nobela, ang "Noli Me Tangere" at "El Filibusterismo", sa lahat ng paaralan sa buong bansa, mula sa elementarya hanggang kolehiyo. Ito ay upang maipakita at maipaintindi sa mga mag-aaral ang mga aral na makukuha mula sa a

In [1]:
titles = [
    "DevOps Self-Service Pipeline Architecture and Its 3–2–1 Rule", 
    "DevOps Self-Service Centric Terraform Project Structure", 
    "DevOps Self-Service Centric Pipeline Security and Guardrails"
    ]

documents = {}
for title in titles:
    documents[title] = SimpleDirectoryReader(input_files=[f"../data/{title}.pdf"]).load_data()
print(f"loaded documents with {len(documents)} documents")

hello world


In [None]:
# Build agents dictionary
agents = {}

for title in titles:

    # build vector index
    vector_index = VectorStoreIndex.from_documents(documents[title], service_context=service_context)
    
    # build list index
    list_index = ListIndex.from_documents(documents[title], service_context=service_context)
    
    # define query engines
    vector_query_engine = vector_index.as_query_engine()
    list_query_engine = list_index.as_query_engine()

    # define tools
    query_engine_tools = [
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name="vector_tool",
                description=f"Useful for retrieving specific context related to {title}",
            ),
        ),
        QueryEngineTool(
            query_engine=list_query_engine,
            metadata=ToolMetadata(
                name="summary_tool",
                description=f"Useful for summarization questions related to {title}",
            ),
        ),
    ]

    # build agent
    function_llm = OpenAI(model="gpt-3.5-turbo-0613")
    agent = OpenAIAgent.from_tools(
        query_engine_tools,
        llm=function_llm,
        verbose=True,
    )

    agents[title] = agent