In this notebook we will look into Building RAG when you have a large number of documents using DocumentAgents concept with ReAct Agent.

In [1]:
%pip install -q llama-index
%pip install -q llama-index-llms-groq
%pip install -q llama-index-embeddings-huggingface

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.5/1.6 MB[0m [31m17.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.0/189.0 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m43.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.5/49.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h

Set Logging

In [2]:
# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.
import nest_asyncio

nest_asyncio.apply()

import logging
import sys

# Set up the root logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Set logger level to INFO

# Clear out any existing handlers
logger.handlers = []

# Set up the StreamHandler to output to sys.stdout (Colab's output)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)  # Set handler level to INFO

# Add the handler to the logger
logger.addHandler(handler)

from IPython.display import display, HTML

Setup and Imports

In [3]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)
from llama_index.core.settings import Settings
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


NumExpr defaulting to 2 threads.


In [4]:
from google.colab import userdata

llm = Groq(model="llama3-groq-70b-8192-tool-use-preview", api_key=userdata.get('GROQ_API_KEY'))
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

2 prompts are loaded, with the keys: ['query', 'text']


In [5]:
from llama_index.core import Settings
Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 512

Download Documents

use Wikipedia pages of Toronto, Seattle, Chicago, Boston, Houston cities and build RAG pipeline.

In [6]:
wiki_titles = ["Toronto", "Seattle", "Chicago", "Boston", "Houston"]

from pathlib import Path

import requests

for title in wiki_titles:
    response = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "extracts",
            # 'exintro': True,
            "explaintext": True,
        },
    ).json()
    page = next(iter(response["query"]["pages"].values()))
    wiki_text = page["extract"]

    data_path = Path("data")
    if not data_path.exists():
        Path.mkdir(data_path)

    with open(data_path / f"{title}.txt", "w") as fp:
        fp.write(wiki_text)


Load Document

In [8]:
# Load all wiki documents

from llama_index.core import SimpleDirectoryReader

city_docs = {}
for wiki_title in wiki_titles:
    city_docs[wiki_title] = SimpleDirectoryReader(
        input_files=[f"data/{wiki_title}.txt"]
    ).load_data()

Build ReAct Agent for each city

In [9]:
from llama_index.core.agent import ReActAgent
from llama_index.core import VectorStoreIndex, SummaryIndex
from llama_index.core.tools import QueryEngineTool, ToolMetadata

# Build agents dictionary
agents = {}

for wiki_title in wiki_titles:
    # build vector index
    vector_index = VectorStoreIndex.from_documents(
        city_docs[wiki_title],
    )
    # build summary index
    summary_index = SummaryIndex.from_documents(
        city_docs[wiki_title],
    )
    # define query engines
    vector_query_engine = vector_index.as_query_engine()
    summary_query_engine = summary_index.as_query_engine()

    # define tools
    query_engine_tools = [
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name="vector_tool",
                description=(
                    f"Useful for retrieving specific context from {wiki_title}"
                ),
            ),
        ),
        QueryEngineTool(
            query_engine=summary_query_engine,
            metadata=ToolMetadata(
                name="summary_tool",
                description=(
                    "Useful for summarization questions related to"
                    f" {wiki_title}"
                ),
            ),
        ),
    ]

    # build agent
    agent = ReActAgent.from_tools(
        query_engine_tools,
        llm=llm,
        verbose=True,
    )

    agents[wiki_title] = agent


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Define IndexNode for each of these Agents

In [10]:
from llama_index.core.schema import IndexNode

# define top-level nodes
objects = []
for wiki_title in wiki_titles:
    # define index node that links to these agents
    wiki_summary = (
        f"This content contains Wikipedia articles about {wiki_title}. Use"
        " this index if you need to lookup specific facts about"
        f" {wiki_title}.\nDo not use this index if you want to analyze"
        " multiple cities."
    )
    node = IndexNode(
        text=wiki_summary, index_id=wiki_title, obj=agents[wiki_title]
    )
    objects.append(node)


Degine Top-level Retriever to choose an Agent

In [11]:
vector_index = VectorStoreIndex(
    objects=objects,
)
query_engine = vector_index.as_query_engine(similarity_top_k=1, verbose=True)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Rate Limit

Groq free tier has a rate limit so sometimes we will encounter too many requests errors. When the query engine waits a few seconds and try again, normally the query will go through

Test Queries

Should choose a vector tool/summary took for a specific agent based on the query.

In [12]:
# Helper function for print
def print_response(response):
    display(HTML(f'<p style="font-size:20px">{response.response}</p>'))

In [13]:
# should use Toronto agent -> vector tool
response = query_engine.query("What is the population of Toronto?")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[1;3;38;2;11;159;203mRetrieval entering Toronto: ReActAgent
[0m[1;3;38;2;237;90;200mRetrieving from object ReActAgent with query What is the population of Toronto?
[0m> Running step 574c2163-1049-4ba5-b176-74f20619ae5b. Step input: What is the population of Toronto?
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_tool
Action Input: {'input': 'What is the population of Toronto?'}
[0m

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mObservation: The population of Toronto is 2,794,356 in 2021.
[0m> Running step 829edfe2-def9-421b-948a-73e9fb35c1de. Step input: None
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!
Answer: Answer: The population of Toronto is 2,794,356 in 2021.
[0mHTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [14]:
print_response(response)

In [15]:
# should use Houston agent -> vector tool
response = query_engine.query("Who and when was Houston founded?")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[1;3;38;2;11;159;203mRetrieval entering Houston: ReActAgent
[0m[1;3;38;2;237;90;200mRetrieving from object ReActAgent with query Who and when was Houston founded?
[0m> Running step a5bdbf8f-e534-4ebd-930b-e54c1fb38f34. Step input: Who and when was Houston founded?
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_tool
Action Input: {'input': 'Who founded Houston?'}
[0m

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mObservation: The Allen brothers founded Houston.
[0m> Running step 94a4ffc2-dbf3-410b-800d-10508e58a1ac. Step input: None
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_tool
Action Input: {'input': 'When was Houston founded?'}
[0m

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mObservation: Houston was founded in 1836.
[0m> Running step 0e2e524f-2485-4cf3-90f0-7b0158eb325a. Step input: None
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: Houston was founded by the Allen brothers in 1836.
[0mHTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [16]:
print_response(response)

In [17]:
# should use Boston agent -> summary tool
response = query_engine.query("Summarize about the sports teams in Boston")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[1;3;38;2;11;159;203mRetrieval entering Boston: ReActAgent
[0m[1;3;38;2;237;90;200mRetrieving from object ReActAgent with query Summarize about the sports teams in Boston
[0m> Running step 67f5707d-bbe1-42da-83cd-2a5254dbca93. Step input: Summarize about the sports teams in Boston
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: summary_tool
Action Input: {'input': 'sports teams in Boston'}
[0mHTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP R

In [18]:
print_response(response)

In [19]:
# should use Seattle agent -> summary tool
response = query_engine.query(
    "Give me a summary on all the positive aspects of Chicago"
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[1;3;38;2;11;159;203mRetrieval entering Chicago: ReActAgent
[0m[1;3;38;2;237;90;200mRetrieving from object ReActAgent with query Give me a summary on all the positive aspects of Chicago
[0m> Running step 8b443c0d-0b64-4246-9d1c-58e7b9020632. Step input: Give me a summary on all the positive aspects of Chicago
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: summary_tool
Action Input: {'input': 'Give me a summary on all the positive aspects of Chicago'}
[0mHTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
Retrying request to /chat/completions in 5.000000 seconds
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
Retrying request to /chat

In [20]:
print_response(response)

Tell me about the similarities of lives in Chicago and Toronto

In [21]:
response = query_engine.query(
    "Tell me about the similarities of lives in Chicago and Toronto"
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[1;3;38;2;11;159;203mRetrieval entering Toronto: ReActAgent
[0m[1;3;38;2;237;90;200mRetrieving from object ReActAgent with query Tell me about the similarities of lives in Chicago and Toronto
[0m> Running step 66ad5a2d-7c81-4723-a26f-8e4192d249dd. Step input: Tell me about the similarities of lives in Chicago and Toronto
HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_tool
Action Input: {'input': 'Chicago and Toronto'}
[0m

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mObservation: Chicago and Toronto are two major cities in North America, with Chicago being located in the United States and Toronto in Canada. Both cities are known for their diverse populations, cultural attractions, and economic strengths. Chicago is a prominent centre for finance, commerce, industry, technology, telecommunications, and transportation, while Toronto is a hub for technology, design, financial services, life sciences, education, arts, fashion, aerospace, environmental innovation, food services, and tourism. Both cities are popular tourist destinations, with Chicago attracting visitors to its iconic landmarks such as the Cloud Gate sculpture and the Art Institute of Chicago, and Toronto drawing tourists to its CN Tower, museums, galleries, festivals, and public events.
[0m> Running step 9175f979-4a2b-494e-bb5a-b3fe52b7a2ab. Step input: None
HTTP Request: POST https://api.groq.

In [22]:
print_response(response)

Tell me how many shows has Shen Yun performed in 2023?

In [25]:
from pathlib import Path

import requests

response = requests.get(
    "https://web.archive.org/web/20230401000000*/https://www.shenyun.com/tickets",
    params={
        "action": "query",
        "format": "json",
        # "titles": title,
        "prop": "extracts",
        # 'exintro': True,
        "explaintext": True,
    },
).json()

page = response["query"]["pages"].values()
text = page["extract"]

data_path = Path("data")
if not data_path.exists():
    Path.mkdir(data_path)

with open(data_path / f"{title}.txt", "w") as fp:
    fp.write(wiki_text)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
# Load all wiki documents

from llama_index.core import SimpleDirectoryReader

shenyun_docs = SimpleDirectoryReader(
    input_files=[f"data/"]
)
for wiki_title in wiki_titles:
    city_docs[wiki_title] = SimpleDirectoryReader(
        input_files=[f"data/{wiki_title}.txt"]
    ).load_data()


In [None]:
from llama_index.core.agent import ReActAgent
from llama_index.core import VectorStoreIndex, SummaryIndex
from llama_index.core.tools import QueryEngineTool, ToolMetadata

# Build agents dictionary
agents = {}

# build vector index
vector_index = VectorStoreIndex.from_documents(
    city_docs[wiki_title],
)
# build summary index
summary_index = SummaryIndex.from_documents(
    city_docs[wiki_title],
)
# define query engines
vector_query_engine = vector_index.as_query_engine()
summary_query_engine = summary_index.as_query_engine()

# define tools
query_engine_tools = [
    QueryEngineTool(
        query_engine=vector_query_engine,
        metadata=ToolMetadata(
            name="vector_tool",
            description=(
                f"Useful for retrieving specific context from {wiki_title}"
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=summary_query_engine,
        metadata=ToolMetadata(
            name="summary_tool",
            description=(
                "Useful for summarization questions related to"
                f" {wiki_title}"
            ),
        ),
    ),
]

# build agent
agent = ReActAgent.from_tools(
    query_engine_tools,
    llm=llm,
    verbose=True,
)

agents[wiki_title] = agent
