In [6]:
import os
from dotenv import load_dotenv
load_dotenv()
from llama_index.core import (
    VectorStoreIndex,
    SimpleKeywordTableIndex,
    SimpleDirectoryReader,
)
from llama_index.core import SummaryIndex
from llama_index.core.schema import IndexNode
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.llms.openai import OpenAI
from llama_index.core.callbacks import CallbackManager

In [8]:
# Load all wiki documents
documents = SimpleDirectoryReader("./documents").load_data()

In [10]:
import os

os.environ["OPENAI_API_KEY"] = "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"

In [9]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")


[Document(id_='8d05d346-5b73-4696-b790-3d55c8ac0bef', embedding=None, metadata={'page_label': '1', 'file_name': 'DS3-assessment-Swiss-Dormant-Assets.pdf', 'file_path': 'c:\\Users\\Achu - Dhamu\\Llama\\documents\\DS3-assessment-Swiss-Dormant-Assets.pdf', 'file_type': 'application/pdf', 'file_size': 121517, 'creation_date': '2024-03-24', 'last_modified_date': '2024-03-23'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='December  2015 \nDormant assets   \nA factsheet of the  Swiss Bankers Association ', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 Document(id_='b1cea640-031d-461d-9521-14e1dfbe04cd', embedding=None, metadata={'page_label': '2', 'file_name

In [11]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

In [16]:
from llama_index.agent.openai import OpenAIAgent
from llama_index.core import load_index_from_storage, StorageContext
from llama_index.core.node_parser import SentenceSplitter
import os

node_parser = SentenceSplitter()

# Build agents dictionary
agents = {}
query_engines = {}

for idx, doc_content in enumerate(documents):
    doc_title = f"Document_{idx + 1}"  # Assuming titles like Document_1, Document_2, ...
    nodes = node_parser.get_nodes_from_documents([doc_content])  # Wrap content in a list

    if not os.path.exists(f"./data/{doc_title}"):
        # Build vector index
        vector_index = VectorStoreIndex(nodes)
        vector_index.storage_context.persist(
            persist_dir=f"./data/{doc_title}"
        )
    else:
        vector_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=f"./data/{doc_title}"),
        )

    # Build summary index
    summary_index = SummaryIndex(nodes)
    # Define query engines
    vector_query_engine = vector_index.as_query_engine()
    summary_query_engine = summary_index.as_query_engine()

    # Define tools
    query_engine_tools = [
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name="vector_tool",
                description=(
                    f"Useful for questions related to specific aspects of {doc_title}."
                ),
            ),
        ),
        QueryEngineTool(
            query_engine=summary_query_engine,
            metadata=ToolMetadata(
                name="summary_tool",
                description=(
                    f"Useful for any requests that require a holistic summary of EVERYTHING about {doc_title}."
                ),
            ),
        ),
    ]

    # Build agent
    function_llm = OpenAI(model="gpt-4")
    agent = OpenAIAgent.from_tools(
        query_engine_tools,
        llm=function_llm,
        verbose=True,
        system_prompt=f"""\
You are a specialized agent designed to answer queries about {doc_title}.
You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
""",
    )

    agents[doc_title] = agent
    query_engines[doc_title] = vector_index.as_query_engine(
        similarity_top_k=2
    )


In [18]:
all_tools = []
for idx, doc_content in enumerate(documents):
    doc_title = f"Document_{idx + 1}"  # Assuming titles like Document_1, Document_2, ...
    doc_summary = (
        f"This content contains documents related to {doc_title}. Use"
        f" this tool if you want to answer any questions about {doc_title}.\n"
    )
    doc_tool = QueryEngineTool(
        query_engine=agents[doc_title],
        metadata=ToolMetadata(
            name=doc_title,
            description=doc_summary
        ),
    )
    all_tools.append(doc_tool)


In [19]:
# define an "object" index and retriever over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex, SimpleToolNodeMapping

tool_mapping = SimpleToolNodeMapping.from_objects(all_tools)
obj_index = ObjectIndex.from_objects(
    all_tools,
    tool_mapping,
    VectorStoreIndex,
)

In [21]:
from llama_index.agent.openai_legacy import FnRetrieverOpenAIAgent

top_agent = FnRetrieverOpenAIAgent.from_retriever(
    obj_index.as_retriever(similarity_top_k=3),
    system_prompt=""" \
You are an agent designed to answer queries about a set of given documents.
Please always use the tools provided to answer a question. Do not rely on prior knowledge.\

""",
    verbose=True,
)

In [23]:
all_nodes = []
for doc_content in documents:
    nodes = node_parser.get_nodes_from_documents([doc_content])
    all_nodes.extend(nodes)

# Create VectorStoreIndex using all_nodes
base_index = VectorStoreIndex(all_nodes)
base_query_engine = base_index.as_query_engine(similarity_top_k=4)

In [25]:

# baseline
response = base_query_engine.query(
   "what are the differences between the Swiss and UK acts?"
)
print(str(response))

The differences between the Swiss and UK acts are related to the treatment of dormant assets. In Switzerland, assets without contact are considered dormant after a specific period and are published on a designated website if their value exceeds a certain threshold. If no contact is made by entitled claimants within a year of publication, the assets are transferred to the federal government. In the UK, the Dormant Bank and Building Society Accounts Act 2008 focuses on dormant accounts in banks and building societies, with provisions for reclaiming such assets.


In [29]:
response = top_agent.query(
   "what are the differences between the Swiss and UK acts?"
)
print(str(response))

STARTING TURN 1
---------------

=== Calling Function ===
Calling function: Document_21 with args: {"input": "Swiss acts"}
Added user message to memory: Swiss acts
=== Calling Function ===
Calling function: vector_tool with args: {
  "input": "Swiss acts"
}
Got output: Swiss acts refer to the laws, regulations, or actions that are specific to Switzerland.

Got output: The term "Swiss acts" in Document_21 refers to laws, regulations, or actions that are specific to Switzerland.

=== Calling Function ===
Calling function: Document_22 with args: {"input": "UK acts"}
Added user message to memory: UK acts
=== Calling Function ===
Calling function: vector_tool with args: {
  "input": "UK acts"
}
Got output: The UK acts are legislative measures or laws enacted by the United Kingdom government.

Got output: The UK acts referred to in Document_22 are legislative measures or laws enacted by the United Kingdom government. These laws are created to regulate, authorize, sanction, grant, declare or 