### Data Downloading and File Setup

In [2]:
import os
import requests

In [2]:
os.makedirs('Data/paul_graham/', exist_ok=True)
os.makedirs('Data/10k/', exist_ok=True)

url = 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt'
output_path = 'Data/paul_graham/paul_graham_essay.txt'

response = requests.get(url)

with open(output_path, 'wb') as file:
    file.write(response.content)
print(f"Downloaded the file to: {output_path}")

files = {
    'https://d18rn0p25nwr6d.cloudfront.net/CIK-0001324424/1d1b7ef4-fd87-4efa-a8fd-f728746142d1.pdf': 'expedia_10k_2023.pdf',
    'https://d18rn0p25nwr6d.cloudfront.net/CIK-0001075531/31c876e5-f44f-4645-8757-e2b828c23357.pdf': 'booking_10k_2023.pdf',
    'https://d18rn0p25nwr6d.cloudfront.net/CIK-0001543151/6fabd79a-baa9-4b08-84fe-deab4ef8415f.pdf': 'uber_10k_2023.pdf',
    'https://d18rn0p25nwr6d.cloudfront.net/CIK-0001759509/d576a7f4-780c-4f39-86a6-aa54b03fa2ec.pdf': 'lyft_10k_2023.pdf'
}

for url, filename in files.items():
    output_path = f'Data/10k/{filename}'
    response = requests.get(url)
    with open(output_path, 'wb') as file:
        file.write(response.content)
    print(f"Downloaded {filename} to {output_path}")

Downloaded the file to: Data/paul_graham/paul_graham_essay.txt
Downloaded expedia_10k_2023.pdf to Data/10k/expedia_10k_2023.pdf
Downloaded booking_10k_2023.pdf to Data/10k/booking_10k_2023.pdf
Downloaded uber_10k_2023.pdf to Data/10k/uber_10k_2023.pdf
Downloaded lyft_10k_2023.pdf to Data/10k/lyft_10k_2023.pdf


### OpenAI

In [3]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

os.environ["OPENAI_API_KEY"] = "#"

Settings.llm = OpenAI(model='gpt-4o-mini', temperature=0.2)
Settings.embed_model = OpenAIEmbedding(model='text-embedding-3-small')
Settings.chunk_size = 1024

### Indexing, Saving, and Loading 

- `SimpleDirectoryReader`: Reads the content of files from given paths.
- `VectorStoreIndex.from_documents`: Creates an index from the loaded documents for querying.
- `StorageContext.persist`: Saves the indices to a specified directory.
- `load_index_from_storage`: Reloads indices from storage for use.

In [4]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core import StorageContext, load_index_from_storage
import os

def index_and_load_data(paul_graham_path, tenk_paths, storage_dir):
    """
    Indexes, saves, and loads data for the Paul Graham essay and 10k filings.
    
    :param paul_graham_path: Path to the Paul Graham essay file.
    :param tenk_paths: Dictionary with names and paths to 10k filing PDF files.
    :param storage_dir: Directory where indices will be stored.
    :return: Indexed Paul Graham essay and 10k indices.
    """
    # Load and index the Paul Graham essay
    paul_graham_documents = SimpleDirectoryReader(input_files=[paul_graham_path]).load_data()
    paul_graham_index = VectorStoreIndex.from_documents(paul_graham_documents)
    print("Indexed Paul Graham essay.")

    # Load and index the 10k filings
    tenk_indices = {}
    for name, path in tenk_paths.items():
        documents = SimpleDirectoryReader(input_files=[path]).load_data()
        index = VectorStoreIndex.from_documents(documents)
        tenk_indices[name] = index
        print(f"Indexed {name}.")

    # Save indices to disk
    os.makedirs(storage_dir, exist_ok=True)
    paul_graham_index.storage_context.persist(persist_dir=f'{storage_dir}/paul_graham_index')
    print("Indexed and saved Paul Graham essay.")

    for name, index in tenk_indices.items():
        index.storage_context.persist(persist_dir=f'{storage_dir}/{name}_index')
        print(f"Saved {name} index to disk.")

    # load indices from storage
    paul_graham_index = load_index_from_storage(StorageContext.from_defaults(persist_dir=f'{storage_dir}/paul_graham_index'))
    print("Loaded Paul Graham essay index.")

    tenk_indices_paths = {name: f'{storage_dir}/{name}_index' for name in tenk_paths.keys()}
    tenk_indices = {name: load_index_from_storage(StorageContext.from_defaults(persist_dir=path)) for name, path in tenk_indices_paths.items()}
    print("Loaded all 10k indices.")

    return paul_graham_index, tenk_indices

In [5]:
paul_graham_path = 'Data/paul_graham/paul_graham_essay.txt'
tenk_paths = {
    'expedia_10k_2023': 'Data/10k/expedia_10k_2023.pdf',
    'booking_10k_2023': 'Data/10k/booking_10k_2023.pdf',
    'uber_10k_2023': 'Data/10k/uber_10k_2023.pdf',
    'lyft_10k_2023': 'Data/10k/lyft_10k_2023.pdf'
}
storage_dir = 'Data/storage'

In [6]:
paul_graham_index, tenk_indices = index_and_load_data(paul_graham_path, tenk_paths, storage_dir)

Indexed Paul Graham essay.
Indexed expedia_10k_2023.
Indexed booking_10k_2023.
Indexed uber_10k_2023.
Indexed lyft_10k_2023.
Indexed and saved Paul Graham essay.
Saved expedia_10k_2023 index to disk.
Saved booking_10k_2023 index to disk.
Saved uber_10k_2023 index to disk.
Saved lyft_10k_2023 index to disk.
Loaded Paul Graham essay index.
Loaded all 10k indices.


In [62]:
tenk_indices

{'expedia_10k_2023': <llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x75c707ddedb0>,
 'booking_10k_2023': <llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x75c7084420f0>,
 'uber_10k_2023': <llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x75c707806060>,
 'lyft_10k_2023': <llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x75c7053d9f40>}

### Creating Tools and Router Engines for Query Handling

- `QueryEngineTool`: Configures each tool with its corresponding query engine and metadata.
- `RouterQueryEngine`: Manages multiple tools, directing queries to the appropriate tool based on context.
- `LLMSingleSelector`: Automatically selects the right tool for the task based on the nature of the query.

In [7]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

In [9]:
# Create QA tool for Paul Graham essay
paul_graham_qa_tool = QueryEngineTool(
    query_engine=paul_graham_index.as_query_engine(),
    metadata=ToolMetadata(
        name="paul_graham_qa",
        description="A question-answering tool that handles queries related to Paul Graham's essay, providing precise and relevant answers.",
    )
)
print("Created QA tool for Paul Graham essay.")

Created QA tool for Paul Graham essay.


In [10]:
# Create Summarization tool for Paul Graham essay
paul_graham_summarization_tool = QueryEngineTool(
    query_engine=paul_graham_index.as_query_engine(response_mode="tree_summarize"),
    metadata=ToolMetadata(
        name="paul_graham_summarization",
        description="A summarization tool that provides concise summaries of Paul Graham's essay.",
    )
)
print("Created Summarization tool for Paul Graham essay.")

Created Summarization tool for Paul Graham essay.


In [11]:
tools_pg = [paul_graham_qa_tool, paul_graham_summarization_tool]

In [12]:
# Create tools for 10k filings
tools_tenk = []
for name, index in tenk_indices.items():
    tool = QueryEngineTool(
        query_engine=index.as_query_engine(similarity_top_k=3),
        metadata=ToolMetadata(
            name=name,
            description=f"Provides information from {name.replace('_', ' ')} filing for the year 2023.",
        )
    )
    tools_tenk.append(tool)
    print(f"Created tool for {name}.")

Created tool for expedia_10k_2023.
Created tool for booking_10k_2023.
Created tool for uber_10k_2023.
Created tool for lyft_10k_2023.


In [13]:
# Create a Router Query Engine for Paul Graham tools
pg_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[paul_graham_qa_tool, paul_graham_summarization_tool]
)

In [14]:
# Create a Router Query Engine for 10k filing tools
tenk_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=tools_tenk
)

### Executing Queries and Retrieving Responses

- **`tenk_engine.query()`**: Executes the query against the configured 10k filing tools to fetch relevant financial data.
- **`pg_engine.query()`**: Executes the query against the Paul Graham essay tools to either summarize or extract specific information.
- **Formatted Output**: The responses are printed in a readable format, clearly labeled to differentiate between the different queries.

In [15]:
expedia_query = "What is the income statement of Expedia for year 2023?"
booking_query = "What is the income statement of booking.com for year 2023?"
uber_query = "What is the income statement of Uber for year 2023?"
lyft_query = "What is the income statement of Lyft for year 2023?"

paul_graham_summary_query = "Summarize the information contained in the Paul Graham essay."
paul_graham_yc_query = "What did Paul Graham do at Y Combinator from the Paul Graham essay?"

In [16]:
expedia_response = tenk_engine.query(expedia_query)
print(f"Expedia Response:\n{expedia_response}\n")

Expedia Response:
For the year ended December 31, 2023, the income statement of Expedia Group, Inc. is as follows:

- Revenue: $12,839 million
- Costs and expenses:
  - Cost of revenue (exclusive of depreciation and amortization): $1,573 million
  - Selling and marketing - direct: $6,107 million
  - Selling and marketing - indirect: $756 million
  - Technology and content: $1,358 million
  - General and administrative: $771 million
  - Depreciation and amortization: $807 million
  - Impairment of goodwill: $297 million
  - Intangible and other long-term asset impairment: $129 million
  - Legal reserves, occupancy tax and other: $8 million
  - Restructuring and related reorganization charges: $0 million

- Operating income: $1,033 million

- Other income (expense):
  - Interest income: $207 million
  - Interest expense: $(245) million
  - Gain (loss) on debt extinguishment, net: $0 million
  - Gain on sale of business, net: $25 million
  - Other, net: $(2) million

- Total other expense

In [17]:
paul_graham_summary_response = pg_engine.query(paul_graham_summary_query)
print(f"Paul Graham Essay Summary:\n{paul_graham_summary_response}\n")
print('---'*50)
paul_graham_yc_response = pg_engine.query(paul_graham_yc_query)
print(f"Paul Graham at Y Combinator Response:\n{paul_graham_yc_response}\n")


Paul Graham Essay Summary:
The essay reflects on Paul Graham's journey in the tech and startup world, highlighting his experiences with Lisp programming, the founding of Y Combinator, and the evolution of publishing in the digital age. Graham discusses the significance of creating a new dialect of Lisp called Arc and his realization of the potential for online essays to reach a broad audience, contrasting it with the restrictive nature of print publishing. He emphasizes the value of pursuing work that may lack prestige, suggesting that such pursuits often lead to genuine discoveries and align with pure motives. The essay also recounts his personal life, including his relationship with Jessica Livingston and the formation of Y Combinator, driven by their shared vision for a new approach to venture capital. Graham reflects on the early days of Y Combinator, noting their lack of prior experience in angel investing, which allowed them to innovate and implement their ideas effectively. Thro

### Creating and Using OpenAIAgent

- **`OpenAIAgent.from_tools()`**: This function initializes the agent with the specified tools, allowing it to access a wide range of data sources and perform various tasks.
- **`agent.chat()`**: Used to interact with the agent and ask questions or make requests, leveraging the capabilities of the integrated tools.
- **`verbose=True`**: Enables detailed logging of the query handling process, making it easier to debug and understand the agent's actions.

In [19]:
from llama_index.agent.openai import OpenAIAgent

In [20]:
agent = OpenAIAgent.from_tools(tools=tools_pg + tools_tenk, verbose=True)
print("OpenAIAgent created and ready to handle queries.")

OpenAIAgent created and ready to handle queries.


In [21]:
response = agent.chat("What is the revenue growth of Expedia in 2023?")
print(f"Response:\n{response}")

Added user message to memory: What is the revenue growth of Expedia in 2023?
=== Calling Function ===
Calling function: expedia_10k_2023 with args: {"input":"revenue growth 2023"}
Got output: In 2023, total revenue reached $12,839 million, reflecting an increase of 10% compared to $11,667 million in 2022. This growth was driven by various business models, with the merchant model generating $8,818 million, up from $7,762 million in the previous year, and the agency model increasing to $3,075 million from $2,994 million. Additionally, advertising, media, and other revenues rose to $946 million from $911 million. The lodging service type saw significant growth, reaching $10,264 million compared to $8,905 million in 2022. Overall, the revenue growth in 2023 was robust across multiple segments and service types.

Response:
In 2023, Expedia's total revenue reached $12,839 million, reflecting a 10% increase compared to $11,667 million in 2022. This growth was driven by various business models

### Creating an Assistant Agent with Combined Instructions

- **`OpenAIAssistantAgent.from_new()`**: Creates a new assistant agent with specific instructions and tools, designed to handle complex queries and provide detailed responses.
- **`tools_pg + tools_tenk`**: Combines the tools for querying both the essay and financial filings, enhancing the agent’s versatility.

In [25]:
from llama_index.agent.openai import OpenAIAssistantAgent

instructions = (
    "You are an assistant that can provide answers to questions from 10k Filings, "
    "and also answer questions and summarize the Paul Graham essay. Choose the best approach "
    "based on the question asked and provide concise, relevant responses using the available tools."
)

def assistant_agent(tools):
    agent = OpenAIAssistantAgent.from_new(
        name="RAG Agent",
        instructions= instructions, 
        tools=tools, 
        verbose=True,
        run_retrieve_sleep_time=1.0
    )
    
    print("OpenAIAssistantAgent created with all provided tools.")
    return agent

In [26]:
rag_agent = assistant_agent(tools_pg + tools_tenk)

OpenAIAssistantAgent created with all provided tools.


In [27]:
response = rag_agent.chat("What are some personal anecdotes from Paul Graham's essay?")
print(f"Response:\n{response}\n")

=== Calling Function ===
Calling function: paul_graham_qa with args: {"input":"Can you share some personal anecdotes from the essay?"}
Got output: The essay includes several personal anecdotes that illustrate the author's journey and experiences. One notable moment is when the author reflects on the surprise of finding an audience after giving a talk at a Lisp conference, which unexpectedly garnered 30,000 page views after being shared on Slashdot. This experience highlighted the newfound accessibility of publishing online, contrasting it with the restrictive print era where only a select few could reach readers.

Another anecdote involves the author's realization of the potential for writing essays online, which led to a commitment to continue writing despite the medium's initial lack of prestige. The author acknowledges that working on projects perceived as unprestigious, like Lisp and online essays, often leads to genuine discoveries and aligns with authentic motives.

Additionally,

In [28]:
response = rag_agent.chat("What is the revenue growth of Expedia in 2023?")
print(f"Response:\n{response}")

=== Calling Function ===
Calling function: expedia_10k_2023 with args: {"input":"What is the revenue growth of Expedia in 2023?"}
Got output: Expedia's total revenue in 2023 was $12,839 million, representing a growth of 10% compared to 2022.
Response:
In 2023, Expedia reported a total revenue of $12,839 million, experiencing a revenue growth of 10% compared to the previous year.


###  Using SubQuestionQueryEngine for Complex Financial Analysis and Query Planning

- **`SubQuestionQueryEngine.from_defaults()`**: Creates a query engine capable of decomposing complex questions into simpler, answerable sub-queries.

In [29]:
import nest_asyncio
from llama_index.core.query_engine import SubQuestionQueryEngine
nest_asyncio.apply()

In [31]:
sub_question_query = """
Compare and contrast the revenue and expenses of Expedia and Booking.com \
for 2023 and provide an in-depth analysis.
"""

In [33]:
tools = tools_pg + tools_tenk
query_planning_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=tools,  
    use_async=True  
)

In [35]:
response = query_planning_engine.query(sub_question_query)
print(f"SubQuestionQueryEngine Response:\n{response}\n")

Generated 8 sub questions.
[1;3;38;2;237;90;200m[expedia_10k_2023] Q: What is the total revenue reported by Expedia in their 10K filing for 2023?
[0m[1;3;38;2;90;149;237m[expedia_10k_2023] Q: What are the total expenses reported by Expedia in their 10K filing for 2023?
[0m[1;3;38;2;11;159;203m[booking_10k_2023] Q: What is the total revenue reported by Booking.com in their 10K filing for 2023?
[0m[1;3;38;2;155;135;227m[booking_10k_2023] Q: What are the total expenses reported by Booking.com in their 10K filing for 2023?
[0m[1;3;38;2;237;90;200m[expedia_10k_2023] Q: How do the revenue figures of Expedia and Booking.com for 2023 compare?
[0m[1;3;38;2;90;149;237m[booking_10k_2023] Q: How do the expense figures of Expedia and Booking.com for 2023 compare?
[0m[1;3;38;2;11;159;203m[expedia_10k_2023] Q: What are the key factors influencing the revenue and expenses of Expedia in 2023?
[0m[1;3;38;2;155;135;227m[booking_10k_2023] Q: What are the key factors influencing the revenue 

In [37]:
tools_tenk2 = QueryEngineTool(
    query_engine=query_planning_engine,
    metadata=ToolMetadata(
        name="Filing_10k_query_planner",
        description=(
            "This tool breaks down complex queries into sub-questions for each relevant data source, "
            "then gathers all the intermediate responses and synthesizes a final response."
        )
    )
)
print("Filing_10k_query_planner tool created successfully.")

Filing_10k_query_planner tool created successfully.


In [38]:
instructions = """
You are a veteran stock market investor who is an expert in analyzing companies' annual 10k filings.
You will answer questions in the persona of a veteran stock market investor.
"""
agent = OpenAIAssistantAgent.from_new(
    name="10K Filing Query Planner",
    instructions=instructions,
    tools=[tools_tenk2],  
    verbose=True,
    run_retrieve_sleep_time=1.0,
)

In [39]:
response = agent.chat(sub_question_query)
print(f"Assistant Agent Response:\n{response}")

=== Calling Function ===
Calling function: Filing_10k_query_planner with args: {"input": "What are the revenues of Expedia for 2023?"}
Generated 3 sub questions.
[1;3;38;2;237;90;200m[expedia_10k_2023] Q: What is the total revenue reported by Expedia in their 2023 10-K filing?
[0m[1;3;38;2;90;149;237m[expedia_10k_2023] Q: What are the revenue breakdowns by segment for Expedia in 2023?
[0m[1;3;38;2;11;159;203m[expedia_10k_2023] Q: How does Expedia's 2023 revenue compare to previous years?
[0m[1;3;38;2;237;90;200m[expedia_10k_2023] A: The total revenue reported by Expedia in their 2023 10-K filing is $12,839 million.
[0m[1;3;38;2;11;159;203m[expedia_10k_2023] A: In 2023, Expedia's total revenue was $12,839 million, representing a 10% increase from $11,667 million in 2022 and a significant 36% increase from $8,598 million in 2021. Revenue from lodging increased by 15% in 2023, while air revenue rose by 13%, and advertising and media revenue grew by 6%. Overall, the revenue growth

### Using Wolfram Alpha Tool with OpenAIAgent

- **`WolframAlphaToolSpec`**: This class is crucial for setting up the tool with the necessary credentials and defining how the agent interacts with Wolfram Alpha.
- **`OpenAIAgent.from_tools()`**: This function integrates the tool into the agent, expanding the agent's capabilities beyond language processing to include powerful computational features provided by Wolfram Alpha.

In [250]:
from llama_index.tools.wolfram_alpha  import WolframAlphaToolSpec

In [251]:
wolfram_tool = WolframAlphaToolSpec(app_id="5LYRWL-G6238QRGWT").to_tool_list()

In [252]:
agent = OpenAIAgent.from_tools(wolfram_tool, verbose=True)
print("OpenAIAgent created with Wolfram Alpha tool.")

OpenAIAgent created with Wolfram Alpha tool.


In [253]:
query = "How can we solve problem with LLM Compiler"
response = agent.query(query)
print(f"Response from Wolfram Alpha:\n{response}")

Added user message to memory: How can we solve problem with LLM Comiler
Response from Wolfram Alpha:
Solving problems with a Large Language Model (LLM) Compiler involves several steps, each tailored to the specific issue you're encountering. Here's a structured approach to identify and resolve common problems:

### 1. **Identify the Problem**

First, clearly define the problem you're facing with the LLM Compiler. Common issues include:
- Syntax errors in the code.
- Logical errors leading to unexpected outputs.
- Performance issues, such as slow execution or high resource consumption.
- Integration problems with other software or systems.

### 2. **Consult Documentation**

Once you've identified the problem, the next step is to consult the documentation for the LLM Compiler. Documentation often includes:
- Installation and setup instructions.
- Usage guidelines, including how to structure input and interpret output.
- Troubleshooting sections addressing common issues.
- API references,

### Creating and Using ReActAgent for 10k Filing Analysis

- **`ReActAgent`**: Combines reasoning and action to handle complex tasks, making it ideal for scenarios that involve multi-step processing and decision-making.

In [47]:
from llama_index.core.agent import ReActAgent

In [49]:
agent = ReActAgent.from_tools(
    tools=tools_tenk,
    verbose=True,
    context="""You are a veteran stock market investor who is an expert in analyzing companies' annual 10k filings.
    You will answer questions in the persona of a veteran stock market investor.""",
    max_iterations=10
)

In [50]:
response = agent.query("Compare and contrast the revenue and expenses of Expedia and Booking Holdings for 2023 and provide an in-depth analysis.")
print(f"Response:\n{response}")

> Running step af5473a5-20eb-427c-81c3-debec1eda742. Step input: Compare and contrast the revenue and expenses of Expedia and Booking Holdings for 2023 and provide an in-depth analysis.
[1;3;38;5;200mThought: The current language of the user is: English. I need to use tools to gather the revenue and expenses information for both Expedia and Booking Holdings for 2023.
Action: expedia_10k_2023
Action Input: {'input': 'revenue and expenses for 2023'}
[0m[1;3;34mObservation: In 2023, total revenue amounted to $12,839 million, which reflects an increase from $11,667 million in 2022. The breakdown of revenue by business model includes $8,818 million from the merchant model, $3,075 million from the agency model, and $946 million from advertising, media, and other sources. 

Regarding expenses, the cost of revenue decreased by $84 million compared to 2022, primarily due to lower direct customer service costs, despite higher cloud and personnel costs. Selling and marketing expenses totaled $

### Setup for Financial Analysis with Plan and Execute Framework

In [121]:
from langchain.chat_models import ChatOpenAI
from langchain_experimental.plan_and_execute import PlanAndExecute, load_chat_planner, load_agent_executor
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

In [122]:
from llama_index.core.langchain_helpers.agents.tools import (
    IndexToolConfig,
    LlamaIndexTool,
)


1. **`create_index_tool_configs` Function:**
   - **Purpose:** Creates configurations for tools that query the 10k filings data for different companies.
   - **Key Steps:** 
     - Iterates through the indices of each company's 10k filings.
     - Configures each index into a tool with specific settings, such as the query engine and description.

In [546]:
def create_index_tool_configs(tenk_indices):
    """
    Create tool configurations for the 10k filings of companies using their indices.

    :param tenk_indices: Dictionary of indices for each company's 10k filings.
    :return: List of IndexToolConfig objects for each company's 10k filings.
    """
    tool_configs = []
    for name, index in tenk_indices.items():
        tool_config = IndexToolConfig(
            query_engine=index.as_query_engine(similarity_top_k=3),  # Create a query engine from the index
            name=name,
            description=f"Provides information from {name.replace('_', ' ')} 10k filing for the year 2023.",
            tool_kwargs={"return_direct": True}
        )
        tool_configs.append(tool_config)
        print(f"Created tool configuration for {name}.")

    print("Tool configurations for all companies created successfully.")
    return tool_configs

2. **`create_llama_index_tools` Function:**
   - **Purpose:** Converts the tool configurations created in the previous step into `LlamaIndexTool` instances.
   - **Key Steps:**
     - Uses the `from_tool_config` method to convert each configuration into a usable tool for querying.

In [547]:
def create_llama_index_tools(config):
    """
    Create LlamaIndexTool instances from tool configurations.

    :param config: List of IndexToolConfig objects or a single IndexToolConfig object.
    :return: List of LlamaIndexTool instances.
    """
    llama_tools = []

    if isinstance(config, list) and len(config) > 1:
        llama_tools = [LlamaIndexTool.from_tool_config(cfg) for cfg in config]
        print("LlamaIndexTool instances created successfully with multiple configurations.")
    else:  
        
        single_config = config[0] if isinstance(config, list) else config
        llama_tools = [LlamaIndexTool.from_tool_config(single_config)]
        print("LlamaIndexTool instance created successfully with one configuration.")

    return llama_tools

3. **`setup_plan_and_execute_agent` Function:**
   - **Purpose:** Sets up a `PlanAndExecute` agent using the tools created from 10k filings and a language model (`gpt-4-0125-preview`).
   - **Key Steps:**
     - Loads a chat planner and an executor using the specified model (`ChatOpenAI`).
     - Combines the planner and executor into a `PlanAndExecute` agent.

In [548]:
def setup_plan_and_execute_agent(engine, model):
    """
    Set up the Plan and Execute agent using a planner and executor.

    :param engine: RouterQueryEngine object.
    :param model: The language model used for planning and execution.
    :return: PlanAndExecute agent.
    """
    planner = load_chat_planner(model)
    executor = load_agent_executor(model, engine, verbose=True)
    agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
    print("Plan and Execute agent set up successfully.")
    return agent

4. **Execution Flow:**
   - **Tool Configurations:** Calls `create_index_tool_configs` to generate tool configurations based on the indices.
   - **Llama Tools Creation:** Calls `create_llama_index_tools` to transform the configurations into Llama tools.
   - **Agent Setup and Execution:** 
     - Calls `setup_plan_and_execute_agent` with the created tools and model to set up the agent.
     - Executes a query using the agent to compare and analyze the financials of Expedia and Booking Holdings for 2023.

In [549]:
tool_configs = create_index_tool_configs(tenk_indices)

Created tool configuration for expedia_10k_2023.
Created tool configuration for booking_10k_2023.
Created tool configuration for uber_10k_2023.
Created tool configuration for lyft_10k_2023.
Tool configurations for all companies created successfully.


In [550]:
llama_tools = create_llama_index_tools(tool_configs)

LlamaIndexTool instances created successfully with multiple configurations.


In [551]:
model = ChatOpenAI(model_name="gpt-4-0125-preview" , temperature=0)

In [552]:
agent = setup_plan_and_execute_agent(llama_tools, model)

Plan and Execute agent set up successfully.


In [553]:
response = agent.run("Compare and contrast the revenue and expenses of Expedia and Booking Holdings for 2023 and provide an in-depth analysis.")
print(f"Response:\n{response}")



[1m> Entering new PlanAndExecute chain...[0m
steps=[Step(value='Research the latest financial reports or earnings releases for Expedia and Booking Holdings for the fiscal year 2023 to find data on their revenue and expenses.'), Step(value='Compare the revenue figures for both companies, highlighting any growth, decline, or notable trends.'), Step(value='Analyze the expense structures of both companies, identifying major areas of spending and any significant changes or patterns compared to previous years.'), Step(value='Discuss any external factors or market conditions in 2023 that might have influenced the financial performance of both companies.'), Step(value="Provide an in-depth analysis of how each company's revenue and expenses have impacted their overall financial health and market position in 2023."), Step(value="Given the above steps taken, please respond to the user's original question.\n\n")]

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction:
```
{
  "acti

In [544]:
response = agent.run("Compare and contrast the revenue and expenses of Uber and Lyft for 2023 and provide an in depth analysis")
print (response)



[1m> Entering new PlanAndExecute chain...[0m
steps=[Step(value='Research the most recent financial reports or data available for Uber and Lyft for the year 2023, focusing on their revenue and expenses.'), Step(value='Compare the revenue figures for both companies, noting any significant differences or trends.'), Step(value='Analyze the expense structures of Uber and Lyft, identifying key areas where they spend their money and any notable differences between the two.'), Step(value='Discuss any external factors that might have influenced the financial performance of both companies in 2023, such as regulatory changes, market competition, or global economic conditions.'), Step(value='Provide an in-depth analysis of how these financial figures (revenue and expenses) reflect the overall financial health and business strategies of Uber and Lyft.'), Step(value="Given the above steps taken, respond to the user's original question by comparing and contrasting the revenue and expenses of Uber

KeyboardInterrupt: 

#### Setting Up RouterQueryEngine for Executing Complex Financial Queries

1. **Objective**: 
   - To configure a `RouterQueryEngine` that intelligently selects and uses appropriate tools (`LlamaIndexTool` instances) based on incoming queries related to 10k filings.

2. **Function Overview**:
   - **`setup_router_query_engine(llama_tools)`**:
     - This function sets up a `RouterQueryEngine` that routes queries to the most relevant tools (`LlamaIndexTool` instances).
     - It uses `LLMSingleSelector` to select the best tool for the given query.
     - Returns the configured `RouterQueryEngine`.

3. **Correct Usage**:
   - The function should receive a list of `LlamaIndexTool` instances (`llama_tools`) and configure a `RouterQueryEngine` using these tools.
   - It correctly returns a `RouterQueryEngine` object instead of a tool configuration.

In [528]:
def setup_router_query_engine(tenk_engine):
    """
    Set up a RouterQueryEngine to select the appropriate tool based on the query.

    :param llama_tools: List of LlamaIndexTool instances.
    :return: RouterQueryEngine object.
    """
    filing_10k_tool_config = IndexToolConfig(
                      query_engine=tenk_engine,
                      name="Filing_10k",
                      description="Selects the appropriate company filing query engine based on the query and executes it",
                      #tool_kwargs={"return_direct": True}
                     )
    print("RouterQueryEngine for 10k filings set up successfully.")
    return filing_10k_tool_config

In [554]:
router_engine = setup_router_query_engine(tenk_engine)

RouterQueryEngine for 10k filings set up successfully.


In [555]:
llama_tools_for_router  = create_llama_index_tools(router_engine)

LlamaIndexTool instance created successfully with one configuration.


In [558]:
agent = setup_plan_and_execute_agent(llama_tools_for_router, model)

Plan and Execute agent set up successfully.


In [559]:
response = agent.run("Compare and contrast the revenue and expenses of Expedia and booking holdings for 2023 and provide an in depth analysis")
print (response)



[1m> Entering new PlanAndExecute chain...[0m
steps=[Step(value='Research the latest financial reports or earnings releases from Expedia and Booking Holdings for the fiscal year 2023 to gather data on their revenue and expenses.'), Step(value='Compare the revenue figures for both companies, analyzing growth rates, primary revenue sources, and any notable trends or factors influencing these numbers.'), Step(value='Contrast the expense structures of both companies, looking at major areas of spending, efficiency measures, and any significant changes or investments made during the year.'), Step(value='Analyze the impact of external factors such as economic conditions, travel industry trends, and regulatory changes on the financial performance of both companies.'), Step(value='Synthesize the findings to provide an in-depth analysis of the financial health and operational efficiency of Expedia and Booking Holdings, highlighting key differences and similarities.'), Step(value="Given the ab

### LLMCompiler

1. **Components Used**:
   - **`LLMCompilerAgentWorker`**: This component is used to create an agent worker that leverages the large language model (GPT-4) and a list of tools to compile and respond to queries.
   - **`AgentRunner`**: The agent runner is initialized with the agent worker, enabling it to process queries and execute actions based on the worker's capabilities.

2. **Model Setup**:
   - The code sets up the GPT-4 model (`gpt-4-0125-preview`) with zero temperature, meaning the model will produce consistent and deterministic outputs, which is ideal for factual and analytical responses.

3. **Agent Worker Creation**:
   - **`LLMCompilerAgentWorker.from_tools`**: This method initializes the agent worker using the provided tools (`tools_tenk2`), which are responsible for handling specific queries related to 10k filings.
   - The `verbose` flag is set to `True`, which ensures that detailed logs of the agent's operations are printed, aiding in debugging and understanding the flow.

4. **Agent Runner Initialization**:
   - **`AgentRunner`**: The agent runner is responsible for managing the agent worker and orchestrating the entire query execution process.

In [210]:
from llama_index.packs.agents_llm_compiler.step import LLMCompilerAgentWorker
from llama_index.core.agent import AgentRunner

In [244]:
model = Settings.llm = OpenAI(model='gpt-4-0125-preview', temperature=0)

In [255]:
filing_10k_agent_worker = LLMCompilerAgentWorker.from_tools(
    tools= [tools_tenk2],
    verbose=True,
    llm=model
    )
print("Agent Worker created successfully.")

Agent Worker created successfully.


In [246]:
filing_10k_agent = AgentRunner(agent_worker=filing_10k_agent_worker)
print("Agent Runner initialized successfully.")

Agent Runner initialized successfully.


In [247]:
query = """
Compare and contrast the revenue and expenses
of Expedia and Booking Holdings for 2023 and provide an in-depth analysis.
"""

In [248]:
response = filing_10k_agent.chat(query)
print("Query executed successfully.")
print(f"Response:\n{response}")

> Running step 38456728-873a-4a6d-a282-6044d5eeed3d for task 6d9112c0-4c2c-4384-a7f0-fdf63bc8b57f.
> Step count: 0
[1;3;38;5;200m> Plan: 1. Filing_10k_query_planner("Expedia 2023 revenue and expenses")
2. Filing_10k_query_planner("Booking Holdings 2023 revenue and expenses")
3. join()<END_OF_PLAN>
[0mGenerated 4 sub questions.
Generated 4 sub questions.
[1;3;38;2;237;90;200m[booking_10k_2023] Q: What was the total revenue reported by Booking Holdings in their 2023 10-K filing?
[0m[1;3;38;2;90;149;237m[booking_10k_2023] Q: What were the total expenses reported by Booking Holdings in their 2023 10-K filing?
[0m[1;3;38;2;11;159;203m[booking_10k_2023] Q: What are the key components of Booking Holdings' revenue in 2023?
[0m[1;3;38;2;155;135;227m[booking_10k_2023] Q: What are the major categories of expenses for Booking Holdings in 2023?
[0m[1;3;38;2;237;90;200m[expedia_10k_2023] Q: What was Expedia's total revenue for the year 2023?
[0m[1;3;38;2;90;149;237m[expedia_10k_2023] Q:

### Agent instend agent

In [6]:
import nest_asyncio
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.packs.agents_llm_compiler.step import LLMCompilerAgentWorker
from llama_index.core.agent import AgentRunner
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.agent.openai import OpenAIAssistantAgent
from langchain.chat_models import ChatOpenAI

nest_asyncio.apply()

In [5]:
model = Settings.llm = OpenAI(model='gpt-4-0125-preview', temperature=0)

In [7]:
def load_indices(storage_dir):
    """
    Load 10k indices from the specified storage directory.

    :param storage_dir: Directory where indices are stored.
    :return: Dictionary of loaded indices.
    """
    indices_paths = {
        'expedia_10k_2023': f'{storage_dir}/expedia_10k_2023_index',
        'booking_10k_2023': f'{storage_dir}/booking_10k_2023_index',
        'uber_10k_2023': f'{storage_dir}/uber_10k_2023_index',
        'lyft_10k_2023': f'{storage_dir}/lyft_10k_2023_index'
    }
    tenk_indices = {
        name: load_index_from_storage(StorageContext.from_defaults(persist_dir=path))
        for name, path in indices_paths.items()
    }
    print("Loaded all 10k indices from storage.")
    return tenk_indices

In [8]:
def create_tools(tenk_indices):
    """
    Create tools from loaded indices to handle queries.

    :param tenk_indices: Dictionary of indices for each company's 10k filings.
    :return: List of QueryEngineTools configured for each company.
    """
    tools = [
        QueryEngineTool(
            query_engine=index.as_query_engine(similarity_top_k=3),
            metadata=ToolMetadata(
                name=name,
                description=f"Provides information from {name.replace('_', ' ')} 10k filing for the year 2023."
            )
        )
        for name, index in tenk_indices.items()
    ]
    print("Tools created for all indices.")
    return tools

In [9]:
def setup_and_run_agent(tools, model, query):
    """
    Set up the LLM Compiler Agent Worker, Agent Runner, and run a query.

    :param tools: List of tools to be used by the agent.
    :param model: Language model configuration.
    :param query: Query to be executed by the agent.
    :return: Response from the agent.
    """
    filing_10k_agent_worker = LLMCompilerAgentWorker.from_tools(
        tools=tools,
        verbose=True,
        llm=model
    )
    print("Agent Worker created successfully.")

    filing_10k_agent = AgentRunner(agent_worker=filing_10k_agent_worker)
    print("Agent Runner initialized successfully.")

    response = filing_10k_agent.chat(query)
    print("Query executed successfully.")
    return response

In [10]:
def process_and_create_new_agent(initial_response, model):
    """
    Process the initial response data and create a new agent for further analysis.

    :param initial_response: Data retrieved from the first agent.
    :param model: The language model configuration for the new agent.
    :return: Response from the new agent.
    """
    processed_data = f"Processed result: {initial_response}"  
    new_agent_instructions = """
    You are a financial analyst with expertise in interpreting company filings.
    Use the provided data to give an in-depth analysis of company performance.
    """

    new_agent = OpenAIAssistantAgent.from_new(
        name="Financial Analysis Agent",
        instructions=new_agent_instructions,
        tools=[],  
        verbose=True,
        run_retrieve_sleep_time=1.0,
    )

    response = new_agent.chat(processed_data)
    print("New agent executed successfully.")
    return response

In [12]:
storage_dir = 'Data/storage'

In [13]:
tenk_indices = load_indices(storage_dir)

Loaded all 10k indices from storage.


In [14]:
tools_tenk2 = create_tools(tenk_indices)

Tools created for all indices.


In [15]:
query = """
Compare and contrast the revenue and expenses
of Expedia and Booking Holdings for 2023 and provide an in-depth analysis.
"""

In [16]:
initial_response = setup_and_run_agent(tools_tenk2, model, query)
print(f"Initial Agent Response:\n{initial_response}\n")

Agent Worker created successfully.
Agent Runner initialized successfully.
> Running step 1b7591d3-7552-4fc8-96ee-869903ce411f for task 6119a683-7f13-449b-aff2-4a52f2b2ff04.
> Step count: 0
[1;3;38;5;200m> Plan: 1. expedia_10k_2023({"input": "revenue"})
2. expedia_10k_2023({"input": "expenses"})
3. booking_10k_2023({"input": "revenue"})
4. booking_10k_2023({"input": "expenses"})
5. join()<END_OF_PLAN>
[0m[1;3;34mRan task: expedia_10k_2023. Observation: The expenses that are not allocated to the reportable segments include accounting, human resources, certain information technology, and legal expenses. These are included in the Corporate and Eliminations section. Additionally, Corporate and Eliminations also encompasses unallocated corporate functions and expenses, amortization of intangible assets and any related impairment, stock-based compensation expense, restructuring and related reorganization charges, legal reserves, occupancy tax and other, and other items excluded from segmen

In [17]:
final_response = process_and_create_new_agent(initial_response, model)
print(f"Final Agent Response:\n{final_response}")

New agent executed successfully.
Final Agent Response:
Based on your summary, let's delve into an analysis of the financial performance and strategic focus of both Expedia and Booking Holdings.

**Revenue Generation and Diversification:**
Expedia's broader service offerings may suggest a diversification strategy that aims to capture various segments of the travel industry market. This could include different brands under its umbrella that cater to various travel services such as hotel booking, car rentals, cruises, and flights. Although this approach might potentially increase revenue streams, it can also lead to higher operational costs and complexity in managing the disparate elements of the business.

Booking Holdings, by focusing more directly on the efficiency of its core offerings and perhaps minimizing ancillary services, may streamline operations to maximize revenues from its primary business model. By emphasizing marketing ROI, Booking Holdings is likely concentrating resource

In [18]:
def setup_and_run_agent(company_tools, language_model, analysis_query):
    """
    Set up the LLM Compiler Agent Worker, initialize the Agent Runner, and execute a financial analysis query.

    :param company_tools: List of QueryEngineTool instances representing each company's financial data.
    :param language_model: Configuration of the language model used for the agent.
    :param analysis_query: The query string for the financial analysis to be executed.
    :return: Response object from the agent containing the analysis results.
    """
    filing_10k_agent_worker = LLMCompilerAgentWorker.from_tools(
        tools=company_tools,
        verbose=True,
        llm=language_model
    )
    print("LLM Compiler Agent Worker created successfully with company-specific tools.")

    filing_10k_agent = AgentRunner(agent_worker=filing_10k_agent_worker)
    print("Agent Runner initialized and ready to execute queries.")

    print(f"Executing query: {analysis_query}")
    response = filing_10k_agent.chat(analysis_query)
    print("Query execution completed.")

    print(f"Initial Response from Agent:\n{response}\n")
    return response

In [23]:
def process_and_create_new_agent(initial_response, language_model):
    """
    Process the initial response data to find the most relevant results and create a new agent for deeper analysis.

    :param initial_response: The initial analysis response from the first agent.
    :param language_model: The language model configuration for the new financial analysis agent.
    :return: Response from the new agent with a detailed, high-quality analysis.
    """
    refined_data = f"Processed data focusing on key financial metrics and insights: {initial_response}"  # Customize based on actual extraction needs

    new_agent_instructions = """
    You are a highly skilled financial analyst with expertise in interpreting complex financial statements.
    Your task is to use the refined data provided to deliver a comprehensive and clear analysis, focusing on
    the most significant financial insights, trends, and implications for each company.
    Use your expertise to highlight key findings and draw actionable conclusions based on the data.
    """

    new_agent = OpenAIAssistantAgent.from_new(
        name="Refined Financial Analysis Agent",
        instructions=new_agent_instructions,
        tools=[],  
        verbose=True,
        run_retrieve_sleep_time=1.0,
    )
    print("Refined Financial Analysis Agent created")

    response = new_agent.chat(refined_data)
    print("Refined analysis executed successfully.")

    print(f"Final Analysis Response:\n{response}\n")
    return response

In [20]:
tenk_indices = load_indices(storage_dir)
company_tools = create_tools(tenk_indices)

Loaded all 10k indices from storage.
Tools created for all indices.


In [21]:
initial_response = setup_and_run_agent(company_tools, model, query)

LLM Compiler Agent Worker created successfully with company-specific tools.
Agent Runner initialized and ready to execute queries.
Executing query: 
Compare and contrast the revenue and expenses
of Expedia and Booking Holdings for 2023 and provide an in-depth analysis.

> Running step ffb503dc-876c-4104-8d8d-d603f3c0a032 for task bd5df6ea-b7fa-427d-8651-75f49fb926b5.
> Step count: 0
[1;3;38;5;200m> Plan: 1. expedia_10k_2023({"input": "revenue"})
2. expedia_10k_2023({"input": "expenses"})
3. booking_10k_2023({"input": "revenue"})
4. booking_10k_2023({"input": "expenses"})
5. join()<END_OF_PLAN>
[0m[1;3;34mRan task: expedia_10k_2023. Observation: The expenses that are not allocated to the reportable segments include accounting, human resources, certain information technology, and legal expenses. These are included in the Corporate and Eliminations category. Additionally, Corporate and Eliminations encompasses unallocated corporate functions and expenses, amortization of intangible ass

In [24]:
final_response = process_and_create_new_agent(initial_response, model)

Refined Financial Analysis Agent created
Refined analysis executed successfully.
Final Analysis Response:
Financial analysis of Expedia and Booking Holdings typically involves digging into key metrics such as revenue growth, profit margins, operating expenses, capital expenditure, cash flow, and market share trends. To deliver a comprehensive and clear analysis, I will consider these metrics using the processed data with a focus on how these two companies are expanding their market share and revenue, as well as their operational focus and expense allocation.

**Revenue Growth and Market Share:**
A fundamental measure of success in the online travel industry is how quickly a company is growing its top-line revenue and capturing market share from competitors. An analysis of year-over-year revenue growth can reveal which company is gaining traction more quickly. If Expedia, for instance, is showing faster revenue growth, that might suggest successful marketing initiatives or an effective 