In [3]:
import os
%load_ext autoreload
%autoreload 2


In [4]:
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
CO_API_KEY = os.environ['CO_API_KEY']

### Preparation

In [2]:
from llama_index.core import Settings
from llama_index.llms.cohere import Cohere
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = Cohere(model="command-r-plus")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") #Also try with large
Settings.chunk_size = 512
Settings.chunk_overlap = 64

ModuleNotFoundError: No module named 'workflows.context'

### Data ingestion

- We'll use the 10-k files from NVIDIA in the last 5 years
- We'll parse the HTML files into formatted text with the Unstructurd library (that can be integrated thanks to LlamaHub UnstructuredReader

In [7]:
from llama_index.readers.file import UnstructuredReader
from pathlib import Path

directory = "./data/"
years = [2021, 2022, 2023, 2024, 2025]

loader = UnstructuredReader()
doc_set = {} # Dict that will save documents grouped by year (year is the key). Remember that one html file can lead to various documents.
all_docs = [] # List where all documents are saved. Updated with extend.
# We use both a list and a dict. List useful when we want to train with everything, dict when we want to search by year

for year in years:
    year_docs = loader.load_data(file=Path(f"./data/nvda-{year}.html"), split_documents=False)
    for d in year_docs: #Goes through each document for that year
        d.metadata = {"year" : year}

    doc_set[year] = year_docs
    all_docs.extend(year_docs)
    print(f"Documents for year {year}: {len(year_docs)}")


libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


Documents for year 2021: 1


libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


Documents for year 2022: 1


libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


Documents for year 2023: 1


libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


Documents for year 2024: 1
Documents for year 2025: 1


### Indexing

- We'll build one vector index for each year
- Each vector index allows to ask questions about the 10-K filing of that given year. Build each index & save to disk

In [13]:
from llama_index.core import VectorStoreIndex, StorageContext

"""DO NOT EXECUTE THIS PIECE OF CODE IF YOU HAVE NOT INCLUDED NEW DOCUMENTS. STEP TO NEXT ONE"""
# TODO: Save in a vector database like Qdrant instead of disk
# VectorStoreIndex converts documents in vectors for semantic searches
# StorageContext manages how data from the index is saved (memory or disk)

index_set = {} # Dict to save an index for each year (key)
for year in years:
    storage_context = StorageContext.from_defaults() #Create empty StorageContext container with default configurations
    cur_index = VectorStoreIndex.from_documents(
        doc_set[year],
        storage_context=storage_context,
        show_progress=True
    ) # We store in cur_index the vectorized documents for that year. It uses embedding model from settings.
    index_set[year] = cur_index
    storage_context.persist(persist_dir=f"./storage/{year}") # Persist = save in disk directory

Parsing nodes: 100%|██████████| 1/1 [00:00<00:00,  8.16it/s]
Generating embeddings: 100%|██████████| 104/104 [00:02<00:00, 44.79it/s]
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 14.06it/s]
Generating embeddings: 100%|██████████| 104/104 [00:02<00:00, 50.66it/s]
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 13.33it/s]
Generating embeddings: 100%|██████████| 111/111 [00:02<00:00, 45.89it/s]
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 12.42it/s]
Generating embeddings: 100%|██████████| 118/118 [00:02<00:00, 49.23it/s]
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
Generating embeddings: 100%|██████████| 120/120 [00:02<00:00, 51.75it/s]


In [8]:
# To load an index from disk we do the following

from llama_index.core import load_index_from_storage
from llama_index.core import VectorStoreIndex, StorageContext

index_set = {}
for year in years:
    storage_context = StorageContext.from_defaults(persist_dir=f"./storage/{year}")
    cur_index = load_index_from_storage(storage_context)
    index_set[year] = cur_index

### Sub Question Query Engine

A Query Engine is a higher-level construct that uses an `Index` (and by extension, a `Retriever`) to answer queries. 

It not only retrieves the relevant data but also processes it to generate a response to the query. A `Query Engine` uses the `Retriever` to fetch data and then applies additional logic to generate a response.

Since we have the 10-K filings for several years, we'll create a list of query engine tools, each specialized in searching in a certain year. We'll either a certain year tool, or an agent will decide which year tool to call, depending on our query.

A special type of query engine to manage individual query tools is the `SubQuestionQueryEngine`. It is able to decompose your question in sub-question and choose which `QueryEngineTool` is more suitable to answer it. It also combines all answers into a coherent output

In [9]:
from llama_index.core import QueryBundle
from llama_index.core.tools import QueryEngineTool
from llama_index.core.query_engine import SubQuestionQueryEngine

# Define a list of individual query engine tools
individual_query_engine_tools = [
    QueryEngineTool.from_defaults(
        query_engine=index_set[year].as_query_engine(),
        name = f"vector_index_{year}",
        description = (
            f"useful to answer queries about the {year} SEC 10-K for NVIDIA"
        )
    ) # I've specified the tool name and description. Example of name: vector_index_2019.
    for year in years
]

# Define the subQuestionQueryEngine, that takes the list of individual tools and chooses
query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=individual_query_engine_tools,
)

# Also, create a global tool for cross-queries that involve multiple years
query_engine_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name = "sub_question_query_engine",
    description = (
        "useful to answer queries that require analyzing multiple SEC 10-K documebts for NVIDIA"
    ),
)

# Combine all tools
# Operator '+' concatenates lists. Since query_engine_tools is a single item, make a list of single item out of it.
tools = individual_query_engine_tools + [query_engine_tool] 


### Chatbot agent for testing

Llamaindex has removed agents. You now have to implement a workflow to make an agent.


See https://developers.llamaindex.ai/python/examples/workflow/function_calling_agent/

In [18]:
from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI
from workflows.function_calling_agent import FunctionCallingAgent

agent = FunctionCallingAgent(
    llm=OpenAI(model="gpt-4o-mini"), tools=tools, timeout=120, verbose=True
)

ret = await agent.run(input="Hello!")
# Mostrar la respuesta del agente
print("Respuesta:", ret.result["response"].message.content)


Running step prepare_chat_history
Step prepare_chat_history produced event InputEvent
Running step handle_llm_input
Step handle_llm_input produced event StopEvent


AttributeError: 'dict' object has no attribute 'result'