In [1]:
import re
import os
import glob
import pandas as pd
import datetime
from sec_api import QueryApi, RenderApi

from llama_index import (
    download_loader,
    VectorStoreIndex,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
    ListIndex,
    LLMPredictor,
    load_graph_from_storage,
)
from langchain.chains.conversation.memory import ConversationBufferMemory
from llama_index.query_engine.transform_query_engine import TransformQueryEngine
from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
from llama_index.langchain_helpers.agents import (
    LlamaToolkit,
    create_llama_chat_agent,
    IndexToolConfig,
)
from langchain import OpenAI
from llama_index.indices.composability import ComposableGraph
from pathlib import Path

from llama_index import (
    download_loader,
    ServiceContext,
    GPTVectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
    LLMPredictor,
)
from langchain.llms.openai import OpenAIChat

from langchain.chat_models import ChatOpenAI

from pathlib import Path
from dotenv import load_dotenv
import nest_asyncio
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.query_engine import SubQuestionQueryEngine
nest_asyncio.apply()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sec_path_apple='/Users/rouzbeh/Downloads/apple_10k_2022.pdf'
sec_path_intel='/Users/rouzbeh/Downloads/intel_10k_2022.pdf'

In [3]:
# cofigure LLM services
llm_predictor=LLMPredictor(llm=OpenAI(temperature=0,model_name="gpt-3.5-turbo", max_tokens=-1))
service_context=ServiceContext.from_defaults(llm_predictor=llm_predictor)



In [4]:
# document loader
apple_docs = SimpleDirectoryReader(input_files=[sec_path_apple]).load_data()
intel_docs = SimpleDirectoryReader(input_files=[sec_path_intel]).load_data()
print(f"loaded apple 10k with {len(apple_docs)} documents")
print(f"loaded intel 10k with {len(intel_docs)} documents")

loaded apple 10k with 80 documents
loaded intel 10k with 134 documents


In [5]:
apple_index = GPTVectorStoreIndex.from_documents(apple_docs)
print(f"finished building for apple. Length of the document is {len(apple_index.docstore.docs)}")
intel_index = GPTVectorStoreIndex.from_documents(intel_docs)
print(f"finished building for intel. Length of the document is {len(intel_index.docstore.docs)}")


finished building for apple. Length of the document is 120
finished building for intel. Length of the document is 184


- Fix the html index creation process. It's too long and that is because of the length of the document is too long. I have to figure out a way to make the document shorter and get rid of the non-sense html tags.

In [6]:
apple_engine = apple_index.as_query_engine(similarity_top_k=3)
intel_engine = intel_index.as_query_engine(similarity_top_k=3)

In [7]:


query_engine_tools = [
    QueryEngineTool(
        query_engine=apple_engine,
        metadata=ToolMetadata(name="apple_10k", description="provides information about apple's financials in 2022 fiscal year"),
    ),
    QueryEngineTool(
        query_engine=intel_engine,
        metadata=ToolMetadata(name="intel_10k", description="provides information about intel's financials in 2022 fiscal year"),
    )
]
query_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)


In [18]:

response = query_engine.query("revenue of apple in 2021")
print(response)

Generated 1 sub questions.
[36;1m[1;3m[apple_10k] Q: What is the revenue of Apple
[0m[36;1m[1;3m[apple_10k] A: in 2020?

The revenue of Apple in 2020 is $274,515 million.
[0m
It is not possible to answer the question without prior knowledge.


In [13]:
response = query_engine.query("Compare intel and apple")
print(response)


Generated 2 sub questions.
[36;1m[1;3m[apple_10k] Q: What are Apple's financials in 2022 fiscal year
[0m[36;1m[1;3m[apple_10k] A: 
Apple's financials in the 2022 fiscal year include total net sales of $394.3 billion, iPhone net sales of $205.5 billion, Mac net sales of $40.2 billion, iPad net sales of $29.3 billion, Wearables, Home and Accessories net sales of $41.2 billion, and Services net sales of $78.1 billion. The Company also repurchased $90.2 billion of its common stock and paid dividends and dividend equivalents of $14.8 billion.
[0m[33;1m[1;3m[intel_10k] Q: What are Intel's financials in 2022 fiscal year
[0m[33;1m[1;3m[intel_10k] A: 
In 2022, Intel's financials were as follows:
Net Revenue: $63.1 billion
Gross Margin (GAAP): 42.6%
Gross Margin (Non-GAAP): 47.3%
Diluted Earnings Per Share (GAAP): $1.94
Diluted Earnings Per Share (Non-GAAP): $1.84
Operating Cash Flow: $15.4 billion
Adjusted Free Cash Flow: $(4.1) billion
[0m
Apple had significantly higher financials 