In [12]:
import os
import datetime
from sec_api import QueryApi, RenderApi
from llama_index import (
    download_loader,
    ServiceContext,
    GPTVectorStoreIndex,
    SimpleDirectoryReader,
    StringIterableReader,
    GPTTreeIndex,
    StorageContext,
    load_index_from_storage,
)
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()

True

In [16]:


report_type="10-K"
ticker="AAPL"
year=None
if year is None:
    year = (
        datetime.datetime.now().year - 2
        if datetime.datetime.now().month == 1
        else datetime.datetime.now().year - 1
    )
SEC_API_KEY = os.getenv("SEC_API_KEY")
queryApi = QueryApi(api_key=SEC_API_KEY)
renderApi = RenderApi(api_key=SEC_API_KEY)

print("ticker, report_type, year = ", ticker, report_type, year)
query = {
    "query": {
        "query_string": {
            "query": f'ticker:{ticker} AND filedAt:{{{year}-01-01 TO {year}-12-31}} AND formType:"{report_type}"'
        }
    },
    "from": "0",
    "size": "10",
    "sort": [{"filedAt": {"order": "desc"}}],
}

filings = queryApi.get_filings(query)


ticker, report_type, year =  AAPL 10-K 2022


In [20]:
sec_url = filings["filings"][0]["linkToFilingDetails"]
filing = renderApi.get_filing(sec_url)

In [29]:
with open('test_filing.html', 'w') as f:
    f.write(filing)

In [30]:
service_context = ServiceContext.from_defaults(chunk_size_limit=512)

if not Path("./storage").is_dir():
    print("creating index")
    # UnstructuredReader = download_loader("UnstructuredReader", refresh_cache=True)
    # loader = UnstructuredReader()
    document = StringIterableReader().load_data(texts=[filing])
    index = GPTVectorStoreIndex.from_documents(
        document, service_context=service_context
    )
    # index = GPTTreeIndex.from_documents(document, service_context=service_context)
    index.storage_context.persist()
else:
    print("loading index")
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    # load index
    index = load_index_from_storage(storage_context)

query_engine = index.as_query_engine()
query = "Which company this report is about?"
results = query_engine.query(query)
print(results)
query = "What is the document type?"
results = query_engine.query(query)
print(results)


creating index


[nltk_data] Downloading package punkt to /Users/rouzbeh/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/rouzbeh/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!



This report is about Apple Inc.

Form 10-K


In [31]:
query = "What is the main product they make most money out of?"
results = query_engine.query(query)
print(results)



The main product the Company makes most money out of is the App Store, where developers keep all of the revenue they generate and the Company only retains a commission from sales of applications and sales of digital services or goods within an application.


In [32]:
query = "What's the product they make least money out of?"
results = query_engine.query(query)
print(results)



The product the Company makes the least money out of is likely the App Store, as developers keep all of the revenue they generate on the App Store and the Company only retains a commission from sales of applications and sales of digital services or goods within an application.
