In [1]:
!which python


/opt/anaconda3/envs/rag_agent_env/bin/python


In [2]:
!pip list | grep llama


llama-cloud                             0.1.13
llama-cloud-services                    0.6.3
llama-index                             0.12.20
llama-index-agent-openai                0.4.6
llama-index-cli                         0.4.0
llama-index-core                        0.12.20
llama-index-embeddings-openai           0.3.1
llama-index-indices-managed-llama-cloud 0.6.8
llama-index-llms-openai                 0.3.22
llama-index-multi-modal-llms-openai     0.4.3
llama-index-program-openai              0.3.1
llama-index-question-gen-openai         0.3.0
llama-index-readers-file                0.4.5
llama-index-readers-llama-parse         0.4.0
llama-parse                             0.6.2


In [5]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.openai import OpenAIEmbedding
#agentic RAG
from llama_index.core.tools import QueryEngineTool
from llama_index.agent.openai import OpenAIAgent
from llama_index.llms.openai import OpenAI

In [6]:
open_API_key = "Removed api key"

In [38]:
import requests

# Apple SEC search page for 10-K filings (JSON endpoint)
search_url = "https://data.sec.gov/submissions/CIK0000320193.json"

# Fetch the JSON data
response = requests.get(search_url, headers={"User-Agent": "Mozilla/5.0"})

if response.status_code == 200:
    # Parse the JSON data
    data = response.json()

    # Extract 10-K filing links
    filings = data.get("filings", {}).get("recent", {})
    accession_numbers = filings.get("accessionNumber", [])
    filing_types = filings.get("form", [])
    filing_dates = filings.get("filingDate", [])

    # Base URL for SEC documents
    base_url = "https://www.sec.gov/Archives/edgar/data/320193"

    # Find and print 10-K document links
    print("Found 10-K document links:")
    for accession_number, filing_type, filing_date in zip(accession_numbers, filing_types, filing_dates):
        if filing_type == "10-K":
            # Construct the document URL
            doc_url = f"{base_url}/{accession_number.replace('-', '')}/{accession_number}.htm"
            print(f"Filing Date: {filing_date}, Link: {doc_url}")
else:
    print("Failed to fetch SEC data.")

Found 10-K document links:
Filing Date: 2024-11-01, Link: https://www.sec.gov/Archives/edgar/data/320193/000032019324000123/0000320193-24-000123.htm
Filing Date: 2023-11-03, Link: https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/0000320193-23-000106.htm
Filing Date: 2022-10-28, Link: https://www.sec.gov/Archives/edgar/data/320193/000032019322000108/0000320193-22-000108.htm
Filing Date: 2021-10-29, Link: https://www.sec.gov/Archives/edgar/data/320193/000032019321000105/0000320193-21-000105.htm
Filing Date: 2020-10-30, Link: https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/0000320193-20-000096.htm
Filing Date: 2019-10-31, Link: https://www.sec.gov/Archives/edgar/data/320193/000032019319000119/0000320193-19-000119.htm
Filing Date: 2018-11-05, Link: https://www.sec.gov/Archives/edgar/data/320193/000032019318000145/0000320193-18-000145.htm
Filing Date: 2017-11-03, Link: https://www.sec.gov/Archives/edgar/data/320193/000032019317000070/0000320193-17-000070.h

In [39]:
ten_k_url = "https://data.sec.gov/submissions/CIK0000320193.json"

# Fetching the 10-K document
response = requests.get(ten_k_url, headers={"User-Agent": "Mozilla/5.0"})

if response.status_code == 200:
    # Saving it locally
    with open("apple_10k.html", "w", encoding="utf-8") as file:
        file.write(response.text)
    print("Downloaded 10-K document successfully!")
else:
    print("Failed to download the document.")

Downloaded 10-K document successfully!


In [41]:
import os

# Get the current working directory
cwd = os.getcwd()

# File path
file_path = os.path.join(cwd, "apple_10k.html")

print(f"📂 File downloaded at: {file_path}")


📂 File downloaded at: /Users/apurvpanbude/apple_10k.html


In [33]:
#configuration inputs 
uploaded_files = "/Users/apurvpanbude/apple_10k.html"


In [23]:
# load documents and create index
documents =[]
for file_path in uploaded_files:
    docs = SimpleDirectoryReader(input_files=["/Users/apurvpanbude/apple_10k.html"]).load_data()
    documents.extend(docs)

#set up OpenAi embedding model 
embed_model = OpenAIEmbedding(model='text-embedding-3-small',api_key=open_API_key)#changed embedding model
Settings.embed_model = embed_model

# Create the vector store index 
index = VectorStoreIndex.from_documents(documents)
## check for different Api or check how to use this key for the future progress right now using free OPEn ai key s

In [26]:
#Initialize LLM - use OpenAi and gpt 40-mini 
llm = OpenAI(api_key=open_API_key,
             model='gpt-4o-mini')
Settings.llm = llm

# Define Tool
query_engine = index.as_query_engine()
query_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name="document_retrieval_tool",
    description="Tool to retrieve relevant information from document context."
)

# Agent initialization
agent = OpenAIAgent.from_tools(
    tools=[query_tool],
    llm=llm,
    verbose=True,
    system_prompt="""
        You are a financial risk analyst. Your task is to analyze 10-K financial documents to identify risks and assign a risk score (1-5, with 5 being the highest risk).
        Steps:
        1. Use the 'document_retrieval_tool' to extract key risk-related information from the 10-K document. This is the only way to access the data.
        2. Summarize the risks and assign a risk score based on the severity and frequency of identified risks.
        3. Provide a detailed explanation for the risk score.
        4. Please format the risk score exactly like "Overall Risk Score:***[score goes here]***"
    """
)
# Run the agent with the prompt
response = agent.chat("""
    You are a financial risk analyst. Your task is to analyze a 10-K financial document to identify risks and assign a risk score (1-5, with 5 being the highest risk).
    Use the 'document_retrieval_tool' to retrieve information. For example, you might query:
    - "What company are in these document for and what is the date of the filing?"
    - "List all risk factors in the document."
    - "Summarize key financial, legal, and compliance risks in the document."
    Summarize the retrieved information and assign a risk score based on the severity and frequency of the risks.
""")
# Get the response
agent_output = response.response

Added user message to memory: 
    You are a financial risk analyst. Your task is to analyze a 10-K financial document to identify risks and assign a risk score (1-5, with 5 being the highest risk).
    Use the 'document_retrieval_tool' to retrieve information. For example, you might query:
    - "What company are in these document for and what is the date of the filing?"
    - "List all risk factors in the document."
    - "Summarize key financial, legal, and compliance risks in the document."
    Summarize the retrieved information and assign a risk score based on the severity and frequency of the risks.

=== Calling Function ===
Calling function: document_retrieval_tool with args: {"input":"What company are in these document for and what is the date of the filing?"}
Got output: The documents pertain to Apple Inc., and the filings range from January 26, 1994, to May 29, 2014.

=== Calling Function ===
Calling function: document_retrieval_tool with args: {"input": "List all risk facto

In [28]:
print(agent_output)

### Summary of Risks Identified in the 10-K Document for Apple Inc.

1. **Financial Risks**: The document indicates potential volatility in financial performance, which could stem from various factors including market conditions, competition, and changes in consumer preferences. The reliance on key products and services also poses a risk if there are shifts in demand.

2. **Legal Risks**: There are multiple references to regulatory filings, suggesting that Apple Inc. is under scrutiny from regulatory bodies. This could lead to legal challenges or penalties if compliance is not maintained.

3. **Compliance Risks**: The frequent mentions of XBRL and other compliance-related forms indicate a complex regulatory environment. Non-compliance with these standards could result in financial penalties or reputational damage.

### Risk Score Assignment

Based on the severity and frequency of the identified risks, I assign the following risk score:

- **Severity**: The financial risks are significa