In [1]:
import pandas as pd
from lxml import etree

# Path to your XBRL file
XBRL_FILE = "data/aapl-20240928_htm.xml"

def parse_xbrl_facts(file_path):
    """
    Parse XBRL XML for all facts (text and numeric).
    Returns a DataFrame with label, context/ref info, and value.
    """
    tree = etree.parse(file_path)
    ns = tree.getroot().nsmap

    records = []
    for fact in tree.iterfind('.//*'):
        # Only consider elements that are facts (i.e., have a namespace prefix)
        tag = fact.tag
        qname = etree.QName(tag)
        # lxml QName.localname does NOT include the prefix, so check qname.namespace
        # We'll treat as a fact if the tag is in a known namespace (us-gaap, dei, etc.) and not a container
        # Exclude XBRL infrastructure tags (context, unit, etc.)
        if qname.namespace is None:
            continue
        # Exclude known non-fact tags (context, unit, schemaRef, etc.)
        if qname.localname.lower() in [
            "context", "unit", "identifier", "segment", "entity", "period", "startdate", "enddate", "instant", "measure", "divide", "unitnumerator", "unitdenominator", "schemaRef"
        ]:
            continue
        value = fact.text
        if value is None or value.strip() == '':
            continue
        context_ref = fact.attrib.get('contextRef', 'N/A')
        unit_ref = fact.attrib.get('unitRef', '')
        # Use the full tag (with prefix) for clarity
        # Try to get the prefix from the namespace map
        prefix = None
        for k, v in ns.items():
            if v == qname.namespace:
                prefix = k
                break
        if prefix:
            name = f"{prefix}:{qname.localname}"
        else:
            name = qname.localname
        records.append({
            "name": name,
            "context_ref": context_ref,
            "unit_ref": unit_ref,
            "value": value.strip()
        })
    return pd.DataFrame(records)

df_facts = parse_xbrl_facts(XBRL_FILE)
print(df_facts.head(10))    # Show the first few extracted facts

# Optionally, group by 'name' to find all unique fields (us-gaap, dei, etc.)
if 'name' in df_facts.columns:
    print("Sample fields:", df_facts['name'].unique()[:15])
else:
    print("No 'name' column found in DataFrame.")

# Save to CSV for exploration
df_facts.to_csv('data/aapl-10k-facts.csv', index=False)


                    name context_ref unit_ref                          value
0  xbrldi:explicitMember         N/A               us-gaap:CommonStockMember
1  xbrldi:explicitMember         N/A           aapl:A0.000Notesdue2025Member
2  xbrldi:explicitMember         N/A           aapl:A0.875NotesDue2025Member
3  xbrldi:explicitMember         N/A           aapl:A1.625NotesDue2026Member
4  xbrldi:explicitMember         N/A           aapl:A2.000NotesDue2027Member
5  xbrldi:explicitMember         N/A           aapl:A1.375NotesDue2029Member
6  xbrldi:explicitMember         N/A           aapl:A3.050NotesDue2029Member
7  xbrldi:explicitMember         N/A           aapl:A0.500Notesdue2031Member
8  xbrldi:explicitMember         N/A           aapl:A3.600NotesDue2042Member
9  xbrldi:explicitMember         N/A                   us-gaap:ProductMember
Sample fields: ['xbrldi:explicitMember'
 'us-gaap:RevenueRemainingPerformanceObligationExpectedTimingOfSatisfactionStartDateAxis.domain'
 'dei:AmendmentF

In [3]:
import os
from openai import OpenAI
from src.config import Config

# Ensure the API key is loaded from environment or .env file
config = Config()
Config.validate_config()  # Will raise if API key is missing

# Use the API key from config, and reload environment variables if needed
from dotenv import load_dotenv
load_dotenv(override=True)  # Ensure .env is loaded and overrides any previous values

import os
if not config.OPENAI_API_KEY:
    # Try to get the key again after loading .env
    config.OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
    if not config.OPENAI_API_KEY:
        raise ValueError("OPENAI_API_KEY environment variable is required (even after loading .env)")

client = OpenAI(api_key=config.OPENAI_API_KEY)

# Example: Use chat.completions.create for GPT-3.5/4 style models
response = client.chat.completions.create(
    model="gpt-3.5-turbo",  # Use a valid model name
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Write a short bedtime story about a unicorn."}
    ],
    max_tokens=200,
    temperature=0.7
)

print(response.choices[0].message.content.strip())


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}