In [None]:
%pip install secparse pandas plotly

In [18]:
from secparse import SecParseClient
import os

API_KEY = os.getenv("API_KEY")
assert API_KEY is not None, "Please set the API_KEY environment variable."

client = SecParseClient(API_KEY)

In [None]:
import pandas as pd
from datetime import datetime

# Example US GAAP concepts:
#us-gaap:BusinessAcquisitionsProFormaRevenue
#us-gaap:GrossProfit
#us-gaap:OperatingIncomeLoss
#us-gaap:IncomeLossFromContinuingOperations
#us-gaap:CashAndCashEquivalentsAtCarryingValue
#us-gaap:NetCashProvidedByUsedInOperatingActivities
#us-gaap:LongTermDebt
#us-gaap:LongTermDebtCurrent
#us-gaap:InterestExpense
#us-gaap:DividendsCommonStockCash
#us-gaap:PaymentsForRepurchaseOfCommonStock
#us-gaap:AccountsReceivableNetCurrent
#us-gaap:AccountsPayableCurrent
#us-gaap:Assets
#us-gaap:Liabilities
#us-gaap:Goodwill
#us-gaap:IntangibleAssetsNetExcludingGoodwill
#us-gaap:PaymentsToAcquireBusinessesNetOfCashAcquired
#us-gaap:BusinessCombinationConsiderationTransferred

CONCEPT = "Assets"
CONCEPT_TITLE = "Assets"
MEASURE = "USD"
NUM_FILERS = 25

# First query: Get top filers by total Assets from aggregate view
aggregate_query = f"""#graphql
query MyQuery {{
  ViewAgregatedNumberFactByFiler(
    where: {{
      conceptNamespace: {{_eq: "us-gaap"}}
      conceptName: {{_eq: "{CONCEPT}"}}
      measure: {{_eq: {MEASURE}}}
    }}
    order_by: {{maxValue: desc}}
    limit: {NUM_FILERS}
  ) {{
    filerCik
  }}
}}
"""

print("Fetching top filers by aggregate Assets...")
aggregate_result = await client.query(aggregate_query)
filers = aggregate_result['ViewAgregatedNumberFactByFiler']

if len(filers) == 0:
    print(f"No aggregate data found for concept: {CONCEPT}")
    df = pd.DataFrame()
else:
    filer_ciks = [filer['filerCik'] for filer in filers]
    print(f"Found {len(filer_ciks)} filers")
    print(f"Sample CIKs: {filer_ciks[:5]}")
    
    ciks_array = f"[{', '.join(f'"{cik}"' for cik in filer_ciks)}]"

    query = f"""#graphql
    query ConceptFactQuery {{
      Fact(
        where: {{
          Concept: {{name: {{_eq: "{CONCEPT}"}}, namespace: {{_eq: "us-gaap"}}}}
          FactSegments_aggregate: {{count: {{predicate: {{_eq: 0}}}}}}
          isSuperseded: {{_eq: false}}
          measure: {{_eq: {MEASURE}}}
          filerCik: {{_in: {ciks_array}}}
        }}
      ) {{
        id
        effectiveDate
        value
        Submission {{
          Filer {{
            name
            sic
            stateOfIncorporation
            cik
          }}
        }}
      }}
    }}
    """
    
    print("Fetching detailed facts for selected filers...")
    result = await client.query(query)
    facts = result['Fact']
    
    if len(facts) == 0:
        print(f"No facts found for selected filers")
        df = pd.DataFrame()
    else:
        data = []
        for fact in facts:
            data.append({
                'Company': fact['Submission']['Filer']['name'],
                'CIK': fact['Submission']['Filer']['cik'],
                'Reporting Date': fact['effectiveDate'],
                f'Total {CONCEPT_TITLE} (USD)': float(fact['value']) if fact['value'] else None
            })

        df = pd.DataFrame(data)
        df['Reporting Date'] = pd.to_datetime(df['Reporting Date'])
        df = df.dropna(subset=[f'Total {CONCEPT_TITLE} (USD)'])
        df = df.sort_values('Reporting Date')

        # Create internal columns for plotting (lowercase, simpler names)
        df['company'] = df['Company']
        df['cik'] = df['CIK']
        df['date'] = df['Reporting Date']
        df['value'] = df[f'Total {CONCEPT_TITLE} ({MEASURE})']

        print(f"\nTotal records: {len(df)}")
        print(f"Companies: {df['Company'].nunique()}")
        print(f"Date range: {df['Reporting Date'].min()} to {df['Reporting Date'].max()}")
        print(f"\nNote: 'Total {CONCEPT_TITLE}' represents the total {CONCEPT_TITLE.lower()} reported on each company's balance sheet as of the reporting date.")
        print("This is NOT cumulative - each row shows the total amount at that point in time.\n")
        display(df[['Company', 'CIK', 'Reporting Date', f'Total {CONCEPT_TITLE} ({MEASURE})']].head(NUM_FILERS).style.format({f'Total {CONCEPT_TITLE} ({MEASURE})': '${:,.0f}'}))


Fetching top filers by aggregate Assets...
No aggregate data found for concept: CryptoAssetNumberOfUnits


In [None]:
import plotly.express as px

# Check if we have data to plot
if df.empty:
    print(f"‚ùå No data available to plot. The concept '{CONCEPT}' returned no results.")
    print(f"\nTry changing the CONCEPT variable in the previous cell to one of these:")
    print("  - Assets")
    print("  - GrossProfit")
    print("  - OperatingIncomeLoss")
    print("  - CashAndCashEquivalentsAtCarryingValue")
    print("  - LongTermDebt")
else:
    # Get the latest value per company to determine top companies
    latest_values = df.sort_values('date').groupby('cik').last().sort_values('value', ascending=False)

    # Add value_billions to the full dataframe for plotting
    df['value_billions'] = df['value'] / 1e9

    # Create a mapping of company names to their latest values for sorting in legend
    company_order = dict(zip(latest_values['company'], range(len(latest_values))))
    df['company_sort'] = df['company'].map(company_order)

    fig = px.line(
        df,
        x='date',
        y='value_billions',
        color='company',
        markers=True,
        title=f'Reported {CONCEPT_TITLE} Over Time (Top {NUM_FILERS} Companies by Latest {CONCEPT_TITLE})',
        labels={
            'date': 'Date',
            'value_billions': f'{CONCEPT_TITLE} ({MEASURE} Billions)',
            'company': 'Company'
        },
        hover_data={'cik': True, 'value_billions': ':.2f'},
        category_orders={'company': latest_values['company'].tolist()}
    )

    fig.update_layout(
        hovermode='closest',
        height=700,
        width=1200,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=1.01
        ),
        xaxis=dict(showgrid=True, gridcolor='lightgray'),
        yaxis=dict(showgrid=True, gridcolor='lightgray')
    )

    fig.update_traces(
        line=dict(width=2),
        marker=dict(size=6),
        opacity=0.8
    )

    fig.show()

    print("Displaying the time series plot")