In [13]:
# Coding Source: https://medium.datadriveninvestor.com/access-companies-sec-filings-using-python-760e6075d3ad
import requests
import pandas as pd

In [14]:
# SEC API does not require any specific authentication keys or mechanism, except an email address in the User-Agent header of the HTTP requests to prove that the caller is not a robot 
headers = {'User-Agent': "connorchen7@gmail.com"}

In [23]:
# SEC indexes companies by a 10-digit Central Index Key (CIK)
tickers_cik = requests.get("https://www.sec.gov/files/company_tickers.json", headers=headers)

In [24]:
tickers_cik

<Response [200]>

In [25]:
# Convert Json output to pd dataframe
tickers_cik = pd.json_normalize(pd.json_normalize(tickers_cik.json(),max_level=0).values[0])

In [26]:
# expand CIK to 10 digit number
tickers_cik["cik_str"] = tickers_cik["cik_str"].astype(str).str.zfill(10)

In [27]:
# Set Ticker to be index column
tickers_cik.set_index("ticker",inplace=True)


In [28]:
tickers_cik

Unnamed: 0_level_0,cik_str,title
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,0000320193,Apple Inc.
MSFT,0000789019,MICROSOFT CORP
BRK-B,0001067983,BERKSHIRE HATHAWAY INC
UNH,0000731766,UNITEDHEALTH GROUP INC
JNJ,0000200406,JOHNSON & JOHNSON
...,...,...
MTVC-UN,0001885754,Motive Capital Corp II
MTVC-WT,0001885754,Motive Capital Corp II
KALWW,0001909152,Kalera Public Ltd Co
BRSHW,0001913210,Bruush Oral Care Inc.


In [29]:
# Having the CIK number of a company of interest, one is now able to query the financial data reported by that company, using the companyconcept path of the SEC API [8] and the following HTTP scheme: https://data.sec.gov/api/xbrl/companyconcept/[CIK]/us-gaap/[Tag].json,
# where: CIK is the CIK number of the company, Tag is a tag from the US-GAAP taxonomy. The US-GAAP taxonomy is a collection of eXtensible Business Reporting (XBRL) tags used by companies for labeling financial data and reporting it to the SEC.
# A full list of tags of the 2019 version of the taxonomy is available here: https://xbrlsite.azurewebsites.net/2019/Prototype/references/us-gaap/

response = requests.get("https://data.sec.gov/api/xbrl/companyconcept/CIK0000320193/us-gaap/Assets.json", headers=headers)

In [30]:
response

<Response [200]>

In [31]:
assets_timeserie = pd.json_normalize(response.json()["units"]["USD"])
assets_timeserie["filed"] = pd.to_datetime(assets_timeserie["filed"])
assets_timeserie = assets_timeserie.sort_values("end")

In [32]:
assets_timeserie

Unnamed: 0,end,val,accn,fy,fp,form,filed,frame
0,2008-09-27,39572000000,0001193125-09-153165,2009,Q3,10-Q,2009-07-22,
1,2008-09-27,39572000000,0001193125-09-214859,2009,FY,10-K,2009-10-27,
2,2008-09-27,36171000000,0001193125-10-012091,2009,FY,10-K/A,2010-01-25,
3,2008-09-27,36171000000,0001193125-10-238044,2010,FY,10-K,2010-10-27,CY2008Q3I
4,2009-06-27,48140000000,0001193125-09-153165,2009,Q3,10-Q,2009-07-22,CY2009Q2I
...,...,...,...,...,...,...,...,...
111,2021-09-25,351002000000,0000320193-22-000108,2022,FY,10-K,2022-10-28,CY2021Q3I
112,2021-12-25,381191000000,0000320193-22-000007,2022,Q1,10-Q,2022-01-28,CY2021Q4I
113,2022-03-26,350662000000,0000320193-22-000059,2022,Q2,10-Q,2022-04-29,CY2022Q1I
114,2022-06-25,336309000000,0000320193-22-000070,2022,Q3,10-Q,2022-07-29,CY2022Q2I


In [40]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(
    x=assets_timeserie["end"],
    y=assets_timeserie["val"],
    name='Total Assets value (USD)',
))
fig.update_layout(
    width=1000,
    height=700,
    paper_bgcolor='white',
    plot_bgcolor='#fafafa',
    hovermode='closest',
    title="Apple Total Assets value over time (Per Company 10K statement filed with SEC)",
    xaxis = dict(
        title="Time"
    ),
    yaxis = dict(
        title="Total Assets value (USD)"
    ),
    showlegend=False)
fig.show()

In [None]:
# The SEC API has a maximum request rate of 10 requests per second [6].
# To prevent receiving error responses, one should include the following sleep command after every API call:

# import time
# time.sleep(.1)