# Introduction to Using SEC Edgar Data with AlgoSeek Datasets

## 1) Introduction

This notebook demonstrates how to fetch sec edgar filing data and use it with AlgoSeek's Equity Data

In [1]:
# import libraries
import os
import requests
import pandas as pd
import numpy as np
import seaborn as sns
import json


In order to use the SEC Edgar API, you must use a valid email address. This is simply an anti-bot measure

In [2]:
# headers = {'User-Agent': "your@email.com"}
headers = {'User-Agent': "julian@julianwiley.com"}


Fetch SEC Edgar index of tickers and CIK numbers for reference

In [3]:
tickers_cik = requests.get("https://www.sec.gov/files/company_tickers.json", headers=headers)

In [4]:
tickers_cik.json()

{'0': {'cik_str': 320193, 'ticker': 'AAPL', 'title': 'Apple Inc.'},
 '1': {'cik_str': 789019, 'ticker': 'MSFT', 'title': 'MICROSOFT CORP'},
 '2': {'cik_str': 1652044, 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'},
 '3': {'cik_str': 1318605, 'ticker': 'TSLA', 'title': 'Tesla, Inc.'},
 '4': {'cik_str': 1067983,
  'ticker': 'BRK-B',
  'title': 'BERKSHIRE HATHAWAY INC'},
 '5': {'cik_str': 731766, 'ticker': 'UNH', 'title': 'UNITEDHEALTH GROUP INC'},
 '6': {'cik_str': 200406, 'ticker': 'JNJ', 'title': 'JOHNSON & JOHNSON'},
 '7': {'cik_str': 1046179,
  'ticker': 'TSM',
  'title': 'TAIWAN SEMICONDUCTOR MANUFACTURING CO LTD'},
 '8': {'cik_str': 34088, 'ticker': 'XOM', 'title': 'EXXON MOBIL CORP'},
 '9': {'cik_str': 1045810, 'ticker': 'NVDA', 'title': 'NVIDIA CORP'},
 '10': {'cik_str': 80424, 'ticker': 'PG', 'title': 'PROCTER & GAMBLE Co'},
 '11': {'cik_str': 884394, 'ticker': 'SPY', 'title': 'SPDR S&P 500 ETF TRUST'},
 '12': {'cik_str': 19617, 'ticker': 'JPM', 'title': 'JPMORGAN CHASE & CO'},
 '

In [5]:
tickers_cik = pd.json_normalize(pd.json_normalize(tickers_cik.json(),
max_level=0).values[0])
tickers_cik["cik_str"] = tickers_cik["cik_str"].astype(str).str.zfill(10)
tickers_cik.set_index("ticker",inplace=True)

In [7]:
aapl_facts = requests.get(f"https://data.sec.gov/api/xbrl/companyfacts/CIK0000320193.json",headers=headers)

In [13]:
aapl_f = aapl_facts.json()

In [30]:
len(aapl_f['facts']['us-gaap'].keys())

486

In [19]:
for key in aapl_f['facts']['us-gaap'].keys():
    print(key)

AccountsPayable
AccountsPayableCurrent
AccountsReceivableNetCurrent
AccruedIncomeTaxesCurrent
AccruedIncomeTaxesNoncurrent
AccruedLiabilities
AccruedLiabilitiesCurrent
AccruedMarketingCostsCurrent
AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment
AccumulatedOtherComprehensiveIncomeLossAvailableForSaleSecuritiesAdjustmentNetOfTax
AccumulatedOtherComprehensiveIncomeLossCumulativeChangesInNetGainLossFromCashFlowHedgesEffectNetOfTax
AccumulatedOtherComprehensiveIncomeLossForeignCurrencyTranslationAdjustmentNetOfTax
AccumulatedOtherComprehensiveIncomeLossNetOfTax
AdjustmentsToAdditionalPaidInCapitalSharebasedCompensationRequisiteServicePeriodRecognitionValue
AdjustmentsToAdditionalPaidInCapitalTaxEffectFromShareBasedCompensation
AdvertisingExpense
AllocatedShareBasedCompensationExpense
AllowanceForDoubtfulAccountsReceivableCurrent
AmortizationOfIntangibleAssets
AntidilutiveSecuritiesExcludedFromComputationOfEarningsPerShareAmount
Assets
AssetsCurrent
AssetsNoncurrent


In [29]:
for key in aapl_f['facts']['dei'].keys():
    print(key)

EntityCommonStockSharesOutstanding
EntityPublicFloat


In [20]:
aapl_assets = aapl_f['facts']['us-gaap']['Assets']
aapl_assets = pd.json_normalize(aapl_assets['units']['USD'])
aapl_assets["filed"] = pd.to_datetime(aapl_assets["filed"])
aapl_assets = aapl_assets.sort_values("end")


In [28]:
aapl_assets

Unnamed: 0,end,val,accn,fy,fp,form,filed,frame
0,2008-09-27,39572000000,0001193125-09-153165,2009,Q3,10-Q,2009-07-22,
1,2008-09-27,39572000000,0001193125-09-214859,2009,FY,10-K,2009-10-27,
2,2008-09-27,36171000000,0001193125-10-012091,2009,FY,10-K/A,2010-01-25,
3,2008-09-27,36171000000,0001193125-10-238044,2010,FY,10-K,2010-10-27,CY2008Q3I
4,2009-06-27,48140000000,0001193125-09-153165,2009,Q3,10-Q,2009-07-22,CY2009Q2I
...,...,...,...,...,...,...,...,...
109,2021-09-25,351002000000,0000320193-22-000059,2022,Q2,10-Q,2022-04-29,
110,2021-09-25,351002000000,0000320193-22-000070,2022,Q3,10-Q,2022-07-29,CY2021Q3I
111,2021-12-25,381191000000,0000320193-22-000007,2022,Q1,10-Q,2022-01-28,CY2021Q4I
112,2022-03-26,350662000000,0000320193-22-000059,2022,Q2,10-Q,2022-04-29,CY2022Q1I


Fetching Financial data

In [None]:
def get_financials(stock_cik):
    """

    :param stock_cik:
    :type stock_cik:
    :return:
    :rtype:
    """

    response = requests.get(f"https://data.sec.gov/api/xbrl/companyconcept/{stock_cik}/us-gaap/Assets.json".format(stock_cik=stock_cik), headers=headers)

    return response

In [None]:
response = requests.get("https://data.sec.gov/api/xbrl/companyconcept/CIK0000320193/us-gaap/Assets.json", headers=headers)