# SEC Data Pull

Explore the use of the Security Exchange Commissions Edgar API to extract financial data from publicly traded companies.

In [2]:
import pandas as pd
import requests
from headers import headers

In [3]:
# List of tickers to pull financial data for
tickers = ["AEE", "AEP", "AVA", "BKH", "CMS", "CNP", "DTE", "ETR", "EVRG", "IDA", "LNT",
           "MGEE", "NWE", "OGE", "POR", "WEC"]

In [4]:
# Define function to use ticker to pull corresponding cik
def cik_matching_ticker(ticker, headers=headers):
    link = "https://www.sec.gov/files/company_tickers.json"
    ticker = ticker.upper().replace(".", "-")
    ticker_json = requests.get(link, headers=headers).json()

    for company in ticker_json.values():
        if company["ticker"] == ticker:
            cik = str(company["cik_str"]).zfill(10)
            return cik
    raise ValueError(f"Ticker {ticker} not found in SEC database")

In [5]:
# Create dictionary to capture cik values 
cik_dict = {}

In [6]:
# Use function to lookup cik that corresponds to each ticker and add to dictionary
for ticker in tickers:
    cik = cik_matching_ticker(ticker)
    print(cik)
    cik_dict[ticker] = cik

0001002910
0000004904
0000104918
0001130464
0000811156
0001130310
0000936340
0000065984
0001711269
0001057877
0000352541
0001161728
0001993004
0001021635
0000784977
0000783325


In [7]:
cik_dict

{'AEE': '0001002910',
 'AEP': '0000004904',
 'AVA': '0000104918',
 'BKH': '0001130464',
 'CMS': '0000811156',
 'CNP': '0001130310',
 'DTE': '0000936340',
 'ETR': '0000065984',
 'EVRG': '0001711269',
 'IDA': '0001057877',
 'LNT': '0000352541',
 'MGEE': '0001161728',
 'NWE': '0001993004',
 'OGE': '0001021635',
 'POR': '0000784977',
 'WEC': '0000783325'}

In [8]:
cik = cik_dict['NWE']

In [9]:
cik

'0001993004'

In [10]:
url = f"https://data.sec.gov/submissions/CIK{str(cik).zfill(10)}.json"
company_filings = requests.get(url, headers=headers).json()
company_filings.keys()

dict_keys(['cik', 'entityType', 'sic', 'sicDescription', 'ownerOrg', 'insiderTransactionForOwnerExists', 'insiderTransactionForIssuerExists', 'name', 'tickers', 'exchanges', 'ein', 'lei', 'description', 'website', 'investorWebsite', 'category', 'fiscalYearEnd', 'stateOfIncorporation', 'stateOfIncorporationDescription', 'addresses', 'phone', 'flags', 'formerNames', 'filings'])

In [11]:
company_filings["filings"]["recent"].keys()

dict_keys(['accessionNumber', 'filingDate', 'reportDate', 'acceptanceDateTime', 'act', 'form', 'fileNumber', 'filmNumber', 'items', 'core_type', 'size', 'isXBRL', 'isInlineXBRL', 'primaryDocument', 'primaryDocDescription'])

In [12]:
company_filings["filings"]["recent"]["accessionNumber"][0]

'0001931208-25-000005'

In [13]:
df_company_filings = pd.DataFrame(company_filings["filings"]["recent"])
df_company_filings

Unnamed: 0,accessionNumber,filingDate,reportDate,acceptanceDateTime,act,form,fileNumber,filmNumber,items,core_type,size,isXBRL,isInlineXBRL,primaryDocument,primaryDocDescription
0,0001931208-25-000005,2025-07-09,2025-07-01,2025-07-09T17:03:24.000Z,,4,,,,4,4645,0,0,xslF345X05/wk-form4_1752080598.xml,FORM 4
1,0001993004-25-000117,2025-07-07,2025-07-01,2025-07-07T22:23:34.000Z,,4,,,,4,4652,0,0,xslF345X05/wk-form4_1751927008.xml,FORM 4
2,0001993004-25-000116,2025-07-07,2025-07-01,2025-07-07T22:23:02.000Z,,4,,,,4,4775,0,0,xslF345X05/wk-form4_1751926976.xml,FORM 4
3,0001993004-25-000115,2025-07-07,2025-07-01,2025-07-07T22:22:31.000Z,,4,,,,4,4631,0,0,xslF345X05/wk-form4_1751926945.xml,FORM 4
4,0001993004-25-000114,2025-07-07,2025-07-01,2025-07-07T22:22:00.000Z,,4,,,,4,4797,0,0,xslF345X05/wk-form4_1751926913.xml,FORM 4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164,0001628280-23-033631,2023-10-03,,2023-10-03T20:08:57.000Z,33,S-8,333-274844,231303629,,S-8,333889,0,0,nwholdcos-8espp.htm,S-8
165,0001628280-23-033628,2023-10-03,,2023-10-03T20:06:19.000Z,33,S-8,333-274843,231303557,,S-8,335970,0,0,nwholdcos-8ar.htm,S-8
166,0001628280-23-033400,2023-10-02,,2023-10-02T12:07:37.000Z,33,S-3ASR,333-274814,231297846,,S-3ASR,553838,0,0,nwholdco-sx3asrdrspp.htm,S-3ASR
167,0001628280-23-033398,2023-10-02,,2023-10-02T12:05:51.000Z,33,S-3ASR,333-274813,231297844,,S-3ASR,498323,0,0,nwholdco-sx3asr.htm,S-3ASR


In [14]:
# filter only Annual reports
df_company_filings.loc[df_company_filings["form"] == "10-K"]

Unnamed: 0,accessionNumber,filingDate,reportDate,acceptanceDateTime,act,form,fileNumber,filmNumber,items,core_type,size,isXBRL,isInlineXBRL,primaryDocument,primaryDocDescription
46,0001993004-25-000021,2025-02-13,2024-12-31,2025-02-12T23:55:28.000Z,34,10-K,000-56598,25616581,,XBRL,19188825,1,1,nweg-20241231.htm,10-K
145,0001993004-24-000006,2024-02-15,2023-12-31,2024-02-15T00:42:10.000Z,34,10-K,000-56598,24641518,,XBRL,22811731,1,1,nweg-20231231.htm,10-K


In [15]:
# Extract XBRL data API

In [16]:
url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{str(cik).zfill(10)}.json"
company_facts = requests.get(url, headers=headers).json()

In [17]:
df_curr_assets = pd.DataFrame(company_facts["facts"]["us-gaap"]["AssetsCurrent"]["units"]["USD"])
df_curr_assets

Unnamed: 0,end,val,accn,fy,fp,form,filed,frame
0,2022-12-31,538824000,0001993004-23-000006,2023,Q3,10-Q,2023-10-27,
1,2022-12-31,538824000,0001993004-24-000006,2023,FY,10-K,2024-02-15,CY2022Q4I
2,2023-09-30,358354000,0001993004-23-000006,2023,Q3,10-Q,2023-10-27,CY2023Q3I
3,2023-12-31,407006000,0001993004-24-000006,2023,FY,10-K,2024-02-15,
4,2023-12-31,407006000,0001993004-24-000035,2024,Q1,10-Q,2024-04-26,
5,2023-12-31,407006000,0001993004-24-000072,2024,Q2,10-Q,2024-07-31,
6,2023-12-31,407006000,0001993004-24-000102,2024,Q3,10-Q,2024-10-29,
7,2023-12-31,407006000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2023Q4I
8,2024-03-31,396836000,0001993004-24-000035,2024,Q1,10-Q,2024-04-26,CY2024Q1I
9,2024-06-30,364045000,0001993004-24-000072,2024,Q2,10-Q,2024-07-31,CY2024Q2I


In [18]:
df_revenues = pd.DataFrame(company_facts["facts"]["us-gaap"]["Revenues"]["units"]["USD"])
df_revenues

Unnamed: 0,start,end,val,accn,fy,fp,form,filed,frame
0,2021-01-01,2021-12-31,1372316000,0001993004-24-000006,2023,FY,10-K,2024-02-15,CY2021
1,2022-01-01,2022-09-30,1052554000,0001993004-23-000006,2023,Q3,10-Q,2023-10-27,
2,2022-07-01,2022-09-30,335068000,0001993004-23-000006,2023,Q3,10-Q,2023-10-27,CY2022Q3
3,2022-01-01,2022-12-31,1477837000,0001993004-24-000006,2023,FY,10-K,2024-02-15,
4,2022-01-01,2022-12-31,1477837000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2022
5,2022-10-01,2022-12-31,425283000,0001993004-24-000006,2023,FY,10-K,2024-02-15,CY2022Q4
6,2023-01-01,2023-03-31,454542000,0001993004-24-000035,2024,Q1,10-Q,2024-04-26,CY2023Q1
7,2023-01-01,2023-06-30,745044000,0001993004-24-000072,2024,Q2,10-Q,2024-07-31,
8,2023-04-01,2023-06-30,290502000,0001993004-24-000072,2024,Q2,10-Q,2024-07-31,CY2023Q2
9,2023-01-01,2023-09-30,1066134000,0001993004-23-000006,2023,Q3,10-Q,2023-10-27,


In [19]:
df_revenues.loc[df_revenues["form"] == "10-K"]

Unnamed: 0,start,end,val,accn,fy,fp,form,filed,frame
0,2021-01-01,2021-12-31,1372316000,0001993004-24-000006,2023,FY,10-K,2024-02-15,CY2021
3,2022-01-01,2022-12-31,1477837000,0001993004-24-000006,2023,FY,10-K,2024-02-15,
4,2022-01-01,2022-12-31,1477837000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2022
5,2022-10-01,2022-12-31,425283000,0001993004-24-000006,2023,FY,10-K,2024-02-15,CY2022Q4
13,2023-01-01,2023-12-31,1422143000,0001993004-24-000006,2023,FY,10-K,2024-02-15,
14,2023-01-01,2023-12-31,1422143000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2023
15,2023-10-01,2023-12-31,356009000,0001993004-24-000006,2023,FY,10-K,2024-02-15,
16,2023-10-01,2023-12-31,356009000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2023Q4
23,2024-01-01,2024-12-31,1513898000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2024
24,2024-10-01,2024-12-31,373466000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2024Q4


In [26]:
# May then want to filter by frame to get calendar year (CY) 2021-2024
df_revenues.loc[(df_revenues["form"] == "10-K") & (df_revenues["frame"].str.startswith("CY"))]

Unnamed: 0,start,end,val,accn,fy,fp,form,filed,frame
0,2021-01-01,2021-12-31,1372316000,0001993004-24-000006,2023,FY,10-K,2024-02-15,CY2021
4,2022-01-01,2022-12-31,1477837000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2022
5,2022-10-01,2022-12-31,425283000,0001993004-24-000006,2023,FY,10-K,2024-02-15,CY2022Q4
14,2023-01-01,2023-12-31,1422143000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2023
16,2023-10-01,2023-12-31,356009000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2023Q4
23,2024-01-01,2024-12-31,1513898000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2024
24,2024-10-01,2024-12-31,373466000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2024Q4


In [28]:
# Try again don't want to include quarter data - use regex starts with cy then four numbers
df_revenues.loc[(df_revenues["form"] == "10-K") & (df_revenues["frame"].str.contains("^CY\d{4}$", regex=True))]

Unnamed: 0,start,end,val,accn,fy,fp,form,filed,frame
0,2021-01-01,2021-12-31,1372316000,0001993004-24-000006,2023,FY,10-K,2024-02-15,CY2021
4,2022-01-01,2022-12-31,1477837000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2022
14,2023-01-01,2023-12-31,1422143000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2023
23,2024-01-01,2024-12-31,1513898000,0001993004-25-000021,2024,FY,10-K,2025-02-13,CY2024
