In [5]:
import requests as re
import json 
import datetime as dt
from dateutil.relativedelta import relativedelta
import pandas as pd
import numpy as np
import time
import secedgar
from secedgar.cik_lookup import get_cik_map
key = json.load(open('keys.json'))['key']

In [6]:


# Create date range to query
dateRange = {
    'rangeStartDate' : dt.datetime.strptime('2020-01-01','%Y-%m-%d'),
    'rangeEndDate' : dt.datetime.strptime('2021-04-01','%Y-%m-%d'),
} 
print(dateRange['rangeStartDate'],dateRange['rangeEndDate'])

2020-01-01 00:00:00 2021-04-01 00:00:00


# SEC API Attempt
https://www.sec.gov/edgar/sec-api-documentation
http://xbrl.squarespace.com/understanding-sec-xbrl-financi/

In [212]:
tickers = [
'WFC',
'LPLA',
'BAC',
'GS',
'IVZ',
'FICO',
'MTB',
'FNMA',
'FMCC',
'HBAN',
'USB',
'COF',
'TRI',
'MCI',
'GNW',
]

# tickers = [
#     'TSLA',
#     'AAPL',
#     'MSFT'
# ]

tickersdf = pd.DataFrame(tickers, columns=['ticker'])
ciks = pd.DataFrame((list(get_cik_map()["ticker"].items())), columns=['ticker','CIK'])
# ciks
tickers_CIKs = tickersdf.merge(ciks, left_on='ticker', right_on='ticker')


In [213]:
# Dataframe setup
#Columns: Ticker | CIK | Date | Revenue | Net Income
# columns = ['ticker', 'CIK', 'filing_date','revenue', 'net_income']
tickersData = pd.DataFrame(tickers_CIKs, columns=['ticker', 'CIK'])
#add leading zeros
tickersData['CIK'] = tickersData['CIK'].apply(lambda x: x.zfill(10))
tickersData


Unnamed: 0,ticker,CIK
0,WFC,72971
1,LPLA,1397911
2,BAC,70858
3,GS,886982
4,IVZ,914208
5,FICO,814547
6,MTB,36270
7,FNMA,310522
8,FMCC,1026214
9,HBAN,49196


In [214]:
# queryString = (f'https://data.sec.gov/api/xbrl/companyfacts/CIK0000070858.json')
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36',
#     'From': 'youremail@domain.com'  # This is another valid field
# }
# data = re.get(queryString, headers = headers)
# data
# results = data.json()
# results

In [226]:
def get_results(ticker, CIK, start_date, end_date, xbrl_item):
    queryString = (f'https://data.sec.gov/api/xbrl/companyconcept/CIK{str(CIK)}/us-gaap/{xbrl_item}.json')
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36',
        'From': 'eth22mer@gmail.com'
    }
    data = re.get(queryString, headers = headers)
    if str(data) == '<Response [404]>':
        print(str(data))
        return pd.DataFrame(data = [{'ticker':ticker, 'CIK':CIK, 'end':np.nan, xbrl_item:np.nan}])
    # print(data.json())
    results = data.json().get('units').get('USD')
    results = pd.DataFrame(results)
    results = results[results.end<=end_date]
    results = results[results.start>=start_date]
    results = results[results.form == "10-Q"]
    results = results[results.frame.notnull()]
    results['CIK'] = CIK
    results['ticker'] = ticker
    results = results.rename(columns = {'val':xbrl_item})
    # print(results[['ticker', 'CIK', 'end', xbla_item]])
    # return results[['ticker', 'CIK', 'end', 'frame', xbla_item]]
    return results


In [227]:
all_results = pd.DataFrame([])
for row in tickersData.iterrows():
    print(row[1]['ticker'])
    all_results = all_results.append(get_results(row[1]['ticker'], row[1]['CIK'], '2020-01-01', '2021-03-01', 'NetIncomeLoss'))
    time.sleep(1)
all_results

WFC
LPLA
BAC
GS
IVZ
FICO
MTB
FNMA
FMCC
HBAN
USB
COF
TRI
<Response [404]>
MCI
<Response [404]>
GNW


Unnamed: 0,start,end,NetIncomeLoss,accn,fy,fp,form,filed,frame,CIK,ticker
151,2020-01-01,2020-03-31,653000000.0,0000072971-21-000221,2021.0,Q1,10-Q,2021-05-05,CY2020Q1,72971,WFC
155,2020-04-01,2020-06-30,-3846000000.0,0000072971-21-000267,2021.0,Q2,10-Q,2021-07-28,CY2020Q2,72971,WFC
159,2020-07-01,2020-09-30,3216000000.0,0000072971-21-000317,2021.0,Q3,10-Q,2021-11-01,CY2020Q3,72971,WFC
205,2020-01-01,2020-03-31,155641000.0,0001397911-21-000094,2021.0,Q1,10-Q,2021-05-04,CY2020Q1,1397911,LPLA
209,2020-04-01,2020-06-30,101662000.0,0001397911-21-000149,2021.0,Q2,10-Q,2021-08-03,CY2020Q2,1397911,LPLA
213,2020-07-01,2020-09-30,103789000.0,0001397911-21-000180,2021.0,Q3,10-Q,2021-11-02,CY2020Q3,1397911,LPLA
161,2020-01-01,2020-03-31,4010000000.0,0000070858-21-000063,2021.0,Q1,10-Q,2021-04-29,CY2020Q1,70858,BAC
165,2020-04-01,2020-06-30,3533000000.0,0000070858-21-000084,2021.0,Q2,10-Q,2021-07-30,CY2020Q2,70858,BAC
169,2020-07-01,2020-09-30,4881000000.0,0000070858-21-000107,2021.0,Q3,10-Q,2021-10-29,CY2020Q3,70858,BAC
154,2020-01-01,2020-03-31,1213000000.0,0001193125-21-148444,2021.0,Q1,10-Q,2021-05-04,CY2020Q1,886982,GS


In [228]:
# all_results.to_csv(f'all_results.csv')

In [229]:
arg = all_results.copy()
arg = arg.drop_duplicates()
arg['NetIncomeLoss'] = all_results.apply(lambda x: x['NetIncomeLoss']*-1 if x['form'] == '10-Q'  else x['NetIncomeLoss'], axis=1)
# arg['fpCLEAN'] = arg.apply(lambda x: np.nan if x['frame'] == '10-K'  else x['frame'][6:8], axis=1)
# arg['fyCLEAN']  = arg.apply(lambda x: x['frame'][2:6], axis=1)
# arg.sort_values(['ticker', 'fyCLEAN', 'fpCLEAN'], ascending=[True, True, True])
# arg.groupby(['ticker', 'fyCLEAN']).sum()
# arg1
# tickersData['CIK'].apply(lambda x: x.zfill(10))
# df['A'] = df.apply(lambda x: x['B'] if x['A']==0 else x['A'], axis=1)

In [131]:
arg.drop_duplicates()

Unnamed: 0,start,end,NetIncomeLoss,accn,fy,fp,form,filed,frame,CIK,ticker
181,2019-01-01,2019-03-31,-702000000,0001564590-21-004599,2020,FY,10-K,2021-02-08,CY2019Q1,1318605,TSLA
187,2019-04-01,2019-06-30,-408000000,0001564590-21-004599,2020,FY,10-K,2021-02-08,CY2019Q2,1318605,TSLA
193,2019-07-01,2019-09-30,143000000,0001564590-21-004599,2020,FY,10-K,2021-02-08,CY2019Q3,1318605,TSLA
197,2019-01-01,2019-12-31,-862000000,0001564590-21-004599,2020,FY,10-K,2021-02-08,CY2019,1318605,TSLA
199,2019-10-01,2019-12-31,105000000,0001564590-21-004599,2020,FY,10-K,2021-02-08,CY2019Q4,1318605,TSLA
203,2020-01-01,2020-03-31,-16000000,0000950170-21-000046,2021,Q1,10-Q,2021-04-28,CY2020Q1,1318605,TSLA
208,2020-04-01,2020-06-30,-104000000,0000950170-21-000524,2021,Q2,10-Q,2021-07-27,CY2020Q2,1318605,TSLA
213,2020-07-01,2020-09-30,-331000000,0000950170-21-002253,2021,Q3,10-Q,2021-10-25,CY2020Q3,1318605,TSLA
214,2020-01-01,2020-12-31,721000000,0001564590-21-004599,2020,FY,10-K,2021-02-08,CY2020,1318605,TSLA
215,2020-10-01,2020-12-31,270000000,0001564590-21-004599,2020,FY,10-K,2021-02-08,CY2020Q4,1318605,TSLA
