In [1]:
import requests as req
import re
import json 
import datetime as dt
from dateutil.relativedelta import relativedelta
import pandas as pd
import numpy as np
import time
import secedgar
from secedgar.cik_lookup import get_cik_map
key = json.load(open('keys.json'))['key']

In [2]:


# Create date range to query
dateRange = {
    'rangeStartDate' : dt.datetime.strptime('2020-01-01','%Y-%m-%d'),
    'rangeEndDate' : dt.datetime.strptime('2021-04-01','%Y-%m-%d'),
} 
print(dateRange['rangeStartDate'],dateRange['rangeEndDate'])

2020-01-01 00:00:00 2021-04-01 00:00:00


# SEC API
https://www.sec.gov/edgar/sec-api-documentation
http://xbrl.squarespace.com/understanding-sec-xbrl-financi/

In [35]:
tickers = [
# 'BAC',
# 'WFC',
# 'GS',
# 'USB',
'COF'#,
# 'BCS',
# 'HBAN',
# 'MTB',
# 'FICO',
# 'FNMA'
]

tickersdf = pd.DataFrame(tickers, columns=['ticker'])
ciks = pd.DataFrame((list(get_cik_map()["ticker"].items())), columns=['ticker','CIK'])
# ciks
tickers_CIKs = tickersdf.merge(ciks, left_on='ticker', right_on='ticker')


In [36]:
# Dataframe setup
#Columns: Ticker | CIK | Date | Revenue | Net Income
# columns = ['ticker', 'CIK', 'filing_date','revenue', 'net_income']
tickersData = pd.DataFrame(tickers_CIKs, columns=['ticker', 'CIK'])
#add leading zeros
tickersData['CIK'] = tickersData['CIK'].apply(lambda x: x.zfill(10))
tickersData


Unnamed: 0,ticker,CIK
0,COF,927628


In [37]:
# queryString = (f'https://data.sec.gov/api/xbrl/companyfacts/CIK0000070858.json')
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36',
#     'From': 'youremail@domain.com'  # This is another valid field
# }
# data = re.get(queryString, headers = headers)
# data
# results = data.json()
# results

In [38]:
def get_results(ticker, CIK, start_date, end_date, xbrl_item):
    queryString = (f'https://data.sec.gov/api/xbrl/companyconcept/CIK{str(CIK)}/us-gaap/{xbrl_item}.json')
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36',
        'From': 'eth22mer@gmail.com'
    }
    data = req.get(queryString, headers = headers)
    if str(data) == '<Response [404]>':
        print(str(data))
        return pd.DataFrame(data = [{'ticker':ticker, 'CIK':CIK, 'end':np.nan, xbrl_item:np.nan}])
    # print(data.json())
    results = data.json().get('units').get('USD')
    results = pd.DataFrame(results)
    results = results[results.end<=end_date]
    results = results[results.start>=start_date]
    # results = results[results.form == "10-Q"]
    results = results[results.frame.notnull()]
    results['CIK'] = CIK
    results['ticker'] = ticker
    results = results.rename(columns = {'val':xbrl_item})
    # print(results[['ticker', 'CIK', 'end', xbla_item]])
    # return results[['ticker', 'CIK', 'end', 'frame', xbla_item]]
    return results


In [56]:
all_results = pd.DataFrame([])
for row in tickersData.iterrows():
    print(row[1]['ticker'])
    all_results = all_results.append(get_results(row[1]['ticker'], row[1]['CIK'], '2020-01-01', '2022-09-01', 'NetIncomeLoss'))
    time.sleep(1)
all_results

COF


Unnamed: 0,start,end,NetIncomeLoss,accn,fy,fp,form,filed,frame,CIK,ticker
174,2020-01-01,2020-03-31,-1340000000,0000927628-21-000175,2021,Q1,10-Q,2021-05-07,CY2020Q1,927628,COF
178,2020-04-01,2020-06-30,-918000000,0000927628-21-000256,2021,Q2,10-Q,2021-07-30,CY2020Q2,927628,COF
182,2020-07-01,2020-09-30,2406000000,0000927628-21-000309,2021,Q3,10-Q,2021-11-05,CY2020Q3,927628,COF
184,2020-01-01,2020-12-31,2714000000,0000927628-22-000106,2021,FY,10-K,2022-02-25,CY2020,927628,COF
185,2021-01-01,2021-03-31,3325000000,0000927628-21-000175,2021,Q1,10-Q,2021-05-07,CY2021Q1,927628,COF
187,2021-04-01,2021-06-30,3536000000,0000927628-21-000256,2021,Q2,10-Q,2021-07-30,CY2021Q2,927628,COF
189,2021-07-01,2021-09-30,3104000000,0000927628-21-000309,2021,Q3,10-Q,2021-11-05,CY2021Q3,927628,COF
190,2021-01-01,2021-12-31,12390000000,0000927628-22-000106,2021,FY,10-K,2022-02-25,CY2021,927628,COF


In [57]:
## add a column that has years correctly
all_results['frame_fy'] = all_results['frame'].str.extract(r'(\d{4})')
all_results

Unnamed: 0,start,end,NetIncomeLoss,accn,fy,fp,form,filed,frame,CIK,ticker,frame_fy
174,2020-01-01,2020-03-31,-1340000000,0000927628-21-000175,2021,Q1,10-Q,2021-05-07,CY2020Q1,927628,COF,2020
178,2020-04-01,2020-06-30,-918000000,0000927628-21-000256,2021,Q2,10-Q,2021-07-30,CY2020Q2,927628,COF,2020
182,2020-07-01,2020-09-30,2406000000,0000927628-21-000309,2021,Q3,10-Q,2021-11-05,CY2020Q3,927628,COF,2020
184,2020-01-01,2020-12-31,2714000000,0000927628-22-000106,2021,FY,10-K,2022-02-25,CY2020,927628,COF,2020
185,2021-01-01,2021-03-31,3325000000,0000927628-21-000175,2021,Q1,10-Q,2021-05-07,CY2021Q1,927628,COF,2021
187,2021-04-01,2021-06-30,3536000000,0000927628-21-000256,2021,Q2,10-Q,2021-07-30,CY2021Q2,927628,COF,2021
189,2021-07-01,2021-09-30,3104000000,0000927628-21-000309,2021,Q3,10-Q,2021-11-05,CY2021Q3,927628,COF,2021
190,2021-01-01,2021-12-31,12390000000,0000927628-22-000106,2021,FY,10-K,2022-02-25,CY2021,927628,COF,2021


In [60]:
arg = all_results.copy()
# arg = arg.drop_duplicates()
arg['NetIncomeLoss'] = all_results.apply(lambda x: x['NetIncomeLoss']*-1 if x['form'] == '10-Q'  else x['NetIncomeLoss'], axis=1)
# arg['fpCLEAN'] = arg.apply(lambda x: np.nan if x['frame'] == '10-K'  else x['frame'][6:8], axis=1)
# arg['fyCLEAN']  = arg.apply(lambda x: x['frame'][2:6], axis=1)
# arg.sort_values(['ticker', 'fyCLEAN', 'fpCLEAN'], ascending=[True, True, True])
arg = arg.groupby(by='ticker').apply(lambda grp: grp.groupby(by='frame_fy')[['NetIncomeLoss']].sum().to_dict("index")).to_dict()

# tickersData['CIK'].apply(lambda x: x.zfill(10))
# df['A'] = df.apply(lambda x: x['B'] if x['A']==0 else x['A'], axis=1)
arg
# arg = arg[['ticker', 'frame', 'NetIncomeLoss']]
# arg.to_csv(f'all_results_qtrs4.csv')

{'COF': {'2020': {'NetIncomeLoss': 2566000000},
  '2021': {'NetIncomeLoss': 2425000000}}}

In [44]:
all_results


Unnamed: 0,start,end,NetIncomeLoss,accn,fy,fp,form,filed,frame,CIK,ticker
185,2021-01-01,2021-03-31,3325000000,0000927628-21-000175,2021,Q1,10-Q,2021-05-07,CY2021Q1,927628,COF
187,2021-04-01,2021-06-30,3536000000,0000927628-21-000256,2021,Q2,10-Q,2021-07-30,CY2021Q2,927628,COF
189,2021-07-01,2021-09-30,3104000000,0000927628-21-000309,2021,Q3,10-Q,2021-11-05,CY2021Q3,927628,COF
190,2021-01-01,2021-12-31,12390000000,0000927628-22-000106,2021,FY,10-K,2022-02-25,CY2021,927628,COF
