# P1 Edgar data client for REST API

## Initialization

In [3]:
%load_ext autoreload
%autoreload 2

import os
import json
import pprint
from typing import Any

import pandas as pd

if False:
    import sys
    sys.path.append("/commodity_research/p1_data_client_python_private")
    print(sys.path)
    os.environ["P1_API_TOKEN"]='e44e7c6b04ef3ea1cfb7a8a67db74751c177259e'
    os.environ["P1_EDGAR_API_TOKEN"]='8c9c9458b145202c7a6b6cceaabd82023e957a46d6cf7061ed8e1c94a168f2fd'

import p1_data_client_python.edgar.edgar_client as p1_edg
import p1_data_client_python.edgar.mappers as p1_map

# Enter your token here.
# You can get your token by signing up at `www.particle.one`.
# P1_API_TOKEN = "YOUR_TOKEN_HERE"
# An example token is like:

P1_API_TOKEN = os.environ["P1_EDGAR_API_TOKEN"]
print("P1_API_TOKEN=", P1_API_TOKEN)

P1_API_TOKEN= 8c9c9458b145202c7a6b6cceaabd82023e957a46d6cf7061ed8e1c94a168f2fd


## Quick start

There are 3 steps:
1. Get information about company identifiers
2. Get information about financial items available
3. Download data

## Mappers

### GvkCikMapper

It handles CIK <-> GVK transformation.

In [4]:
gvk_mapper = p1_map.GvkCikMapper(token=P1_API_TOKEN)
gvk_mapper.get_gvk_from_cik(cik=940800, as_of_date="2007-01-18")

Unnamed: 0,cik,effdate,thrudate,gvk
0,940800,2007-01-18T00:00:00,2007-03-14T23:59:59,61411


In [5]:
gvk_mapper.get_cik_from_gvk(gvk=61411, as_of_date="2007-01-18")

Unnamed: 0,cik,effdate,thrudate,gvk
0,940800,2007-01-18T00:00:00,2007-03-14T23:59:59,61411


### ItemMapper

It provides mapping between keywords and description of Compustat items.

In [7]:
item_mapper = p1_map.ItemMapper(token=P1_API_TOKEN)
item_mapper.get_item_from_keywords(keywords=["short-term", "short term"])

Unnamed: 0,item,description
0,IVSTCH_YEAR,Short-Term Investments - Change
1,CHE_QUARTER,Cash and Short-Term Investments
2,ALTO_QUARTER,Other Long-term Assets
3,IVLT_QUARTER,Total Long-term Investments
4,DLTT_QUARTER,Long-Term Debt - Total
5,DLTR_YEAR,Long-Term Debt - Reduction
6,DLTIS_YEAR,Long-Term Debt - Issuance


In [5]:
item_mapper.get_mapping()

Unnamed: 0,item,description
0,ACCHG_QUARTER,Accounting Changes / Cumulative Effect
1,ACOMINC_QUARTER,Accumulated Other Comprehensive Income (Loss)
2,ACO_QUARTER,Current Assets - Other - Total
3,ACT_QUARTER,Current Assets - Total
4,ADRR_QUARTER,ADR Ratio
...,...,...
167,XIDO_QUARTER,Extraordinary Items and Discontinued Operations
168,XINT_QUARTER,Interest and Related Expense- Total
169,XI_QUARTER,Extraordinary Items
170,XRD_QUARTER,Research and Development Expense


## Metadata

In [8]:
client = p1_edg.EdgarClient(token=P1_API_TOKEN)

In [9]:
def display_df(df: pd.DataFrame) -> None:
    print("num_rows=%s" % df.shape[0])
    display(df.head(3))

    
def print_payload(payload: str, n: int = 300) -> None:
    print(pprint.pformat(payload)[:n])

In [10]:
# Get forms for a subset of forms and CIKs.
headers = client.get_form_headers(
            form_type=['13F-HR', '10-K', '3', '4'],
            cik=[918504, 1048286, 5272, 947263, 1759760, 320193],
            start_date='2020-10-30',
            end_date='2020-10-30',
            date_mode='publication_date'
)
display_df(headers)

Processing CIK:   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

num_rows=7


Unnamed: 0,uuid,cik,filing_date,form_type,edgar_path,company,acceptance_datetime,period_of_report,created_at
0,c01879c6-3f25-4873-bb8c-4c8f94e2eb02,5272,2020-10-30,4,edgar/data/5272/000120919120056278/0001209191-...,AMERICAN INTERNATIONAL GROUP INC,2020-10-30T13:27:59+00:00,2020-10-28T04:00:00+00:00,2020-10-30T13:29:17.316747+00:00
1,528e0c88-9ffc-4a31-a570-40da058f2eaa,5272,2020-10-30,4,edgar/data/5272/000120919120056279/0001209191-...,AMERICAN INTERNATIONAL GROUP INC,2020-10-30T13:29:01+00:00,2020-10-28T04:00:00+00:00,2020-10-30T13:30:44.767449+00:00
2,24e25e67-4cbf-4c2c-afc7-a70be29dd6de,320193,2020-10-30,10-K,edgar/data/320193/000032019320000096/000032019...,Apple Inc.,2020-10-29T22:06:25+00:00,2020-09-26T04:00:00+00:00,2020-10-30T10:06:34.131935+00:00


In [11]:
# Get forms for a subset of forms and all CIKs for 1 year.
headers = client.get_form_headers(
            form_type=['4'],
            cik=None,
            start_date='2020-01-01',
            end_date='2020-01-31',
            date_mode='publication_date'
)
display_df(headers)

Processing :   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

num_rows=7815


Unnamed: 0,uuid,cik,filing_date,form_type,edgar_path,company,acceptance_datetime,period_of_report,created_at
0,835b433c-fcaa-4615-bd3a-7ac996ca7c95,1750,2020-01-02,4,edgar/data/1750/0001127602-20-000197.txt,AAR CORP,2020-01-02T21:47:54+00:00,2019-12-24T05:00:00+00:00,2020-06-12T23:46:24.395414+00:00
1,a9ead7b8-4af6-4ee5-8af7-f793a8632952,1750,2020-01-02,4,edgar/data/1750/0001127602-20-000091.txt,AAR CORP,2020-01-02T20:23:00+00:00,2019-12-31T05:00:00+00:00,2020-06-12T23:46:24.395414+00:00
2,51772308-6109-40b9-a97c-e1048cf9dc6c,6769,2020-01-02,4,edgar/data/6769/0001357400-20-000007.txt,APACHE CORP,2020-01-02T19:33:37+00:00,2019-12-31T05:00:00+00:00,2020-06-12T23:46:28.584799+00:00


## Payload data

### Form8

In [12]:
# Get all Form8 data for one CIK, one item in a range of time.
payload = client.get_form8_payload(
    cik=18498, 
    start_date="2020-01-04", 
    end_date="2020-12-04", 
    date_mode="publication_date",
    item="ACT_QUARTER",
)
display_df(payload)

Processing CIK:   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

num_rows=4


Unnamed: 0,form_uuid,filing_url,form_publication_timestamp,filing_date,creation_timestamp,cik,ticker,item_name,form_table_row_name,item_value,compustat_timestamp,period_of_report,compustat_coifnd_id,gvk
0,514be1a9-88d1-426b-be7e-d7b5a0ff64e6,https://www.sec.gov/Archives/edgar/data/18498/...,2020-03-12T11:45:58,2020-03-12,2020-06-12T23:49:03.580000,18498,GCO,ACT_QUARTER,Total current assets,508.1830000000001,2020-03-12T20:00:00,2020-01-31T00:00:00,10099821.0,5109
1,70a8c7b9-d0a7-4c41-902c-89d84d3b9122,https://www.sec.gov/Archives/edgar/data/18498/...,2020-06-09T11:21:05,2020-06-09,2020-12-01T17:13:22.396000,18498,GCO,ACT_QUARTER,Total current assets,735.008,2020-06-09T20:00:00,2020-04-30T00:00:00,10164135.0,5109
2,30cc5db5-e9ce-4d43-830b-d5d5a94b201a,https://www.sec.gov/Archives/edgar/data/18498/...,2020-09-03T11:30:54,2020-09-03,2020-09-03T11:32:58.186000,18498,GCO,ACT_QUARTER,Total current assets,777.658,,2020-07-31T00:00:00,,5109


In [13]:
# Get all Form8 data for multiple CIK, all items, and entire period of time.
payload = client.get_form8_payload(cik=[18498, 319201, 5768])
display_df(payload)

Processing CIK:   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

num_rows=961


Unnamed: 0,form_uuid,filing_url,form_publication_timestamp,filing_date,creation_timestamp,cik,ticker,item_name,form_table_row_name,item_value,compustat_timestamp,period_of_report,compustat_coifnd_id,gvk
0,782bb3b5-a1b0-42f9-9071-d13a98849a5f,https://www.sec.gov/Archives/edgar/data/319201...,2010-01-28T21:23:27,2010-01-28,2020-06-12T22:52:30.433000,319201,KLAC,NI_QUARTER,Net income (loss),21.794,2010-01-29T05:00:00,2009-12-31T00:00:00,6999592,6304
1,782bb3b5-a1b0-42f9-9071-d13a98849a5f,https://www.sec.gov/Archives/edgar/data/319201...,2010-01-28T21:23:27,2010-01-28,2020-06-12T22:52:30.433000,319201,KLAC,SALE_QUARTER,Total revenues,440.355,2010-01-29T05:00:00,2009-12-31T00:00:00,6999592,6304
2,9f600236-5a62-4861-bcf3-59c2e884cd90,https://www.sec.gov/Archives/edgar/data/5768/0...,2010-02-09T21:17:04,2010-02-09,2020-06-12T22:52:22.626000,5768,ASEI,NI_QUARTER,Net income,5.819,2010-02-10T05:00:00,2009-12-31T00:00:00,7007593,1554


### Form4


#### Examples of queries

In [14]:
# Initalize the client.
client = p1_edg.EdgarClient(token=P1_API_TOKEN)

In [15]:
# Get Form4 data for one CIK and one day, as dataframe.
payload = client.get_form4_payload(
    cik=1524358, 
    start_date="2015-10-23", 
    end_date="2020-10-23", 
    date_mode="publication_date",
    output_type="dataframes"
)

Processing CIK:   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

In [16]:
payload.keys()

dict_keys(['metadata', 'derivative_table', 'footnotes', 'general_info', 'non_derivative_table', 'reporting_owner_info'])

In [17]:
display_df(payload['general_info'])

num_rows=382


Unnamed: 0,uuid,url,date_of_original_submission,document_type,form3_holdings_reported,form4_transactions_reported,issuer_cik,issuer_name,issuer_trading_symbol,no_securities_owned,not_subject_to_section_16,period_of_report,remarks,schema_version,signature_date,signature_name,footnote_ids
0,0000008f-b9a2-4b2a-bdb2-cb4605c895c4,https://www.sec.gov/Archives/edgar/data/152435...,,4,,,1524358,MARRIOTT VACATIONS WORLDWIDE Corp,VAC,,0.0,2015-10-22,,X0306,2015-10-23,"/s/ Catherine Meeker, Attorney-in-Fact",[]
1,01101471-7ee9-4fc5-afef-73d5ac86d756,https://www.sec.gov/Archives/edgar/data/152435...,,4,,,1524358,MARRIOTT VACATIONS WORLDWIDE Corp,VAC,,0.0,2020-03-12,,X0306,2020-03-16,"/s/James H Hunter, IV\nAttorney-In-Fact",[]
2,0164245c-a57e-4ea8-b730-69bb7f4104d0,https://www.sec.gov/Archives/edgar/data/152435...,,4,,,1524358,MARRIOTT VACATIONS WORLDWIDE Corp,VAC,,0.0,2015-12-15,,X0306,2015-12-17,"/s/ Catherine Meeker, Attorney-in-Fact",[]


In [18]:
# Get Form4 data for one CIK and a week.
payload = client.get_form4_payload(
    cik=1002910, 
    start_date="2015-10-20", 
    end_date="2015-10-27",
    date_mode="publication_date",
)

Processing CIK:   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

In [19]:
# Get Form4 data for multiple CIKs and a week.
payload = client.get_form4_payload(
    cik=[910521, 883241, 80424], 
    start_date="2020-12-10", 
    end_date="2020-12-17", 
    date_mode="publication_date",
    output_type="dataframes"
)

Processing CIK:   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

In [20]:
display_df(payload['metadata'])

num_rows=23


Unnamed: 0,uuid,form_type,company,cik,release_date,edgar_path,payload_path,created_at,period
0,4b59d19c-45f5-4a35-8ecb-736b59079a77,4,PROCTER & GAMBLE Co,80424,2020-12-17,edgar/data/80424/000112760220031658/0001127602...,s3://edgar-store/form4/2020/8f9905ed8b24fbe3bd...,2020-12-17T14:37:55.298000,2020-12-17T00:00:00
1,bf76a297-b067-44fa-9c09-aa9579487cc6,4,PROCTER & GAMBLE Co,80424,2020-12-17,edgar/data/80424/000112760220031735/0001127602...,s3://edgar-store/form4/2020/c6d86b0107ac3e4dc3...,2020-12-17T18:57:17.888000,2020-12-17T00:00:00
2,cb47a9e7-75cc-485c-8605-cfc36e578df5,4,PROCTER & GAMBLE Co,80424,2020-12-17,edgar/data/80424/000112760220031737/0001127602...,s3://edgar-store/form4/2020/37f6fda98d0f71f6d1...,2020-12-17T18:59:23.359000,2020-12-17T00:00:00


In [21]:
# Get Form4 data for all companies and one day.
payload = client.get_form4_payload(
    start_date="2020-12-17", 
    end_date="2020-12-17",     
    date_mode="publication_date",
)
print_payload(payload)

Processing :   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

{'derivative_table':                                      uuid  transaction_index_within_table  \
0    0084fea9-6b26-430b-b5f1-e1c26da2434c                               0   
1    0084fea9-6b26-430b-b5f1-e1c26da2434c                               1   
2    0084fea9-6b26-430b-b5f1-e1c26da2434c       


#### How to handle and show payload data

In [22]:
# Print out a length, and a table names inside a payload.
print("len(payload)=%s" % len(payload))
print("payload.keys()=%s" % payload.keys())

# Show a metadata of a payload.
print('payload["metadata"]=\n%s' % pprint.pformat(payload["metadata"][:2]))

# Print prettified "general_info" table of a payload.
print_payload(payload["general_info"])

len(payload)=6
payload.keys()=dict_keys(['metadata', 'derivative_table', 'footnotes', 'general_info', 'non_derivative_table', 'reporting_owner_info'])
payload["metadata"]=
                                   uuid form_type  \
0  28bf899e-e47d-4063-a9a8-b05a0a5dc571         4   
1  45ab2b89-9a2e-4fba-a53c-0f51076307be         4   

                           company      cik release_date  \
0  Armada Hoffler Properties, Inc.  1569187   2020-12-17   
1                     RAYONIER INC    52827   2020-12-17   

                                          edgar_path  \
0  edgar/data/1569187/000156918720000067/00015691...   
1  edgar/data/52827/000005282720000244/0000052827...   

                                        payload_path  \
0  s3://edgar-store/form4/2020/ffb0d1daa1928ac1a3...   
1  s3://edgar-store/form4/2020/cbc1c144ea30740c24...   

                   created_at               period  
0  2020-12-17T11:09:21.909000  2020-12-17T00:00:00  
1  2020-12-17T11:05:10.096000  2020-12-17T0

### Form13

#### Examples of queries

In [23]:
# Initalize the client.
client = p1_edg.EdgarClient(token=P1_API_TOKEN)

In [24]:
# Get Form13 data for one filer as CIK and one day.
payload = client.get_form13_payload(
    cik=1259313, 
    start_date="2015-11-16", end_date="2015-11-16",
    date_mode="publication_date",
)
display_df(payload['metadata'])

Processing CIK:   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

num_rows=1


Unnamed: 0,uuid,form_type,company,cik,edgar_path,payload_path,created_at,period,release_date
0,00024613-c236-4f49-bf40-b110f49155f6,13F-HR,ARES MANAGEMENT LLC,1259313,edgar/data/1259313/0001104659-15-079368.txt,s3://edgar-store/form13f%/2015/bc80e15e513a52b...,2020-06-13T01:21:33.030000,2015-11-16T00:00:00,2015-11-16T00:00:00


In [25]:
# Get Form13 data for one filed company as CUSIP and one day.
payload = client.get_form13_payload(
    cusip="01449J204", start_date="2015-11-16", end_date="2015-11-16",
    date_mode="publication_date",
)
print_payload(payload)

Processing CUSIP:   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

{'cover_page':                                    uuid  \
0  00024613-c236-4f49-bf40-b110f49155f6   
1  3ed94d62-9e28-474a-96d4-8ce273be6b27   
2  52087cd2-caa0-45ff-af9a-c8d43fd0f35d   
3  5a3ff825-8ab6-481f-9777-c45b27aedc5f   
4  a223d2b6-d8eb-4fa5-a2c4-35e424802bd9   
5  ac568d7b-3f5d-4779-a864-


In [26]:
# Get Form13 data for a list of CUSIPs and one day.
payload = client.get_form13_payload(
    cusip=["002824100", "01449J204"], 
    start_date="2016-11-15", 
    end_date="2016-11-15", 
    date_mode="publication_date",
    output_type="dataframes"
)
print_payload(payload)

Processing CUSIP:   0%|          | 0/1 [00:00<?, ?it/s]

Pages: : 0it [00:00, ?it/s]

{'cover_page':                                     uuid additional_information  amendment_no  \
0   00205d8a-371b-499d-9b24-40ae176d59e2                   None           NaN   
1   0122f9ff-26b3-4be7-afe7-24a815409501                   None           NaN   
2   02cb7011-9d3f-406a-beb7-bd2a3b79e0b2  


#### How to handle and show payload data

In [27]:
# Print out a length, and a table names inside a payload.
print("len(payload)=%s" % len(payload))
print("payload.keys()=%s" % payload.keys())

len(payload)=6
payload.keys()=dict_keys(['metadata', 'cover_page', 'header_data', 'information_table', 'signature_block', 'summary_page'])


In [28]:
# Show a metadata of a payload.
display_df(payload["metadata"])

num_rows=77


Unnamed: 0,uuid,form_type,company,cik,edgar_path,payload_path,created_at,period,release_date
0,2e368dfe-a609-4d3c-9772-6cf84907ab5b,13F-HR,LOWE BROCKENBROUGH & CO INC,22657,edgar/data/22657/0000022657-16-000011.txt,s3://edgar-store/form13f%/2016/455a6b63c9e0705...,2020-06-12T21:11:04.298000,2016-11-15T00:00:00,2016-11-15T00:00:00
1,443e3fcb-5a07-472c-b305-592ab23d2b10,13F-HR,WAYNE HUMMER INVESTMENTS L.L.C.,49096,edgar/data/49096/0000049096-16-000010.txt,s3://edgar-store/form13f%/2016/8ca17dd798114d1...,2020-06-12T21:11:08.061000,2016-11-15T00:00:00,2016-11-15T00:00:00
2,b0725596-b951-4816-be2b-68407df4a398,13F-HR,CAPITAL GUARDIAN TRUST CO,314965,edgar/data/314965/0000732812-16-000118.txt,s3://edgar-store/form13f%/2016/75bd42a1e0f9365...,2020-06-12T21:11:04.298000,2016-11-15T00:00:00,2016-11-15T00:00:00


### Form10

In [29]:
# Get Form10 data for one CIK and 2 days.
payload = client.get_form10_payload(
    cik=1002910, start_date="2020-05-11", end_date="2020-05-12",
    date_mode="publication_date",
)

Output()

Processing CIK:   0%|          | 0/1 [00:00<?, ?it/s]

1002910: 1 forms loaded


In [30]:
print("len(payload)=%s" % len(payload))
print("payload[0].keys()=%s" % payload[0].keys())

len(payload)=1
payload[0].keys()=dict_keys(['meta', 'data'])


In [31]:
print('payload[0]["meta"]=\n%s' % pprint.pformat(payload[0]["meta"]))

payload[0]["meta"]=
{'cik': {'0': 1002910},
 'filing_date': {'0': '2020-05-11'},
 'filing_url': {'0': 'https://www.sec.gov/Archives/edgar/data/1002910/000100291020000115/0001002910-20-000115-index.html'},
 'form_type': {'0': '10-Q'},
 'uuid': {'0': '981cfc73-7380-4093-8cd7-f0d6a845b3e4'}}


In [32]:
json_str = payload[0]["data"]
print(pprint.pformat(payload[0]["data"])[:2000])

{'calculation': {'calculationLinkbase': [['linkRole',
                                          {'definition': '1002000 - Statement '
                                                         '- Consolidated '
                                                         'Statement of Income '
                                                         '(Loss) and '
                                                         'Comprehensive Income '
                                                         '(Loss)',
                                           'role': 'http://www.ameren.com/role/ConsolidatedStatementOfIncomeLossAndComprehensiveIncomeLoss'},
                                          {},
                                          ['concept',
                                           {'label': 'Net Income (Loss) '
                                                     'Attributable to Parent',
                                            'name': 'us-gaap:NetIncomeLoss'},
                   