In [21]:
from typing import Any
import requests
import json
from ratelimit import limits, sleep_and_retry
import pandas as pd
from datetime import datetime
import requests
import json

# @sleep_and_retry
@limits(calls=10, period=1.05)
def __request_get(url, *args,**kwargs):
    headers = {'User-agent': 'Mozilla/5.0'}
    return requests.get(url, headers = headers, *args, **kwargs)

In [22]:
def convert_to_list_dict(dict_of_lists:dict[str: list[Any]]) -> list[dict[str: Any]]:
    # Get all the keys from the dictionary
    keys = list(dict_of_lists.keys())
    
    # Use zip to combine corresponding elements from all lists
    zipped_values = zip(*[dict_of_lists[key] for key in keys])
    
    # Convert the zipped values to a list of dictionaries
    result = [dict(zip(keys, values)) for values in zipped_values]
    
    return result

In [84]:
def get_submissions_info(
        cik:str|int, 
        get_older_files:bool|int=False, 
        save_path:str=None) -> dict[str, list]:
    """ get document summissions for the ticker from https://data.sec.gov/submissions
    parameters:
        cik: str
        get_older_files: bool
            getting olders submissions that older than 5 years old.
    """    
    def __concat_submission(dict1, dict2):
        out_dict = {}
        
        #concat dict
        for key in dict2.keys():
            # Concatenate the lists if the key is present in both dictionaries
            out_dict[key] = dict1.get(key, []) + dict2.get(key, [])
            
        return out_dict
    
    def __get_submission_meta(cik:str|int) -> dict[str, list]:
        """ returns dictionary of submission metadata.
        """
        
        cik = str(cik).zfill(10)

        # get the metadata of the submissions.
        meta = __request_get(base_url + f"CIK{cik}.json").json()
        
        return meta
    
    base_url = "https://data.sec.gov/submissions/"
    
    filings = __get_submission_meta(cik=cik)['filings']
    submissions = {}

    # get the recent submission files.
    submissions = filings['recent']

    # get the older submission files.
    if 'files' in filings.keys() and get_older_files:
        older_file_meta = filings['files']
        for i in range(len(older_file_meta)):
            sub = __request_get(base_url + older_file_meta['name']).json()
            submissions = __concat_submission(submissions, sub)
            
    sub_df = pd.DataFrame(submissions)
    
    # filter the form submission.
    sub_df = dict(sub_df[sub_df['form'].isin(['10-Q', '10-K', '10-K/A', '10-Q/A'])])

    submissions = dict(sub_df)
    
    # casting panda series to list
    submissions = {k:list(v) for k,v in submissions.items()}
    
    # adding cik
    submissions['cik'] = [int(cik)]* len(sub_df)

    #save to cache json data
    if save_path is not None:
        with open(save_path, 'w') as json_file:
            json.dump(submissions, json_file, indent=4)
    
    # submissions = convert_to_list_dict(submissions)
    
    return submissions

In [85]:
form = get_submissions_info("1045810", False)

In [86]:
form

{'accessionNumber': ['0001045810-24-000124',
  '0001045810-24-000029',
  '0001045810-23-000227',
  '0001045810-23-000175',
  '0001045810-23-000093',
  '0001045810-23-000017',
  '0001045810-22-000166',
  '0001045810-22-000147',
  '0001045810-22-000079',
  '0001045810-22-000036',
  '0001045810-21-000163',
  '0001045810-21-000131',
  '0001045810-21-000064',
  '0001045810-21-000010',
  '0001045810-20-000189',
  '0001045810-20-000147',
  '0001045810-20-000065',
  '0001045810-20-000010',
  '0001045810-19-000170',
  '0001045810-19-000144',
  '0001045810-19-000079',
  '0001045810-19-000023',
  '0001045810-18-000150',
  '0001045810-18-000114',
  '0001045810-18-000080',
  '0001045810-18-000010',
  '0001045810-17-000172',
  '0001045810-17-000123',
  '0001045810-17-000075',
  '0001045810-17-000027',
  '0001045810-16-000353',
  '0001045810-16-000300',
  '0001045810-16-000275',
  '0001045810-16-000205',
  '0001045810-15-000173',
  '0001045810-15-000143',
  '0001045810-15-000097',
  '0001045810-15-00