In [2]:
from typing import Any
import requests
import json
from ratelimit import limits, sleep_and_retry
import pandas as pd
from datetime import datetime
import requests
import json

# @sleep_and_retry
@limits(calls=10, period=1.05)
def request_get(url, *args,**kwargs):
    headers = {'User-agent': 'Mozilla/5.0'}
    return requests.get(url, headers = headers, *args, **kwargs)

In [3]:
def convert_to_list_dict(dict_of_lists:dict[str: list[Any]]) -> list[dict[str: Any]]:
    # Get all the keys from the dictionary
    keys = list(dict_of_lists.keys())
    
    # Use zip to combine corresponding elements from all lists
    zipped_values = zip(*[dict_of_lists[key] for key in keys])
    
    # Convert the zipped values to a list of dictionaries
    result = [dict(zip(keys, values)) for values in zipped_values]
    
    return result

In [4]:
def submissions_form_transform(submissions_form:list[dict[str: Any]]) -> list[dict[str: Any]]:
    for i in range(len(submissions_form)):
        submissions_form[i] = {
            'accession_number': submissions_form[i]['accessionNumber'],
            'cik': submissions_form[i]['cik'],
            'filing_date': submissions_form[i]['filingDate'],
            'report_date': submissions_form[i]['reportDate'],
            'acceptance_date_time': submissions_form[i]['acceptanceDateTime'],
            'act': submissions_form[i]['act'],
            'form': submissions_form[i]['form'],
            'file_number': submissions_form[i]['fileNumber'],
            'film_number': submissions_form[i]['filmNumber'],
            'items': submissions_form[i]['items'],
            'size': submissions_form[i]['size'],
            'is_xbrl': submissions_form[i]['isXBRL'],
            'is_inline_xbrl': submissions_form[i]['isInlineXBRL'],
            'primary_docment': submissions_form[i]['primaryDocument'],
            'primary_doc_description': submissions_form[i]['primaryDocDescription']
        }
    return submissions_form    


def get_submissions_form(
        cik:str|int, 
        get_older_files:bool|int=False, 
        save_path:str=None) -> dict[str, list]:
    """ get document summissions for the ticker from https://data.sec.gov/submissions
    parameters:
        cik: str
        get_older_files: bool
            getting olders submissions that older than 5 years old.
    """    
    def __concat_submission(dict1, dict2):
        out_dict = {}
        
        #concat dict
        for key in dict2.keys():
            # Concatenate the lists if the key is present in both dictionaries
            out_dict[key] = dict1.get(key, []) + dict2.get(key, [])
            
        return out_dict
    
    def __get_submission_meta(cik:str|int) -> dict[str, list]:
        """ returns dictionary of submission metadata.
        """
        
        cik = str(cik).zfill(10)

        # get the metadata of the submissions.
        meta = request_get(base_url + f"CIK{cik}.json").json()
        
        return meta
    
    base_url = "https://data.sec.gov/submissions/"
    
    filings = __get_submission_meta(cik=cik)['filings']
    submissions = {}

    # get the recent submission files.
    submissions = filings['recent']

    # get the older submission files.
    if 'files' in filings.keys() and get_older_files:
        older_file_meta = filings['files']
        for i in range(len(older_file_meta)):
            sub = request_get(base_url + older_file_meta['name']).json()
            submissions = __concat_submission(submissions, sub)
            
    sub_df = pd.DataFrame(submissions)
    
    # filter the form submission.
    sub_df = dict(sub_df[sub_df['form'].isin(['10-Q', '10-K', '10-K/A', '10-Q/A'])])

    submissions = dict(sub_df)
    
    # casting panda series to list
    submissions = {k:list(v) for k,v in submissions.items()}
    
    # adding cik
    submissions['cik'] = [int(cik)]* len(sub_df)

    #save to cache json data
    if save_path is not None:
        with open(save_path, 'w') as json_file:
            json.dump(submissions, json_file, indent=4)
    
    submissions = convert_to_list_dict(submissions)
    submissions = submissions_form_transform(submissions)
    
    return submissions

In [6]:
import os
import sqlite3

connection = sqlite3.connect('test.db')
cursor = connection.cursor()

In [7]:
schemasubmissionForm = """submissionForm(
  accession_number INT(15) PRIMARY KEY NOT NULL,
  cik INT(10),
  filing_date DATE,
  report_date DATE,
  acceptance_date_time DATETIME,
  act STRING,
  form STRING,
  file_number STRING,
  film_number STRING,
  items INT,
  size INT,
  is_xbrl INT,
  is_inline_xbrl INT,
  primary_docment STRING,
  primary_doc_description STRING
)"""

In [79]:
def submissions_form_load(connection, cik:int|str):
    submissions_form = get_submissions_form(cik)
    
    # if not have submission form.
    if len(submissions_form)==0:
        print("No submissions from of CIK{}".format(cik))
        return

    keys = list(submissions_form[0].keys())
    values = [ list(d.values()) for d in submissions_form]

    cursor = connection.cursor()
    
    cursor.execute("""CREATE TABLE IF NOT EXISTS latestFormUpdate(
        cik INT(10) PRIMARY KEY,
        timestamp FLOAT
        )""")

    cursor.execute("CREATE TABLE IF NOT EXISTS " + schemasubmissionForm)
    cursor.executemany(
        f"""
        INSERT OR REPLACE INTO submissionForm ({",".join(keys)})
        VALUES ({",".join("?"*len(keys))})
        """,    
        values
        )
    cursor.execute(f"""
        INSERT OR REPLACE INTO latestFormUpdate (cik, timestamp)
        VALUES ({cik},{datetime.today().timestamp()})
        """)
        

    print("Submissions from of CIK{} was loaded into a database.".format(cik))
    connection.commit()

In [80]:
submissions_form_load(connection, cik=1045810)

Submissions from of CIK1045810 was loaded into a database.


In [112]:
from datetime import datetime


cik = 1045810
n_days = 30

def lazy_
    latest_form_update = cursor.execute(f"""
        SELECT timestamp FROM latestFormUpdate 
        WHERE cik={int(cik)}
        """).fetchone()[0]

    dalta_days = (datetime.today().timestamp() - latest_form_update)/86400

    if delta_days > n_days:
        submissions_form_load(connection=connection, cik=cik)

In [86]:
latest_form_update

1718348589.549019

In [34]:
latest_filing_date

'2024-05-29'

datetime.datetime(2024, 6, 14, 14, 3, 9, 549019)

In [108]:
delta = datetime(2024,9,1) - datetime.fromtimestamp(latest_form_update)
delta_days = delta.days
delta_days

78

In [111]:
dalta_days = (datetime(2024,9,1).timestamp() - latest_form_update)/86400
dalta_days

78.41447281227896

In [116]:
cursor.execute(f"""
            SELECT timestamp FROM latestFormUpdate 
            WHERE cik={int(23)}
            """).fetchone() is None

True