In [None]:
import json
import pandas as pd
import pytz
import datetime
import os
import configparser

In [None]:
config_ini = 'config.ini'

In [None]:
config = configparser.ConfigParser()
config.read(config_ini)
entity_filing_history_dir = config.get(section='DEFAULT', option='entity_filing_history_dir_path')
entity_filing_history_summary_dir = config.get(section='DEFAULT', option='entity_filing_history_summary_dir_path')

In [None]:
output_path = os.path.join(entity_filing_history_summary_dir,fr'filing_history_summary.csv')

In [None]:
filing_history_path_list = [os.path.join(entity_filing_history_dir,file_name) for file_name in os.listdir(entity_filing_history_dir)]

In [None]:
def utc_to_eastern(utc_str):
    utc_dt = datetime.datetime.strptime(utc_str, '%Y-%m-%dT%H:%M:%S.%fZ')
    utc_zone = pytz.utc
    eastern_zone = pytz.timezone('America/New_York')
    utc_dt_aware = utc_zone.localize(utc_dt)
    eastern_dt_aware = utc_dt_aware.astimezone(eastern_zone)
    naive_eastern_dt = eastern_dt_aware.replace(tzinfo=None)
    return naive_eastern_dt


In [None]:
all_summary_list = []

In [None]:
for file_path in filing_history_path_list:
    with open(file_path) as json_file:
        data = json.load(json_file)
    
    if 'cik' not in data:
        continue

    cik = str(data['cik']).zfill(10)
    filing_date_list = data['filings']['recent']['filingDate']
    report_date_list = data['filings']['recent']['reportDate']
    acceptance_date_list = [utc_to_eastern(acceptance_datetime) for acceptance_datetime in data['filings']['recent']['acceptanceDateTime']]
    
    form_list = data['filings']['recent']['form']
    filings = [{'filingDate': item1, 'reportDate': item2, 'acceptanceDateTime': item3, 'form': item4,'cik':cik} for
               item1, item2, item3, item4 in zip(filing_date_list, report_date_list, acceptance_date_list, form_list)]
    all_summary_list.extend(filings)


In [None]:
df = pd.DataFrame(all_summary_list)

In [None]:
df.to_csv(output_path,index=False)
df.to_csv(output_path.replace('.csv',f'--{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}.csv'),index=False)