In [None]:
# Import standard libraries
import json
import os.path
import pandas as pd
import sqlite3
import sys
import fac as f

In [None]:
# Load the config for the FAC API.
# You need a file in this directory called 'config.json'
# It needs to look like this:
# {
#    "FAC_API_KEY": "YOUR-KEY-HERE",
#    "DATA_DIR": "data"
# }
config = json.load(open("config.json", "r"))

In [4]:
client = f.FAC()
client.api_key(config["FAC_API_KEY"])
endpoints = ["additional_ueis", "additional_eins", "general", "findings", "federal_awards"]

In [5]:
def build_datapath(filename):
    return os.path.join(config["DATA_DIR"], filename)

In [6]:
# Takes around 93m to run, first time.
for ep in endpoints:
    ep_json_filepath = build_datapath(f"{ep}.json")
    # If there is no JSON file for this table, go ahead and do the download.
    if not os.path.isfile(ep_json_filepath):
        client.endpoint(ep)
        client.fetch()
        with open(ep_json_filepath, 'w', encoding='utf-8') as f:
            json.dump(client.results(), f, ensure_ascii=False, indent=2)

# Write the metadata
metadata_filepath = build_datapath("metadata.json")
if not os.path.isfile(metadata_filepath):
    with open(metadata_filepath, 'w', encoding='utf-8') as f:
        json.dump(client.metadata(), f, ensure_ascii=False, indent=2)

In [11]:
conn = sqlite3.connect(build_datapath("fac.sqlite"))

# Create tables

for ep in endpoints:
    ep_json_filepath = build_datapath(f"{ep}.json")
    with open(ep_json_filepath, 'r') as f:
        data = json.load(f)
        # Use the first object to create the table
        fields = data[0].keys()
        fields_with_commas = ",".join(map(lambda f: f"{f} TEXT", fields))
        stmt = f"CREATE TABLE IF NOT EXISTS {ep} ({fields_with_commas})"
        print(stmt)
        conn.execute(stmt)
        conn.commit()


CREATE TABLE IF NOT EXISTS additional_ueis (report_id TEXT,auditee_uei TEXT,audit_year TEXT,additional_uei TEXT)
CREATE TABLE IF NOT EXISTS additional_eins (report_id TEXT,auditee_uei TEXT,audit_year TEXT,additional_ein TEXT)
CREATE TABLE IF NOT EXISTS general (report_id TEXT,auditee_uei TEXT,audit_year TEXT,auditee_certify_name TEXT,auditee_certify_title TEXT,auditee_contact_name TEXT,auditee_email TEXT,auditee_name TEXT,auditee_phone TEXT,auditee_contact_title TEXT,auditee_address_line_1 TEXT,auditee_city TEXT,auditee_state TEXT,auditee_ein TEXT,auditee_zip TEXT,auditor_certify_name TEXT,auditor_certify_title TEXT,auditor_phone TEXT,auditor_state TEXT,auditor_city TEXT,auditor_contact_title TEXT,auditor_address_line_1 TEXT,auditor_zip TEXT,auditor_country TEXT,auditor_contact_name TEXT,auditor_email TEXT,auditor_firm_name TEXT,auditor_foreign_address TEXT,auditor_ein TEXT,cognizant_agency TEXT,oversight_agency TEXT,date_created TEXT,ready_for_certification_date TEXT,auditor_certified

In [12]:
# Load the data
for ep in endpoints:
    fname = ep_json_filepath = build_datapath(f"{ep}.json")
    with open(fname, 'r') as f:
        print(f"Loading {fname}")
        jsn = json.load(f)
        print(f"Converting to dataframe: {len(jsn)} records")
        df = pd.DataFrame.from_records(jsn)
        print(f"Inserting")
        df.to_sql(ep, 
                  con=conn, 
                  if_exists='append', 
                  index=False)

Loading ../data/additional_ueis.json
Converting to dataframe: 12525 records
Inserting
Loading ../data/additional_eins.json
Converting to dataframe: 55647 records
Inserting
Loading ../data/general.json
Converting to dataframe: 312488 records
Inserting
Loading ../data/findings.json
Converting to dataframe: 410808 records
Inserting
Loading ../data/federal_awards.json
Converting to dataframe: 5175364 records
Inserting


In [9]:
# Close the connection
conn.close()
client.metadata()

{'total_time': 3.671,
 'average_query_time': 0.918,
 'query_count': 4,
 'total_time_hms': '0:00:03'}