In [None]:
# Import standard libraries
import json
import os.path
import pandas as pd
import sqlite3
import sys
import fac as f

In [None]:
# Load the config for the FAC API.
# You need a file in this directory called 'config.json'
# It needs to look like this:
# {
#    "FAC_API_KEY": "YOUR-KEY-HERE",
#    "DATA_DIR": "data"
# }
config = json.load(open("config.json", "r"))

In [None]:
client = f.FAC()
client.api_key(config["FAC_API_KEY"])
endpoints = ["additional_ueis", "additional_eins", "general", "findings", "federal_awards"]

In [None]:
def build_datapath(filename):
    return os.path.join(config["DATA_DIR"], filename)

In [None]:
# Takes around 93m to run, first time.
for ep in endpoints:
    ep_json_filepath = build_datapath(f"{ep}.json")
    # If there is no JSON file for this table, go ahead and do the download.
    if not os.path.isfile(ep_json_filepath):
        client.endpoint(ep)
        client.fetch()
        with open(ep_json_filepath, 'w', encoding='utf-8') as f:
            json.dump(client.results(), f, ensure_ascii=False, indent=2)

# Write the metadata
metadata_filepath = build_datapath("metadata.json")
if not os.path.isfile(metadata_filepath):
    with open(metadata_filepath, 'w', encoding='utf-8') as f:
        json.dump(client.metadata(), f, ensure_ascii=False, indent=2)

In [None]:
conn = sqlite3.connect(build_datapath("fac.sqlite"))

# Create tables

for ep in endpoints:
    ep_json_filepath = build_datapath(f"{ep}.json")
    with open(ep_json_filepath, 'r') as f:
        data = json.load(f)
        # Use the first object to create the table
        fields = data[0].keys()
        fields_with_commas = ",".join(map(lambda f: f"{f} TEXT", fields))
        stmt = f"CREATE TABLE IF NOT EXISTS {ep} ({fields_with_commas})"
        print(stmt)
        conn.execute(stmt)
        conn.commit()


In [None]:
# Load the data
for ep in endpoints:
    fname = ep_json_filepath = build_datapath(f"{ep}.json")
    with open(fname, 'r') as f:
        print(f"Loading {fname}")
        jsn = json.load(f)
        print(f"Converting to dataframe: {len(jsn)} records")
        df = pd.DataFrame.from_records(jsn)
        print(f"Inserting")
        df.to_sql(ep, 
                  con=conn, 
                  if_exists='append', 
                  index=False)

In [None]:
# Close the connection
conn.close()
client.metadata()