In [None]:
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor
from db.docs import DocumentManager
from db.db_instance import DBClient
import json

client = DBClient()
db = DocumentManager()

# Data to process
with open('./data/step1/list.json', 'r') as f:
    list = json.load(f)

error_items = []

def add_to_db(item, main_pbar):
    try:
        db_obj = {
            "submitter": item['submitter'],
            "substantive_submission": item['step_1']['substantive_submission'],
            "group": item['group'],
            "support": item['step_1']['support']['support'] if 'support' in item['step_1'] else None,
            "support_evidence": item['step_1']['support']['evidence'] if 'support' in item['step_1'] else None,
            "motivations": item['step_1']['motivations'] if 'motivations' in item['step_1'] else None,
            "regulation_type": item['step_1']['regulation']['regulation_type'] if 'regulation' in item['step_1'] and item['step_1']['regulation'] else None,
            "regulation_type_evidence": item['step_1']['regulation']['evidence'] if 'regulation' in item['step_1'] and item['step_1']['regulation'] else None,
            "regulator_trust": item['step_1']['regulator_trust']['regulator_trust'] if 'regulator_trust' in item['step_1'] and item['step_1']['regulator_trust'] else None,
            "regulator_trust_evidence": item['step_1']['regulator_trust']['evidence'] if 'regulator_trust' in item['step_1'] and item['step_1']['regulator_trust'] else None,
            "definitions": item['step_1']['definitions'] if 'definitions' in item['step_1'] and len(item['step_1']['definitions']) > 0 else None,
            "step_2": json.dumps(item['step_2']) if 'step_2' in item else None,
            "metadata": {k: v for k, v in item["metadata"].items() if v is not None},
            "uniqueId": item["uniqueId"]
        }    
        db.new_doc(db_obj)
        main_pbar.update(1)
        return
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        error_items.append(item)
        print(item)
        return

with tqdm(total=len(list["data"]), desc="Overall Progress") as main_pbar:
    with ThreadPoolExecutor(max_workers=20) as executor:
        futures = [executor.submit(add_to_db, item, main_pbar) for item in list["data"]]
        for future in futures:
            future.result()

with open('errors.json', 'w') as f:
    json.dump(error_items, f)