In [0]:
import requests
import json
import os 
import logging
from datetime import datetime 

In [0]:
run_id = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
cols_needed = [
    "disasterNumber",
    "declarationDate",
    "declarationType",
    "incidentType",
    "incidentBeginDate",
    "incidentEndDate",
    "state",
    "designatedArea",
    "fipsStateCode",
    "fipsCountyCode",
    "region",
    "iaProgramDeclared",
    "paProgramDeclared",
    "hmProgramDeclared"
]

In [0]:
#dbutils.fs.rm("/Volumes/climate-risk/bronze/fema_raw/", recurse=True)

In [0]:
all_rows= []
skip=0
top = 1000

run_start_ts = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")

url = (
        "https://www.fema.gov/api/open/v2/DisasterDeclarationsSummaries?$format=json&$select=disasterNumber,declarationDate,declarationType,incidentType,incidentBeginDate,incidentEndDate,state,designatedArea,fipsStateCode,fipsCountyCode,region,iaProgramDeclared,paProgramDeclared,hmProgramDeclared&$filter=declarationDate ge '2022-01-01'"
)
bronze_path = (
    "/Volumes/climate-risk/bronze/fema_raw/"
)
os.makedirs(bronze_path,exist_ok=True)
page = 0
total_records = 0
status = "SUCCESS"
errors = []

run_dir = f"{bronze_path}/run_id={run_id}"
data_dir = f"{run_dir}/data"
metadata_dir = f"{run_dir}/metadata"
os.makedirs(data_dir,exist_ok=True)
os.makedirs(metadata_dir,exist_ok=True)

while True:
    temp_url = f"{url}&$top={top}&$skip={skip}"
    try:
        t0 = datetime.utcnow()
        resp = requests.get(temp_url,timeout=60)
        resp.raise_for_status()

        payload = resp.json()
        data = payload.get("DisasterDeclarationsSummaries",[])
        # print(data)
        if not data:
            break
        file_name = f"{data_dir}/page_{page:06d}_skip_{skip:06d}.json"
        with open(file_name, "w") as f:
            json.dump(payload, f)
        ms = (datetime.utcnow() - t0).total_seconds()*1000
        logging.info(f"WRITE | file={file_name} rows={len(data)} latency_ms={ms}")
        
        logging.info(f"fetched and written {len(data)} records in {ms} ms")
        total_records+=len(data)
        skip += top
        page += 1
    except Exception as e:
        status = "FAILED"
        error_metadata = {
            "error":str(e),
            "url":temp_url,
            "page":page,
            "skip":skip,
            "timestamp":datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
        }
        errors.append(error_metadata)
        logging.exception(e)
        break

run_end_ts = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")

metadata = {
    "source":"fema",
    "run_id":run_id,
    "timestamp":datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
    "total_records":total_records,
    "page_size":top,
    "total_pages":page,
    "final_skip":skip,
    "errors":errors,
    "status":status,
    "ingest_start_ts":run_start_ts,
    "ingest_end_ts":run_end_ts
}

with open(f"{metadata_dir}/run_metadata.json","w") as f:
    json.dump(metadata,f,indent=2)