In [0]:
import requests
from pyspark.sql import functions as F
from pathlib import Path
import os
from datetime import datetime, timedelta,date
import json

In [0]:
def write_data(start_time, end_time, run_id):
    url = "https://earthquake.usgs.gov/fdsnws/event/1/query"

    params = {
        "format": "geojson",
        "starttime": start_time,
        "endtime": end_time,
        "minmagnitude": 2.5,
        "orderby": "time",
        "limit": 500
    }

    resp = requests.get(url, params=params, timeout=(5, 30))
    resp.raise_for_status()
    data = resp.json()

    ingest_ts = datetime.utcnow()
    #run_id = ingest_ts.strftime("%Y%m%dT%H%M%S")

    # ðŸ”‘ partition by EVENT DATE, not ingestion date
    output_base_folder = "/Volumes/climate-risk/bronze/usgs_raw"
    output_base_path = (
        f"{output_base_folder}/event_date={start_time}/run_id={run_id}/"
    )

    os.makedirs(output_base_path, exist_ok=True)

    metadata = {
        "endpoint": url,
        "params": params,
        "event_date": start_time,
        "run_id": run_id,
        "ingest_ts": ingest_ts.isoformat(),
        "status_code": resp.status_code,
        "record_count": len(data.get("features", []))
    }

    with open(f"{output_base_path}/metadata.json", "w") as f:
        json.dump(metadata, f)

    with open(f"{output_base_path}/response.json", "w") as f:
        json.dump(data, f)

In [0]:
run_id = datetime.utcnow().strftime("%Y%m%dT%H%M%S")
today = date.today()
folders = dbutils.fs.ls("/Volumes/climate-risk/bronze/usgs_raw")
if len(folders)==0:
    start = date(2022,1,1)
    end = date(2025,12,1)
else:
    start = date.fromisoformat(folders[-1].path.split("/")[-2].split("=")[1])-timedelta(days=30)
    end = today
current = start

while current<=end:
    day_start = current.isoformat()
    day_end = (current+timedelta(days=1)).isoformat()
    try:
        write_data(day_start, day_end, run_id)
    except Exception as e:
        print(f"Error writing data for {day_start} to {day_end}: {e}")
    current += timedelta(days=1)