In [0]:
# Mount ADLS Gen2
# Required each time the cluster is restarted wich should be only on the first notebook as the run in order

tiers = ["bronze", "silver", "gold"]
adls_paths = {tier: f"abfss://{tier}@prj1storage.dfs.core.windows.net/" for tier in tiers}

# Accessing paths
bronze_adls = adls_paths["bronze"]
silver_adls = adls_paths["silver"]
gold_adls = adls_paths["gold"]

dbutils.fs.ls(bronze_adls)
dbutils.fs.ls(silver_adls)
dbutils.fs.ls(gold_adls)

[]

In [0]:
import requests
import json
from datetime import date, timedelta

In [0]:
# For static pipeline
""" # Remove this before running Data Factory Pipeline
start_date = date.today() - timedelta(1)
end_date = date.today() """

## FOR DYNAMIC PIPELINE
# Get base parameters
dbutils.widgets.text("start_date", "")
dbutils.widgets.text("end_date", "")

start_date = dbutils.widgets.get("start_date")
end_date = dbutils.widgets.get("end_date")

In [0]:
# start_date, end_date

(datetime.date(2025, 3, 11), datetime.date(2025, 3, 12))

In [0]:
# Construct the API URL with start and end dates provided by Data Factory, formatted for geojson output.
url =f"https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime={start_date}&endtime={end_date}"

In [0]:
try:
    # Make the GET request to fetch data
    response = requests.get(url)

    # Check if the request was successful
    response.raise_for_status() # Raise HTTPerror for bad responses (4xx or 5xx)
    
    data = response.json().get("features", [])
    if not data:
        print("No data found for the given date range.")
    else:
        # Specify the path to save the data (Specify the ADLS path)
        file_path = f"{bronze_adls}/{start_date}_earthquake_data.json"

        # Save the data to the specified path (Save the JSON data)
        json_data = json.dumps(data, indent=4)

        dbutils.fs.put(file_path, json_data, overwrite=True)

        print(f"Data successfully saved to {file_path}")
except requests.exceptions.RequestException as e:
    print(f"Request fetching data from API: {e}")


Wrote 297456 bytes.
Data successfully saved to abfss://bronze@prj1storage.dfs.core.windows.net//2025-03-11_earthquake_data.json


In [0]:
data[10]

{'type': 'Feature',
 'properties': {'mag': 1.06,
  'place': '21 km SW of La Quinta, CA',
  'time': 1741733048890,
  'updated': 1741733321083,
  'tz': None,
  'url': 'https://earthquake.usgs.gov/earthquakes/eventpage/ci41077696',
  'detail': 'https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=ci41077696&format=geojson',
  'felt': None,
  'cdi': None,
  'mmi': None,
  'alert': None,
  'status': 'automatic',
  'tsunami': 0,
  'sig': 17,
  'net': 'ci',
  'code': '41077696',
  'ids': ',ci41077696,',
  'sources': ',ci,',
  'types': ',nearby-cities,origin,phase-data,scitech-link,',
  'nst': 24,
  'dmin': 0.09143,
  'rms': 0.24,
  'gap': 48,
  'magType': 'ml',
  'type': 'earthquake',
  'title': 'M 1.1 - 21 km SW of La Quinta, CA'},
 'geometry': {'type': 'Point',
  'coordinates': [-116.4505005, 33.5203323, 13.16]},
 'id': 'ci41077696'}

In [0]:
## FOR DYNAMIC PIPELINE

# Define your variables
output_data = {
    "start_date": start_date,
    "end_date": end_date,
    "bronze_adls": bronze_adls,
    "silver_adls": silver_adls,
    "gold_adls": gold_adls
}

# Serialize the dictionary to a JSON string
output_json = json.dumps(output_data)

# Log the serialize JSON for debugging
print(f"Serialized JSON: {output_json}")

# Return the JSON string
dbutils.notebook.exit(output_json)