In [1]:
import os
import json
from urllib.parse import urlparse

import boto3
import requests
import geopandas as gpd
from shapely.geometry import box, shape
from tqdm import tqdm




# Get list of datasets from OT API query

In [11]:
from differencing_functions import DataAccess, OpenTopographyQuery, GetDEMs

## Load API key

In [8]:
# After setting the environment variable, access your API key in this notebook.
API_Key = os.getenv('OPENTOPO_ADMIN_ENTERPRISE_API_KEY')

# If your API key is not set, you can set it here.
# API_Key = "your_api_key_here"

if API_Key is not None:
    print("API Key loaded successfully!")
else:
    print("Failed to load API Key.")

API Key loaded successfully!


<h3 id="Option-3-Upload-File">Define bounds using an uploaded file</h3>

In [9]:
shapefile_path = "/Users/cassandrabrigham/ASU Dropbox/Cassandra Brigham/Mac/Documents/POSTDOC/Offset mapping - SCEC/GIS/san_jacinto.shp"

da = DataAccess()

da.define_bounds_from_file(shapefile_path, target_crs = 'EPSG:4326')

{'south': 32.79130944963459,
 'west': -116.93032727942875,
 'north': 33.72062768978766,
 'east': -115.57803503695636,
 'polygon_wkt': ['-116.93032727942875, 33.67457519663469, -116.87254408082845, 33.72062768978766, -116.80774049361317, 33.70715146646199, -116.49830336466015, 33.53446917230551, -116.42053906000177, 33.480435035977344, -116.16294480082101, 33.33031374927006, -115.81948578857995, 33.156879269002346, -115.68501834510822, 33.048308147401876, -115.57803503695636, 32.86279561564286, -115.57938537217136, 32.83670416059145, -115.64825246813542, 32.81968386172708, -115.69821487108975, 32.79130944963459, -116.18568588369826, 33.121025783963546, -116.89931103811193, 33.63786514990443, -116.93032727942875, 33.67457519663469']}

<h3 id="Use-OT-Catalog - Single DEM"> Use OT Catalog To Find Datasets</h3>


In [18]:
# Build a DataAccess and call the wrapper:
da = DataAccess()
da.define_bounds_from_file(shapefile_path, target_crs="EPSG:4326")

otq, catalog_df = GetDEMs.query_single_dem(
    da,
    product_format="PointCloud",
    include_federated=True,
    detail=False,
    save_as="results.json",   
)

catalog_df

Unnamed: 0,Name,ID type,Data Source,Property ID,Horizontal EPSG,Vertical Coordinates,Clean Name
0,B4 Project - Southern San Andreas and San Jaci...,opentopoID,ot,OTLAS.032018.32611.1,32611,Ellipsoid,B4_Project_Southern_San_Andreas_and_San_Jacint...
1,2010 Salton Sea Lidar Collection,opentopoID,ot,OTLAS.032012.26911.2,26911,NAVD88 (GEOID 09),2010_Salton_Sea_Lidar_Collection
2,B4 Project - Southern San Andreas and San Jaci...,opentopoID,ot,OTLAS.032006.32611.1,32611,Ellipsoid,B4_Project_Southern_San_Andreas_and_San_Jacint...
3,CA SaltonSea EarthMRI 3 D21,USGS_3DEP_ID,usgs,CA_SaltonSea_EarthMRI_3_D21,3857,NAVD88 height - Geoid18 (Meters),CA_SaltonSea_EarthMRI_3_D21
4,CA SaltonSea EarthMRI 1 2021,USGS_3DEP_ID,usgs,CA_SaltonSea_EarthMRI_1_2021,3857,NAVD88 height - Geoid18 (Meters),CA_SaltonSea_EarthMRI_1_2021
5,USGS LPC CA SoCal Wildfires B1 2018 LAS 2019,USGS_3DEP_ID,usgs,USGS_LPC_CA_SoCal_Wildfires_B1_2018_LAS_2019,3857,NAVD88 height - Geoid12B (metre),USGS_LPC_CA_SoCal_Wildfires_B1_2018_LAS_2019
6,USGS LPC CA SoCAL Wildfires TL 2018 LAS 2019,USGS_3DEP_ID,usgs,USGS_LPC_CA_SoCAL_Wildfires_TL_2018_LAS_2019,3857,NAVD88 height - Geoid12B (metre),USGS_LPC_CA_SoCAL_Wildfires_TL_2018_LAS_2019
7,CA SaltonSea 2010,USGS_3DEP_ID,usgs,CA_SaltonSea_2010,3857,NAVD88 - Geoid09 (Meters),CA_SaltonSea_2010
8,USGS LPC CA E SanDiegoCo 2016 LAS 2017,USGS_3DEP_ID,usgs,USGS_LPC_CA_E_SanDiegoCo_2016_LAS_2017,3857,NAVD88 height (ftUS),USGS_LPC_CA_E_SanDiegoCo_2016_LAS_2017


In [None]:
CA_SaltonSea_EarthMRI_3_D21

In [19]:
selected_dataset_index = 3
print(f"Selected dataset: {catalog_df['Name'][selected_dataset_index]}")

Selected dataset: CA SaltonSea EarthMRI 3 D21


## Parameters

In [42]:
SHAPEFILE    = shapefile_path
CATALOG_URL  = "https://usgs-lidar-stac.s3-us-west-2.amazonaws.com/ept/catalog.json"
PREFIX       = catalog_df['Property ID'][selected_dataset_index]
BUCKET       = "usgs-lidar-public"
OUTPUT_LIST  = "nodes_to_download.txt"

In [43]:
output_json = "pipeline.json"

# Build the pipeline dict
pipeline = [
    {
        "type": "readers.ept",
        "filename": f"https://s3-us-west-2.amazonaws.com/usgs-lidar-public/{PREFIX}/ept.json"
    },
    {
        "type": "filters.crop",
        "mask": shapefile_path
    },
    {
        "type": "writers.las",
        "filename": f"{PREFIX}.laz"
    }
]

# Write it out
with open(output_json, "w") as f:
    json.dump(pipeline, f, indent=2)

print(f"PDAL pipeline JSON written to {output_json}")

PDAL pipeline JSON written to pipeline.json


## 1. Load region polygon (WGS84)

In [26]:
gdf    = gpd.read_file(SHAPEFILE).to_crs(epsg=4326)
region = gdf.geometry.union_all()

## 2. Fetch and parse ept.json

In [None]:
ept_url = f"https://s3-us-west-2.amazonaws.com/usgs-lidar-public/CA_SaltonSea_EarthMRI_3_D21/ept-hierarchy/0-0-0-0.json"
resp    = requests.get(ept_url)
resp.raise_for_status()
meta    = resp.json()

minx, miny, minz, maxx, maxy, maxz = meta["bounds"]
depth_limit    = meta["keySize"]         # maximum depth
hierarchy_type = meta["hierarchyType"]   # usually "json"
node_size      = meta["nodeSize"]        # points per leaf

# Compute tile XY size at each depth:
# width_d = (maxx - minx) / 2**d ; height_d analogously
# We'll ignore Z for footprint filtering.

KeyError: 'keySize'

In [36]:
import boto3
from botocore import UNSIGNED
from botocore.config import Config

# create an unsigned (public-data) S3 client
s3 = boto3.client(
    "s3",
    config=Config(signature_version=UNSIGNED),
    region_name="us-west-2"
)

In [39]:
# initialize an empty list
keys = []

# paginator walks through every page of the S3 “list_objects_v2” result
pfx = f"{PREFIX}/ept-data/"
for page in s3.get_paginator("list_objects_v2").paginate(
    Bucket=BUCKET,
    Prefix=pfx
):
    for obj in page.get("Contents", []):
        k = obj["Key"]           # e.g. "CA_SaltonSea_EarthMRI_3_D21/ept-data/4-12-34-8.laz"
        if k.endswith(".laz"):    # filter to only the LAZ tiles
            keys.append(k)

print(f"Found {len(keys)} .laz tiles")

KeyboardInterrupt: 

In [None]:
https://s3-us-west-2.amazonaws.com/usgs-lidar-public/{PREFIX}/ept.json

In [31]:
meta["nodeSize"]

KeyError: 'nodeSize'

In [29]:
hierarchy_type

'json'

## 3. List every node file under `ept-data/`

In [None]:
s3 = boto3.client("s3", config=boto3.session.Config(signature_version="unsigned"))
pfx = f"{PREFIX}/ept-data/"
keys = []
for page in s3.get_paginator("list_objects_v2").paginate(
    Bucket=BUCKET, Prefix=pfx
):
    for obj in page.get("Contents", []):
        k = obj["Key"]
        if k.endswith(".laz"):
            keys.append(k)

print(f"Found {len(keys)} .laz tiles in ept-data/")

## 4. Parse each key into (d,x,y,_) and filter by region

In [None]:
to_download = []
for key in tqdm(keys):
    # key looks like: "<PREFIX>/ept-data/D-X-Y-Z.laz"
    leaf = os.path.basename(key).replace(".laz","")
    d, x, y, z = map(int, leaf.split("-"))
    # compute this tile's 2D footprint
    tile_w = (maxx - minx) / (2 ** d)
    tile_h = (maxy - miny) / (2 ** d)
    tx0 = minx + x * tile_w
    ty0 = miny + y * tile_h
    tx1 = tx0 + tile_w
    ty1 = ty0 + tile_h
    tile_geom = box(tx0, ty0, tx1, ty1)
    if region.intersects(tile_geom):
        to_download.append(key)

print(f"{len(to_download)} tiles intersect your region")

## 5. Write out the full S3 paths

In [None]:
with open(OUTPUT_LIST, "w") as f:
    for key in sorted(to_download):
        f.write(f"s3://{BUCKET}/{key}\n")

print(f"Wrote node list → {OUTPUT_LIST}")