The Australian DEM is 35GB, and hosted by the DEA [here](https://data.dea.ga.gov.au/?prefix=projects/elevation/ga_srtm_dem1sv1_0/)
AWS bucket name to use to access the data is `dea-public-data`

## Original Data Source

*SRTM-derived 1 Second Digital Elevation Models Version 1.0*

The 1 second Shuttle Radar Topography Mission (SRTM) Digital Elevation
Models Version 1.0 package comprises three surface models: the Digital
Elevation Model (DEM), the Smoothed Digital Elevation Model (DEM-S)
and the Hydrologically Enforced Digital Elevation Model (DEM-H).

### sources and guides
[DEA guide to accessing data from their public S3 bucket using STAC](https://docs.dea.ga.gov.au/guides/setup/gis/stac/)

[Downloading and streaming data using STAC metadata](https://docs.dea.ga.gov.au/notebooks/How_to_guides/Downloading_data_with_STAC/)

In [258]:
import os
import numpy as np

import ipywidgets as widgets

from matplotlib import pyplot as plt

import geopandas as gpd
from shapely.ops import transform
import rasterio
from rasterio.windows import Window, from_bounds
from rasterio import plot

import pystac_client
import odc.stac
from odc.algo import to_rgba
import stackstac
import pyproj

import leafmap.leafmap as leafmap

import xarray as xr
import ipyleaflet
import IPython.display as dsp
from dask.distributed import Client


In [259]:
# set variables here
os.environ['AWS_NO_SIGN_REQUEST'] = 'YES'
client = Client()

geom_gpd = gpd.read_file('data/temp/dissolved-boundaries.geojson')
bbox = list(geom_gpd.total_bounds)
bbox_temp=[116.2601, -29.3073, 116.3875, -29.2203]

catalog_dem = pystac_client.Client.open("https://explorer.sandbox.dea.ga.gov.au/stac/")
catalog_rgb = pystac_client.Client.open("https://earth-search.aws.element84.com/v1")

collection_dem = ['ga_srtm_dem1sv1_0']
collection_rgb = ['sentinel-2-l2a']

# start and end date for landsat imagery. This is for making a colour image to compare the DEM to.
start_date = "2023-01-01"
end_date = "2023-02-01"

# spatial projection information
crs_string = "epsg:3857"
epsg = pyproj.CRS.from_string(crs_string).to_epsg()
resolution = 90 #added to resolve error "Failed to auto-guess CRS/resolution."

print(bbox)


[116.26012130269045, -29.307384715430175, 116.3875862387774, -29.220237788279107]


In [260]:
# Build a query with the set parameters
query_dem = catalog_dem.search(
    bbox=bbox, 
    collections=collection_dem,
)

# Search the STAC catalog for all items matching the query
items_dem = list(query_dem.items())
print(f"Found: {len(items_dem):d} datasets")

Found: 1 datasets


In [273]:
ds_dem = catalog_dem.search(bbox=bbox,
                             collections=collection_dem).item_collection()

# Extract the href for the 'dem_h' asset from each item
cog_urls = [item.assets['dem_h'].href for item in ds_dem]

# For demonstration, just use the first URL or a specific one
# You might need to adjust this depending on how you want to handle multiple items
cog_url = cog_urls[0]

dem_stack = stackstac.stack(items=ds_dem,
                            epsg=epsg,
                            resolution=resolution,
                            bounds_latlon=bbox,
                            assets=['dem_h']) #bounds_latlong clips the data within the given area
#dem_stack

dem = dem_stack.sel(band=["dem_h"]).persist()
dem = dem.isel(time=0)

Key:       ('asset_table_to_reader_and_window-fetch_raster_window-59c8b0e3d008b84ed5720b4afc631458', 0, 0, 0, 0)
Function:  execute_task
args:      ((subgraph_callable-2c5d5062-e10b-4b94-a77c-235fd6baec2e, (subgraph_callable-2b020f27-d84c-43f6-885f-9420103998d2, array([[('s3://dea-public-data/projects/elevation/ga_srtm_dem1sv1_0/demh1sv1_0.tif', [112.99986111, -44.00013889, 153.99986111, -10.00013889], [1., 0.])]],
      dtype=[('url', 'O'), ('bounds', '<f8', (4,)), ('scale_offset', '<f8', (2,))]), RasterSpec(epsg=4326, bounds=(90, -90, 180, 0), resolutions_xy=(90, 90)), <Resampling.nearest: 0>, dtype('float64'), nan, True, None, (<class 'tuple'>, [RasterioIOError('HTTP response code: 404')]), <class 'stackstac.rio_reader.AutoParallelRioReader'>), (slice(0, 1, None), slice(0, 1, None)), dtype('float64'), nan))
kwargs:    {}
Exception: 'RuntimeError("Error reading Window(col_off=0, row_off=0, width=1, height=1) from \'s3://dea-public-data/projects/elevation/ga_srtm_dem1sv1_0/demh1sv1_0.

In [262]:
output_filename = "data/temp/clipped_raster.tif"
with rasterio.open(
    output_filename,
    'w',
    driver='GTiff',
    height=dem.shape[1],
    width=dem.shape[2],
    count=dem.shape[0],
    dtype=dem.dtype,
    crs=dem.crs,
    transform=dem.transform,
) as dst:
    dst.write(dem)

In [263]:
ds_rgb = catalog_rgb.search(bbox=bbox,
                             datetime=f"{start_date}/{end_date}",
                             collections=collection_rgb).item_collection()


#len(ds_rgb)

rgb_stack = stackstac.stack(items=ds_rgb,
                            epsg=epsg,
                            bounds_latlon=bbox,
                            assets=["blue", "green", "red"]) #bounds_latlong clips the data within the given area
#rgb_stack

In [264]:
# data = rgb_stack.resample(time="MS").median("time", keep_attrs=True)
# #data

One thing to watch out for with stackstac.stack is that you will wind up with a distinct time coordinate for each STAC item that you pass in. To achieve the intuitive representation of the data, you need to flatten the DataArray with respect to day.
Note: if you are only reading a single STAC item, stackstac.mosaic will inadvertently reduce your data along the band dimension (which is definitely not what you want!), hence the conditional statement checking for more than one time coordinate value.
[from here](https://hrodmn.dev/posts/stackstac/)

In [265]:
# def flatten(x, dim="time"):
#     assert isinstance(x, xr.DataArray)
#     if len(x[dim].values) > len(set(x[dim].values)):
#         x = x.groupby(dim).map(stackstac.mosaic)

#     return x


# # round time coordinates so all observations from the same day so they have
# # equivalent timestamps
# data = data.assign_coords(
#     time=data.time.astype("datetime64[D]")
# )

# # mosaic along time dimension
# flat_stack = flatten(data, dim="time")
# flat_stack

In [266]:
# flat_stack = flat_stack.compute()

# flat_stack.sel(band=["red", "green", "blue"]).plot.imshow(
#     col="time",
#     col_wrap=4,
#     rgb="band",
#     robust=True,
#     size=6,
#     vmin=0,
#     vmax=3000,
#     add_labels=False,
# )

In [272]:
# Initialize a leafmap map
m = leafmap.Map(height="600px", width="900px", draw_control=False, measure_control=False, fullscreen_control=False, attribution_control=False)

m.add_gdf(geom_gpd, zoom_to_layer=True, layer_name="boundary", info_mode=None, show=True,style={
    "stroke": True,
    "color": "black",
    "weight": 3,
    "opacity": 1,
    "fill": False})


m.add_cog_layer(cog_url, wondow=bbox_temp)

m


Map(center=[37.7452, -122.4108], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'â€¦