# Using Papermill to Execute a Digital Elevation Model and store the result in an S3 bucket

See [RFC](https://www.notion.so/sensandworkspace/RFC-Geospatial-integrations-with-a-REPL-bd5d344487234fef85e5414300e417f4) for more detail

In [15]:
#papermill_description=imports

import json
import os
import geopandas as gpd
from io import StringIO
import rasterio
import matplotlib.pyplot as plt
import numpy as np
from gis_utils.stac import initialize_stac_client, query_stac_api, process_dem_asset, save_metadata_sidecar, process_dem_asset_and_mask
from gis_utils.dataframe import get_bbox_from_geodf
import rasterio.plot
import logging
import sys
from rio_cogeo.cogeo import cog_translate
from rio_cogeo.profiles import cog_profiles
from rasterio.io import MemoryFile
from matplotlib import cm
from matplotlib.colors import Normalize
from rasterio.plot import reshape_as_raster
from rasterio.warp import calculate_default_transform, reproject, Resampling


# Configure logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger(__name__)

# Set environment variable for AWS public datasets
os.environ['AWS_NO_SIGN_REQUEST'] = 'YES'

In [16]:
#papermill_description=get_coords_from_geodataframe

def get_coords_from_geodataframe(gdf):
    """Function to parse features from GeoDataFrame in such a manner that rasterio wants them"""
    import json
    return [json.loads(gdf.to_json())['features'][0]['geometry']]


In [17]:
#papermill_description=compute_elevation_statistics

def compute_elevation_statistics(dem_data):
    """
    Compute basic elevation statistics from a digital elevation model (DEM) dataset.

    This function calculates the minimum, maximum, mean, and standard deviation of elevation
    values within the provided DEM data array. It handles the DEM data as a NumPy array,
    which is a common format for raster data in Python.

    Parameters:
    - dem_data (numpy.ndarray): A 2D NumPy array containing elevation data from a DEM raster.
      The array should contain numeric values representing elevation at each cell. No-data
      values should be represented by NaNs in the array to be properly ignored in calculations.

    Returns:
    - dict: A dictionary containing the computed elevation statistics, with keys 'min_elevation',
      'max_elevation', 'mean_elevation', and 'std_dev_elevation'.
    """

    # Compute the minimum elevation, ignoring any NaN values which represent no-data cells
    min_elevation = float(np.nanmin(dem_data))

    # Compute the maximum elevation, ignoring any NaN values
    max_elevation = float(np.nanmax(dem_data))

    # Compute the mean elevation, ignoring any NaN values
    mean_elevation = float(np.nanmean(dem_data))

    # Compute the standard deviation of elevation, ignoring any NaN values
    std_dev_elevation = float(np.nanstd(dem_data))

    # Construct and return a dictionary containing the computed statistics
    stats = {
        'min_elevation': min_elevation,
        'max_elevation': max_elevation,
        'mean_elevation': mean_elevation,
        'std_dev_elevation': std_dev_elevation
    }

    return stats

In [18]:
#papermill_description=parameters

notebook_key = "localjupyter"
geojson = {
    'body': {
        "type": "FeatureCollection",
        "name": "dissolved-boundaries",
        "crs": {
            "type": "name",
            "properties": {
                "name": "urn:ogc:def:crs:OGC:1.3:CRS84" 
            }
        },
        "features": [
            {
                "type": "Feature",
                "properties": {
                    "fid": 1
                },
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [
                        [
                            [116.26012130269045, -29.225295369642396],
                            [116.261724812149055, -29.241374854584375],
                            [116.283751968396274, -29.256813692452539],
                            [116.284342735038919, -29.268250184258388],
                            [116.292247755352392, -29.265992437426529],
                            [116.292360282331941, -29.293057573630019],
                            [116.314865678242256, -29.293523728033122],
                            [116.326259034921833, -29.293033039128805],
                            [116.326315298411629, -29.305397680579894],
                            [116.355065941687045, -29.307016748931797],
                            [116.355065941687045, -29.306575187382712],
                            [116.383366477044206, -29.307384715430175],
                            [116.384322956370426, -29.290407813444993],
                            [116.387586238777402, -29.282629879611861],
                            [116.386517232471661, -29.259807919053017],
                            [116.359201308185533, -29.259488866292969],
                            [116.359229439930417, -29.259243440415627],
                            [116.35242155766754, -29.259292525638209],
                            [116.352140240218716, -29.220237788279107],
                            [116.302234524787593, -29.223503148505326],
                            [116.281388901825679, -29.2239696200396],
                            [116.26012130269045, -29.225295369642396]
                        ]
                    ]
                }
            }
        ]
    }
}
propertyName = "test"
output_type = "overlay"
colormap = "viridis"


In [19]:
#papermill_description=process_variables

# Construct the filenames using propertyName
# name_property-name_attribute.extension
elevation_json_filename = f"/tmp/{notebook_key}/dem_{propertyName}_elevation-stats.json"
output_tiff_filename = f"/tmp/{notebook_key}/dem_{propertyName}.tiff"

output_colored_tiff_filename = f"/tmp/{notebook_key}/dem_colored_{propertyName}.tiff"
output_cog_filename = f"/tmp/{notebook_key}/dem_{propertyName}_cog.public.tiff"

In [20]:
#papermill_description=processing_file_io

req = geojson
geojson_data = req['body']  # Directly accessing the 'body' since it's already a dictionary in this mock setup

# Convert the GeoJSON string to a GeoDataFrame
gdf = gpd.read_file(StringIO(json.dumps(geojson_data)))

In [21]:
#papermill_description=processing_bounding_box

# Get bounding box from GeoJSON
bbox = get_bbox_from_geodf(geojson_data)

# Get polygon coordinates in rasterio-friendly format
coords = get_coords_from_geodataframe(gdf)

In [22]:
#papermill_description=processing_stac_init

#Jenna has commented out the RGB stac items because this notebook is only for processing and returning a dem. So the RGB stac items are not needed and it might speed the execution up.

stac_url_dem = "https://explorer.sandbox.dea.ga.gov.au/stac/"
collections_dem = ['ga_srtm_dem1sv1_0']

# Initialize STAC clients
logger.info(f"Initializing STAC client for DEM with URL: {stac_url_dem} and collections: {collections_dem}")
client_dem = initialize_stac_client(stac_url_dem)

INFO:__main__:Initializing STAC client for DEM with URL: https://explorer.sandbox.dea.ga.gov.au/stac/ and collections: ['ga_srtm_dem1sv1_0']
INFO:gis_utils.stac:Initializing STAC client for URL: https://explorer.sandbox.dea.ga.gov.au/stac/
INFO:gis_utils.stac:STAC client initialized successfully


In [23]:
#papermill_description=processing_stac_search

# Query STAC catalogs
items_dem = query_stac_api(client_dem, bbox, collections_dem, None, None) #modified the query_stac_api function to accept polygon + bbox for masking

INFO:gis_utils.stac:Found 1 items


In [24]:
#papermill_description=processing_stac_assets

# Only want the dem asset
item = items_dem[0]
dem_asset = item.assets.get('dem')
fallback_dem = {
		'title': 'dem',
		'href': 'https://dea-public-data.s3-ap-southeast-2.amazonaws.com/projects/elevation/ga_srtm_dem1sv1_0/dem1sv1_0.tif'
}
primary_dem = dem_asset if dem_asset else fallback_dem

In [25]:
#papermill_description=processing_dem_asset

# Original function:
#data, metadata, src = process_dem_asset(dem_asset, bbox, output_tiff_filename)

# Modified function including mask/clip:
data, metadata, src = process_dem_asset_and_mask(primary_dem, coords, bbox, output_tiff_filename)

INFO:gis_utils.stac:Opening DEM asset from: s3://dea-public-data/projects/elevation/ga_srtm_dem1sv1_0/dem1sv1_0.tif
INFO:gis_utils.stac:Writing to mask file:  /tmp/localjupyter/dem_test.tiff
INFO:gis_utils.stac:Written masked data to /tmp/localjupyter/dem_test.tiff
INFO:gis_utils.stac:Output mask file size: 580476 bytes


In [26]:
#papermill_description=processing

""" 
This is now broken because the masked data contains nodata values. 
Need to contain the stats calculations to within clipped area so only valid values are passed in
"""
elevation_stats = compute_elevation_statistics(data)

# Plot the data using matplotlib. When testing is done, comment out to avoid slowing down notebook lambda execution.
# fig, ax = plt.subplots(figsize=(10, 10))
# rasterio.plot.show(data, transform=src.transform, ax=ax, cmap='terrain')
# ax.set_title('Digital Elevation Model')

# Serialize 'elevation_stats' to a JSON string
elevation_stats_json = json.dumps(elevation_stats)
# Convert the JSON string to bytes
elevation_stats_bytes = elevation_stats_json.encode()

# asset_type signifies the type of asset, e.g. overlay that is stored in the application DB
asset_metadata = {
    'properties': {
        'output_type': output_type,
    },
    'data': {
        'elevation_stats': elevation_stats,
    },
}

In [27]:
#papermill_description=processing_cog
with rasterio.open(output_tiff_filename) as mew:
    meta = mew.meta.copy()
    dst_crs = rasterio.crs.CRS.from_epsg(4326)
    transform, width, height = calculate_default_transform(
        mew.crs, dst_crs, mew.width, mew.height, *mew.bounds
    )

    meta.update({
        'crs': dst_crs,
        'transform': transform,
        'width': width,
        'height': height
    })

    tif_data = mew.read(1, masked=True).astype('float32') #setting masked=True here tells rasterio to use masking information if present, but we need to add the mask itself first.
    mew_formatted = tif_data.filled(np.nan)

    cmap = cm.get_cmap('viridis') #can also use 'terrain' cmap to keep this the same as the preview image from above.
    na = mew_formatted[~np.isnan(mew_formatted)]

    min_value = min(na)
    max_value = max(na)

    norm = Normalize(vmin=min_value, vmax=max_value)

    coloured_data = (cmap(norm(mew_formatted))[:, :, :3] * 255).astype(np.uint8)

    meta.update({"count":3})


    with rasterio.open(output_colored_tiff_filename, 'w', **meta) as dst:
        reshape = reshape_as_raster(coloured_data)
        dst.write(reshape)

try:
    dst_profile = cog_profiles.get('deflate')
    with MemoryFile() as mem_dst:
        cog_translate(
            output_colored_tiff_filename,
            output_cog_filename,
            config=dst_profile,
            in_memory=True,
            dtype="uint8",
            add_mask=False,
            nodata=0,
            dst_kwargs=dst_profile
        )
    
    save_metadata_sidecar(output_cog_filename, asset_metadata)    
except:
    raise Exception('Unable to convert to cog')



The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

Reading input: /tmp/localjupyter/dem_colored_test.tiff

Updating dataset tags...
Writing output to: /tmp/localjupyter/dem_test_cog.public.tiff


INFO:gis_utils.stac:Metadata saved to /tmp/localjupyter/dem_test_cog.public.tiff.meta.json


In [28]:
#papermill_description=plotly_graphing

import plotly.graph_objects as go


with rasterio.open(output_tiff_filename) as src:
    elevation = src.read(1)  # Read the first band
    print("Dataset shape:", elevation.shape)


num_rows = elevation.shape[0]

# Ensure the row index is valid
# For example, use the middle row of the dataset
row_index = num_rows // 2  # This guarantees a valid index within the data's bounds

# Extract the elevation data for that row
horizontal_line = elevation[row_index, :]

# Creating the plot
# horizontal_line = elevation[100, :]

fig = go.Figure(data=go.Scatter(y=horizontal_line))

# Adding layout details
fig.update_layout(
    title='Elevation Profile Along a Horizontal Line',
    xaxis_title='Column Index',
    yaxis_title='Elevation',
    template='plotly_dark'  # Optional: change the template as needed
)

# Show the figure
fig.show()

plot_json = fig.to_json()

asset_metadata['viz'] = plot_json

# Apply additional characteristics to notebook artefacts
asset_metadata['properties']['platform'] = 'Space Shuttle Endeavour'
asset_metadata['properties']['overlayType'] = 'DEM'

save_metadata_sidecar(output_cog_filename, asset_metadata)    



Dataset shape: (315, 460)


INFO:gis_utils.stac:Metadata saved to /tmp/localjupyter/dem_test_cog.public.tiff.meta.json
