## Processing Version 001 ECOSTRESS Data: Swath to Grid 

In this notebook, we convert the raw ECOSTRESS version 001 product to a grid.

This code was adapted from: https://lpdaac.usgs.gov/documents/975/ECOSTRESS_Tutorial_9kUJHnD.html

In [1]:
"""
Load the necessary packages and set environment variables
"""

# Import packages
import os, shutil, time, glob, warnings
import folium
import earthaccess
import pandas as pd
import geopandas as gpd
import rasterio as rio
import rioxarray as rxr
import h5py
import pyproj
import xarray as xr
import numpy as np
import holoviews as hv
import hvplot.xarray
import hvplot.pandas
import folium
from branca.element import Figure
from matplotlib import pyplot as plt
from affine import Affine
from pyresample import geometry as geom
from pyresample import kd_tree as kdt
from os.path import join
from osgeo import gdal, gdal_array, gdalconst, osr

# Projection information
geog = 'EPSG:4326'  # Geographic projection
prj = 'EPSG:5070'  # Projected coordinate system- WGS 84 NAD83 UTM Zone 13N

# File path information
datadir = '/data-store/iplant/home/shared/esiil/HYR_SENSE/'

# File path information
print("Success")

Success


In [2]:
ecodir = '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/'

def list_files(path, ext, recursive):
    """
    List files of a specific type in a directory or subdirectories
    """
    if recursive is True:
        return glob.glob(os.path.join(path, '**', '*{}'.format(ext)), recursive=True)
    else:
        return glob.glob(os.path.join(path, '*{}'.format(ext)), recursive=False)

# Get a list of .nc files
nc_files = list_files(ecodir,"*.h5",recursive=True)
print(nc_files)

['/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO2LSTE/ECOSTRESS_L2_LSTE_11158_004_20200624T121534_0601_01.h5', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO2LSTE/ECOSTRESS_L2_LSTE_11406_009_20200710T122818_0601_01.h5', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO4ESIPTJPL/ECOSTRESS_L4_ESI_PT-JPL_11158_004_20200624T121534_0601_01.h5', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO4ESIPTJPL/ECOSTRESS_L4_ESI_PT-JPL_11406_009_20200710T122818_0601_01.h5', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO1BGEO/ECOSTRESS_L1B_GEO_11158_004_20200624T121534_0601_01.h5', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO1BGEO/ECOSTRESS_L1B_GEO_11406_009_20200710T122818_0601_01.h5']


Now let's take a look at the structure of one of the NetCDF files to get an idea of what is stored within.

In [3]:
# Function to open and print NetCDF items
def read_h5_file(fp):
    
    def print_attrs(name, obj):
        print(name)
        for key, val in obj.attrs.items():
            print(f"    {key}: {val}")
            
    with h5py.File(fp, 'r') as f:
        print(f'Contents of {fp}')
        f.visititems(print_attrs)

# Open the first file to examine the contents
read_h5_file(nc_files[0])

Contents of /home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO2LSTE/ECOSTRESS_L2_LSTE_11158_004_20200624T121534_0601_01.h5
L2 LSTE Metadata
L2 LSTE Metadata/AncillaryNWP
L2 LSTE Metadata/BandSpecification
L2 LSTE Metadata/CloudMaxTemperature
L2 LSTE Metadata/CloudMeanTemperature
L2 LSTE Metadata/CloudMinTemperature
L2 LSTE Metadata/CloudSDevTemperature
L2 LSTE Metadata/Emis1GoodAvg
L2 LSTE Metadata/Emis2GoodAvg
L2 LSTE Metadata/Emis3GoodAvg
L2 LSTE Metadata/Emis4GoodAvg
L2 LSTE Metadata/Emis5GoodAvg
L2 LSTE Metadata/LSTGoodAvg
L2 LSTE Metadata/NWPSource
L2 LSTE Metadata/NumberOfBands
L2 LSTE Metadata/OrbitCorrectionPerformed
L2 LSTE Metadata/QAPercentCloudCover
L2 LSTE Metadata/QAPercentGoodQuality
SDS
SDS/Emis1
    _FillValue: [0]
    add_offset: [0.49]
    coordsys: b'cartesian'
    format: b'scaled'
    long_name: b'Band 1 Emissivity'
    scale_factor: [0.002]
    units: b'n/a'
    valid_range: [  1 255]
SDS/Emis1_err
    _FillValue: [0]
    add_offset: [0.]
    coordsys: b'cartesian'

In [4]:
# Seperate the lists out by ShortName
lst_files = [fpath for fpath in nc_files if "ECOSTRESS_L2_LSTE" in fpath]
esi_files = [fpath for fpath in nc_files if "ECOSTRESS_L4_ESI_PT-JPL" in fpath]
geo_files = [fpath for fpath in nc_files if "ECOSTRESS_L1B_GEO" in fpath]

# Create a dictionary for our LST and ESI files
datadict = {
    'ECOL2LSTE': lst_files,
    'ECO4ESIPTJPL': esi_files
}
print(datadict)

{'ECOL2LSTE': ['/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO2LSTE/ECOSTRESS_L2_LSTE_11158_004_20200624T121534_0601_01.h5', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO2LSTE/ECOSTRESS_L2_LSTE_11406_009_20200710T122818_0601_01.h5'], 'ECO4ESIPTJPL': ['/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO4ESIPTJPL/ECOSTRESS_L4_ESI_PT-JPL_11158_004_20200624T121534_0601_01.h5', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO4ESIPTJPL/ECOSTRESS_L4_ESI_PT-JPL_11406_009_20200710T122818_0601_01.h5']}


### Define a function to perform the swath-to-grid conversion on ECOSTRESS V001

Below we have defined a function which takes an open ECOSTRESS NetCDF file as an input and identifies the corresponding geolocation file which we downloaded in the previous notebook. Using the geolocation data, we can define the coordinate space for the ECOSTRESS LST and ESI and export them as georeferenced GeoTIFF raster files. There are many steps to get from the NetCDF to GeoTIFF, so do not feel like you have to understand each of these. The function should handle any ECOSTRESS NEtCDF file.

In [5]:
# Explicitly use GDAL exceptions
gdal.UseExceptions()

def process_nc_file(ncData, geoData, ecoName, shortName, ecoSDS, dataID):
    
    #######################################################
    # Open the geo file and retrieve coordinate information
    g = h5py.File(geoData)
    geo_objs = []
    g.visit(geo_objs.append)
    
    # Search for lat/lon SDS inside data file
    latSD = [str(obj) for obj in geo_objs if isinstance(g[obj], h5py.Dataset) and '/latitude' in obj]
    lonSD = [str(obj) for obj in geo_objs if isinstance(g[obj], h5py.Dataset) and '/longitude' in obj]
    
    # Open SDS as arrays
    lat = g[latSD[0]][()].astype(float)
    lon = g[lonSD[0]][()].astype(float)
    
    # Read the array dimensions
    dims = lat.shape
    # Set swath definition from lat/lon arrays
    swathDef = geom.SwathDefinition(lons=lon, lats=lat)
    print(f"Swath definition:\n{swathDef.corners}")
    
    # Define the lat/lon for the middle of the swath
    mid = [int(lat.shape[1] / 2) - 1, int(lat.shape[0] / 2) - 1]
    midLat, midLon = lat[mid[0]][mid[1]], lon[mid[0]][mid[1]]

    # Define AEQD projection centered at swath center
    epsgConvert = pyproj.Proj("+proj=aeqd +lat_0={} +lon_0={}".format(midLat, midLon))
    # Use info from AEQD projection bbox to calculate output cols/rows/pixel size
    llLon, llLat = epsgConvert(np.min(lon), np.min(lat), inverse=False)
    urLon, urLat = epsgConvert(np.max(lon), np.max(lat), inverse=False)
    areaExtent = (llLon, llLat, urLon, urLat)
    cols = int(round((areaExtent[2] - areaExtent[0]) / 70))  # 70 m pixel size
    rows = int(round((areaExtent[3] - areaExtent[1]) / 70))

    # Define Geographic projection
    epsg, proj, pName = '4326', 'longlat', 'Geographic'
    # Define bounding box of swath
    llLon, llLat, urLon, urLat = np.min(lon), np.min(lat), np.max(lon), np.max(lat)
    areaExtent = (llLon, llLat, urLon, urLat)
    # Create area definition with estimated number of columns and rows
    projDict = pyproj.CRS("epsg:4326")
    areaDef = geom.AreaDefinition(epsg, pName, proj, projDict, cols, rows, areaExtent)

    # Square pixels and calculate output cols/rows
    ps = np.min([areaDef.pixel_size_x, areaDef.pixel_size_y])
    cols = int(round((areaExtent[2] - areaExtent[0]) / ps))
    rows = int(round((areaExtent[3] - areaExtent[1]) / ps))
    
    # Set up a new Geographic area definition with the refined cols/rows
    areaDef = geom.AreaDefinition(epsg, pName, proj, projDict, cols, rows, areaExtent)

    # Get arrays with information about the nearest neighbor to each grid point 
    index, outdex, indexArr, distArr = kdt.get_neighbour_info(swathDef, areaDef, 210, neighbours=1)
    
    # Read SDS attributes and define fill value, add offset, and scale factor if available
    # Subset list to ETinst and ETinstUncertainty
    sds = [dataID]
    ecoSDS = [dataset for dataset in ecoSDS if dataset.endswith(tuple(sds))]
    for dataset in ecoSDS:
        print(dataset.split('/')[-1])
    
    # Read in NC file and print out SDS attributes
    s = ecoSDS[0]
    ecoSD = f[s][()]
    for attr in f[s].attrs:
        if type(f[s].attrs[attr]) == np.ndarray:
            print(f'{attr} = {f[s].attrs[attr][0]}')
        else:
            print(f'{attr} = {f[s].attrs[attr].decode("utf-8")}')
    
    # Fill value (nan)
    try:
        fv = int(f[s].attrs['_FillValue'][0])
    except KeyError:
        fv = None
    except ValueError:
        fv = f[s].attrs['_FillValue'][0]
    # Scale factor
    try:
        sf = f[s].attrs['scale_factor'][0]
    except KeyError:
        sf = 1
    # Offset
    try:
        add_off = f[s].attrs['add_offset'][0]
    except KeyError:
        add_off = 0
    # Pixel units
    try:
        units = f[s].attrs['units'].decode("utf-8")
    except KeyError:
        units = 'none'

    # Perform K-D Tree nearest neighbor resampling (swath 2 grid conversion)
    ECOgeo = kdt.get_sample_from_neighbour_info(
        'nn', areaDef.shape, ecoSD, index, outdex, indexArr, fill_value=None)

    # Define the geotransform 
    gt = [areaDef.area_extent[0], ps, 0, areaDef.area_extent[3], 0, -ps]
    print(f"Successfully geotransformed {identifier}\nGeotransform:\n{gt}")

    # Apply scale factor and offset and set the fill value
    ECOgeo = ECOgeo * sf + add_off            # Apply Scale Factor and Add Offset
    ECOgeo[ECOgeo == fv * sf + add_off] = fv  # Set Fill Value

    print("\n")
    print("Exporting GeoTIFF ...")
    
    fv = np.nan # defining a Null value
        
    outDir = f'/home/jovyan/HYR-SENSE/data/Drought-FireRisk/georeferenced/{short_name}'
    # Check the directory exists, make it if not
    if not os.path.exists(outDir):
        os.makedirs(outDir)
        
    # Set up output name
    outName = join(outDir, ecoName+'.tif')
    print("output file:\n{}\n".format(outName))
    
    # Get driver, specify dimensions, define and set output geotransform
    height, width = ECOgeo.shape
    driv = gdal.GetDriverByName('GTiff')
    dataType = gdal_array.NumericTypeCodeToGDALTypeCode(ECOgeo.dtype)
    d = driv.Create(outName, width, height, 1, dataType)
    d.SetGeoTransform(gt)
        
    # Create and set output projection, write output array data
    # Define target SRS
    srs = osr.SpatialReference()
    srs.ImportFromEPSG(int(epsg))
    d.SetProjection(srs.ExportToWkt())
    srs.ExportToWkt()
    
    # Write array to band
    band = d.GetRasterBand(1)
    band.WriteArray(ECOgeo)
        
    # Define fill value if it exists, if not, set to mask fill value
    if fv is not None and not np.isnan(fv):
        band.SetNoDataValue(fv)
    else:
        try:
            band.SetNoDataValue(ECOgeo.fill_value)
        except:
            pass
    band.FlushCache()
    d, band = None, None
    

print("Function to process ECOSTRESS NetCDF files is ready to use!")

Function to process ECOSTRESS NetCDF files is ready to use!


### Apply the function to our LST and ESI NetCDF files

Now we can use our data dictionary to loop through the ECOSTRESS products we downloaded and export them as georeferenced files.

In [6]:
t0 = time.time()

# Define some parameters
geo_id = "ECOSTRESS_L1B_GEO_"

# Loop the products and file paths
for short_name, fpaths in datadict.items():
    print(f"Processing NetCDF files for {short_name}")
    for fp in fpaths:
        identifier = os.path.basename(fp)[:-3]
        
        # Open the NetCDF file
        f = h5py.File(fp)   # Read in ECOSTRESS HDF5 file
        # Retrieve the granule identifier
        ecoName = os.path.basename(fp.split('.h5')[0])  # Keep original filename
        print(ecoName)

        # Create a list of all SDS inside of the .h5 file
        eco_objs = []
        f.visit(eco_objs.append)
        ecoSDS = [str(obj) for obj in eco_objs if isinstance(f[obj], h5py.Dataset)] 
        for dataset in ecoSDS[0:10]: 
            print(dataset)
            
        # Define which dataset we want to retrieve based on the short name
        if short_name == "ECOL2LSTE":
            dat = 'LST' # Land Surface Temperature
        else:
            dat = 'ESIavg' # Evaporative Stress Index
                
        # Find the matching ECO1BGEO file from the file list
        parts = identifier.split('_')
        geo_identifier = geo_id + '_'.join(parts[-5:])
        print(geo_identifier)
        geo = [geo_link for geo_link in geo_files if geo_identifier in geo_link][0]
        print(geo)

        ###################################################
        # Now we are ready to apply our processing function
        process_nc_file(f, geo, ecoName, short_name, ecoSDS, dat)
        print('Time to complete granule:', time.time() - t0)
        print("\n")
        print("---------------------------------------------")

print('Total elapsed time:', time.time() - t0)

Processing NetCDF files for ECOL2LSTE
ECOSTRESS_L2_LSTE_11158_004_20200624T121534_0601_01
L2 LSTE Metadata/AncillaryNWP
L2 LSTE Metadata/BandSpecification
L2 LSTE Metadata/CloudMaxTemperature
L2 LSTE Metadata/CloudMeanTemperature
L2 LSTE Metadata/CloudMinTemperature
L2 LSTE Metadata/CloudSDevTemperature
L2 LSTE Metadata/Emis1GoodAvg
L2 LSTE Metadata/Emis2GoodAvg
L2 LSTE Metadata/Emis3GoodAvg
L2 LSTE Metadata/Emis4GoodAvg
ECOSTRESS_L1B_GEO_11158_004_20200624T121534_0601_01
/home/jovyan/HYR-SENSE/data/Drought-FireRisk/ECO1BGEO/ECOSTRESS_L1B_GEO_11158_004_20200624T121534_0601_01.h5
Swath definition:
[(-105.81323363256239, 36.31741104244618), (-109.02841297467181, 39.11466034241513), (-105.79158666507249, 41.22855474984687), (-102.61410919030835, 38.334870314871246)]
LST
_FillValue = 0
add_offset = 0.0
coordsys = cartesian
format = scaled
long_name = Land Surface Temperature
scale_factor = 0.02
units = K
valid_range = 7500
Successfully geotransformed ECOSTRESS_L2_LSTE_11158_004_20200624T12

### Check on the results by plotting one of the GeoTIFFs

In [9]:
dest = '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/'

# Load our ROI data
rmnp = gpd.read_file(os.path.join(dest, 'NPS_ROMO_Boundary.geojson'))
rmnp = rmnp.to_crs("EPSG:4326") # geographic coordinates
# Get the bounding box of RMNP
rmnp_bounds = rmnp.total_bounds  # Get the bounding box in the format (minx, miny, maxx, maxy)

# Directory where our georeferenced data is
datadir = os.path.join(dest,'georeferenced') 
tif_files = list_files(datadir, '*.tif', recursive=True)
print(tif_files)

# Open the first LST image
lst = rxr.open_rasterio(tif_files[0], mask=True).squeeze()
print(lst.rio.crs)
# # Open the first ESI image
# esi = rxr.open_rasterio(tif_files[2], mask=True).squeeze()

# Clip the raster to the bounding box
lst_clipped = lst.rio.clip_box(*rmnp_bounds)
print(lst_clipped)

del lst

# Plot the LST image using hvplot
lst_plot = lst_clipped.hvplot.image(
    cmap='greys',
    frame_height=500,
    frame_width=500,
    geo=True,
    crs='EPSG:4326'
).opts(title="Land Surface Temperature for RMNP")

# Plot the RMNP boundary on top
rmnp_plot = rmnp.hvplot(
    geo=True,
    frame_height=500,
    frame_width=500,
    crs='EPSG:4326',
    color='red',
    line_width=3,
    alpha=0.8,
    fill_alpha=0
).opts(title="RMNP Boundary")

lst_plot * rmnp_plot

['/home/jovyan/HYR-SENSE/data/Drought-FireRisk/georeferenced/ECOL2LSTE/ECOSTRESS_L2_LSTE_11158_004_20200624T121534_0601_01.tif', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/georeferenced/ECOL2LSTE/ECOSTRESS_L2_LSTE_11406_009_20200710T122818_0601_01.tif', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/georeferenced/ECO4ESIPTJPL/ECOSTRESS_L4_ESI_PT-JPL_11158_004_20200624T121534_0601_01.tif', '/home/jovyan/HYR-SENSE/data/Drought-FireRisk/georeferenced/ECO4ESIPTJPL/ECOSTRESS_L4_ESI_PT-JPL_11406_009_20200710T122818_0601_01.tif']
EPSG:4326
<xarray.DataArray (y: 629, x: 668)>
[420172 values with dtype=float64]
Coordinates:
    band         int64 1
  * x            (x) float64 -105.9 -105.9 -105.9 ... -105.5 -105.5 -105.5
  * y            (y) float64 40.55 40.55 40.55 40.55 ... 40.16 40.16 40.16 40.16
    spatial_ref  int64 0
Attributes:
    AREA_OR_POINT:  Area
    scale_factor:   1.0
    add_offset:     0.0
