Just supply the HLS ID and this generates a DSWx-HLS product

In [1]:
from pystac_client import Client  
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
import requests
import concurrent.futures
from dem_stitcher import stitch_dem
from tile_stitcher import get_raster_from_tiles
import rasterio
import geopandas as gpd
import yaml
from shapely.geometry import box


from proteus.dswx_hls import (
    get_dswx_hls_cli_parser,
    generate_dswx_layers,
    create_logger,
    parse_runconfig_file,
    compare_dswx_hls_products
)

In [2]:
HLS_ID = 'HLS.S30.T32VMK.2021257T103629.v2.0'

In [3]:
out_dir = Path('out') / HLS_ID
out_dir.mkdir(exist_ok=True, parents=True)

# Download HLS Tile

In [4]:
STAC_URL = 'https://cmr.earthdata.nasa.gov/stac'
api = Client.open(f'{STAC_URL}/LPCLOUD/')
hls_collections = ['HLSL30.v2.0', 'HLSS30.v2.0']

search_params = {"collections": hls_collections,
                 "ids": [HLS_ID],
                 "max_items": 5}
resp = api.search(**search_params)
assert resp.matched() == 1

In [5]:
resp_items = resp.get_all_items()
resp_items



In [6]:
hls_dir = out_dir / HLS_ID
hls_dir.mkdir(exist_ok=True, parents=True)

In [7]:
def download_one(url: str, out_dir: Path = hls_dir):
    # Source: https://stackoverflow.com/questions/16694907/download-large-file-in-python-with-requests
    local_filename = out_dir / url.split('/')[-1]
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192): 
                f.write(chunk)
    return local_filename

In [8]:
urls = [asset.href for asset in resp_items[0].assets.values()]
urls[:2]

['https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T32VMK.2021257T103629.v2.0/HLS.S30.T32VMK.2021257T103629.v2.0.B05.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T32VMK.2021257T103629.v2.0/HLS.S30.T32VMK.2021257T103629.v2.0.SZA.tif']

In [9]:
#download_one(urls[0])

with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    hls_paths = list(tqdm(executor.map(download_one, urls[:]), total=len(urls)))

100%|███████████████| 20/20 [00:32<00:00,  1.61s/it]


## Bounds

Get bounds of HLS tile

In [10]:
hls_bounds = box(*resp_items[0].bbox).buffer(.25).bounds
hls_bounds

(7.123297, 57.393989, 9.418131, 58.890622)

# Auxiliary Datasets

## DEM

In [11]:
dst_area_or_point = 'Point'
dem, p_dem = stitch_dem(hls_bounds, 
                        'glo_30', 
                        dst_area_or_point=dst_area_or_point, 
                        dst_ellipsoidal_height=False)

Reading glo_30 Datasets: 100%|█| 6/6 [00:11<00:00,  


In [12]:
dem_path = out_dir / 'glo_30.tif'

In [13]:
with rasterio.open(dem_path, 'w', **p_dem) as ds:
   ds.write(dem, 1)
   ds.update_tags(AREA_OR_POINT=dst_area_or_point)

## ESA 10 m world cover

In [14]:
X_esa_wc, p_wc = get_raster_from_tiles(hls_bounds, 
                                       tile_shortname='esa_world_cover_2021')

In [15]:
wc_10m_path = out_dir / f'wc_10m.tif'

In [16]:
with rasterio.open(wc_10m_path, 'w', **p_wc) as ds:
    ds.write(X_esa_wc)

# ESA 100 m World Cover

In [17]:
wc_100m_path =  out_dir / f'wc_100m.tif'

In [18]:
X_cop100, p_cop100 = get_raster_from_tiles(hls_bounds, 
                                           tile_shortname='cop_100_lulc_discrete',
                                           year=2019)

In [19]:
with rasterio.open(wc_100m_path, 'w', **p_cop100) as ds:
    ds.write(X_cop100)

## NOAA GSHHS

In [20]:
%%time

#coastline_url = 'http://www.soest.hawaii.edu/pwessel/gshhg/gshhg-shp-2.3.7.zip' # Source: https://www.soest.hawaii.edu/pwessel/gshhg/index.html
coastline_url = ('https://asf-dem-west.s3.us-west-2.amazonaws.com'
                 '/WATER_MASK/GSHHG/GSHHS_shp/f/GSHHS_f_L1.shp')

df = gpd.read_file(coastline_url, bbox=hls_bounds)
df.head()

CPU times: user 6 s, sys: 7.73 s, total: 13.7 s
Wall time: 1min 53s


Unnamed: 0,id,level,source,parent_id,sibling_id,area,geometry
0,0-E,1,WVS,-1,-1,50654050.0,"POLYGON ((180.00000 68.99378, 180.00000 65.033..."
1,6419,1,WVS,-1,-1,8.514851,"POLYGON ((7.99836 58.09289, 8.00000 58.09286, ..."
2,7133,1,WVS,-1,-1,7.060912,"POLYGON ((9.04250 58.60414, 9.04250 58.60581, ..."
3,7896,1,WVS,-1,-1,5.899308,"POLYGON ((7.37750 57.99917, 7.37747 57.99750, ..."
4,8052,1,WVS,-1,-1,5.684125,"POLYGON ((7.49747 57.99247, 7.49744 57.99333, ..."


In [21]:
coastline_path = out_dir / 'gshhs_coastline'
coastline_path_shp = coastline_path / (coastline_path.stem + '.shp')

In [22]:
df.to_file(coastline_path)

## Delete all the data from memory

In [23]:
del dem
del df
del X_cop100
del X_esa_wc

# Setup Runconfig

In [26]:
dswx_hls_runconfig_url = 'https://raw.githubusercontent.com/nasa/PROTEUS/main/src/proteus/defaults/dswx_hls.yaml'

In [27]:
resp = requests.get(dswx_hls_runconfig_url)
runconfig_dict = yaml.safe_load(resp.content)
runconfig_dict

{'runconfig': {'name': 'dswx_hls_workflow_default',
  'groups': {'pge_name_group': {'pge_name': 'DSWX_HLS_PGE'},
   'input_file_group': {'input_file_path': None},
   'dynamic_ancillary_file_group': {'dem_file': None,
    'dem_file_description': None,
    'landcover_file': None,
    'landcover_file_description': None,
    'worldcover_file': None,
    'worldcover_file_description': None,
    'shoreline_shapefile': None,
    'shoreline_shapefile_description': None},
   'primary_executable': {'product_type': 'DSWX_HLS'},
   'product_path_group': {'product_path': None,
    'scratch_path': None,
    'output_dir': None,
    'product_id': None,
    'product_version': None},
   'processing': {'check_ancillary_inputs_coverage': True,
    'apply_ocean_masking': False,
    'apply_aerosol_class_remapping': True,
    'aerosol_not_water_to_high_conf_water_fmask_values': [224, 160, 96],
    'aerosol_water_moderate_conf_to_high_conf_water_fmask_values': [224,
     160,
     96],
    'aerosol_partial_su

In [28]:
hls_paths_str = [str(p.resolve()) for p in hls_paths]
hls_paths_str[:2]

['/Users/cmarshak/bekaert-team/dswx-hls-pst-workflow/out/HLS.S30.T32VMK.2021257T103629.v2.0/HLS.S30.T32VMK.2021257T103629.v2.0/HLS.S30.T32VMK.2021257T103629.v2.0.B05.tif',
 '/Users/cmarshak/bekaert-team/dswx-hls-pst-workflow/out/HLS.S30.T32VMK.2021257T103629.v2.0/HLS.S30.T32VMK.2021257T103629.v2.0/HLS.S30.T32VMK.2021257T103629.v2.0.SZA.tif']

In [29]:
runconfig_dict['runconfig']['groups']['input_file_group']['input_file_path'] = hls_paths_str
runconfig_dict['runconfig']['groups']['dynamic_ancillary_file_group']['dem_file'] = str(dem_path.resolve())
runconfig_dict['runconfig']['groups']['dynamic_ancillary_file_group']['landcover_file'] = str(wc_100m_path.resolve())
runconfig_dict['runconfig']['groups']['dynamic_ancillary_file_group']['worldcover_file'] = str(wc_10m_path.resolve())
runconfig_dict['runconfig']['groups']['dynamic_ancillary_file_group']['shoreline_shapefile'] = str(coastline_path_shp.resolve())

In [30]:
product_path = out_dir / 'product'
scratch_path = out_dir / 'scratch'
output_dir = out_dir / 'output'

product_id = f'dswx_{HLS_ID}'
product_version = 1.0

runconfig_dict['runconfig']['groups']['product_path_group']['product_path'] = str(product_path.resolve())
runconfig_dict['runconfig']['groups']['product_path_group']['scratch_path'] = str(scratch_path.resolve())
runconfig_dict['runconfig']['groups']['product_path_group']['product_version'] = product_version
runconfig_dict['runconfig']['groups']['product_path_group']['product_id'] = str(product_id)
runconfig_dict['runconfig']['groups']['product_path_group']['output_dir'] = str(output_dir)

In [31]:
HLS_THRESHOLDS = runconfig_dict['runconfig']['groups']['hls_thresholds']

In [32]:
runconfig_path = out_dir/ 'runconfig.yaml'
with open(runconfig_path, 'w') as f:
    yaml.dump(runconfig_dict, f, default_flow_style=False)

From the README - this is the recommended way - for debugging - it's easiest to write this to terminal. Ideally, we could use [this](https://github.com/nasa/PROTEUS/blob/main/tests/test_dswx_hls_workflow.py) to get python error-handling. But this worked and not messing.

**Note** this will require that the notebook is run from the environment `dswx_hls`!

In [33]:
!dswx_hls.py {runconfig_path}

  __import__('pkg_resources').require('proteus==1.0.1')
Default runconfig file: /Users/cmarshak/mambaforge/envs/dswx_hls/lib/python3.11/site-packages/proteus-1.0.1-py3.11.egg/proteus/defaults/dswx_hls.yaml
YAML schema: /Users/cmarshak/mambaforge/envs/dswx_hls/lib/python3.11/site-packages/proteus-1.0.1-py3.11.egg/proteus/schemas/dswx_hls.yaml
Input runconfig file: out/HLS.S30.T32VMK.2021257T103629.v2.0/runconfig.yaml
Validating runconfig file: out/HLS.S30.T32VMK.2021257T103629.v2.0/runconfig.yaml
HLS thresholds:
     wigt: 0.124
     awgt: 0.0
     pswt_1_mndwi: -0.44
     pswt_1_nir: 1500
     pswt_1_swir1: 900
     pswt_1_ndvi: 0.7
     pswt_2_mndwi: -0.5
     pswt_2_blue: 1000
     pswt_2_nir: 2500
     pswt_2_swir1: 3000
     pswt_2_swir2: 1000
     lcmask_nir: 1200
PROTEUS software version: 1.0.1
input files:
    HLS product file(s):
        /Users/cmarshak/bekaert-team/dswx-hls-pst-workflow/out/HLS.S30.T32VMK.2021257T103629.v2.0/HLS.S30.T32VMK.2021257T103629.v2.0/HLS.S30.T32VMK.20