In [1]:
from cmr import GranuleQuery
import datetime
from shapely.geometry import Polygon
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from osgeo import gdal
import rasterio
import concurrent.futures
import backoff
from rasterio.errors import RasterioIOError
import requests
import boto3
from rasterio.session import AWSSession
import os


Source: https://git.earthdata.nasa.gov/projects/LPDUR/repos/lpdaac_cloud_data_access/browse/notebooks/Topic_2__Cloud_Optimized_Data.ipynb

In [2]:
def get_temp_creds():
    temp_creds_url = 'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'
    return requests.get(temp_creds_url).json()
temp_creds_req = get_temp_creds()

session = boto3.Session(aws_access_key_id=temp_creds_req['accessKeyId'], 
                        aws_secret_access_key=temp_creds_req['secretAccessKey'],
                        aws_session_token=temp_creds_req['sessionToken'],
                        region_name='us-west-2')
rio_env = rasterio.Env(AWSSession(session),
                       GDAL_DISABLE_READDIR_ON_OPEN='TRUE',
                       CPL_VSIL_CURL_ALLOWED_EXTENSIONS='tif',
                       VSI_CACHE=True,
                       region_name='us-west-2',
                       GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                       GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))
rio_env.__enter__()

<rasterio.env.Env at 0x7fca555a86d0>

# Constants

In [3]:
HLS_S30_CONCEPT_ID = 'C2021957295-LPCLOUD'
HLS_L30_CONCEPT_ID = 'C2021957657-LPCLOUD'
DSWX_CONCEPT_ID = 'C2617126679-POCLOUD'

# Read DSWx metadata

In [4]:
df_dswx = gpd.read_file('dswx_metadata_with_tags.geojson')

In [5]:
def hls_url_generator(hls_id: str, band='B04') -> str:
    base_url = 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected'
    tokens = hls_id.split('.')
    prefix = f'{tokens[0]}{tokens[1]}.020'
    return f'{base_url}/{prefix}/{hls_id}/{hls_id}.{band}.tif'

In [6]:
hls_id = 'HLS.L30.T32RMT.2023124T095600.v2.0'
print(hls_url_generator(hls_id))

https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T32RMT.2023124T095600.v2.0/HLS.L30.T32RMT.2023124T095600.v2.0.B04.tif


In [7]:
urls = df_dswx.hls_dataset.map(hls_url_generator)

In [8]:
@backoff.on_exception(backoff.expo,
                      Exception,
                      max_tries=10)
def get_hls_tag_data(url: str) -> dict:
    with rasterio.open(url) as ds:
        t = ds.tags()
    out = {key.lower() + '_hls': val for key, val in t.items()}
    return out

Sequentially:

In [9]:
hls_tags = list(map(get_hls_tag_data, tqdm(urls[:3])))
hls_tags

100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:03<00:00,  1.26s/it]


[{'accode_hls': 'LaSRC',
  'add_offset_hls': '0.0',
  'area_or_point_hls': 'Area',
  'arop_ave_xshift(meters)_hls': '0',
  'arop_ave_yshift(meters)_hls': '0',
  'arop_ncp_hls': '0',
  'arop_rmse(meters)_hls': '0',
  'arop_s2_refimg_hls': 'NONE',
  'cloud_coverage_hls': '5',
  'datastrip_id_hls': 'S2B_OPER_MSI_L1C_DS_2BPS_20230504T140720_S20230504T123257_N05.09',
  'hls_processing_time_hls': '2023-05-06T08:18:05Z',
  'horizontal_cs_code_hls': 'EPSG:32725',
  'horizontal_cs_name_hls': 'WGS84 / UTM zone 25S',
  'l1c_image_quality_hls': 'NONE',
  'l1_processing_time_hls': '2023-05-04T14:22:41.440202Z',
  'long_name_hls': 'Red',
  'mean_sun_azimuth_angle_hls': '48.3220864006816',
  'mean_sun_zenith_angle_hls': '30.5477928719829',
  'mean_view_azimuth_angle_hls': '103.938965586048',
  'mean_view_zenith_angle_hls': '8.74447582209279',
  'msi band 01 bandpass adjustment slope and offset_hls': '0.995900, -0.000200',
  'msi band 02 bandpass adjustment slope and offset_hls': '0.977800, -0.004000'

Multithread:

In [10]:
n = len(urls)
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
    hls_tags = list(tqdm(executor.map(get_hls_tag_data, urls[:]), total=n))

100%|████████████████████████████████████████████████████████████████████████████████████████████| 50596/50596 [22:36<00:00, 37.29it/s]


In [11]:
df_hls = pd.DataFrame(hls_tags)
df_hls.head()

Unnamed: 0,accode_hls,add_offset_hls,area_or_point_hls,arop_ave_xshift(meters)_hls,arop_ave_yshift(meters)_hls,arop_ncp_hls,arop_rmse(meters)_hls,arop_s2_refimg_hls,cloud_coverage_hls,datastrip_id_hls,...,uly_hls,_fillvalue_hls,landsat_product_id_hls,landsat_scene_id_hls,processing_level_hls,sensor_hls,sentinel2_tileid_hls,tirs_ssm_model_hls,tirs_ssm_position_status_hls,usgs_software_hls
0,LaSRC,0.0,Area,0,0,0,0,NONE,5,S2B_OPER_MSI_L1C_DS_2BPS_20230504T140720_S2023...,...,-399960,-9999,,,,,,,,
1,Lasrc,0.0,Area,0,0,0,0,NONE,13,,...,9000000,-9999,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,42XVQ,PRELIMINARY,ESTIMATED,LPGS_16.2.0
2,Lasrc,0.0,Area,0,0,0,0,NONE,16,,...,9000000,-9999,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,41XNK,PRELIMINARY,ESTIMATED,LPGS_16.2.0
3,Lasrc,0.0,Area,0,0,0,0,NONE,8,,...,9100020,-9999,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,42XVR,PRELIMINARY,ESTIMATED,LPGS_16.2.0
4,Lasrc,0.0,Area,0,0,0,0,NONE,3,,...,9100020,-9999,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,40XER,PRELIMINARY,ESTIMATED,LPGS_16.2.0


In [14]:
df_all = pd.concat([df_dswx, df_hls], axis=1)
df_all['hls_url_B04'] = urls
df_all.head()

Unnamed: 0,granule_id,time_acquired,time_updated,B01_WTR_link,time_acq_str,mgrs_tile_id,accode,aerosol_class_remapping_enabled,aerosol_not_water_to_high_conf_water_fmask_values,aerosol_partial_surface_aggressive_to_high_conf_water_fmask_values,...,_fillvalue_hls,landsat_product_id_hls,landsat_scene_id_hls,processing_level_hls,sensor_hls,sentinel2_tileid_hls,tirs_ssm_model_hls,tirs_ssm_position_status_hls,usgs_software_hls,hls_url_B04
0,OPERA_L3_DSWx-HLS_T25MER_20230504T123259Z_2023...,2023-05-04T12:33:08.845000+00:00,2023-05-06T16:51:17.371000+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T123259Z,T25MER,LaSRC,True,22416096,22419216012896,...,-9999,,,,,,,,,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
1,OPERA_L3_DSWx-HLS_T42XVQ_20230504T125639Z_2023...,2023-05-04T12:56:39.187000+00:00,2023-05-06T08:15:15.550000+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T125639Z,T42XVQ,Lasrc,True,22416096,22419216012896,...,-9999,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,42XVQ,PRELIMINARY,ESTIMATED,LPGS_16.2.0,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
2,OPERA_L3_DSWx-HLS_T41XNK_20230504T125639Z_2023...,2023-05-04T12:56:39.187000+00:00,2023-05-06T08:15:16.575001+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T125639Z,T41XNK,Lasrc,True,22416096,22419216012896,...,-9999,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,41XNK,PRELIMINARY,ESTIMATED,LPGS_16.2.0,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
3,OPERA_L3_DSWx-HLS_T42XVR_20230504T125639Z_2023...,2023-05-04T12:56:39.187000+00:00,2023-05-08T02:12:16.707001+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T125639Z,T42XVR,Lasrc,True,22416096,22419216012896,...,-9999,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,42XVR,PRELIMINARY,ESTIMATED,LPGS_16.2.0,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
4,OPERA_L3_DSWx-HLS_T40XER_20230504T125639Z_2023...,2023-05-04T12:56:39.187000+00:00,2023-05-08T02:13:14.809000+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T125639Z,T40XER,Lasrc,True,22416096,22419216012896,...,-9999,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,40XER,PRELIMINARY,ESTIMATED,LPGS_16.2.0,https://data.lpdaac.earthdatacloud.nasa.gov/lp...


In [15]:
df_all.to_file('dswx_metadata_linked_with_hls.geojson', driver='GeoJSON')