In [1]:
from cmr import GranuleQuery, CollectionQuery, VariableQuery
import datetime
from shapely.geometry import Polygon
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from osgeo import gdal
import rasterio
import concurrent.futures
import backoff
from rasterio.errors import RasterioIOError
import requests
import boto3
from rasterio.session import AWSSession
import os
import numpy as np

# Linking DSWx from HLS

HLS has wonky datestrings

Automating this url: https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=OPERA_L3_DSWX-HLS_PROVISIONAL_V1&granule_ur=*T18GYP*20230410T142428Z*&options[granule_ur][pattern]=true

In [42]:
base_url = 'https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=OPERA_L3_DSWX-HLS_PROVISIONAL_V1&'

sample_hls_id_s2a = 'HLS.S30.T25MER.2023124T123259.v2.0'
sample_hls_id_l8 = 'HLS.L30.T18TWM.2023125T153242.v2.0'

podaac_url = 'https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/OPERA_L3_DSWX-HLS_PROVISIONAL_V1'
              

def format_hls_dt(dt_hls: str) -> str:
    date, time = dt_hls.split('T')
    year = date[:4]
    year_start = datetime.datetime(int(year), 1, 1)
    day_offset = datetime.timedelta(days=(int(date[4:]) - 1))
    date = year_start + day_offset
    date_str_f = date.strftime('%Y%m%d')
    return f'{date_str_f}T{time}'
    
    
def get_dswx_meta_from_hls_id(hls_id: str) -> str:
    tokens = hls_id.split('.')
    mgrs_tile_id = tokens[2]
    acq_time_hls = tokens[3]
    acq_time = format_hls_dt(acq_time_hls)
    req_url = base_url + f'granule_ur=*{mgrs_tile_id}*{acq_time}*&options[granule_ur][pattern]=true'
    resp = requests.get(req_url).json()
    hits = resp['hits']
    out = {'dswx_hits': hits, 'dswx_request_url': req_url}
    if hits:
        dswx_id = resp['items'][0]['meta']['native-id']
        out.update({'dswx_id': dswx_id})
        
        file_name = f'{dswx_id}_B01_WTR.tif'
        url = f'{podaac_url}/{file_name}'
        out.update({'dswx_url': url})
    return out

In [32]:
r = get_dswx_meta_from_hls_id(sample_hls_id_s2a)
r

{'hits': 1,
 'took': 451,
 'items': [{'meta': {'concept-type': 'granule',
    'concept-id': 'G2679801187-POCLOUD',
    'revision-id': 1,
    'native-id': 'OPERA_L3_DSWx-HLS_T25MER_20230504T123259Z_20230506T164750Z_S2B_30_v1.0',
    'provider-id': 'POCLOUD',
    'format': 'application/vnd.nasa.cmr.umm+json',
    'revision-date': '2023-05-06T16:51:33.152Z'},
   'umm': {'TemporalExtent': {'RangeDateTime': {'EndingDateTime': '2023-05-04T12:33:08.845Z',
      'BeginningDateTime': '2023-05-04T12:33:08.845Z'}},
    'GranuleUR': 'OPERA_L3_DSWx-HLS_T25MER_20230504T123259Z_20230506T164750Z_S2B_30_v1.0',
    'AdditionalAttributes': [{'Values': ['HLS.S30.T25MER.2023124T123259.v2.0'],
      'Name': 'HlsDataset'},
     {'Values': ['OLI'], 'Name': 'Sensor'},
     {'Values': ['S2B_MSIL1C_20230504T123259_N0509_R109_T25MER_20230504T140720.SAFE'],
      'Name': 'SensorProductID'},
     {'Values': ['LaSRC'], 'Name': 'Accode'},
     {'Values': ['Area'], 'Name': 'AreaOrPoint'},
     {'Values': ['dswx_hls'],

In [33]:
r = get_dswx_meta_from_hls_id(sample_hls_id_l8)
r

{'hits': 1,
 'took': 988,
 'items': [{'meta': {'concept-type': 'granule',
    'concept-id': 'G2680215611-POCLOUD',
    'revision-id': 1,
    'native-id': 'OPERA_L3_DSWx-HLS_T18TWM_20230505T153242Z_20230507T081905Z_L8_30_v1.0',
    'provider-id': 'POCLOUD',
    'format': 'application/vnd.nasa.cmr.umm+json',
    'revision-date': '2023-05-07T08:22:34.394Z'},
   'umm': {'TemporalExtent': {'RangeDateTime': {'EndingDateTime': '2023-05-05T15:32:42.760Z',
      'BeginningDateTime': '2023-05-05T15:32:42.760Z'}},
    'GranuleUR': 'OPERA_L3_DSWx-HLS_T18TWM_20230505T153242Z_20230507T081905Z_L8_30_v1.0',
    'AdditionalAttributes': [{'Values': ['HLS.L30.T18TWM.2023125T153242.v2.0'],
      'Name': 'HlsDataset'},
     {'Values': ['OLI'], 'Name': 'Sensor'},
     {'Values': ['LC08_L1TP_013031_20230505_20230505_02_RT'],
      'Name': 'SensorProductID'},
     {'Values': ['Lasrc'], 'Name': 'Accode'},
     {'Values': ['Area'], 'Name': 'AreaOrPoint'},
     {'Values': ['dswx_hls'], 'Name': 'ProductID'},
    

# Getting Sensors

In [34]:
df_hls = gpd.read_file('hls_metadata_with_tags.geojson')
df_hls.head()

Unnamed: 0,granule_id,time_acquired,time_updated,B04_link,time_acq_str,mgrs_tile_id,accode,add_offset,area_or_point,arop_ave_xshift(meters),...,msi band 03 bandpass adjustment slope and offset,msi band 04 bandpass adjustment slope and offset,msi band 11 bandpass adjustment slope and offset,msi band 12 bandpass adjustment slope and offset,msi band 8a bandpass adjustment slope and offset,processing_baseline,product_uri,spacecraft_name,tile_id,geometry
0,HLS.L30.T18GYN.2023124T142433.v2.0,2023-05-04T14:24:33.166000+00:00,2023-05-06T07:16:52.481998+00:00,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,2023124T142433Z,T18GYN,Lasrc; Lasrc,0.0,Area,"0, 0",...,,,,,,,,,,"POLYGON ((-72.32396 -47.90997, -71.04347 -47.8..."
1,HLS.L30.T18GYP.2023124T142433.v2.0,2023-05-04T14:24:33.166000+00:00,2023-05-06T07:17:49.271000+00:00,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,2023124T142433Z,T18GYP,Lasrc; Lasrc,0.0,Area,"0, 0",...,,,,,,,,,,"POLYGON ((-72.36920 -47.01149, -70.92768 -46.9..."
2,HLS.L30.T18GXP.2023124T142433.v2.0,2023-05-04T14:24:33.166000+00:00,2023-05-06T07:21:07.122000+00:00,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,2023124T142433Z,T18GXP,Lasrc; Lasrc,0.0,Area,"0, 0",...,,,,,,,,,,"POLYGON ((-73.32739 -47.02950, -72.23989 -47.0..."
3,HLS.L30.T19GCJ.2023124T142433.v2.0,2023-05-04T14:24:33.166000+00:00,2023-05-06T07:30:42.305000+00:00,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,2023124T142433Z,T19GCJ,Lasrc; Lasrc,0.0,Area,"0, 0",...,,,,,,,,,,"POLYGON ((-70.73825 -47.02852, -70.39345 -46.0..."
4,HLS.S30.T19GGM.2023124T140709.v2.0,2023-05-04T14:24:56.969002+00:00,2023-05-06T07:43:05.363000+00:00,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,2023124T140709Z,T19GGM,LaSRC,0.0,Area,0,...,"1.007500, -0.000800","0.976100, 0.001000","1.000000, -0.000300","0.986700, 0.000400","0.996600, 0.000000",5.09,S2B_MSIL1C_20230504T140709_N0509_R110_T19GGM_2...,Sentinel-2B,S2B_OPER_MSI_L1C_TL_2BPS_20230504T172832_A0321...,"POLYGON ((-66.49269 -44.31394, -65.22951 -44.2..."


In [35]:
def get_input_sensor_name(row):
    landsat_id = row['landsat_scene_id']
    if isinstance(landsat_id, str):
        sensor_name = 'Landsat-9' if landsat_id[:3] == 'LC9' else 'Landsat-8'
    else:
        sensor_name = row['spacecraft_name']
    return sensor_name

df_hls['input_sensor_name'] = df_hls.aggregate(get_input_sensor_name, axis=1)

In [36]:
df_hls.input_sensor_name.unique()

array(['Landsat-9', 'Sentinel-2B', 'Landsat-8', 'Sentinel-2A'],
      dtype=object)

In [37]:
df_hls_for_dswx = df_hls[~df_hls.isin(['Landsat-9'])].reset_index(drop=True)

In [38]:
hls_ids = df_hls_for_dswx.granule_id.tolist()

Only 1 hit; however, it's echoed in the reverse look up we did before. So that's good.

In [51]:
dswx_data = list(map(get_dswx_meta_from_hls_id, tqdm(hls_ids[:10])))
dswx_data

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.09s/it]


[{'dswx_hits': 0,
  'dswx_request_url': 'https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=OPERA_L3_DSWX-HLS_PROVISIONAL_V1&granule_ur=*T18GYN*20230504T142433*&options[granule_ur][pattern]=true'},
 {'dswx_hits': 0,
  'dswx_request_url': 'https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=OPERA_L3_DSWX-HLS_PROVISIONAL_V1&granule_ur=*T18GYP*20230504T142433*&options[granule_ur][pattern]=true'},
 {'dswx_hits': 0,
  'dswx_request_url': 'https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=OPERA_L3_DSWX-HLS_PROVISIONAL_V1&granule_ur=*T18GXP*20230504T142433*&options[granule_ur][pattern]=true'},
 {'dswx_hits': 0,
  'dswx_request_url': 'https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=OPERA_L3_DSWX-HLS_PROVISIONAL_V1&granule_ur=*T19GCJ*20230504T142433*&options[granule_ur][pattern]=true'},
 {'dswx_hits': 1,
  'dswx_request_url': 'https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=OPERA_L3_DSWX-HLS_PROVISIONAL_V1&gra

In [None]:
n = len(hls_ids)
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
    dswx_data = list(tqdm(executor.map(get_dswx_meta_from_hls_id, hls_ids[:]), total=n))

  9%|████████▍                                                                                  | 6299/67480 [11:53<2:56:52,  5.77it/s]

In [None]:
df_linked_dswx = pd.DataFrame(dswx_data)
df_linked_dswx.head()

In [None]:
df_final = pd.concat([df_hls_for_dswx, df_linked_dswx], axis=1)
df_final.head()

In [None]:
df_final.to_file('hls_metadata_with_linked_dswx.geojson', driver='GeoJSON')

# Double checking with previous data

In the initial test, it looks like the HLS IDs aren't being found.

In [49]:
df_dswx = gpd.read_file('dswx_metadata_linked_with_hls.geojson')
df_dswx.head()

Unnamed: 0,granule_id,time_acquired,time_updated,B01_WTR_link,time_acq_str,mgrs_tile_id,accode,aerosol_class_remapping_enabled,aerosol_not_water_to_high_conf_water_fmask_values,aerosol_partial_surface_aggressive_to_high_conf_water_fmask_values,...,landsat_product_id_hls,landsat_scene_id_hls,processing_level_hls,sensor_hls,sentinel2_tileid_hls,tirs_ssm_model_hls,tirs_ssm_position_status_hls,usgs_software_hls,hls_url_B04,geometry
0,OPERA_L3_DSWx-HLS_T25MER_20230504T123259Z_2023...,2023-05-04T12:33:08.845000+00:00,2023-05-06T16:51:17.371000+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T123259Z,T25MER,LaSRC,True,22416096,22419216012896,...,,,,,,,,,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,"POLYGON ((-32.01000 -4.52400, -32.01000 -3.530..."
1,OPERA_L3_DSWx-HLS_T42XVQ_20230504T125639Z_2023...,2023-05-04T12:56:39.187000+00:00,2023-05-06T08:15:15.550000+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T125639Z,T42XVQ,Lasrc,True,22416096,22419216012896,...,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,42XVQ,PRELIMINARY,ESTIMATED,LPGS_16.2.0,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,"POLYGON ((69.57000 80.12500, 69.57000 81.14800..."
2,OPERA_L3_DSWx-HLS_T41XNK_20230504T125639Z_2023...,2023-05-04T12:56:39.187000+00:00,2023-05-06T08:15:16.575001+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T125639Z,T41XNK,Lasrc,True,22416096,22419216012896,...,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,41XNK,PRELIMINARY,ESTIMATED,LPGS_16.2.0,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,"POLYGON ((69.36600 80.11600, 69.36600 81.14900..."
3,OPERA_L3_DSWx-HLS_T42XVR_20230504T125639Z_2023...,2023-05-04T12:56:39.187000+00:00,2023-05-08T02:12:16.707001+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T125639Z,T42XVR,Lasrc,True,22416096,22419216012896,...,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,42XVR,PRELIMINARY,ESTIMATED,LPGS_16.2.0,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,"POLYGON ((69.63400 81.01600, 69.63400 82.04400..."
4,OPERA_L3_DSWx-HLS_T40XER_20230504T125639Z_2023...,2023-05-04T12:56:39.187000+00:00,2023-05-08T02:13:14.809000+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T125639Z,T40XER,Lasrc,True,22416096,22419216012896,...,LC08_L1TP_207244_20230504_20230504_02_RT,LC82072442023124LGN00,L1TP,OLI_TIRS,40XER,PRELIMINARY,ESTIMATED,LPGS_16.2.0,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,"POLYGON ((64.07000 81.00700, 64.07000 82.04500..."


Looks like there is only 1.

In [50]:
df_dswx[df_dswx.hls_dataset.isin(hls_ids[:10])]

Unnamed: 0,granule_id,time_acquired,time_updated,B01_WTR_link,time_acq_str,mgrs_tile_id,accode,aerosol_class_remapping_enabled,aerosol_not_water_to_high_conf_water_fmask_values,aerosol_partial_surface_aggressive_to_high_conf_water_fmask_values,...,landsat_product_id_hls,landsat_scene_id_hls,processing_level_hls,sensor_hls,sentinel2_tileid_hls,tirs_ssm_model_hls,tirs_ssm_position_status_hls,usgs_software_hls,hls_url_B04,geometry
569,OPERA_L3_DSWx-HLS_T19GGM_20230504T140709Z_2023...,2023-05-04T14:24:56.969002+00:00,2023-05-06T16:35:18.729000+00:00,https://archive.podaac.earthdata.nasa.gov/poda...,20230504T140709Z,T19GGM,LaSRC,True,22416096,22419216012896,...,,,,,,,,,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,"POLYGON ((-65.12400 -44.22600, -65.12400 -43.2..."
