#### This notebook has two functions that effectively return s3 links for coincident, near-contemporaneous ICESat-2 tracks given a GEDI input from earthaccess's .search_data() . 

This works with a single result there are hopes to scale this up to eventually return coincident, near-contemporaneous GEDI and ICESat-2 granule IDs / s3links given a user-defined bbox and tim

In [10]:
import earthaccess
import icepyx as ipx
import geopandas as gpd
import numpy as np
import pandas as pd
import datetime
from datetime import timedelta
import json
import re
from shapely.geometry import Polygon
from shapely.geometry import box

In [2]:
earthaccess.login()

Enter your Earthdata Login username:  Jehayes
Enter your Earthdata password:  ········


<earthaccess.auth.Auth at 0x7f5838c74390>

In [5]:
# gather random GEDI data using earthaccess as a test case
bbox = (-121.83183686310275, 39.251634434878014, -120.00653085351905, 40.2087274330508)
GEDI_results= earthaccess.search_data(
    short_name='GEDI02_B',
    version='002',
    cloud_hosted=True,
    bounding_box=bbox,
    temporal=("2019-08-01", "2019-09-01"),
    count=1
)

In [6]:
GEDI_results

[Collection: {'ShortName': 'GEDI02_B', 'Version': '002'}
 Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Longitude': -158.6015651135, 'Latitude': -0.0100300309}, {'Longitude': -140.539844646, 'Latitude': 23.3566009965}, {'Longitude': -137.8749561776, 'Latitude': 26.2013941876}, {'Longitude': -135.0681660582, 'Latitude': 28.9826371402}, {'Longitude': -132.0968864286, 'Latitude': 31.6894485361}, {'Longitude': -128.9338646389, 'Latitude': 34.3091107961}, {'Longitude': -125.5598770797, 'Latitude': 36.8259912001}, {'Longitude': -121.9435288734, 'Latitude': 39.2241059194}, {'Longitude': -118.0626455903, 'Latitude': 41.4824080213}, {'Longitude': -113.9006376509, 'Latitude': 43.5846280408}, {'Longitude': -109.4342894506, 'Latitude': 45.5025714683}, {'Longitude': -104.6560518645, 'Latitude': 47.2127155124}, {'Longitude': -99.5683380437, 'Latitude': 48.6878240741}, {'Longitude': -94.1876160397, 'Latitude': 49.9021379921}, {'Longitude': -88.55

In [18]:
def extract_gedi_track_time(gedi_results):
    """
    This function takes GEDI results from an earthaccess .search_data() query
    and returns the polygon of the GEDI path and collection date window in %Y-%m-%d

    Outputs:
        gedi_track_gdf - geodataframe containing GEDI path geometry (polygon)
        collection_times - tuple of temporal coverage of GEDI data in %Y-%m-%d

    SCOTT IS DEVELOPING AN earthaccess.results_to_geopandas that could replace this
    """
    # assume that DataGranule is a class and input_list contains instances of this class
    class DataGranule:
        def __init__(self, data):
            self.data = data
    
        def __str__(self):
            return self.data
            # convert DataGranule objects to string representation
    input_str = ''.join(str(item) for item in gedi_results)
    # extract the JSON-like portion of the GEDI results with regex
    # looking for coordinates of path to build polygon
    match_coords = re.search(r"\[\{'Longitude':.*?\}\]", input_str)
    if match_coords:
        spatial_coverage_str = match_coords.group(0)
        # convert string to list of dictionaries
        spatial_coverage = json.loads(spatial_coverage_str.replace("'", '"'))
    # convert to (Longitude, Latitude) tuples
    coords = [(point['Longitude'], point['Latitude']) for point in spatial_coverage]
    polygon = Polygon(coords)
    gedi_track_gdf = gpd.GeoDataFrame(index=[0], crs="EPSG:4326", geometry=[polygon])

    # now lets repeat with the time values
    match_time = re.search(r"\{'RangeDateTime': \{'BeginningDateTime': '(.*?)', 'EndingDateTime': '(.*?)'\}\}", 
                      input_str)
    if match_time:
        beginning_date_time = match_time.group(1)
        ending_date_time = match_time.group(2)
        # convert dates to %Y-%m-%d to help with future icepyx query
        collection_times = (beginning_date_time.split('T')[0], ending_date_time.split('T')[0])
    
    return gedi_track_gdf, collection_times

In [22]:
def contemp_coinc_GEDI_IS2(gedi_results, gedi_bbox, 
                           is2_sn='ATL08', time_pad_days=14):
    """
    This function returns the s3 urls of near-contemporaneous ICESat-2 tracks that overlap 
    a user-inputted GEDI granule

    Inputs:
        gedi_results - GEDI results from an earthaccess .search_data() query
        gedi_bbox - bbox used for the bounding_box argument in the .search_data() query used
                    to gather GEDI results
                    *i want to be able to pull this from the GEDI results themselves but don't
                    know how to
        is2_sn - ICESat-2 shortname for icepyx query, default ATL08 for now
        time_pad_days - days (+-) within GEDI acquisition that desired ICESat-2 tracks were acquired 

    Outputs:
        s3urls - list of coincident ICESat-2 granules acquired within time_pad_days of the input GEDI
                 acquisition
    """
    gedi_track_gdf, gedi_dates = extract_gedi_track_time(gedi_results)
    
    # Create polygon from user-inputted bounding box
    bbox_polygon = box(*bbox)
    bbox_gdf = gpd.GeoDataFrame(index=[0], crs="EPSG:4326", geometry=[bbox_polygon])
    # clip the GEDI track to the bbox
    gedi_clip_poly = gpd.clip(gedi_track_gdf, bbox_gdf)
    clipped_path_geom = gedi_clip_poly.iloc[0].geometry
    # get spatial extent of clipped path to use as input of icepyx query
    # this code is a little bunk (silly)
    x_values = clipped_path_geom.boundary.xy[0].tolist()
    y_values = clipped_path_geom.boundary.xy[1].tolist()
    is2_spatial_extent = [(x, y) for x, y in zip(x_values, y_values)]
    
    short_name = is2_sn
    spatial_extent = is2_spatial_extent
    # add the days +- pad for near-contemporaneous search
    is2_pad_start = (datetime.datetime.strptime(gedi_dates[0], "%Y-%m-%d") 
                     - datetime.timedelta(days=time_pad_days)).strftime("%Y-%m-%d")
    is2_pad_end = (datetime.datetime.strptime(gedi_dates[1], "%Y-%m-%d") 
                   + datetime.timedelta(days=time_pad_days)).strftime("%Y-%m-%d")
    date_range = [is2_pad_start, is2_pad_end]
    try:
        region = ipx.Query(short_name, spatial_extent, date_range)
        s3urls = region.avail_granules(ids=True, cloud=True)[1]
        return s3urls
    except:
        print(f"No coincident granules found from CMR within {time_pad_days} of acquisition")

In [23]:
contemp_coinc_GEDI_IS2(GEDI_results, 
                       gedi_bbox=bbox, 
                       is2_sn='ATL08', time_pad_days=14)

['s3://nsidc-cumulus-prod-protected/ATLAS/ATL08/006/2019/08/15/ATL08_20190815210225_07450406_006_02.h5',
 's3://nsidc-cumulus-prod-protected/ATLAS/ATL08/006/2019/08/19/ATL08_20190819205407_08060406_006_02.h5',
 's3://nsidc-cumulus-prod-protected/ATLAS/ATL08/006/2019/08/21/ATL08_20190821083817_08290402_006_02.h5']