# Setup Gabon GEDI L4A Testing

1. Download Gabon outline in geospatial format
2. Save to the workspace, probably not the repo?
3. Query CMR with the bbox of the polygon to find out how many granules are involved

Boundary file is available at `shared-buckets/alexdevseed/iso3/GAB-ADM0.json`

In [17]:
!pip install geopandas profilehooks

[0m

In [25]:
import json
import os
import os.path
import sys
import urllib.parse
from typing import Any, Callable, Mapping, Optional, Sequence, TypeVar

import geopandas as gpd
import h5py
import numpy as np
import requests
from maap.maap import Granule, MAAP
from profilehooks import timecall

T = TypeVar('T')

In [19]:
nasa_cmr_host = 'cmr.earthdata.nasa.gov'
maap_cmr_host = 'cmr.maap-project.org'
maap = MAAP('api.ops.maap-project.org')

## Functions

### General Functions

In [20]:
def for_each(f: Callable[[T], None], xs: Sequence[T]) -> None:
    for x in xs:
        f(x)


def pprint(value: Any) -> None:
    print(json.dumps(value, indent=2))

    
def get_geo_boundary(iso: str, level: int) -> gpd.GeoDataFrame:
    file_path = f'/projects/my-public-bucket/iso3/{iso}-ADM{level}.json'
    
    if not os.path.exists(file_path):
        r = requests.get(
            'https://www.geoboundaries.org/gbRequest.html',
            dict(ISO=iso, ADM=f'ADM{level}')
        )
        r.raise_for_status()
        dl_url = r.json()[0]['gjDownloadURL']
        geo_boundary = requests.get(dl_url).json()

        with open(file_path, 'w') as out:
            out.write(json.dumps(geo_boundary))
    
    return gpd.read_file(file_path)

### UMM Functions

In [21]:
SIZE_UNIT_DEFAULT = 'MB'
SIZE_UNIT_FACTORS = dict(
    KB=1000,
    MB=1000 * 1000,
    GB=1000 * 1000 * 1000,
)


def umm_find_collections(
    cmr_host: str,
    params: Mapping[str, Any],
    **kwargs: Any
) -> Mapping[str, Any]:
    url = f'https://{cmr_host}/search/collections.umm_json'
    r = requests.get(url, params, **kwargs)

    return r.json()


def umm_find_collection(
    cmr_host: str,
    params: Mapping[str, Any],
    **kwargs: Any
) -> Mapping[str, Any]:
    return umm_find_collections(cmr_host, {**params, 'page_size': 1}, **kwargs)['items'][0]


@timecall
def umm_find_granules(
    cmr_host: str,
    params: Mapping[str, Any],
    **kwargs: Any
) -> Mapping[str, Any]:
    method = 'post' if 'data' in kwargs or 'files' in kwargs else 'get'
    url = urllib.parse.urljoin(f'https://{cmr_host}/search', 'granules.umm_json')
    r = requests.request(method, url, params=params, **kwargs)
    
    return r.json()


def umm_granule_size_in_bytes(granule: Mapping[str, Any]) -> int:
    adi = granule['DataGranule']['ArchiveAndDistributionInformation'][0]
    size_unit = adi.get('SizeUnit', SIZE_UNIT_DEFAULT)
    size_unit_factor = SIZE_UNIT_FACTORS.get(size_unit, SIZE_UNIT_FACTORS[SIZE_UNIT_DEFAULT])
    
    return round(adi.get('SizeInBytes', adi.get('Size', 0) * size_unit_factor))

### MAAP Functions

In [22]:
@timecall
def find_granules(**kwargs: Any):
    return maap.searchGranule(**kwargs)


def download_granule(dest_dir: str, *, overwrite=False) -> Callable[[Granule], None]:
    os.makedirs(dest_dir, exist_ok=True)

    @timecall
    def do_download_granule(granule: Granule) -> None:
        granule.getData(dest_dir, overwrite)
    
    return do_download_granule

## Subset Gabon Granules

### Get Gabon Geo Boundary

In [23]:
gabon_gdf = get_geo_boundary('GAB', 0)
gabon_geojson = gabon_gdf.geometry.to_json()
gabon_geodict = json.loads(gabon_geojson)
gabon_gdf

Unnamed: 0,shapeName,shapeISO,shapeID,shapeGroup,shapeType,geometry
0,Gabon,GAB,GAB-ADM0-3_0_0-B1,GAB,ADM0,"MULTIPOLYGON (((8.83154 -0.92271, 8.83809 -0.9..."


### Get GEDI L4A Collection

In [24]:
gedi_l4a_doi = '10.3334/ORNLDAAC/1986'
gedi_l4a = maap.searchCollection(doi=gedi_l4a_doi, limit=1)[0]
gedi_l4a_concept_id = gedi_l4a['concept-id']

### Find GEDI L4A Granules within Gabon Bounding Box

In [24]:
granules = find_granules(
    collection_concept_id=gedi_l4a_concept_id,
    bounding_box=','.join(map(str, gabon_gdf.total_bounds)),
    limit=2000
)

print(f'Found {len(granules)} granules')


  find_granules (/tmp/ipykernel_3734/2505500122.py:1):
    84.751 seconds



Found 1009 granules


In [16]:
for_each(download_granule('/projects/my-public-bucket/gedi-l4a/gabon'), granules[13:14])


  do_download_granule (/tmp/ipykernel_3734/2505500122.py:9):
    0.000 seconds

