In [0]:
# /// script
# requires-python = ">=3.13"
# dependencies = [
#     "earthaccess",
#     "httpx",
#     "obstore",
#     "pystac",
#     "python-cmr",
#     "rustac",
#     "smart-open",
#     "tqdm",
# ]
# ///

In [7]:

import json
from typing import Any
from urllib.parse import urlparse

import earthaccess
import httpx
import obstore as obs
import pystac
import rustac
import smart_open
import tqdm
from cmr import GranuleQuery
from obstore.store import from_url

auth = earthaccess.login(strategy="netrc")

Run a CMR granule query but specify the `stac` output format


In [10]:
api = GranuleQuery()

granule_query = (
    api
    .collection_concept_id(
        [
            "C2021957657-LPCLOUD",  # HLSL30
            "C2021957295-LPCLOUD",  # HLSS30
        ]
    )
    .bounding_box(-180, 30, 180, 80)
    .temporal("2024-06-01T00:00:00Z", "2024-06-02T00:00:00Z")
)

print(f"this query yields {granule_query.hits()} results!")

this query yields 8003 results!


We can use this granule query to iterate through paged results following the example from the unreleased `results()` method:


In [11]:
def get_results(query, page_size: int = 2000):
    page_size = min(max(1, page_size), 2000)
    url = query._build_url() + f"&page_size={page_size}"
    headers = dict(query.headers or {})

    while True:
        response = httpx.get(url, headers=headers, timeout=None)
        response.raise_for_status()

        if query._format == "json":
            yield from response.json()["feed"]["entry"]
        else:
            yield response.text

        if not (cmr_search_after := response.headers.get("cmr-search-after")):
            break

        headers["cmr-search-after"] = cmr_search_after

In [21]:
results = get_results(granule_query)

In [22]:
%%time
def get_stac_json_links(result, prefix: str = "https"):
    url = next(
        link["href"] for link in result["links"]
        if link["href"].endswith("stac.json") and link["href"].startswith(prefix)
    )
    return urlparse(url)


stac_json_links = [get_stac_json_links(result) for result in results]

CPU times: user 812 ms, sys: 96.2 ms, total: 908 ms
Wall time: 20.9 s


In [25]:
link = stac_json_links[0]
link

ParseResult(scheme='https', netloc='data.lpdaac.earthdatacloud.nasa.gov', path='/lp-prod-public/HLSS30.020/HLS.S30.T01WEV.2024153T000609.v2.0/HLS.S30.T01WEV.2024153T000609.v2.0_stac.json', params='', query='', fragment='')

In [33]:
store = from_url(
    link.scheme + "://" + link.netloc
)

item_data = await obs.get_async(store, stac_json_links[0].path)
item_bytes = await item_data.bytes_async()
item_dict = json.loads(item_bytes.to_bytes().decode("utf-8"))
item_dict

{'type': 'Feature',
 'stac_version': '1.0.0',
 'id': 'HLS.S30.T01WEV.2024153T000609.v2.0',
 'properties': {'start_datetime': '2024-06-01T00:06:19.072460Z',
  'end_datetime': '2024-06-01T00:06:19.072460Z',
  'platform': 'sentinel-2b',
  'instruments': ['msi'],
  'eo:cloud_cover': 18.0,
  'proj:transform': [30.0,
   0.0,
   499980.0,
   0.0,
   -30.0,
   8000040.0,
   0.0,
   0.0,
   1.0],
  'proj:shape': [3660, 3660],
  'proj:epsg': 32601,
  'view:sun_azimuth': 187.71039035,
  'view:azimuth': 296.0704673,
  'sci:doi': '10.5067/HLS/HLSS30.002',
  'datetime': '2024-06-01T00:06:19.072460Z'},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[-177.000554, 71.115154],
     [-177.000575, 71.841919],
     [-175.544595, 71.573861],
     [-173.959239, 71.242815],
     [-173.940843, 71.2374],
     [-173.963758, 71.090475],
     [-177.000554, 71.115154]]]]},
 'links': [{'rel': 'self',
   'href': 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/HLSS30.020/HLS.S30.T01WEV.2024153T0

In [32]:

test = await rustac.write(
    "test.parquet",
    [item_dict],
)

{'type': 'Feature',
 'stac_version': '1.0.0',
 'id': 'HLS.S30.T01WEV.2024153T000609.v2.0',
 'properties': {'start_datetime': '2024-06-01T00:06:19.072460Z',
  'end_datetime': '2024-06-01T00:06:19.072460Z',
  'platform': 'sentinel-2b',
  'instruments': ['msi'],
  'eo:cloud_cover': 18.0,
  'proj:transform': [30.0,
   0.0,
   499980.0,
   0.0,
   -30.0,
   8000040.0,
   0.0,
   0.0,
   1.0],
  'proj:shape': [3660, 3660],
  'proj:epsg': 32601,
  'view:sun_azimuth': 187.71039035,
  'view:azimuth': 296.0704673,
  'sci:doi': '10.5067/HLS/HLSS30.002',
  'datetime': '2024-06-01T00:06:19.072460Z'},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[-177.000554, 71.115154],
     [-177.000575, 71.841919],
     [-175.544595, 71.573861],
     [-173.959239, 71.242815],
     [-173.940843, 71.2374],
     [-173.963758, 71.090475],
     [-177.000554, 71.115154]]]]},
 'links': [{'rel': 'self',
   'href': 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/HLSS30.020/HLS.S30.T01WEV.2024153T0