Skip to content

Commit

Permalink
Issue #573 metadata_from_stac: add support for "item_assets"
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Jun 17, 2024
1 parent 2574094 commit 36cecca
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Document PEP 723 based Python UDF dependency declarations ([Open-EO/openeo-geopyspark-driver#237](https://github.com/Open-EO/openeo-geopyspark-driver/issues/237))
- Added more `openeo.api.process.Parameter` helpers to easily create "bounding_box", "date", "datetime", "geojson" and "temporal_interval" parameters for UDP construction.
- Added convenience method `Connection.load_stac_from_job(job)` to easily load the results of a batch job with the `load_stac` process ([#566](https://github.com/Open-EO/openeo-python-client/issues/566))
- `load_stac`/`metadata_from_stac`: add support for extracting band info from "item_assets" in collection metadata ([#573](https://github.com/Open-EO/openeo-python-client/issues/573))

### Changed

Expand Down
34 changes: 34 additions & 0 deletions openeo/_testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
Utilities for testing
"""

import json
from pathlib import Path
from typing import Callable, Optional, Union


class TestDataLoader:
"""
Helper to resolve paths to test data files, loadt them as JSON, and optionally preprocess them.
It's recommended to set this up as a fixture in your test suite,
e.g. from conftest.py:
@pytest.fixture
def test_data() -> TestDataLoader:
return TestDataLoader(root=Path(__file__).parent / "data")
"""

def __init__(self, root: Union[str, Path]):
self.data_root = Path(root)

def get_path(self, filename: Union[str, Path]) -> Path:
"""Get absolute path to a test data file"""
return self.data_root / filename

def load_json(self, filename: Union[str, Path], preprocess: Optional[Callable[[str], str]] = None) -> dict:
"""Parse data from JSON file"""
data = self.get_path(filename).read_text(encoding="utf8")
if preprocess:
data = preprocess(data)
return json.loads(data)
75 changes: 74 additions & 1 deletion openeo/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import warnings
from typing import Any, Callable, List, NamedTuple, Optional, Tuple, Union
from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple, Union

import pystac

Expand Down Expand Up @@ -107,6 +107,7 @@ class Band(NamedTuple):


class BandDimension(Dimension):
# TODO #575 support unordered bands and avoid assumption that band order is known.
def __init__(self, name: str, bands: List[Band]):
super().__init__(type="bands", name=name)
self.bands = bands
Expand Down Expand Up @@ -534,6 +535,8 @@ def metadata_from_stac(url: str) -> CubeMetadata:
:return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url.
"""

# TODO move these nested functions and other logic to _StacMetadataParser

def get_band_metadata(eo_bands_location: dict) -> List[Band]:
# TODO: return None iso empty list when no metadata?
return [
Expand Down Expand Up @@ -573,6 +576,10 @@ def is_band_asset(asset: pystac.Asset) -> bool:
for asset_band in asset_bands:
if asset_band.name not in get_band_names(bands):
bands.append(asset_band)
if collection.ext.has("item_assets"):
# TODO #575 support unordered band names and avoid conversion to a list.
bands = list(_StacMetadataParser().get_bands_from_item_assets(collection.ext.item_assets))

elif isinstance(stac_object, pystac.Catalog):
catalog = stac_object
bands = get_band_metadata(catalog.extra_fields.get("summaries", {}))
Expand All @@ -586,3 +593,69 @@ def is_band_asset(asset: pystac.Asset) -> bool:
temporal_dimension = TemporalDimension(name="t", extent=[None, None])
metadata = CubeMetadata(dimensions=[band_dimension, temporal_dimension])
return metadata


class _StacMetadataParser:
"""
Helper to extract openEO metadata from STAC metadata resource
"""

def __init__(self):
# TODO: toggles for how to handle strictness, warnings, logging, etc
pass

def _get_band_from_eo_bands_item(self, eo_band: Union[dict, pystac.extensions.eo.Band]) -> Band:
if isinstance(eo_band, pystac.extensions.eo.Band):
return Band(
name=eo_band.name,
common_name=eo_band.common_name,
wavelength_um=eo_band.center_wavelength,
)
elif isinstance(eo_band, dict) and "name" in eo_band:
return Band(
name=eo_band["name"],
common_name=eo_band.get("common_name"),
wavelength_um=eo_band.get("center_wavelength"),
)
else:
raise ValueError(eo_band)

def get_bands_from_eo_bands(self, eo_bands: List[Union[dict, pystac.extensions.eo.Band]]) -> List[Band]:
"""
Extract bands from STAC `eo:bands` array
:param eo_bands: List of band objects, as dict or `pystac.extensions.eo.Band` instances
"""
# TODO: option to skip bands that failed to parse in some way?
return [self._get_band_from_eo_bands_item(band) for band in eo_bands]

def _get_bands_from_item_asset(
self, item_asset: pystac.extensions.item_assets.AssetDefinition
) -> Union[List[Band], None]:
"""Get bands from a STAC 'item_assets' asset definition."""
if item_asset.ext.has("eo"):
if item_asset.ext.eo.bands is not None:
return self.get_bands_from_eo_bands(item_asset.ext.eo.bands)
elif "eo:bands" in item_asset.properties:
# TODO: skip this in strict mode?
_log.warning("Extracting band info from 'eo:bands' metadata, but 'eo' STAC extension was not declared.")
return self.get_bands_from_eo_bands(item_asset.properties["eo:bands"])

def get_bands_from_item_assets(
self, item_assets: Dict[str, pystac.extensions.item_assets.AssetDefinition]
) -> Set[Band]:
"""
Get bands extracted from "item_assets" objects (defined by "item-assets" extension,
in combination with "eo" extension) at STAC Collection top-level,
Note that "item_assets" in STAC is a mapping, so the band order is undefined,
which is why we return a set of bands here.
:param item_assets: a STAC `item_assets` mapping
"""
bands = set()
for item_asset in item_assets.values():
asset_bands = self._get_bands_from_item_asset(item_asset)
if asset_bands:
bands.update(asset_bands)
return bands
2 changes: 2 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@


def get_test_resource(relative_path: str) -> Path:
# TODO: migrate to TestDataLoader
dir = Path(os.path.dirname(os.path.realpath(__file__)))
return dir / relative_path


def load_json_resource(relative_path, preprocess: Callable = None) -> dict:
# TODO: migrate to TestDataLoader
with get_test_resource(relative_path).open("r+") as f:
data = f.read()
if preprocess:
Expand Down
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest

from openeo._testing import TestDataLoader
from openeo.util import ensure_dir

pytest_plugins = "pytester"
Expand All @@ -25,3 +26,8 @@ def tmp_openeo_config_home(tmp_path):
path = ensure_dir(Path(str(tmp_path)) / "openeo-conf")
with mock.patch.dict("os.environ", {"OPENEO_CONFIG_HOME": str(path)}):
yield path


@pytest.fixture
def test_data() -> TestDataLoader:
return TestDataLoader(root=Path(__file__).parent / "data")
82 changes: 82 additions & 0 deletions tests/data/stac/collections/agera5_daily01.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"type": "Collection",
"id": "agera5_daily",
"stac_version": "1.0.0",
"description": "ERA5",
"links": [
{
"rel": "self",
"type": "application/json",
"href": "https://stac.test/collections/agera5_daily"
}
],
"stac_extensions": [
"https://stac-extensions.github.io/item-assets/v1.0.0/schema.json",
"https://stac-extensions.github.io/eo/v1.1.0/schema.json"
],
"item_assets": {
"2m_temperature_min": {
"type": "image/tiff; application=geotiff",
"title": "2m temperature min 24h",
"eo:bands": [
{
"name": "2m_temperature_min",
"description": "temperature 2m above ground (Kelvin)"
}
]
},
"2m_temperature_max": {
"type": "image/tiff; application=geotiff",
"eo:bands": [
{
"name": "2m_temperature_max",
"description": "temperature 2m above ground (Kelvin)"
}
]
},
"dewpoint_temperature_mean": {
"type": "image/tiff; application=geotiff",
"title": "2m dewpoint temperature",
"eo:bands": [
{
"name": "dewpoint_temperature_mean",
"description": "dewpoint temperature 2m above ground (Kelvin)"
}
]
},
"vapour_pressure": {
"eo:bands": [
{
"name": "vapour_pressure"
}
]
}
},
"title": "agERA5 data",
"extent": {
"spatial": {
"bbox": [
[
-180,
-90,
180,
90
]
]
},
"temporal": {
"interval": [
[
"2010-01-01T00:00:00Z",
"2024-06-12T00:00:00Z"
]
]
}
},
"keywords": [
"ERA5"
],
"summaries": {},
"assets": {},
"license": "proprietary"
}
31 changes: 29 additions & 2 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import json
from typing import List
import re
from typing import List, Union

import pytest

Expand Down Expand Up @@ -835,8 +836,34 @@ def filter_bbox(self, bbox):
],
)
def test_metadata_from_stac(tmp_path, test_stac, expected):

path = tmp_path / "stac.json"
path.write_text(json.dumps(test_stac))
metadata = metadata_from_stac(path)
assert metadata.band_names == expected


@pytest.mark.parametrize("eo_extension_is_declared", [False, True])
def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_path, eo_extension_is_declared, caplog):
stac_data = test_data.load_json("stac/collections/agera5_daily01.json")
stac_data["stac_extensions"] = [
ext
for ext in stac_data["stac_extensions"]
if (not ext.startswith("https://stac-extensions.github.io/eo/") or eo_extension_is_declared)
]
assert (
any(ext.startswith("https://stac-extensions.github.io/eo/") for ext in stac_data["stac_extensions"])
== eo_extension_is_declared
)
path = tmp_path / "stac.json"
path.write_text(json.dumps(stac_data))

metadata = metadata_from_stac(path)
assert sorted(metadata.band_names) == [
"2m_temperature_max",
"2m_temperature_min",
"dewpoint_temperature_mean",
"vapour_pressure",
]

if not eo_extension_is_declared:
assert "Extracting band info from 'eo:bands' metadata, but 'eo' STAC extension was not declared." in caplog.text

0 comments on commit 36cecca

Please sign in to comment.