diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d544d9a..d2e8f08 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,13 +5,13 @@ repos: - id: validate-pyproject - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + rev: 6.0.1 hooks: - id: isort language_version: python - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.4 + rev: v0.13.1 hooks: - id: ruff args: ["--fix"] @@ -24,6 +24,5 @@ repos: language_version: python exclude: tests/.* additional_dependencies: - - types-simplejson - types-attrs - - pydantic~=2.0 \ No newline at end of file + - pydantic>=2.11 diff --git a/pyproject.toml b/pyproject.toml index 56c483e..5dd503c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ classifiers = [ requires-python = ">=3.11" dependencies = [ "pydantic-zarr>=0.8.0", + "pydantic>=2.12.0a1", "zarr>=3.1.1", "xarray>=2025.7.1", "dask[array,distributed]>=2025.5.1", @@ -111,7 +112,7 @@ use_parentheses = true ensure_newline_before_comments = true [tool.mypy] -python_version = "3.10" +python_version = "3.11" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true diff --git a/src/eopf_geozarr/cli.py b/src/eopf_geozarr/cli.py index 311437a..5ff9b81 100644 --- a/src/eopf_geozarr/cli.py +++ b/src/eopf_geozarr/cli.py @@ -410,9 +410,9 @@ def render_node(node: Any, path: str = "", level: int = 0) -> str: # Generate HTML for this node node_html = f"""
-
+
- {'📁' if children_count > 0 else '📄'} + {"📁" if children_count > 0 else "📄"} {node_name} ({summary}) @@ -882,7 +882,7 @@ def _generate_html_output(
Generated
-
{__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+
{__import__("datetime").datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
diff --git a/src/eopf_geozarr/data_api/geozarr/__init__.py b/src/eopf_geozarr/data_api/geozarr/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/eopf_geozarr/data_api/geozarr/common.py b/src/eopf_geozarr/data_api/geozarr/common.py new file mode 100644 index 0000000..ee21ee3 --- /dev/null +++ b/src/eopf_geozarr/data_api/geozarr/common.py @@ -0,0 +1,264 @@ +"""Common utilities for GeoZarr data API.""" + +import io +import urllib +import urllib.request +from typing import Annotated, Any, Mapping, TypeVar + +from cf_xarray.utils import parse_cf_standard_name_table +from pydantic import AfterValidator, BaseModel +from pydantic.experimental.missing_sentinel import MISSING +from typing_extensions import Protocol, runtime_checkable + +from eopf_geozarr.data_api.geozarr.types import ResamplingMethod + + +class BaseDataArrayAttrs(BaseModel, extra="allow"): + """ + Base attributes for a GeoZarr DataArray. + + Attributes + ---------- + """ + + grid_mapping: str | MISSING = MISSING + + +class GridMappingAttrs(BaseModel, extra="allow"): + """ + Grid mapping attributes for a GeoZarr grid mapping variable. + + Attributes + ---------- + grid_mapping_name : str + The name of the grid mapping. + + Extra fields are permitted. + + Additional attributes might be present depending on the type of grid mapping. + + References + ---------- + https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#grid-mappings-and-projections + """ + + grid_mapping_name: str + + +def get_cf_standard_names(url: str) -> tuple[str, ...]: + """Retrieve the set of CF standard names and return them as a tuple.""" + + headers = {"User-Agent": "eopf_geozarr"} + + req = urllib.request.Request(url, headers=headers) + + try: + with urllib.request.urlopen(req) as response: + content = response.read() # Read the entire response body into memory + content_fobj = io.BytesIO(content) + except urllib.error.URLError as e: + raise e + + _info, table, _aliases = parse_cf_standard_name_table(source=content_fobj) + return tuple(table.keys()) + + +# This is a URL to the CF standard names table. +CF_STANDARD_NAME_URL = ( + "https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/" + "master/Data/cf-standard-names/current/src/cf-standard-name-table.xml" +) + +# this does IO against github. consider locally storing this data instead if fetching every time +# is problematic. +CF_STANDARD_NAMES = get_cf_standard_names(url=CF_STANDARD_NAME_URL) + + +def check_standard_name(name: str) -> str: + """ + Check if the standard name is valid according to the CF conventions. + + Parameters + ---------- + name : str + The standard name to check. + + Returns + ------- + str + The validated standard name. + + Raises + ------ + ValueError + If the standard name is not valid. + """ + + if name in CF_STANDARD_NAMES: + return name + raise ValueError( + f"Invalid standard name: {name}. This name was not found in the list of CF standard names." + ) + + +CFStandardName = Annotated[str, AfterValidator(check_standard_name)] + + +@runtime_checkable +class GroupLike(Protocol): + members: Mapping[str, Any] | None + attributes: Any + + +TGroupLike = TypeVar("TGroupLike", bound=GroupLike) + + +def check_valid_coordinates(model: TGroupLike) -> TGroupLike: + """ + Check if the coordinates of the DataArrayLike objects listed in GroupLike objects are valid. + + For each DataArrayLike in the model, we check the dimensions associated with the DataArrayLike. + For each dimension associated with a data variable, a DataArrayLike with the name of that data + variable must be present in the members of the group. + + Parameters + ---------- + model : GroupLike + An object that implements the GroupLike protocol. + + Returns + ------- + GroupLike + A GroupLike object with referentially valid coordinates. + """ + if model.members is None: + raise ValueError("Model members cannot be None") + + arrays: dict[str, DataArrayLike] = { + k: v for k, v in model.members.items() if isinstance(v, DataArrayLike) + } + for key, array in arrays.items(): + for idx, dim in enumerate(array.array_dimensions): + if dim not in model.members: + raise ValueError( + f"Dimension '{dim}' for array '{key}' is not defined in the model members." + ) + member = model.members[dim] + if isinstance(member, GroupLike): + raise ValueError( + f"Dimension '{dim}' for array '{key}' should be a group. Found an array instead." + ) + if member.shape[0] != array.shape[idx]: + raise ValueError( + f"Dimension '{dim}' for array '{key}' has a shape mismatch: " + f"{member.shape[0]} != {array.shape[idx]}." + ) + return model + + +@runtime_checkable +class DataArrayLike(Protocol): + """ + This is a protocol that models the relevant properties of Zarr V2 and Zarr V3 DataArrays. + """ + + @property + def array_dimensions(self) -> tuple[str, ...]: ... + + shape: tuple[int, ...] + attributes: BaseDataArrayAttrs + + +class TileMatrixLimit(BaseModel): + """""" + + tileMatrix: str + minTileCol: int + minTileRow: int + maxTileCol: int + maxTileRow: int + + +class TileMatrix(BaseModel): + id: str + scaleDenominator: float + cellSize: float + pointOfOrigin: tuple[float, float] + tileWidth: int + tileHeight: int + matrixWidth: int + matrixHeight: int + + +class TileMatrixSet(BaseModel): + id: str + title: str | None = None + crs: str | None = None + supportedCRS: str | None = None + orderedAxes: tuple[str, str] | None = None + tileMatrices: tuple[TileMatrix, ...] + + +class Multiscales(BaseModel, extra="allow"): + """ + Multiscale metadata for a GeoZarr dataset. + + Attributes + ---------- + tile_matrix_set : str + The tile matrix set identifier for the multiscale dataset. + resampling_method : ResamplingMethod + The name of the resampling method for the multiscale dataset. + tile_matrix_set_limits : dict[str, TileMatrixSetLimits] | None, optional + The tile matrix set limits for the multiscale dataset. + """ + + tile_matrix_set: TileMatrixSet + resampling_method: ResamplingMethod + # TODO: ensure that the keys match tile_matrix_set.tileMatrices[$index].id + # TODO: ensure that the keys match the tileMatrix attribute + tile_matrix_limits: dict[str, TileMatrixLimit] | None = None + + +class DatasetAttrs(BaseModel, extra="allow"): + """ + Attributes for a GeoZarr dataset. + + A dataset is a collection of DataArrays. This class models the attributes of a dataset + """ + + ... + + +@runtime_checkable +class DatasetLike(Protocol): + members: Mapping[str, DataArrayLike] | None + + +TDataSetLike = TypeVar("TDataSetLike", bound=DatasetLike) + + +def check_grid_mapping(model: TDataSetLike) -> TDataSetLike: + """ + Ensure that a grid mapping variable is present, and that it refers to a member of the model. + """ + if model.members is not None: + for name, member in model.members.items(): + if member.attributes.grid_mapping not in model.members: + msg = f"Grid mapping variable '{member.attributes.grid_mapping}' declared by {name} was not found in dataset members" + raise ValueError(msg) + return model + + +class MultiscaleGroupAttrs(BaseModel, extra="allow"): + """ + Attributes for Multiscale GeoZarr dataset. + + A Multiscale dataset is a collection of Dataet + + Attributes + ---------- + multiscales: MultiscaleAttrs + """ + + multiscales: Multiscales diff --git a/src/eopf_geozarr/data_api/geozarr/types.py b/src/eopf_geozarr/data_api/geozarr/types.py new file mode 100644 index 0000000..0e0b7dd --- /dev/null +++ b/src/eopf_geozarr/data_api/geozarr/types.py @@ -0,0 +1,79 @@ +"""Types and constants for the GeoZarr data API.""" + +from typing import Any, Final, Literal, NotRequired, TypedDict + + +class TileMatrixLimitJSON(TypedDict): + tileMatrix: str + minTileCol: int + minTileRow: int + maxTileCol: int + maxTileRow: int + + +class XarrayEncodingJSON(TypedDict): + chunks: NotRequired[tuple[int, ...]] + compressors: Any + + +class StandardXCoordAttrsJSON(TypedDict): + units: Literal["m"] + long_name: Literal["x coordinate of projection"] + standard_name: Literal["projection_x_coordinate"] + _ARRAY_DIMENSIONS: list[Literal["x"]] + + +class StandardYCoordAttrsJSON(TypedDict): + units: Literal["m"] + long_name: Literal["y coordinate of projection"] + standard_name: Literal["projection_y_coordinate"] + _ARRAY_DIMENSIONS: list[Literal["y"]] + + +class OverviewLevelJSON(TypedDict): + level: int + zoom: int + width: int + height: int + scale_factor: int + + +class TileMatrixJSON(TypedDict): + id: str + scaleDenominator: float + cellSize: float + pointOfOrigin: tuple[float, float] | list[float] + tileWidth: int + tileHeight: int + matrixWidth: int + matrixHeight: int + + +class TileMatrixSetJSON(TypedDict): + id: str + title: str | None + crs: str | None + supportedCRS: str | None + orderedAxes: tuple[str, str] | None | list[str] + tileMatrices: tuple[TileMatrixJSON, ...] | list[TileMatrixJSON] + + +ResamplingMethod = Literal[ + "nearest", + "average", + "bilinear", + "cubic", + "cubic_spline", + "lanczos", + "mode", + "max", + "min", + "med", + "sum", + "q1", + "q3", + "rms", + "gauss", +] +"""A string literal indicating a resampling method""" +XARRAY_DIMS_KEY: Final = "_ARRAY_DIMENSIONS" diff --git a/src/eopf_geozarr/data_api/geozarr/v2.py b/src/eopf_geozarr/data_api/geozarr/v2.py new file mode 100644 index 0000000..c9f316e --- /dev/null +++ b/src/eopf_geozarr/data_api/geozarr/v2.py @@ -0,0 +1,146 @@ +"""Zarr V2 Models for the GeoZarr Zarr Hierarchy.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any, Iterable, Literal, Self + +from pydantic import ConfigDict, Field, model_validator +from pydantic_zarr.v2 import ArraySpec, GroupSpec, auto_attributes + +from eopf_geozarr.data_api.geozarr.common import ( + BaseDataArrayAttrs, + DatasetAttrs, + GridMappingAttrs, + MultiscaleGroupAttrs, + check_grid_mapping, + check_valid_coordinates, +) +from eopf_geozarr.data_api.geozarr.types import XARRAY_DIMS_KEY + + +class DataArrayAttrs(BaseDataArrayAttrs): + """ + Attributes for a GeoZarr DataArray. + + Attributes + ---------- + array_dimensions : tuple[str, ...] + Alias for the _ARRAY_DIMENSIONS attribute, which lists the dimension names for this array. + """ + + # todo: validate that this names listed here are the names of zarr arrays + # unless the variable is an auxiliary variable + # see https://github.com/zarr-developers/geozarr-spec/blob/main/geozarr-spec.md#geozarr-coordinates + array_dimensions: tuple[str, ...] = Field(alias="_ARRAY_DIMENSIONS") + + # this is necessary to serialize the `array_dimensions` attribute as `_ARRAY_DIMENSIONS` + model_config = ConfigDict(serialize_by_alias=True) + + +class DataArray(ArraySpec[DataArrayAttrs]): + """ + A GeoZarr DataArray variable. It must have attributes that contain an `"_ARRAY_DIMENSIONS"` + key, with a length that matches the dimensionality of the array. + + References + ---------- + https://github.com/zarr-developers/geozarr-spec/blob/main/geozarr-spec.md#geozarr-dataarray + """ + + @classmethod + def from_array( + cls, + array: Any, + chunks: tuple[int, ...] | Literal["auto"] = "auto", + attributes: Mapping[str, object] | Literal["auto"] = "auto", + fill_value: object | Literal["auto"] = "auto", + order: Literal["C", "F"] | Literal["auto"] = "auto", + filters: tuple[Any, ...] | Literal["auto"] = "auto", + dimension_separator: Literal[".", "/"] | Literal["auto"] = "auto", + compressor: Any | Literal["auto"] = "auto", + dimension_names: Iterable[str] | Literal["auto"] = "auto", + ) -> Self: + """ + Override the default from_array method to include a dimension_names parameter. + """ + if attributes == "auto": + auto_attrs = dict(auto_attributes(array)) + else: + auto_attrs = dict(attributes) + if dimension_names != "auto": + auto_attrs = auto_attrs | {XARRAY_DIMS_KEY: tuple(dimension_names)} + model = super().from_array( + array=array, + chunks=chunks, + attributes=auto_attrs, + fill_value=fill_value, + order=order, + filters=filters, + dimension_separator=dimension_separator, + compressor=compressor, + ) + return model # type: ignore[no-any-return] + + @model_validator(mode="after") + def check_array_dimensions(self) -> Self: + if (len_dim := len(self.attributes.array_dimensions)) != ( + ndim := len(self.shape) + ): + msg = ( + f"The {XARRAY_DIMS_KEY} attribute has length {len_dim}, which does not " + f"match the number of dimensions for this array (got {ndim})." + ) + raise ValueError(msg) + return self + + @property + def array_dimensions(self) -> tuple[str, ...]: + return self.attributes.array_dimensions # type: ignore[no-any-return] + + +class GridMappingVariable(ArraySpec[GridMappingAttrs]): + """ + A Zarr array that represents a GeoZarr grid mapping variable. + + The attributes of this array are defined in `GridMappingAttrs`. + + References + ---------- + https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#grid-mappings-and-projections + """ + + ... + + +class Dataset(GroupSpec[DatasetAttrs, DataArray | GridMappingVariable]): + """ + A GeoZarr Dataset. + """ + + @model_validator(mode="after") + def check_valid_coordinates(self) -> Self: + """ + Validate the coordinates of the GeoZarr DataSet. + + This method checks that all DataArrays in the dataset have valid coordinates + according to the GeoZarr specification. + + Returns + ------- + GroupSpec[Any, Any] + The validated GeoZarr DataSet. + """ + return check_valid_coordinates(self) + + @model_validator(mode="after") + def check_grid_mapping(self) -> Self: + return check_grid_mapping(self) + + +class MultiscaleGroup(GroupSpec[MultiscaleGroupAttrs, DataArray | GroupSpec[Any, Any]]): + """ + A GeoZarr Multiscale Group. + """ + + ... diff --git a/src/eopf_geozarr/data_api/geozarr/v3.py b/src/eopf_geozarr/data_api/geozarr/v3.py new file mode 100644 index 0000000..20649c1 --- /dev/null +++ b/src/eopf_geozarr/data_api/geozarr/v3.py @@ -0,0 +1,70 @@ +"""Zarr V3 Models for the GeoZarr Zarr Hierarchy.""" + +from __future__ import annotations + +from typing import Any, Self + +from pydantic import model_validator +from pydantic_zarr.v3 import ArraySpec, GroupSpec + +from eopf_geozarr.data_api.geozarr.common import ( + BaseDataArrayAttrs, + DatasetAttrs, + MultiscaleGroupAttrs, + check_grid_mapping, + check_valid_coordinates, +) + + +class DataArray(ArraySpec[BaseDataArrayAttrs]): + """ + A Zarr array that represents as GeoZarr DataArray variable. + + The attributes of this array are defined in `BaseDataArrayAttrs`. + + This array has an additional constraint: the dimension_names field must be a tuple of strings. + + References + ---------- + https://github.com/zarr-developers/geozarr-spec/blob/main/geozarr-spec.md#geozarr-dataarray + """ + + # The dimension names must be a tuple of strings + dimension_names: tuple[str, ...] + + @property + def array_dimensions(self) -> tuple[str, ...]: + return self.dimension_names + + +class Dataset(GroupSpec[DatasetAttrs, GroupSpec[Any, Any] | DataArray]): + """ + A GeoZarr Dataset. + """ + + @model_validator(mode="after") + def check_valid_coordinates(self) -> Self: + """ + Validate the coordinates of the GeoZarr DataSet. + + This method checks that all DataArrays in the dataset have valid coordinates + according to the GeoZarr specification. + + Returns + ------- + GroupSpec[Any, Any] + The validated GeoZarr DataSet. + """ + return check_valid_coordinates(self) + + @model_validator(mode="after") + def validate_grid_mapping(self) -> Self: + return check_grid_mapping(self) + + +class MultiscaleGroup(GroupSpec[MultiscaleGroupAttrs, DataArray | GroupSpec[Any, Any]]): + """ + A GeoZarr Multiscale Group. + """ + + ... diff --git a/src/eopf_geozarr/tests/__init__.py b/src/eopf_geozarr/tests/__init__.py index 22bc7fa..21c7784 100644 --- a/src/eopf_geozarr/tests/__init__.py +++ b/src/eopf_geozarr/tests/__init__.py @@ -21,9 +21,9 @@ def _verify_basic_structure(output_path: pathlib.Path, groups: list[str]) -> Non # Check that level 0 (native resolution) exists level_0_path = group_path / "0" assert level_0_path.exists(), f"Level 0 not found for {group}" - assert ( - level_0_path / "zarr.json" - ).exists(), f"Level 0 missing zarr.json for {group}" + assert (level_0_path / "zarr.json").exists(), ( + f"Level 0 missing zarr.json for {group}" + ) def _verify_geozarr_spec_compliance(output_path: pathlib.Path, group: str) -> None: @@ -49,12 +49,12 @@ def _verify_geozarr_spec_compliance(output_path: pathlib.Path, group: str) -> No # Check 1: _ARRAY_DIMENSIONS attributes (required by GeoZarr spec) for var_name in ds.data_vars: if var_name != "spatial_ref": # Skip grid_mapping variable - assert ( - "_ARRAY_DIMENSIONS" in ds[var_name].attrs - ), f"Missing _ARRAY_DIMENSIONS for {var_name} in {group}" - assert ds[var_name].attrs["_ARRAY_DIMENSIONS"] == list( - ds[var_name].dims - ), f"Incorrect _ARRAY_DIMENSIONS for {var_name} in {group}" + assert "_ARRAY_DIMENSIONS" in ds[var_name].attrs, ( + f"Missing _ARRAY_DIMENSIONS for {var_name} in {group}" + ) + assert ds[var_name].attrs["_ARRAY_DIMENSIONS"] == list(ds[var_name].dims), ( + f"Incorrect _ARRAY_DIMENSIONS for {var_name} in {group}" + ) print( f" ✅ _ARRAY_DIMENSIONS: {ds[var_name].attrs['_ARRAY_DIMENSIONS']}" ) @@ -62,9 +62,9 @@ def _verify_geozarr_spec_compliance(output_path: pathlib.Path, group: str) -> No # Check coordinates for coord_name in ds.coords: if coord_name not in ["spatial_ref"]: # Skip CRS coordinate - assert ( - "_ARRAY_DIMENSIONS" in ds[coord_name].attrs - ), f"Missing _ARRAY_DIMENSIONS for coordinate {coord_name} in {group}" + assert "_ARRAY_DIMENSIONS" in ds[coord_name].attrs, ( + f"Missing _ARRAY_DIMENSIONS for coordinate {coord_name} in {group}" + ) print( f" ✅ {coord_name} _ARRAY_DIMENSIONS: {ds[coord_name].attrs['_ARRAY_DIMENSIONS']}" ) @@ -72,9 +72,9 @@ def _verify_geozarr_spec_compliance(output_path: pathlib.Path, group: str) -> No # Check 2: CF standard names (required by GeoZarr spec) for var_name in ds.data_vars: if var_name != "spatial_ref": - assert ( - "standard_name" in ds[var_name].attrs - ), f"Missing standard_name for {var_name} in {group}" + assert "standard_name" in ds[var_name].attrs, ( + f"Missing standard_name for {var_name} in {group}" + ) assert ( ds[var_name].attrs["standard_name"] == "toa_bidirectional_reflectance" ), f"Incorrect standard_name for {var_name} in {group}" @@ -83,22 +83,22 @@ def _verify_geozarr_spec_compliance(output_path: pathlib.Path, group: str) -> No # Check 3: Grid mapping attributes (required by GeoZarr spec) for var_name in ds.data_vars: if var_name != "spatial_ref": - assert ( - "grid_mapping" in ds[var_name].attrs - ), f"Missing grid_mapping for {var_name} in {group}" - assert ( - ds[var_name].attrs["grid_mapping"] == "spatial_ref" - ), f"Incorrect grid_mapping for {var_name} in {group}" + assert "grid_mapping" in ds[var_name].attrs, ( + f"Missing grid_mapping for {var_name} in {group}" + ) + assert ds[var_name].attrs["grid_mapping"] == "spatial_ref", ( + f"Incorrect grid_mapping for {var_name} in {group}" + ) print(f" ✅ grid_mapping: {ds[var_name].attrs['grid_mapping']}") # Check 4: Spatial reference variable (as in notebook) assert "spatial_ref" in ds, f"Missing spatial_ref variable in {group}" - assert ( - "_ARRAY_DIMENSIONS" in ds["spatial_ref"].attrs - ), f"Missing _ARRAY_DIMENSIONS for spatial_ref in {group}" - assert ( - ds["spatial_ref"].attrs["_ARRAY_DIMENSIONS"] == [] - ), f"Incorrect _ARRAY_DIMENSIONS for spatial_ref in {group}" + assert "_ARRAY_DIMENSIONS" in ds["spatial_ref"].attrs, ( + f"Missing _ARRAY_DIMENSIONS for spatial_ref in {group}" + ) + assert ds["spatial_ref"].attrs["_ARRAY_DIMENSIONS"] == [], ( + f"Incorrect _ARRAY_DIMENSIONS for spatial_ref in {group}" + ) print( f" ✅ spatial_ref _ARRAY_DIMENSIONS: {ds['spatial_ref'].attrs['_ARRAY_DIMENSIONS']}" ) @@ -124,9 +124,9 @@ def _verify_geozarr_spec_compliance(output_path: pathlib.Path, group: str) -> No if coord == "x" else "projection_y_coordinate" ) - assert ( - ds[coord].attrs["standard_name"] == expected_name - ), f"Incorrect standard_name for {coord} coordinate in {group}" + assert ds[coord].attrs["standard_name"] == expected_name, ( + f"Incorrect standard_name for {coord} coordinate in {group}" + ) print( f" ✅ {coord} standard_name: {ds[coord].attrs['standard_name']}" ) @@ -142,9 +142,9 @@ def _verify_multiscale_structure(output_path: pathlib.Path, group: str) -> None: # Check that at least one level exists (level 0 is always created) level_dirs = [d for d in group_path.iterdir() if d.is_dir() and d.name.isdigit()] - assert ( - len(level_dirs) >= 1 - ), f"Expected at least 1 overview level for {group}, found {len(level_dirs)}" + assert len(level_dirs) >= 1, ( + f"Expected at least 1 overview level for {group}, found {len(level_dirs)}" + ) print( f" Found {len(level_dirs)} overview levels: {sorted([d.name for d in level_dirs])}" ) @@ -156,9 +156,9 @@ def _verify_multiscale_structure(output_path: pathlib.Path, group: str) -> None: ds_0.close() if native_size >= 512: # Larger datasets should have multiple levels - assert ( - len(level_dirs) >= 2 - ), f"Expected multiple overview levels for large dataset {group} (size {native_size}), found {len(level_dirs)}" + assert len(level_dirs) >= 2, ( + f"Expected multiple overview levels for large dataset {group} (size {native_size}), found {len(level_dirs)}" + ) else: print(f" Small dataset (size {native_size}), single level is acceptable") @@ -176,9 +176,9 @@ def _verify_multiscale_structure(output_path: pathlib.Path, group: str) -> None: assert len(ds.data_vars) > 0, f"No data variables in {level_path}" # Verify that spatial dimensions exist - assert ( - "x" in ds.dims and "y" in ds.dims - ), f"Missing spatial dimensions in {level_path}" + assert "x" in ds.dims and "y" in ds.dims, ( + f"Missing spatial dimensions in {level_path}" + ) # Store shape for progression verification level_shapes[level_num] = (ds.dims["y"], ds.dims["x"]) @@ -198,12 +198,12 @@ def _verify_multiscale_structure(output_path: pathlib.Path, group: str) -> None: height_ratio = prev_height / curr_height width_ratio = prev_width / curr_width - assert ( - 1.8 <= height_ratio <= 2.2 - ), f"Height ratio between level {prev_level} and {level} should be ~2, got {height_ratio:.2f}" - assert ( - 1.8 <= width_ratio <= 2.2 - ), f"Width ratio between level {prev_level} and {level} should be ~2, got {width_ratio:.2f}" + assert 1.8 <= height_ratio <= 2.2, ( + f"Height ratio between level {prev_level} and {level} should be ~2, got {height_ratio:.2f}" + ) + assert 1.8 <= width_ratio <= 2.2, ( + f"Width ratio between level {prev_level} and {level} should be ~2, got {width_ratio:.2f}" + ) print( f" Level {prev_level}→{level} downsampling ratio: {height_ratio:.2f}x{width_ratio:.2f}" @@ -253,9 +253,9 @@ def _verify_rgb_data_access(output_path: pathlib.Path, groups: list[str]) -> Non blue_data = ds["b02"].values # Verify data shapes match - assert ( - red_data.shape == green_data.shape == blue_data.shape - ), f"RGB band shapes don't match in {group} level {level_num}" + assert red_data.shape == green_data.shape == blue_data.shape, ( + f"RGB band shapes don't match in {group} level {level_num}" + ) # Verify data is not empty assert red_data.size > 0, f"Empty red data in {group} level {level_num}" diff --git a/src/eopf_geozarr/tests/test_cli_e2e.py b/src/eopf_geozarr/tests/test_cli_e2e.py index 86148f3..96910b3 100644 --- a/src/eopf_geozarr/tests/test_cli_e2e.py +++ b/src/eopf_geozarr/tests/test_cli_e2e.py @@ -115,9 +115,9 @@ def test_cli_convert_real_sentinel2_data(self, temp_output_dir: str) -> None: cmd_info, capture_output=True, text=True, timeout=60 ) - assert ( - result_info.returncode == 0 - ), f"CLI info command failed: {result_info.stderr}" + assert result_info.returncode == 0, ( + f"CLI info command failed: {result_info.stderr}" + ) print("✅ CLI info command succeeded") print(f"Info output: {result_info.stdout}") @@ -142,9 +142,9 @@ def test_cli_convert_real_sentinel2_data(self, temp_output_dir: str) -> None: cmd_validate, capture_output=True, text=True, timeout=60 ) - assert ( - result_validate.returncode == 0 - ), f"CLI validate command failed: {result_validate.stderr}" + assert result_validate.returncode == 0, ( + f"CLI validate command failed: {result_validate.stderr}" + ) print("✅ CLI validate command succeeded") print(f"Validation output: {result_validate.stdout}") @@ -189,27 +189,27 @@ def _verify_converted_data_structure( first_var = data_vars[0] # Check _ARRAY_DIMENSIONS - assert ( - "_ARRAY_DIMENSIONS" in ds[first_var].attrs - ), f"Missing _ARRAY_DIMENSIONS in {first_var} for {group}" + assert "_ARRAY_DIMENSIONS" in ds[first_var].attrs, ( + f"Missing _ARRAY_DIMENSIONS in {first_var} for {group}" + ) # Check standard_name - assert ( - "standard_name" in ds[first_var].attrs - ), f"Missing standard_name in {first_var} for {group}" + assert "standard_name" in ds[first_var].attrs, ( + f"Missing standard_name in {first_var} for {group}" + ) # Check grid_mapping - assert ( - "grid_mapping" in ds[first_var].attrs - ), f"Missing grid_mapping in {first_var} for {group}" + assert "grid_mapping" in ds[first_var].attrs, ( + f"Missing grid_mapping in {first_var} for {group}" + ) print(f" ✅ GeoZarr compliance verified for {first_var}") # Check spatial_ref exists if "spatial_ref" in ds: - assert ( - "_ARRAY_DIMENSIONS" in ds["spatial_ref"].attrs - ), f"Missing _ARRAY_DIMENSIONS in spatial_ref for {group}" + assert "_ARRAY_DIMENSIONS" in ds["spatial_ref"].attrs, ( + f"Missing _ARRAY_DIMENSIONS in spatial_ref for {group}" + ) print(" ✅ spatial_ref variable verified") ds.close() @@ -282,9 +282,9 @@ def test_cli_crs_groups_option(self) -> None: ) assert result.returncode == 0, "Convert help command failed" assert "--crs-groups" in result.stdout, "--crs-groups option should be in help" - assert ( - "Groups that need CRS information added" in result.stdout - ), "Help text should be present" + assert "Groups that need CRS information added" in result.stdout, ( + "Help text should be present" + ) print("✅ --crs-groups option appears in CLI help") @pytest.mark.slow @@ -434,9 +434,9 @@ def test_cli_crs_groups_empty_list(self, temp_output_dir: str) -> None: result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) # Should succeed (empty crs_groups list is valid) - assert ( - result.returncode == 0 - ), f"CLI with empty --crs-groups failed: {result.stderr}" + assert result.returncode == 0, ( + f"CLI with empty --crs-groups failed: {result.stderr}" + ) assert "CRS groups: []" in result.stdout, "Should show empty CRS groups list" print("✅ CLI with empty --crs-groups list works correctly") diff --git a/src/eopf_geozarr/tests/test_integration_sentinel1.py b/src/eopf_geozarr/tests/test_integration_sentinel1.py index 1987906..ea75547 100644 --- a/src/eopf_geozarr/tests/test_integration_sentinel1.py +++ b/src/eopf_geozarr/tests/test_integration_sentinel1.py @@ -284,18 +284,18 @@ def test_sentinel1_gcp_conversion( y_bounds = (ds_measurements.y.min().values, ds_measurements.y.max().values) # Should be within the original GCP bounds (15-18 lon, 39-41 lat) - assert ( - 14.5 <= x_bounds[0] <= 15.5 - ), f"X min bound {x_bounds[0]} outside expected range" - assert ( - 17.5 <= x_bounds[1] <= 18.5 - ), f"X max bound {x_bounds[1]} outside expected range" - assert ( - 38.5 <= y_bounds[0] <= 39.5 - ), f"Y min bound {y_bounds[0]} outside expected range" - assert ( - 40.5 <= y_bounds[1] <= 41.5 - ), f"Y max bound {y_bounds[1]} outside expected range" + assert 14.5 <= x_bounds[0] <= 15.5, ( + f"X min bound {x_bounds[0]} outside expected range" + ) + assert 17.5 <= x_bounds[1] <= 18.5, ( + f"X max bound {x_bounds[1]} outside expected range" + ) + assert 38.5 <= y_bounds[0] <= 39.5, ( + f"Y min bound {y_bounds[0]} outside expected range" + ) + assert 40.5 <= y_bounds[1] <= 41.5, ( + f"Y max bound {y_bounds[1]} outside expected range" + ) # Check multiscales 2 levels created: 0 (native, checked above) and 1 assert "1" in dt["measurements"] diff --git a/src/eopf_geozarr/tests/test_integration_sentinel2.py b/src/eopf_geozarr/tests/test_integration_sentinel2.py index 1ab21ba..074fb41 100644 --- a/src/eopf_geozarr/tests/test_integration_sentinel2.py +++ b/src/eopf_geozarr/tests/test_integration_sentinel2.py @@ -368,9 +368,9 @@ def test_performance_characteristics( prev_pixels = timing_data[i - 1]["pixels"] # Allow some flexibility, but generally expect fewer pixels at higher levels - assert ( - curr_pixels <= prev_pixels * 1.1 - ), f"Level {timing_data[i]['level']} has more pixels than level {timing_data[i-1]['level']}" + assert curr_pixels <= prev_pixels * 1.1, ( + f"Level {timing_data[i]['level']} has more pixels than level {timing_data[i - 1]['level']}" + ) print("✅ Performance characteristics verified!") diff --git a/src/eopf_geozarr/tests/test_reprojection_validation.py b/src/eopf_geozarr/tests/test_reprojection_validation.py index 4ae65b7..dc356d0 100644 --- a/src/eopf_geozarr/tests/test_reprojection_validation.py +++ b/src/eopf_geozarr/tests/test_reprojection_validation.py @@ -193,9 +193,9 @@ def test_titiler_compatibility(): elif "spatial_ref" in ds_measurements: # CRS info should be in spatial_ref attributes spatial_ref = ds_measurements.spatial_ref - assert ( - "crs_wkt" in spatial_ref.attrs - ), "Missing CRS information in spatial_ref" + assert "crs_wkt" in spatial_ref.attrs, ( + "Missing CRS information in spatial_ref" + ) print( f" - CRS info found in spatial_ref: {spatial_ref.attrs.get('crs_wkt', 'N/A')[:50]}..." ) @@ -266,14 +266,14 @@ def test_titiler_compatibility(): # Check CRS for overview (may be in spatial_ref variable) if ds_overview.rio.crs is not None: - assert ( - ds_overview.rio.crs.to_epsg() == 4326 - ), "Expected EPSG:4326 CRS for overview" + assert ds_overview.rio.crs.to_epsg() == 4326, ( + "Expected EPSG:4326 CRS for overview" + ) elif "spatial_ref" in ds_overview: spatial_ref_overview = ds_overview.spatial_ref - assert ( - "crs_wkt" in spatial_ref_overview.attrs - ), "Missing CRS information in overview spatial_ref" + assert "crs_wkt" in spatial_ref_overview.attrs, ( + "Missing CRS information in overview spatial_ref" + ) print(" - Overview CRS info found in spatial_ref") else: print(" - Warning: Overview CRS information not directly accessible") diff --git a/tests/test_data_api/__init__.py b/tests/test_data_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_data_api/conftest.py b/tests/test_data_api/conftest.py new file mode 100644 index 0000000..486a4ca --- /dev/null +++ b/tests/test_data_api/conftest.py @@ -0,0 +1,14153 @@ +from __future__ import annotations + +from zarr import open_group +from zarr.core.buffer import default_buffer_prototype + +example_zarr_json = r"""{ + "attributes": {}, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": { + "conditions": { + "attributes": {}, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": {} + }, + "node_type": "group" + }, + "conditions/meteorology": { + "attributes": {}, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": {} + }, + "node_type": "group" + }, + "conditions/meteorology/cams": { + "attributes": { + "Conventions": "CF-1.7", + "GRIB_centre": "ecmf", + "GRIB_centreDescription": "European Centre for Medium-Range Weather Forecasts", + "GRIB_edition": 1, + "GRIB_subCentre": 0, + "history": "2025-02-27T07:57 GRIB to CDM+CF via cfgrib-0.9.10.4/ecCodes-2.34.1 with {\"source\": \"tmp/S2B_MSIL1C_20250113T103309_N0511_R108_T32TLQ_20250113T122458.SAFE/GRANULE/L1C_T32TLQ_A041032_20250113T103310/AUX_DATA/AUX_CAMSFO\", \"filter_by_keys\": {}, \"encode_cf\": [\"parameter\", \"time\", \"geography\", \"vertical\"]}", + "institution": "European Centre for Medium-Range Weather Forecasts" + }, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": {} + }, + "node_type": "group" + }, + "conditions/meteorology/cams/surface": { + "shape": [], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "original GRIB coordinate for key: level(surface)", + "units": "1", + "_FillValue": "AAAAAAAA+H8=" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/aod865": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "aod865", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Total Aerosol Optical Depth at 865nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210215, + "GRIB_shortName": "aod865", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Total Aerosol Optical Depth at 865nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/latitude": { + "shape": [ + 9 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "latitude", + "standard_name": "latitude", + "stored_direction": "decreasing", + "units": "degrees_north", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/number": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "ensemble member numerical id", + "standard_name": "realization", + "units": "1" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/z": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "geopotential", + "GRIB_cfVarName": "z", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Geopotential", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 129, + "GRIB_shortName": "z", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "m**2 s**-2", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "m**2 s**-2" + }, + "long_name": "Geopotential", + "standard_name": "geopotential", + "units": "m**2 s**-2", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/step": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "time since forecast_reference_time", + "standard_name": "forecast_period", + "dtype": "timedelta64[ns]", + "units": "minutes" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/omaod550": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "omaod550", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Organic Matter Aerosol Optical Depth at 550nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210210, + "GRIB_shortName": "omaod550", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Organic Matter Aerosol Optical Depth at 550nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/aod469": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "aod469", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Total Aerosol Optical Depth at 469nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210213, + "GRIB_shortName": "aod469", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Total Aerosol Optical Depth at 469nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/aod670": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "aod670", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Total Aerosol Optical Depth at 670nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210214, + "GRIB_shortName": "aod670", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Total Aerosol Optical Depth at 670nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/isobaricInhPa": { + "shape": [], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "pressure", + "positive": "down", + "standard_name": "air_pressure", + "stored_direction": "decreasing", + "units": "hPa", + "_FillValue": "AAAAAAAA+H8=" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/duaod550": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "duaod550", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Dust Aerosol Optical Depth at 550nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210209, + "GRIB_shortName": "duaod550", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "isobaricInhPa", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Dust Aerosol Optical Depth at 550nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/ssaod550": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "ssaod550", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Sea Salt Aerosol Optical Depth at 550nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210208, + "GRIB_shortName": "ssaod550", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Sea Salt Aerosol Optical Depth at 550nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/time": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "_eopf_attrs": { + "_eopf_decode_datetime64": "datetime64[ns]" + }, + "long_name": "initial time of forecast", + "standard_name": "forecast_reference_time", + "units": "days since 2025-01-13 00:00:00", + "calendar": "proleptic_gregorian" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/valid_time": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "_eopf_attrs": { + "_eopf_decode_datetime64": "datetime64[ns]" + }, + "long_name": "time", + "standard_name": "time", + "units": "days since 2025-01-13 10:33:00", + "calendar": "proleptic_gregorian" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/bcaod550": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "bcaod550", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Black Carbon Aerosol Optical Depth at 550nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210211, + "GRIB_shortName": "bcaod550", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Black Carbon Aerosol Optical Depth at 550nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/aod550": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "aod550", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Total Aerosol Optical Depth at 550nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210207, + "GRIB_shortName": "aod550", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Total Aerosol Optical Depth at 550nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/longitude": { + "shape": [ + 9 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "longitude", + "standard_name": "longitude", + "units": "degrees_east", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/aod1240": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "aod1240", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Total Aerosol Optical Depth at 1240nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210216, + "GRIB_shortName": "aod1240", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Total Aerosol Optical Depth at 1240nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/cams/suaod550": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "suaod550", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Sulphate Aerosol Optical Depth at 550nm", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 210212, + "GRIB_shortName": "suaod550", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "~", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "~" + }, + "long_name": "Sulphate Aerosol Optical Depth at 550nm", + "standard_name": "unknown", + "units": "~", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf": { + "attributes": { + "Conventions": "CF-1.7", + "GRIB_centre": "ecmf", + "GRIB_centreDescription": "European Centre for Medium-Range Weather Forecasts", + "GRIB_edition": 1, + "GRIB_subCentre": 0, + "history": "2025-02-27T07:57 GRIB to CDM+CF via cfgrib-0.9.10.4/ecCodes-2.34.1 with {\"source\": \"tmp/S2B_MSIL1C_20250113T103309_N0511_R108_T32TLQ_20250113T122458.SAFE/GRANULE/L1C_T32TLQ_A041032_20250113T103310/AUX_DATA/AUX_ECMWFT\", \"filter_by_keys\": {}, \"encode_cf\": [\"parameter\", \"time\", \"geography\", \"vertical\"]}", + "institution": "European Centre for Medium-Range Weather Forecasts" + }, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": {} + }, + "node_type": "group" + }, + "conditions/meteorology/ecmwf/surface": { + "shape": [], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "original GRIB coordinate for key: level(surface)", + "units": "1", + "_FillValue": "AAAAAAAA+H8=" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/v10": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "v10", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "10 metre V wind component", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 166, + "GRIB_shortName": "10v", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "m s**-1", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "m s**-1" + }, + "long_name": "10 metre V wind component", + "standard_name": "unknown", + "units": "m s**-1", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/latitude": { + "shape": [ + 9 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "latitude", + "standard_name": "latitude", + "stored_direction": "decreasing", + "units": "degrees_north", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/number": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "ensemble member numerical id", + "standard_name": "realization", + "units": "1" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/step": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "time since forecast_reference_time", + "standard_name": "forecast_period", + "dtype": "timedelta64[ns]", + "units": "minutes" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/r": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "relative_humidity", + "GRIB_cfVarName": "r", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Relative humidity", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 157, + "GRIB_shortName": "r", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "isobaricInhPa", + "GRIB_units": "%", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "%" + }, + "long_name": "Relative humidity", + "standard_name": "relative_humidity", + "units": "%", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/isobaricInhPa": { + "shape": [], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "pressure", + "positive": "down", + "standard_name": "air_pressure", + "stored_direction": "decreasing", + "units": "hPa", + "_FillValue": "AAAAAAAA+H8=" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/tcwv": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "lwe_thickness_of_atmosphere_mass_content_of_water_vapor", + "GRIB_cfVarName": "tcwv", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Total column vertically-integrated water vapour", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 137, + "GRIB_shortName": "tcwv", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "kg m**-2", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "kg m**-2" + }, + "long_name": "Total column vertically-integrated water vapour", + "standard_name": "lwe_thickness_of_atmosphere_mass_content_of_water_vapor", + "units": "kg m**-2", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/u10": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "unknown", + "GRIB_cfVarName": "u10", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "10 metre U wind component", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 165, + "GRIB_shortName": "10u", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "m s**-1", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "m s**-1" + }, + "long_name": "10 metre U wind component", + "standard_name": "unknown", + "units": "m s**-1", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/time": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "_eopf_attrs": { + "_eopf_decode_datetime64": "datetime64[ns]" + }, + "long_name": "initial time of forecast", + "standard_name": "forecast_reference_time", + "units": "days since 2025-01-13 00:00:00", + "calendar": "proleptic_gregorian" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/valid_time": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "_eopf_attrs": { + "_eopf_decode_datetime64": "datetime64[ns]" + }, + "long_name": "time", + "standard_name": "time", + "units": "days since 2025-01-13 10:33:00", + "calendar": "proleptic_gregorian" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/tco3": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "atmosphere_mass_content_of_ozone", + "GRIB_cfVarName": "tco3", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Total column ozone", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 206, + "GRIB_shortName": "tco3", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "kg m**-2", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "kg m**-2" + }, + "long_name": "Total column ozone", + "standard_name": "atmosphere_mass_content_of_ozone", + "units": "kg m**-2", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/msl": { + "shape": [ + 9, + 9 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9, + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "GRIB_NV": 0, + "GRIB_Nx": 9, + "GRIB_Ny": 9, + "GRIB_cfName": "air_pressure_at_mean_sea_level", + "GRIB_cfVarName": "msl", + "GRIB_dataType": "fc", + "GRIB_gridDefinitionDescription": "Latitude/Longitude Grid", + "GRIB_gridType": "regular_ll", + "GRIB_iDirectionIncrementInDegrees": 0.177, + "GRIB_iScansNegatively": 0, + "GRIB_jDirectionIncrementInDegrees": 0.121, + "GRIB_jPointsAreConsecutive": 0, + "GRIB_jScansPositively": 0, + "GRIB_latitudeOfFirstGridPointInDegrees": 45.126, + "GRIB_latitudeOfLastGridPointInDegrees": 44.16, + "GRIB_longitudeOfFirstGridPointInDegrees": 6.457, + "GRIB_longitudeOfLastGridPointInDegrees": 7.872, + "GRIB_missingValue": 3.4028234663852886e+38, + "GRIB_name": "Mean sea level pressure", + "GRIB_numberOfPoints": 81, + "GRIB_paramId": 151, + "GRIB_shortName": "msl", + "GRIB_stepType": "instant", + "GRIB_stepUnits": 0, + "GRIB_totalNumber": 0, + "GRIB_typeOfLevel": "surface", + "GRIB_units": "Pa", + "_eopf_attrs": { + "coordinates": [ + "number", + "time", + "step", + "surface", + "latitude", + "longitude", + "valid_time", + "isobaricInhPa" + ], + "dimensions": [ + "latitude", + "longitude" + ], + "units": "Pa" + }, + "long_name": "Mean sea level pressure", + "standard_name": "air_pressure_at_mean_sea_level", + "units": "Pa", + "coordinates": "isobaricInhPa number step surface time valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/meteorology/ecmwf/longitude": { + "shape": [ + 9 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 9 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "attributes": { + "long_name": "longitude", + "standard_name": "longitude", + "units": "degrees_east", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "conditions/mask": { + "attributes": {}, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": {} + }, + "node_type": "group" + }, + "conditions/mask/l1c_classification": { + "attributes": {}, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": {} + }, + "node_type": "group" + }, + "conditions/mask/l1c_classification/r60m": { + "attributes": {}, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": {} + }, + "node_type": "group" + }, + "conditions/mask/l1c_classification/r60m/b00": { + "shape": [ + 1830, + 1830 + ], + "data_type": "uint8", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1830, + 1830 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes" + }, + { + "name": "blosc", + "configuration": { + "typesize": 1, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "_eopf_attrs": { + "coordinates": [ + "x", + "y" + ], + "dimensions": [ + "y", + "x" + ], + "flag_masks": [ + 1, + 2, + 4 + ], + "flag_meanings": [ + "OPAQUE", + "CIRRUS", + "SNOW_ICE" + ] + }, + "dtype": " None: + """ + Test that the DataArrayLike protocol works correctly + """ + assert isinstance(obj, DataArrayLike) + + +@pytest.mark.parametrize("obj", [GroupSpec_V2(), GroupSpec_V3()]) +def test_grouplike(obj: GroupSpec_V3[Any, Any] | GroupSpec_V2[Any, Any]) -> None: + """ + Test that the GroupLike protocol works correctly + """ + assert isinstance(obj, GroupLike) + + +def test_get_cf_standard_names() -> None: + """ + Test the get_cf_standard_names function to ensure it retrieves the CF standard names correctly. + """ + standard_names = get_cf_standard_names(CF_STANDARD_NAME_URL) + assert isinstance(standard_names, tuple) + assert len(standard_names) > 0 + assert all(isinstance(name, str) for name in standard_names) + + +@pytest.mark.parametrize( + "name", ["air_temperature", "sea_surface_temperature", "precipitation_flux"] +) +def test_check_standard_name_valid(name: str) -> None: + """ + Test the check_standard_name function with valid standard names. + """ + assert check_standard_name(name) == name + + +def test_check_standard_name_invalid() -> None: + """ + Test the check_standard_name function with an invalid standard name. + """ + with pytest.raises(ValueError): + check_standard_name("invalid_standard_name") + + +def test_multiscales_round_trip() -> None: + """ + Ensure that we can round-trip multiscale metadata through the `Multiscales` model. + """ + from eopf_geozarr.data_api.geozarr.common import Multiscales + + source_untyped = GroupSpec_V3.from_zarr(example_group) + flat = source_untyped.to_flat() + meta = flat["/measurements/reflectance/r60m"].attributes["multiscales"] + assert Multiscales(**meta).model_dump() == tuplify_json(meta) diff --git a/tests/test_data_api/test_v2.py b/tests/test_data_api/test_v2.py new file mode 100644 index 0000000..bacfbb9 --- /dev/null +++ b/tests/test_data_api/test_v2.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pytest +from pydantic import ValidationError +from pydantic_zarr.v2 import ArraySpec, GroupSpec + +from eopf_geozarr.data_api.geozarr.v2 import ( + DataArray, + DataArrayAttrs, + check_valid_coordinates, +) + +from .conftest import example_group + + +def test_invalid_dimension_names() -> None: + msg = r"The _ARRAY_DIMENSIONS attribute has length 3, which does not match the number of dimensions for this array \(got 2\)" + with pytest.raises(ValidationError, match=msg): + DataArray.from_array(np.zeros((10, 10)), dimension_names=["x", "y", "z"]) + + +class TestCheckValidCoordinates: + @staticmethod + @pytest.mark.parametrize("data_shape", [(10,), (10, 12)]) + def test_valid(data_shape: tuple[int, ...]) -> None: + """ + Test the check_valid_coordinates function to ensure it validates coordinates correctly. + """ + + base_array = DataArray.from_array( + np.zeros((data_shape), dtype="uint8"), + dimension_names=[f"dim_{s}" for s in range(len(data_shape))], + ) + coords_arrays = { + f"dim_{idx}": DataArray.from_array( + np.arange(s), dimension_names=(f"dim_{idx}",) + ) + for idx, s in enumerate(data_shape) + } + group = GroupSpec[Any, DataArray](members={"base": base_array, **coords_arrays}) + assert check_valid_coordinates(group) == group + + @staticmethod + @pytest.mark.parametrize("data_shape", [(10,), (10, 12)]) + def test_invalid_coordinates( + data_shape: tuple[int, ...], + ) -> None: + """ + Test the check_valid_coordinates function to ensure it validates coordinates correctly. + + This test checks that the function raises a ValueError when the dimensions of the data variable + do not match the dimensions of the coordinate arrays. + """ + base_array = DataArray.from_array( + np.zeros((data_shape), dtype="uint8"), + dimension_names=[f"dim_{s}" for s in range(len(data_shape))], + ) + coords_arrays = { + f"dim_{idx}": DataArray.from_array( + np.arange(s + 1), dimension_names=(f"dim_{idx}",) + ) + for idx, s in enumerate(data_shape) + } + group = GroupSpec[Any, DataArray](members={"base": base_array, **coords_arrays}) + msg = "Dimension .* for array 'base' has a shape mismatch:" + with pytest.raises(ValueError, match=msg): + check_valid_coordinates(group) + + +@pytest.mark.skip(reason="We don't have a v2 example group yet") +def test_dataarray_attrs_round_trip() -> None: + """ + Ensure that we can round-trip dataarray attributes through the `Multiscales` model. + """ + source_untyped = GroupSpec.from_zarr(example_group) + flat = source_untyped.to_flat() + for key, val in flat.items(): + if isinstance(val, ArraySpec): + model_json = val.model_dump()["attributes"] + assert DataArrayAttrs(**model_json).model_dump() == model_json diff --git a/tests/test_data_api/test_v3.py b/tests/test_data_api/test_v3.py new file mode 100644 index 0000000..6ca049f --- /dev/null +++ b/tests/test_data_api/test_v3.py @@ -0,0 +1,89 @@ +from typing import Any + +import numpy as np +import pytest +import zarr +from pydantic_zarr.core import tuplify_json +from pydantic_zarr.v3 import ArraySpec, GroupSpec + +from eopf_geozarr.data_api.geozarr.v3 import ( + DataArray, + MultiscaleGroup, + check_valid_coordinates, +) + +from .conftest import example_group + + +class TestCheckValidCoordinates: + @staticmethod + @pytest.mark.parametrize("data_shape", [(10,), (10, 12)]) + def test_valid(data_shape: tuple[int, ...]) -> None: + """ + Test the check_valid_coordinates function to ensure it validates coordinates correctly. + """ + + base_array = DataArray.from_array( + np.zeros((data_shape), dtype="uint8"), + dimension_names=[f"dim_{s}" for s in range(len(data_shape))], + ) + coords_arrays = { + f"dim_{idx}": DataArray.from_array( + np.arange(s), dimension_names=(f"dim_{idx}",) + ) + for idx, s in enumerate(data_shape) + } + group = GroupSpec[Any, DataArray](members={"base": base_array, **coords_arrays}) + assert check_valid_coordinates(group) == group + + @staticmethod + @pytest.mark.parametrize("data_shape", [(10,), (10, 12)]) + def test_invalid_coordinates( + data_shape: tuple[int, ...], + ) -> None: + """ + Test the check_valid_coordinates function to ensure it validates coordinates correctly. + + This test checks that the function raises a ValueError when the dimensions of the data variable + do not match the dimensions of the coordinate arrays. + """ + base_array = DataArray.from_array( + np.zeros((data_shape), dtype="uint8"), + dimension_names=[f"dim_{s}" for s in range(len(data_shape))], + ) + coords_arrays = { + f"dim_{idx}": DataArray.from_array( + np.arange(s + 1), dimension_names=(f"dim_{idx}",) + ) + for idx, s in enumerate(data_shape) + } + group = GroupSpec[Any, DataArray](members={"base": base_array, **coords_arrays}) + msg = "Dimension .* for array 'base' has a shape mismatch:" + with pytest.raises(ValueError, match=msg): + check_valid_coordinates(group) + + +def test_dataarray_round_trip() -> None: + """ + Ensure that we can round-trip dataarray attributes through the `Multiscales` model. + """ + source_untyped = GroupSpec.from_zarr(example_group) + flat = source_untyped.to_flat() + for key, val in flat.items(): + if isinstance(val, ArraySpec) and val.dimension_names is not None: + model_json = val.model_dump() + assert DataArray(**model_json).model_dump() == model_json + + +def test_multiscale_attrs_round_trip() -> None: + """ + Test that multiscale datasets round-trip through the `Multiscales` model + """ + source_group_members = dict(example_group.members(max_depth=None)) + for key, val in source_group_members.items(): + if isinstance(val, zarr.Group): + if "multiscales" in val.attrs.asdict(): + model_json = MultiscaleGroup.from_zarr(val).model_dump() + assert MultiscaleGroup(**model_json).model_dump() == tuplify_json( + model_json + )