Skip to content

Commit

Permalink
Merge 02b66ac into 44e6c8f
Browse files Browse the repository at this point in the history
  • Loading branch information
n3h3m committed Oct 9, 2020
2 parents 44e6c8f + 02b66ac commit aaac1d8
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 8 deletions.
5 changes: 5 additions & 0 deletions eodatasets3/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
ComplicatedNamingConventions,
AccessoryDoc,
Location,
ComplicatedNamingConventionsDerivatives,
)
from eodatasets3.properties import EoFields
from eodatasets3.validate import Level, ValidationMessage
Expand Down Expand Up @@ -169,6 +170,10 @@ def __init__(
self.names = ComplicatedNamingConventions.for_standard_dea(self)
elif naming_conventions == "dea_s2":
self.names = ComplicatedNamingConventions.for_standard_dea_s2(self)
elif naming_conventions == "dea_c3":
self.names = ComplicatedNamingConventionsDerivatives.for_c3_derivatives(
self
)
else:
raise NotImplementedError("configurable naming conventions")

Expand Down
75 changes: 72 additions & 3 deletions eodatasets3/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,16 @@

import affine
import attr
from eodatasets3 import utils
from eodatasets3.properties import StacPropertyView, EoFields
from ruamel.yaml.comments import CommentedMap
from shapely.geometry.base import BaseGeometry

from eodatasets3 import utils
from eodatasets3.properties import StacPropertyView, EoFields

# TODO: these need discussion.
DEA_URI_PREFIX = "https://collections.dea.ga.gov.au"
ODC_DATASET_SCHEMA_URL = "https://schemas.opendatacube.org/dataset"


# Either a local filesystem path or a string URI.
# (the URI can use any scheme supported by rasterio, such as tar:// or https:// or ...)
Location = Union[Path, str]
Expand Down Expand Up @@ -373,6 +372,76 @@ def producer_abbreviated(self) -> Optional[str]:
)


class ComplicatedNamingConventionsDerivatives(ComplicatedNamingConventions):
"""
This class is inherited from ComplicatedNamingConventions
and overrides few attributes specific to C3 data processing for data other than ARD.
"""

@classmethod
def for_c3_derivatives(cls, dataset: EoFields, uri=DEA_URI_PREFIX):
"""
The required fields for the c3 data processing are controlled here.
"""
return cls(
dataset=dataset,
base_product_uri=uri,
required_fields=(
"eo:platform",
"odc:dataset_version",
"odc:collection_number",
"odc:processing_datetime",
"odc:producer",
"odc:product_family",
"odc:region_code",
"dea:dataset_maturity",
),
)

@property
def _org_collection_number(self) -> Optional[int]:
# Deliberately fail if collection_number is not defined in the config yaml
return int(self.dataset.collection_number)

def _product_group(self, subname=None) -> str:
# Computues product group, e.g "ga_ls_wo_3"
# Deliberately fail if any of these attributes not found.
parts = [
self.producer_abbreviated,
self.platform_abbreviated,
self.dataset.product_family,
]
return "_".join(parts)

def destination_folder(self, base: Path):
self._check_enough_properties_to_name()
parts = [self.product_name, self.dataset.dataset_version.replace(".", "-")]
parts.extend(utils.subfolderise(self.dataset.region_code))
parts.extend(f"{self.dataset.datetime:%Y/%m/%d}".split("/"))
return base.joinpath(*parts)

def _dataset_label(self, sub_name: str = None):
"""
Responsible for producing the string of product name, regioncode, datetime and maturity
ex: 'ga_ls_wo_3_090081_1998-07-30_interim'
Redundant parameter sub_name is required, since the parent class and other invocations wants it so.
"""
parts = [
self.product_name,
self._displayable_region_code,
f"{self.dataset.datetime:%Y-%m-%d}",
self.dataset.maturity,
]
return "_".join(parts)

@property
def platform_abbreviated(self) -> Optional[str]:
# For now from Alchemist the platform is always landsat for C3 processing
assert "ls" in self.dataset.platform
return "ls"


@attr.s(auto_attribs=True, slots=True)
class DatasetDoc(EoFields):
id: UUID = None
Expand Down
30 changes: 28 additions & 2 deletions eodatasets3/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from typing import Tuple, Dict, Optional, Any, Mapping, Callable, Union

import ciso8601
from eodatasets3.utils import default_utc

from ruamel.yaml.timestamp import TimeStamp as RuamelTimeStamp

from eodatasets3.utils import default_utc


class FileFormat(Enum):
GeoTIFF = 1
Expand Down Expand Up @@ -226,6 +226,8 @@ class StacPropertyView(collections.abc.Mapping):
"landsat:wrs_path": int,
"landsat:wrs_row": int,
"odc:dataset_version": None,
"odc:collection_number": int,
"odc:naming_conventions": None,
# Not strict as there may be more added in ODC...
"odc:file_format": of_enum_type(FileFormat, strict=False),
"odc:processing_datetime": datetime_type,
Expand Down Expand Up @@ -413,10 +415,34 @@ def dataset_version(self) -> str:
"""
return self.properties.get("odc:dataset_version")

@property
def collection_number(self) -> str:
"""
The version of the collection.
Eg:
metadata:
product_family: wofs
dataset_version: 1.6.0
collection_number: 3
"""
return self.properties.get("odc:collection_number", "0")

@dataset_version.setter
def dataset_version(self, value):
self.properties["odc:dataset_version"] = value

@collection_number.setter
def collection_number(self, value):
self.properties["odc:collection_number"] = value

@property
def naming_conventions(self) -> str:
return self.properties.get("odc:naming_conventions")

@naming_conventions.setter
def naming_conventions(self, value):
self.properties["odc:naming_conventions"] = value

@property
def product_family(self) -> str:
"""
Expand Down
39 changes: 36 additions & 3 deletions tests/integration/test_assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

import numpy
import pytest
from ruamel import yaml

from eodatasets3 import DatasetAssembler
from eodatasets3.images import GridSpec
from eodatasets3.model import DatasetDoc
from ruamel import yaml
from tests.integration.common import assert_same_as_file

from tests import assert_file_structure
from tests.integration.common import assert_same_as_file


def test_dea_style_package(
Expand Down Expand Up @@ -309,6 +309,7 @@ def test_s2_naming_conventions(tmp_path: Path):

# The s2 naming conventions have an extra subfolder of the datatake start time.
metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()

assert metadata_path_offset == (
"ga_s2am_blueberries_1/Oz/2018/11/04/20170822T015626/"
"ga_s2am_blueberries_1-0-0_Oz_2018-11-04.odc-metadata.yaml"
Expand Down Expand Up @@ -399,3 +400,35 @@ def test_complain_about_missing_fields(tmp_path: Path, l1_ls8_folder: Path):
f"Expected field {needed_field_name} to "
f"be listed as mandatory in the error message"
)


def test_dea_c3_naming_conventions(tmp_path: Path):
"""
A sample scene for Alchemist C3 processing that tests the naming conventions.
"""
p = DatasetAssembler(tmp_path, naming_conventions="dea_c3")
p.platform = "ga_ls5t"
p.datetime = datetime(1998, 7, 30)
p.product_family = "wo"
p.processed = "1998-07-30T12:23:23"
p.maturity = "interim"
p.producer = "ga.gov.au"
p.region_code = "090081"

# Try missing few fields and expect ValueError
with pytest.raises(
ValueError, match="Need more properties to fulfill naming conventions."
):
p.done()

# Put back the missed ones
p.dataset_version = "1.6.0"
p.collection_number = "3"

# Success case
dataset_id, metadata_path = p.done()
metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()
assert (
metadata_path_offset
== "ga_ls_wo_3/1-6-0/090/081/1998/07/30/ga_ls_wo_3_090081_1998-07-30_interim.odc-metadata.yaml"
)

0 comments on commit aaac1d8

Please sign in to comment.