Skip to content

Commit

Permalink
Merge 6d241af into b08eade
Browse files Browse the repository at this point in the history
  • Loading branch information
Oceancolour-RG committed Nov 12, 2020
2 parents b08eade + 6d241af commit 58e47e1
Show file tree
Hide file tree
Showing 45 changed files with 1,266 additions and 681 deletions.
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ addons:
apt:
packages:
- gdal-bin
- hdf5-tools
- gfortran
- libatlas-base-dev
- libatlas-dev
Expand All @@ -25,6 +26,8 @@ install:
- travis_retry pip install --upgrade pytest pytest-cov coveralls GDAL==1.10.0 rasterio[s3] 'scipy<1.5.0' pandas==1.0.5
# flake8 and black versions should match .pre-commit-config.yaml
- travis_retry pip install flake8==3.8.2 black==20.8b1
# Cattrs removed Python 3.6 support in 1.1.0
- travis_retry pip install cattrs==1.0.0
- travis_retry pip install -e .[test]
- pip freeze
# Either both set or none. See: https://github.com/mapbox/rasterio/issues/1494
Expand Down
52 changes: 52 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
FROM opendatacube/geobase:wheels as env_builder
ARG py_env_path=/env
ARG ENVIRONMENT=test

COPY requirements*.txt /tmp/
# RUN env-build-tool new /tmp/requirements.txt ${py_env_path}
RUN if [ "$ENVIRONMENT" = "test" ] ; then \
env-build-tool new /tmp/requirements-test.txt ${py_env_path} ; \
else \
env-build-tool new /tmp/requirements.txt ${py_env_path} ; \
fi

ENV PATH=${py_env_path}/bin:$PATH

# Copy source code and install it
RUN mkdir -p /code
WORKDIR /code
ADD . /code

RUN pip install --use-feature=2020-resolver .

# Build the production runner stage from here
FROM opendatacube/geobase:runner

ENV LC_ALL=C.UTF-8 \
DEBIAN_FRONTEND=noninteractive \
SHELL=bash

COPY --from=env_builder /env /env
ENV PATH=/env/bin:$PATH

# # Environment can be whatever is supported by setup.py
# # so, either deployment, test
# ARG ENVIRONMENT=test
# RUN echo "Environment is: $ENVIRONMENT"
#
# # Set up a nice workdir, and only copy the things we care about in
# ENV APPDIR=/code
# RUN mkdir -p $APPDIR
# WORKDIR $APPDIR
# ADD . $APPDIR
#
# # These ENVIRONMENT flags make this a bit complex, but basically, if we are in dev
# # then we want to link the source (with the -e flag) and if we're in prod, we
# # want to delete the stuff in the /code folder to keep it simple.
# RUN if [ "$ENVIRONMENT" = "deployment" ] ; then rm -rf $APPDIR ; \
# else pip install --editable .[$ENVIRONMENT] ; \
# fi

RUN python

CMD ["python"]
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

.PHONY: docker-tests

docker-tests:
docker build -t eodatasets:test .
docker run -it --rm --volume "${PWD}/tests":/tests eodatasets:test pytest --cov eodatasets --durations=5 /tests
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,23 @@ Some preparers need the ancillary dependencies: `pip install .[ancillary]`
--with-oa / --no-oa Include observation attributes (default:
true)
--help Show this message and exit.


## Creating Releases

```
git fetch origin
# Create a tag for the new version
git tag eodatasets3-<version> origin/eodatasets3
# Push it to main repository
git push origin --tags
# Create a wheel locally
python3 setup.py sdist bdist_wheel
# Upload it (Jeremy, Damien, Kirill have pypi ownership)
python3 -m twine upload dist/*
```
7 changes: 4 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ the provenance, and the assembler can optionally copy any common metadata automa
# Set our product information.
# It's a GA product of "numerus-unus" ("the number one").
p.producer = "ga.gov.au"
p.product_family = "blues"
p.product_family = "numerus-unus"
p.dataset_version = "3.0.0"

...
Expand Down Expand Up @@ -116,10 +116,11 @@ of the current image::
...

Note that the assembler will throw an error if the path lives outside
the dataset (location), as they will be absolute rather than relative paths.
the dataset (location), as this will require absolute paths.
Relative paths are considered best-practice for Open Data Cube.

You can allow absolute paths with a field on assembler construction :meth:`eodatasets3.DatasetAssembler.__init__`::
You can allow absolute paths with a field on assembler construction
:meth:`eodatasets3.DatasetAssembler.__init__`::

with DatasetAssembler(
dataset_location=usgs_level1,
Expand Down
84 changes: 72 additions & 12 deletions eodatasets3/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def __init__(
self._user_metadata = dict()
self._software_versions: List[Dict] = []
self._lineage: Dict[str, List[uuid.UUID]] = defaultdict(list)
self._inherited_geometry = None

if naming_conventions == "default":
self.names = ComplicatedNamingConventions(self)
Expand Down Expand Up @@ -211,6 +212,12 @@ def _work_path(self) -> Path:
def properties(self) -> StacPropertyView:
return self._props

@property
def measurements(self) -> Dict[str, Tuple[GridSpec, Path]]:
return dict(
(name, (grid, path)) for grid, name, path in self._measurements.iter_paths()
)

@property
def label(self) -> Optional[str]:
"""
Expand Down Expand Up @@ -317,6 +324,7 @@ def add_source_dataset(
dataset: DatasetDoc,
classifier: Optional[str] = None,
auto_inherit_properties: bool = False,
inherit_geometry: bool = False,
):
"""
Record a source dataset using its metadata document.
Expand All @@ -335,6 +343,9 @@ def add_source_dataset(
are used for different purposes. Such as having a second level1 dataset
that was used for QA (but is not this same scene).
:param inherit_geometry: Instead of re-calculating the valid bounds geometry based on the
data, which can be very computationally expensive e.g. Landsat 7
striped data, use the valid data geometry from this source dataset.
See :func:`add_source_path` if you have a filepath reference instead of a document.
Expand All @@ -353,6 +364,8 @@ def add_source_dataset(
self._lineage[classifier].append(dataset.id)
if auto_inherit_properties:
self._inherit_properties_from(dataset)
if inherit_geometry:
self._inherited_geometry = dataset.geometry

def _inherit_properties_from(self, source_dataset: DatasetDoc):
for name in self.INHERITABLE_PROPERTIES:
Expand Down Expand Up @@ -669,7 +682,10 @@ def done(
if measurement_docs and sort_measurements:
measurement_docs = dict(sorted(measurement_docs.items()))

valid_data = self._measurements.consume_and_get_valid_data()
if self._inherited_geometry:
valid_data = self._inherited_geometry
else:
valid_data = self._measurements.consume_and_get_valid_data()
# Avoid the messiness of different empty collection types.
# (to have a non-null geometry we'd also need non-null grids and crses)
if valid_data.is_empty:
Expand Down Expand Up @@ -782,6 +798,14 @@ def done(
def _crs_str(self, crs: CRS) -> str:
return f"epsg:{crs.to_epsg()}" if crs.is_epsg_code else crs.to_wkt()

def _document_thumbnail(self, thumb_path, kind=None):
self._checksum.add_file(thumb_path)

accessory_name = "thumbnail"
if kind:
accessory_name += f":{kind}"
self.add_accessory_file(accessory_name, thumb_path)

def write_thumbnail(
self,
red: str,
Expand Down Expand Up @@ -815,21 +839,18 @@ def write_thumbnail(
:param static_stretch: Use a static upper/lower value to stretch by instead of dynamic stretch.
"""
thumb_path = self.names.thumbnail_name(self._work_path, kind=kind)
measurements = dict(
(name, (grid, path)) for grid, name, path in self._measurements.iter_paths()
)

missing_measurements = {red, green, blue} - set(measurements)
missing_measurements = {red, green, blue} - set(self.measurements)
if missing_measurements:
raise IncompleteDatasetError(
ValidationMessage(
Level.error,
"missing_thumb_measurements",
f"Thumbnail measurements are missing: no measurements called {missing_measurements!r}. ",
hint=f"Available measurements: {', '.join(measurements)}",
hint=f"Available measurements: {', '.join(self.measurements)}",
)
)
rgbs = [measurements[b] for b in (red, green, blue)]
rgbs = [self.measurements[b] for b in (red, green, blue)]
unique_grids: List[GridSpec] = list(set(grid for grid, path in rgbs))
if len(unique_grids) != 1:
raise NotImplementedError(
Expand All @@ -846,12 +867,51 @@ def write_thumbnail(
percentile_stretch=percentile_stretch,
input_geobox=grid,
)
self._checksum.add_file(thumb_path)

accessory_name = "thumbnail"
if kind:
accessory_name += f":{kind}"
self.add_accessory_file(accessory_name, thumb_path)
self._document_thumbnail(thumb_path, kind)

def write_thumbnail_singleband(
self,
measurement: str,
bit: int = None,
lookup_table: Dict[int, Tuple[int, int, int]] = None,
kind: str = None,
):
"""
Write a singleband thumbnail out, taking in an input measurement and
outputting a JPG with appropriate settings.
Options are to
EITHER
Use a bit (int) as the value to scale from black to white to
i.e., 0 will be BLACK and bit will be WHITE, with a linear scale between.
OR
Provide a lookuptable (dict) of int (key) [R, G, B] (value) fields
to make the image with.
"""

thumb_path = self.names.thumbnail_name(self._work_path, kind=kind)

_, image_path = self.measurements.get(measurement, (None, None))

if image_path is None:
raise IncompleteDatasetError(
ValidationMessage(
Level.error,
"missing_thumb_measurement",
f"Thumbnail measurement is missing: no measurements called {measurement!r}. ",
hint=f"Available measurements: {', '.join(self.measurements)}",
)
)

FileWrite().create_thumbnail_singleband(
image_path,
thumb_path,
bit,
lookup_table,
)

self._document_thumbnail(thumb_path, kind)

def add_accessory_file(self, name: str, path: Path):
"""
Expand Down
21 changes: 17 additions & 4 deletions eodatasets3/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class GridSpec:

@classmethod
def from_dataset_doc(cls, ds: DatasetDoc, grid="default") -> "GridSpec":

print(list(ds.grids))
g = ds.grids[grid]

if ds.crs.startswith("epsg:"):
Expand Down Expand Up @@ -272,8 +274,15 @@ def as_geo_docs(self) -> Tuple[CRS, Dict[str, GridDoc], Dict[str, MeasurementDoc
f"\t{grid.crs.to_string()!r}\n"
)

# create a simple name for the each resolution groups
grid_name = "RES_{0}m".format(int(grid.transform.a))
if i == 0:
# as stated above, grids have been ordered from most
# (i=0) to fewest (i>0) measurements. The grid with
# the most measurements will be set as "default"
grid_name = "default"

else:
# create a simple name for the each resolution groups
grid_name = "RES_{0}m".format(int(grid.transform.a))

grid_docs[grid_name] = GridDoc(grid.shape, grid.transform)

Expand Down Expand Up @@ -548,7 +557,7 @@ def write_from_ndarray(
"""
with rasterio.open(unstructured_image, "w", **rio_args) as outds:
if bands == 1:
if isinstance(array, h5py.Dataset):
if h5py is not None and isinstance(array, h5py.Dataset):
for tile in tiles:
idx = (
slice(tile[0][0], tile[0][1]),
Expand All @@ -558,7 +567,7 @@ def write_from_ndarray(
else:
outds.write(array, 1)
else:
if isinstance(array, h5py.Dataset):
if h5py is not None and isinstance(array, h5py.Dataset):
for tile in tiles:
idx = (
slice(tile[0][0], tile[0][1]),
Expand Down Expand Up @@ -687,6 +696,10 @@ def create_thumbnail_singleband(
raise ValueError(
"Please set either bit or lookup_table, and not both of them"
)
if bit is None and lookup_table is None:
raise ValueError(
"Please set either bit or lookup_table, you haven't set either of them"
)

with rasterio.open(in_file) as dataset:
data = dataset.read()
Expand Down
20 changes: 18 additions & 2 deletions eodatasets3/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,9 @@ def dataset_label(self) -> str:
def destination_folder(self, base: Path):
self._check_enough_properties_to_name()
# DEA naming conventions folder hierarchy.
# Example: "ga_ls8c_ard_3/092/084/2016/06/28"
# Examples:
# For L8: "ga_ls8c_aard_3/092/084/2016/06/28"
# For S2A/B: "ga_s2bm_aard_2/55/KDT/2016/06/28/003241"

parts = [self.product_name]

Expand All @@ -234,7 +236,21 @@ def destination_folder(self, base: Path):
if region_code:
parts.extend(utils.subfolderise(region_code))

parts.extend(f"{self.dataset.datetime:%Y/%m/%d}".split("/"))
if self.dataset.platform:
# added to pass test_assemble.py, where self.dataset.platform = None
if self.dataset.platform.startswith("sentinel-2"):
# modified output dir so to include HHMMSS to account for
# multiple acquisitions per day
parts.extend(f"{self.dataset.datetime:%Y/%m/%d/%H%M%S}".split("/"))
else:
parts.extend(f"{self.dataset.datetime:%Y/%m/%d}".split("/"))
else:
parts.extend(f"{self.dataset.datetime:%Y/%m/%d}".split("/"))

# If it's not a final product, append the maturity to the folder.
maturity: str = self.dataset.properties.get("dea:dataset_maturity")
if maturity and maturity != "final":
parts[-1] = f"{parts[-1]}_{maturity}"

if self.dataset_separator_field is not None:
val = self.dataset.properties[self.dataset_separator_field]
Expand Down
3 changes: 2 additions & 1 deletion eodatasets3/scripts/packagewagl.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def run(
products = set(p.lower() for p in products)
else:
# products = wagl.DEFAULT_PRODUCTS
products = "lambertian"
products = ["lambertian"]

with rasterio.Env():
for granule in wagl.Granule.for_path(h5_file, level1_metadata_path=level1):
with wagl.do(
Expand Down

0 comments on commit 58e47e1

Please sign in to comment.