In [2]:
import eodatasets3
from pathlib import Path
from eodatasets3 import DatasetDoc

import numpy as np

In [16]:

def create_dataset_doc(
    year,
    region_code,
    dataset_version,
    product_maturity="provisional",
    dataset_maturity="final",
):
    dd = DatasetDoc()

    # General product details
    dd.product_family = "intertidal"
    dd.producer = "ga.gov.au"

    # Platforms and intruments
    if year <= 2020:
        dd.platform = "landsat-7,landsat-8,sentinel-2a,sentinel-2b"
        dd.instrument = "ETM_OLI_TIRS_MSI"
    elif year in (2021, 2022):
        dd.platform = "landsat-7,landsat-8,landsat-9,sentinel-2a,sentinel-2b"
        dd.instrument = "ETM_OLI_TIRS_MSI"
    else:
        dd.platform = "landsat-8,landsat-9,sentinel-2a,sentinel-2b"
        dd.instrument = "OLI_TIRS_MSI"

    # Spatial and temporal information
    dd.region_code = region_code
    dd.datetime = f"{year}-01-01"
    dd.datetime_range = (f"{year}-01-01", f"{year}-12-31T23:59:59.999999")
    dd.processed_now()

    # Product maturity and versioning
    dd.product_maturity = product_maturity
    dd.maturity = dataset_maturity
    dd.dataset_version = dataset_version

    # Set additional properties
    dd.properties.update(
        {
            "odc:product": "ga_s2ls_intertidal_cyear_3",
            "odc:file_format": "GeoTIFF",
            "odc:collection_number": 3,
            "eo:gsd": 10,
        }
    )

    # Generate names
    names = eodatasets3.namer(dd, conventions="dea_c3")

    # Update to temporal naming convention
    time_convention = f"{year}--P1Y"
    names.time_folder = time_convention
    label_parts = names.dataset_label.split("_")
    label_parts[-2] = time_convention
    names.dataset_label = "_".join(label_parts)
    print(f"Preview file path: {names.dataset_folder}/{names.metadata_file}")

    return dd, names


dd, names = create_dataset_doc(
    year=2018, region_code="x37y30", dataset_version="0.0.1"
)

Preview file path: ga_s2ls_intertidal_cyear_3/0-0-1/x37/y30/2018--P1Y/ga_s2ls_intertidal_cyear_3_x37y30_2018--P1Y_final.odc-metadata.yaml


In [17]:
# Update path names to use Collection 3 date aliases
collection_path = Path("/home/jovyan/Robbi/dea-intertidal/data")
data_path = collection_path / names.dataset_folder
metadata_path = data_path / names.metadata_file
thumbnail_path = data_path / names.thumbnail_filename(kind="elevation")
metadata_path

# Create directory if it doesn't exist
# data_path.mkdir(parents=True, exist_ok=True)

PosixPath('/home/jovyan/Robbi/dea-intertidal/data/ga_s2ls_intertidal_cyear_3/0-0-1/x37/y30/2018--P1Y/ga_s2ls_intertidal_cyear_3_x37y30_2018--P1Y_final.odc-metadata.yaml')

In [18]:
import rioxarray
import odc.geo.xr

def write_intertidal_thumbnail(data, path, max_resolution=320):
    jpeg_data = (
        data.odc.reproject(
            how=data.odc.geobox.zoom_to(max_resolution),
            resampling="min",
        )
        .pipe(lambda x: x.where(np.isfinite(x)))
        .odc.colorize(vmin=-2.5, vmax=1.5, cmap="viridis")
        .odc.compress("jpeg", 85, transparent=[255, 255, 255])
    )

    with open(path, "wb") as f:
        f.write(jpeg_data)
        
elevation = rioxarray.open_rasterio(data_path / "x133y40_2019_2021_elevation.tif").squeeze()
write_intertidal_thumbnail(data=elevation, path=thumbnail_path, max_resolution=320)

In [19]:
import datacube

dc = datacube.Datacube()
query_params = dict(x=(140, 140.1), y=(-30, -30.1), time=("2015", "2016"))
dd_list = dc.find_datasets(product="ga_ls8c_nbart_gm_cyear_3", **query_params)

In [20]:
from datetime import datetime

with eodatasets3.DatasetPrepare(
    metadata_path=metadata_path,
    names=names,
) as p:
    # Note the measurement in the metadata
    p.note_measurement("elevation", data_path / "x133y40_2019_2021_elevation.tif")
    p.note_measurement("extents", data_path / "x133y40_2019_2021_extents.tif")
    p.note_measurement(
        "uncertainty", data_path / "x133y40_2019_2021_elevation_uncertainty.tif"
    )

    # Add lineage
    p.note_source_datasets("s2_ard", *set([dd.id for dd in dd_list]))
    p.note_source_datasets("ls_ard", *set([dd.id for dd in dd_list]))
    p.note_source_datasets("ancillary", *set([dd.id for dd in dd_list]))
    
    # Add thumbnail
    p.note_accessory_file('thumbnail', thumbnail_path)
    
    # Validate and write our metadata document
    p.done()

assert metadata_path.exists()

In [None]:
p.note_source_datasets()

In [None]:
metadata_path

In [None]:
dd = eodatasets3.serialise.from_path(path=Path("/home/jovyan/Robbi/dea-intertidal/notebooks/experimental/testing/tile1/ga_ls_wo_fq_cyear_3_x37y30_2022--P1Y_final.odc-metadata.yaml"), skip_validation=False)

In [None]:
dd.properties

In [None]:
import odc.geo
odc.geo.geom.BoundingBox(left=100, bottom=-30, right=110, top=-20, crs="EPSG:4326").polygon

In [None]:
set(dd.properties.keys())

In [None]:
set(d.properties.keys())

In [None]:
d.properties

In [None]:
import datacube

dc = datacube.Datacube()


dc.find_datasets(product="ga_ls_wo_fq_cyear_3", limit=1)[0].metadata_doc