In [None]:
!pip install -e .. --quiet

In [1]:
%load_ext autoreload
%autoreload 2

import odc.stac
import pandas as pd
import pystac_client

from pyTMD.compute import tide_elevations
import pandas as pd
import numpy as np


GAUGE_X = 122.2183
GAUGE_Y = -18.0008
ENSEMBLE_MODELS = ["EOT20", "HAMTIDE11"]  # simplified for tests

## Load fixtures

In [2]:
def load_satellite_ds():
    """
    Load a sample timeseries of Landsat 8 data using odc-stac
    """
    # Connect to stac catalogue
    catalog = pystac_client.Client.open("https://explorer.dea.ga.gov.au/stac")

    # Set cloud defaults
    odc.stac.configure_rio(
        cloud_defaults=True,
        aws={"aws_unsigned": True},
    )

    # Build a query with the parameters above
    buffer = 0.08
    # buffer = 0.5
    bbox = [GAUGE_X - buffer, GAUGE_Y - buffer, GAUGE_X + buffer, GAUGE_Y + buffer]
    query = catalog.search(
        bbox=bbox,
        collections=["ga_ls8c_ard_3"],
        datetime="2020-01/2020-02",
    )

    # Search the STAC catalog for all items matching the query
    ds = odc.stac.load(
        list(query.items()),
        bands=["nbart_red"],
        crs="epsg:3577",
        resolution=30,
        groupby="solar_day",
        bbox=bbox,
        fail_on_error=False,
        chunks={"x": 100, "y": 200},
    )

    return ds

satellite_ds = load_satellite_ds()

def load_measured_tides_ds():
    """
    Load measured sea level data from the Broome ABSLMP tidal station:
    http://www.bom.gov.au/oceanography/projects/abslmp/data/data.shtml
    """
    # Metadata for Broome ABSLMP tidal station:
    # http://www.bom.gov.au/oceanography/projects/abslmp/data/data.shtml
    ahd_offset = -5.322

    # Load measured tides from ABSLMP tide gauge data
    measured_tides_df = pd.read_csv(
        "../tests/data/IDO71013_2020.csv",
        index_col=0,
        parse_dates=True,
        na_values=-9999,
    )[["Sea Level"]]

    # Update index and column names
    measured_tides_df.index.name = "time"
    measured_tides_df.columns = ["tide_height"]

    # Apply station AHD offset
    measured_tides_df += ahd_offset

    # Return as xarray dataset
    return measured_tides_df.to_xarray()

satellite_ds = load_satellite_ds()
measured_tides_ds = load_measured_tides_ds()

In [None]:
ds = satellite_ds.copy(deep=True)
ds

In [None]:
from odc.geo.geobox import GeoBox
import xarray as xr


def _resample_chunks(
    ds: xr.DataArray | xr.Dataset | GeoBox,
    dask_chunks: tuple | None = None,
) -> tuple:
    """
    Automatically return optimised dask chunks
    for reprojection with _pixel_tides_resample.
    Use entire image if GeoBox or if no default
    chunks; use existing chunks if they exist.
    """

    # If dask_chunks is provided, return directly
    if dask_chunks is not None:
        return dask_chunks

    # If ds is a GeoBox, return its shape
    if isinstance(ds, GeoBox):
        return ds.shape

    # if ds has chunks, then return just spatial chunks
    if ds.chunks is not None:
        y_dim, x_dim = ds.odc.spatial_dims
        return ds.chunks[y_dim], ds.chunks[x_dim]

    # if ds has no chunks, then return entire image shape
    return ds.odc.geobox.shape

In [None]:
_resample_chunks(ds, None)

In [None]:
cd ..

In [None]:
satellite_ds.isel(time=0)

In [239]:
from eo_tides import model_tides


def tide_phase(
    x,
    y,
    time,
    model="EOT20",
    directory=None,
    delta="15 min",
    return_tides=False,
    **model_tides_kwargs,
):

    # Pop output format and mode for special handling
    output_format = model_tides_kwargs.pop("output_format", "long")
    mode = model_tides_kwargs.pop("mode", "one-to-many")

    # Model tides
    tide_df = model_tides(
        x=x,
        y=y,
        time=time,
        model=model,
        directory=directory,
        **model_tides_kwargs,
    )

    # Model tides for a time 15 minutes prior to each previously
    # modelled satellite acquisition time. This allows us to compare
    # tide heights to see if they are rising or falling.
    pre_df = model_tides(
        x=x,
        y=y,
        time=time - pd.Timedelta(delta),
        model=model,
        directory=directory,
        **model_tides_kwargs,
    )

    # Compare tides computed for each timestep. If the previous tide
    # was higher than the current tide, the tide is 'ebbing'. If the
    # previous tide was lower, the tide is 'flowing'
    ebb_flow = (tide_df.tide_height < pre_df.tide_height.values).replace({True: "ebb", False: "flow"})

    # If tides are greater than 0, then "high", otherwise "low"
    high_low = (tide_df.tide_height >= 0).replace({True: "high", False: "low"})

    # Combine into one string and add to data
    tide_df["tide_phase"] = high_low.astype(str) + "-" + ebb_flow.astype(str)

    # Optionally convert to a wide format dataframe with a tide model in
    # each dataframe column
    if output_format == "wide":
        # Pivot into wide format with each time model as a column
        print("Converting to a wide format dataframe")
        tide_df = tide_df.pivot(
            columns="tide_model"
        )

        # If in 'one-to-one' mode, reindex using our input time/x/y
        # values to ensure the output is sorted the same as our inputs
        if mode == "one-to-one":
            output_indices = pd.MultiIndex.from_arrays(
                [time, x, y], names=["time", "x", "y"]
            )
            tide_df = tide_df.reindex(output_indices)

        # Optionally drop tides
        if not return_tides:
            return tide_df.drop("tide_height", axis=1)["tide_phase"]

    # Optionally drop tide heights
    if not return_tides:
        return tide_df.drop("tide_height", axis=1)

    return tide_df

In [240]:
import pytest

@pytest.mark.parametrize(
    "models,output_format,return_tides,expected_cols",
    [
        (
            ["EOT20"],
            "long",
            False,
            ["tide_model", "tide_phase"]
        ),
        (
            ["EOT20"],
            "long",
            True,
            ["tide_model", "tide_height", "tide_phase"]
        ),
        (
            ["EOT20", "GOT5.5"],
            "long",
            False,
            ["tide_model", "tide_phase"]
        ),
        (
            ["EOT20", "GOT5.5"],
            "long",
            True,
            ["tide_model", "tide_height", "tide_phase"]
        ),
        (
            ["EOT20"],
            "wide",
            False,
            ["EOT20"]
        ),
        (
            ["EOT20"],
            "wide",
            True,
            [("tide_phase", "EOT20"), ("tide_phase", "EOT20")]
        ),
        (
            ["EOT20", "GOT5.5"],
            "wide",
            False,
            ["EOT20", "GOT5.5"]
        ),
        (
            ["EOT20", "GOT5.5"],
            "wide",
            True,
            [
                ("tide_height", "EOT20"),
                ("tide_height", "GOT5.5"),
                ("tide_phase", "EOT20"),
                ("tide_phase", "GOT5.5"),
            ]
        ),
    ]
)
def test_tide_phase_format(models, output_format, return_tides, expected_cols):

    phase_df = phase_tides(
        x=[122.14, 122.30, 122.12],
        y=[-17.91, -17.92, -18.07],
        time=pd.date_range("2020", "2021", periods=3),
        directory="/var/share/tide_models/",
        model=models,
        output_format=output_format,
        return_tides=return_tides,
        delta = "15 min",
    )

    # Assert expected indexes and columns
    assert phase_df.index.names == ["time", "x", "y"]
    assert phase_df.columns.tolist() == expected_cols




# !pytest -q -k test_ebb_flow --verbose

In [200]:
test_ebb_flow(models=["EOT20"], output_format="long", return_tides=True)

Modelling tides using EOT20 in parallel


100%|██████████| 3/3 [00:01<00:00,  2.72it/s]


Modelling tides using EOT20 in parallel


100%|██████████| 3/3 [00:01<00:00,  2.64it/s]


['tide_model', 'tide_height', 'ebb_flow']


In [256]:
# models = ["EOT20"]
# expected_cols = ["tide_model", "ebb_flow"]
# output_format = "long"
# return_tides = False

# models = ["EOT20"]
# expected_cols = ["tide_model", "tide_height", "ebb_flow"]
# output_format = "long"
# return_tides = True

# models = ["EOT20", "GOT5.5"]
# expected_cols = ["tide_model", "ebb_flow"]
# output_format = "long"
# return_tides = False

models = ["EOT20", "GOT5.5"]
expected_cols = ["tide_model", "tide_height", "ebb_flow"]
output_format = "long"
return_tides = True


# models = ["EOT20"]
# expected_cols = ["EOT20"]
# output_format = "wide"
# return_tides = False

# models = ["EOT20"]
# expected_cols = [("tide_height", "EOT20"), ("ebb_flow", "EOT20")]
# output_format = "wide"
# return_tides = True

# models = ["EOT20", "GOT5.5"]
# expected_cols = ["EOT20", "GOT5.5"]
# output_format = "wide"
# return_tides = False

models = ["EOT20", "GOT5.5"]
expected_cols = [
    ("tide_height", "EOT20"),
    ("tide_height", "GOT5.5"),
    ("ebb_flow", "EOT20"),
    ("ebb_flow", "GOT5.5"),
]
output_format = "wide"
return_tides = True





from eo_tides.model import phase_tides

phase_df = phase_tides(
    x=[122.14, 122.30, 122.12],
    y=[-17.91, -17.92, -18.07],
    time=pd.date_range("2020", "2021", periods=3),
    directory="/var/share/tide_models/",
    model=models,
    output_format=output_format,
    # delta = "15 min",
    return_tides=return_tides,
)


phase_df

Modelling tides using EOT20, GOT5.5 in parallel


100%|██████████| 6/6 [00:01<00:00,  4.01it/s]


Modelling tides using EOT20, GOT5.5 in parallel


100%|██████████| 6/6 [00:01<00:00,  4.57it/s]


Converting to a wide format dataframe


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tide_height,tide_height,tide_phase,tide_phase
Unnamed: 0_level_1,Unnamed: 1_level_1,tide_model,EOT20,GOT5.5,EOT20,GOT5.5
time,x,y,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2020-01-01,122.12,-18.07,-2.800434,-2.870334,low-ebb,low-ebb
2020-01-01,122.14,-17.91,-2.66483,-2.765702,low-flow,low-ebb
2020-01-01,122.3,-17.92,-2.855278,-2.815728,low-ebb,low-ebb
2020-07-02,122.12,-18.07,2.200403,2.171265,high-ebb,high-ebb
2020-07-02,122.14,-17.91,2.110753,2.117294,high-ebb,high-ebb
2020-07-02,122.3,-17.92,2.221157,2.128876,high-ebb,high-ebb
2021-01-01,122.12,-18.07,-2.070869,-2.066286,low-flow,low-flow
2021-01-01,122.14,-17.91,-1.889267,-1.91061,low-flow,low-flow
2021-01-01,122.3,-17.92,-2.168566,-1.993354,low-flow,low-flow


In [261]:
phase_df.columns.tolist() == [
                ('tide_height',  'EOT20'),
                ('tide_height', 'GOT5.5'),
                ('tide_phase',  'EOT20'),
                ('tide_phase', 'GOT5.5'),
            ]

True

In [259]:
            [
                ('tide_height',  'EOT20'),
                ('tide_height', 'GOT5.5'),
                ('tide_phase',  'EOT20'),
                ('tide_phase', 'GOT5.5'),
            ]

[('tide_height', 'EOT20'),
 ('tide_height', 'GOT5.5'),
 ('tide_phase', 'EOT20'),
 ('tide_phase', 'GOT5.5')]

In [222]:
ebb_flow_df.columns.tolist() 

['tide_model', 'ebb_flow']

In [183]:
out  #.columns.tolist()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tide_model,ebb_flow
time,x,y,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01,122.14,-17.91,EOT20,Flow
2020-07-02,122.14,-17.91,EOT20,Ebb
2021-01-01,122.14,-17.91,EOT20,Flow
2020-01-01,122.3,-17.92,EOT20,Ebb
2020-07-02,122.3,-17.92,EOT20,Ebb
2021-01-01,122.3,-17.92,EOT20,Flow
2020-01-01,122.12,-18.07,EOT20,Ebb
2020-07-02,122.12,-18.07,EOT20,Ebb
2021-01-01,122.12,-18.07,EOT20,Flow
2020-01-01,122.14,-17.91,GOT5.5,Ebb


True

In [165]:
out.columns.get_level_values(1)

Index(['EOT20', 'GOT5.5', 'EOT20', 'GOT5.5'], dtype='object', name='tide_model')

['EOT20', 'GOT5.5', 'EOT20', 'GOT5.5']

In [168]:
["EOT20"] * 2

['EOT20', 'EOT20']

In [106]:
import pandas as pd
import pytest

# Define your ebb_flow function here or ensure it's imported

def ebb_flow(x, y, time, directory, model, output_format):
    # Dummy implementation for the sake of example
    return pd.DataFrame({"x": x, "y": y, "time": time, "model": model})

@pytest.mark.parametrize(
    "models",
    [
        "EOT20",  
        ["EOT20", "GOT5.5"], 
    ],
)
def test_ebb_flow(models):
    ebb_flow_df = ebb_flow(
        x=[122.14, 122.30, 122.12],
        y=[-17.91, -17.92, -18.07],
        time=pd.date_range("2020", "2021", periods=3),
        directory="/var/share/tide_models/",
        model=models,
        output_format="wide",
    )
    assert ebb_flow_df is not None  # Example assertion

# Now, run the test
# pytest.main(["-q", "-k", "test_ebb_flow"])




<frozen importlib._bootstrap>:241



In [282]:
phase_df = phase_tides(
            x=[122.14],
            y=[-17.91],
            time=pd.date_range("2020-01-01", "2020-01-02", freq="h"),
            model=["EOT20"],
            time_offset="15 min",
    directory="/var/share/tide_models/",
        )

Modelling tides using EOT20
Modelling tides using EOT20


In [289]:
phase_df.tide_phase.tolist() == [
            "low-flow",
            "low-flow",
            "low-flow",
            "low-flow",
            "high-flow",
            "high-flow",
            "high-flow",
            "high-ebb",
            "high-ebb",
            "high-ebb",
            "low-ebb",
            "low-ebb",
            "low-ebb",
            "low-flow",
            "low-flow",
            "high-flow",
            "high-flow",
            "high-flow",
            "high-flow",
            "high-ebb",
            "high-ebb",
            "high-ebb",
            "low-ebb",
            "low-ebb",
            "low-ebb",
        ]


True

In [None]:
modelled_tides_df["ebb_flow"] = pre_tides_df.drop(
    "tide_model", axis=1, errors="ignore"
).values < modelled_tides_df.drop("tide_model", axis=1, errors="ignore").values
modelled_tides_df["ebb_flow"] = modelled_tides_df["ebb_flow"].replace({
        True: "Ebb",
        False: "Flow",
    })

In [None]:
modelled_tides_df

In [248]:
cd ..

/home/jovyan/Robbi/eo-tides


In [291]:
!export EO_TIDES_TIDE_MODELS=./tests/data/tide_models && pytest tests/test_model.py --verbose -k test_phase_tides

platform linux -- Python 3.10.15, pytest-8.3.3, pluggy-1.5.0 -- /env/bin/python3.10
cachedir: .pytest_cache
rootdir: /home/jovyan/Robbi/eo-tides
configfile: pyproject.toml
plugins: anyio-4.6.2.post1, nbval-0.11.0
collected 32 items / 22 deselected / 10 selected                               

tests/test_model.py::test_phase_tides[15 min] PASSED                     [ 10%]
tests/test_model.py::test_phase_tides[20 min] PASSED                     [ 20%]
tests/test_model.py::test_phase_tides_format[models0-long-False-expected_cols0] PASSED [ 30%]
tests/test_model.py::test_phase_tides_format[models1-long-True-expected_cols1] PASSED [ 40%]
tests/test_model.py::test_phase_tides_format[models2-long-False-expected_cols2] PASSED [ 50%]
tests/test_model.py::test_phase_tides_format[models3-long-True-expected_cols3] PASSED [ 60%]
tests/test_model.py::test_phase_tides_format[models4-wide-False-expected_cols4] PASSED [ 70%]
tests/test_model.py::test_phase_tides_format[models5-wide-True-expected_cols5]

In [278]:
phase_df = phase_tides(
    x=[122.14],
    y=[-17.91],
    time=pd.date_range("2020-01-01", "2020-01-02", freq="h"),
    directory="/var/share/tide_models/",
    model=["EOT20"],
    delta = "15 min",
)

Modelling tides using EOT20
Modelling tides using EOT20


In [279]:
phase_df.tide_phase.tolist()

['low-flow',
 'low-flow',
 'low-flow',
 'low-flow',
 'high-flow',
 'high-flow',
 'high-flow',
 'high-ebb',
 'high-ebb',
 'high-ebb',
 'low-ebb',
 'low-ebb',
 'low-ebb',
 'low-flow',
 'low-flow',
 'high-flow',
 'high-flow',
 'high-flow',
 'high-flow',
 'high-ebb',
 'high-ebb',
 'high-ebb',
 'low-ebb',
 'low-ebb',
 'low-ebb']

In [276]:
def check_sequence(arr):
    pattern = ['low-flow', 'high-flow', 'high-ebb', 'low-ebb']
    # Check if length is multiple of 4
    if len(arr) % 4 != 0:
        return False
        
    # Check each group of 4 elements
    for i in range(0, len(arr), 4):
        if arr[i:i+4].tolist() != pattern:
            return False
    return True

check_sequence(phase_df.query("tide_model == 'EOT20'").tide_phase.values)

False

## Testing pyTMD

In [None]:
from eo_tides import model_tides

x, y, crs, method, model = GAUGE_X, GAUGE_Y, "EPSG:4326", "spline", "EOT20"
x, y, crs, method, model = GAUGE_X, GAUGE_Y, "EPSG:4326", "bilinear", "EOT20"
x, y, crs, method, model = -1034913, -1961916, "EPSG:3577", "bilinear", "EOT20"


# Run EOT20 tidal model for locations and timesteps in tide gauge data
modelled_tides_df = model_tides(
    x=[x],
    y=[y],
    time=measured_tides_ds.time,
    crs=crs,
    method=method,
    directory="../tests/data/tide_models",
)

# Run equivalent pyTMD code to verify same results
pytmd_tides = tide_elevations(
        x=x, 
        y=y, 
        delta_time=measured_tides_ds.time,
        DIRECTORY="../tests/data/tide_models",
        MODEL="EOT20",
        EPSG=int(crs[-4:]),
        TIME="datetime",
        EXTRAPOLATE=True,
        CUTOFF=np.inf,
        METHOD=method,
        # CORRECTIONS: str | None = None,
        # INFER_MINOR: bool = True,
        # MINOR_CONSTITUENTS: list | None = None,
        # APPLY_FLEXURE: bool = False,
        # FILL_VALUE: float = np.nan
        )

np.allclose(modelled_tides_df.tide_height.values, pytmd_tides.data)

### Error for out of bounds

In [None]:
from eo_tides import model_tides

x, y = 180, -50


# Run EOT20 tidal model for locations and timesteps in tide gauge data
modelled_tides_df = model_tides(
    x=[x],
    y=[y],
    model=["EOT20", "GOT5.5"],
    time=measured_tides_ds.time,
    directory="../tests/data/tide_models",
)

In [None]:
from eo_tides import list_models
list_models(directory="")

### Modelling ebb and flow tidal phases
The `tag_tides` function also allows us to determine whether each satellite observation was taken while the tide was rising/incoming (flow tide) or falling/outgoing (ebb tide) by setting `ebb_flow=True`. This is achieved by comparing tide heights 15 minutes before and after the observed satellite observation.

Ebb and flow data can provide valuable contextual information for interpreting satellite imagery, particularly in tidal flat or mangrove forest environments where water may remain in the landscape for considerable time after the tidal peak.

Once you run the cell below, our data will now also contain a new `ebb_flow` variable under **Data variables**:

In [None]:
import datacube

dc = datacube.Datacube()

ds = dc.load(product="ga_s2ls_intertidal_cyear_3", limit=1, measurements="elevation")

In [None]:
from odc.geo.geobox import GeoBox
import xarray as xr
import textwrap
import numpy as np

from typing import Any


def _standardise_time(
    time: np.ndarray | pd.DatetimeIndex | pd.Timestamp | None,
) -> np.ndarray | None:
    """
    Accept a datetime64 ndarray, pandas.DatetimeIndex
    or pandas.Timestamp, and return a datetime64 ndarray.
    """
    # Return time as-is if none
    if time is None:
        return time

    # Convert to a 1D datetime64 array
    time = np.atleast_1d(time).astype("datetime64[ns]")

    return time


def _standardise_inputs(
    ds: xr.DataArray | xr.Dataset | GeoBox,
    time: np.ndarray | pd.DatetimeIndex | pd.Timestamp | None,
) -> (GeoBox, np.ndarray):
    """
    Takes an xarray or GeoBox input and an optional custom times,
    and returns a standardised GeoBox and  
    """

    # If `ds` is an xarray object, extract its GeoBox and time
    if isinstance(ds, (xr.DataArray, xr.Dataset)):

        # Try to extract GeoBox
        try:
            gbox = ds.odc.geobox
        except AttributeError:
            error_msg = """
            Cannot extract a valid GeoBox for `ds`. This is required for
            extracting details about `ds`'s CRS and spatial location.
            
            Import `odc.geo.xr` then run `ds = ds.odc.assign_crs(crs=...)`
            to prepare your data before passing it to this function.
            """
            raise Exception(textwrap.dedent(error_msg).strip())

        # Use custom time by default if provided; otherwise try and extract from `ds`
        if time is not None:
            time = _standardise_time(time)
        elif "time" in ds.coords:
            time = ds.coords["time"].values
        else:
            raise ValueError(
                "`ds` does not have a time dimension, and no custom times were provided via `time`."
            )

    # If `ds` is a GeoBox, use it directly; raise an error if no time was provided
    elif isinstance(ds, GeoBox):
        gbox = ds
        if time is not None:
            time = _standardise_time(time)
        else:
            raise ValueError("If `ds` is a GeoBox, `time` must be provided.")

    # Raise error if no valid inputs were provided
    else:
        raise TypeError(
            "`ds` must be an xarray.DataArray, xarray.Dataset, or odc.geo.geobox.GeoBox."
        )

    return gbox, time


time = pd.date_range("2021", "2022").values
time = pd.date_range("2021", "2022")
time = pd.Timestamp("2022-02-01")
# time = satellite_ds.time
# time = ["a", "b"]


gbox, time = _standardise_inputs(ds=ds.drop_dims("time").odc.geobox, time=time)
gbox, time

In [None]:
satellite_ds.chunks["x"]

In [None]:
import pandas as pd

time = pd.date_range("2021", "2022").values
# time = pd.date_range("2021", "2022")
# time = pd.Timestamp("2022-02-01")
time = satellite_ds.time


def _standardise_time(
    time: np.ndarray | pd.DatetimeIndex | pd.Timestamp | None,
) -> np.ndarray | None:
    """
    Accept a datetime64 ndarray, pandas.DatetimeIndex
    or pandas.Timestamp, and return a datetime64 ndarray.
    """
    # Return time as-is if none
    if time is None:
        return time

    # Convert to a 1D datetime64 array
    time = np.atleast_1d(time).astype("datetime64[ns]")

    return time


time = pd.date_range("2021", "2022").values
# time = pd.date_range("2021", "2022")
# time = pd.Timestamp("2022-02-01")
# time = satellite_ds.time
# time = [pd.Timestamp("2022-02-01"), pd.Timestamp("2022-02-01")]
# time = None
_standardise_time(time=time)

In [None]:
test = np.atleast_1d(time).astype('datetime64[ns]')

In [None]:
test

In [None]:
ds = ds.odc.assign_crs("EPSG:3577")

In [None]:
test = satellite_ds.nbart_red.drop_attrs(deep=True).drop_vars("spatial_ref").odc.reload()

In [None]:
test  #odc.reload()

In [None]:
# Model tide heights
ds = tag_tides(
    ds, 
    ebb_flow=True,     
    directory="../../tests/data/tide_models",
)

# Print output data
print(ds)

We now have data giving us the both the tide height and tidal phase ("ebb" or "flow") for every satellite image:

In [None]:
ds[["time", "tide_height", "ebb_flow"]].drop_vars("spatial_ref").to_dataframe().head()

We could for example use this data to filter our observations to keep ebbing phase observations only:

In [None]:
ds_ebb = ds.where(ds.ebb_flow == "Ebb", drop=True)
print(ds_ebb)

## Pixel biases

In [None]:
import odc.stac
import pystac_client
import planetary_computer

# Connect to STAC catalog
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

# Set cloud access defaults
odc.stac.configure_rio(
    cloud_defaults=True,
    aws={"aws_unsigned": True},
)

# Build a query and search the STAC catalog for all matching items
bbox = [122.160, -18.05, 122.260, -17.95]
query = catalog.search(
    bbox=bbox,
    collections=["sentinel-2-l2a"],
    datetime="2021/2023",
)

# Load data into xarray format
ds_s2 = odc.stac.load(
    items=list(query.items()),
    bands=["red"],
    crs="utm",
    resolution=30,
    groupby="solar_day",
    bbox=bbox,
    fail_on_error=False,
    chunks={},
)

print(ds_s2)

In [None]:
list(stats_ds.data_vars.keys())

In [None]:
from eo_tides.stats import pixel_stats

models = ["EOT20"]
resample = True

stats_ds = pixel_stats(
    ds=satellite_ds,
    model=models,
    resample=resample,
    directory="../tests/data/tide_models",
)

# Verify dims are correct
assert stats_ds.odc.spatial_dims == satellite_ds.odc.spatial_dims

# Verify vars are as expected
expected_vars = ['hat',  'hot',  'lat',  'lot',  'otr',  'tr',  'spread',  'offset_low',  'offset_high']
assert set(expected_vars) == set(stats_ds.data_vars)

# Verify tide models are correct
assert all(stats_ds["tide_model"].values == models)
if len(models) > 1:
    assert "tide_model" in stats_ds.dims

# If resample, assert that statistics have the same shape and dims
# as `satellite_ds`
if resample:
    assert satellite_ds.odc.geobox.shape == stats_ds.odc.geobox.shape



In [None]:
# Verify values are roughly expected
assert np.allclose(stats_ds.offset_high.mean().item, 0.30, atol=0.02)
assert np.allclose(stats_ds.offset_low.mean().item, 0.27, atol=0.02)
assert np.allclose(stats_ds.spread.mean().item, 0.43, atol=0.02)

In [None]:
stats_ds.offset_high.mean().item()

In [None]:
stats_ds.spread.mean()

In [None]:
stats_ds["tide_model"].values.tolist()

In [None]:
stats_ds["tide_model"].values.tolist()

In [None]:
set(['hat',  'hot',  'lat',  'lot',  'otr',  'tr',  'spread',  'offset_low',  'offset_high'])

In [None]:
set(stats_ds.data_vars)

In [None]:
from eo_tides import pixel_tides

pixel_tides(
    ds=satellite_ds,
    model=["EOT20", "GOT5.5"],
    directory="../tests/data/tide_models",
    )

In [None]:
stats_ds.dims

In [None]:
satellite_ds.x