Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mesh Loading (AVD-1813) #4262

Merged
merged 26 commits into from
Aug 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
325e97a
Functionality to load meshes independent of cubes.
trexfeathers Jul 28, 2021
f9ece8a
Make meshes_from_cf private.
trexfeathers Jul 28, 2021
4f4101f
Added tests.
trexfeathers Jul 28, 2021
6f49fd2
Mesh loading docstrings.
trexfeathers Jul 29, 2021
6c966ed
load_mesh integration test.
trexfeathers Jul 29, 2021
df01b3f
Testing improvements.
trexfeathers Jul 30, 2021
48c6d14
Mesh bad cf_role tolerance.
trexfeathers Jul 30, 2021
5a29d44
load_mesh raise ValueError.
trexfeathers Jul 30, 2021
8a981a7
Better var_name docstring for load_meshes.
trexfeathers Jul 30, 2021
fa5937f
Mesh load testing tidy-up.
trexfeathers Jul 30, 2021
6da332d
load_meshes docstring pluralisation fix.
trexfeathers Jul 30, 2021
e6649c1
load_meshes http test py37 compatibility.
trexfeathers Jul 30, 2021
7316ec6
Correct Sphinx domain pluralisation.
trexfeathers Aug 20, 2021
5ffc3d6
Clearer load_meshes Returns docstring.
trexfeathers Aug 20, 2021
a14317b
Add no_mesh integration tests.
trexfeathers Aug 20, 2021
ff73cb0
Clearer test_CFUGridMeshVariable comments.
trexfeathers Aug 20, 2021
436f5f7
Mesh load unit testing use IrisTest.patch().
trexfeathers Aug 20, 2021
b28fbd8
Mesh loading clearer docstring.
trexfeathers Aug 20, 2021
bb30d2e
Enhance test_var_name in test_load_meshes.
trexfeathers Aug 20, 2021
2a4237f
Added test_no_mesh to test_load_meshes.
trexfeathers Aug 20, 2021
ec55e07
Docstring load clarification for mesh loading.
trexfeathers Aug 20, 2021
b900f44
load_mesh better duplicate handling.
trexfeathers Aug 20, 2021
d8809da
Removed face coordinates/data from test_load_meshes.
trexfeathers Aug 20, 2021
b16a4cd
Allow Meshes to be hashed.
pp-mo Aug 20, 2021
ef1c217
Fix for set usage.
pp-mo Aug 23, 2021
b81cfe7
Merge pull request #5 from pp-mo/mesh_hashes
trexfeathers Aug 23, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ env:
# Conda packages to be installed.
CONDA_CACHE_PACKAGES: "nox pip"
# Git commit hash for iris test data.
IRIS_TEST_DATA_VERSION: "2.2"
IRIS_TEST_DATA_VERSION: "2.4"
# Base directory for the iris-test-data.
IRIS_TEST_DATA_DIR: ${HOME}/iris-test-data

Expand Down Expand Up @@ -193,4 +193,4 @@ task:
- mkdir -p ${MPL_RC_DIR}
- echo "backend : agg" > ${MPL_RC_FILE}
- echo "image.cmap : viridis" >> ${MPL_RC_FILE}
- nox --session linkcheck -- --verbose
- nox --session linkcheck -- --verbose
179 changes: 171 additions & 8 deletions lib/iris/experimental/ugrid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
from collections.abc import Iterable
from contextlib import contextmanager
from functools import wraps
from itertools import groupby
import logging
from pathlib import Path
import re
import threading

Expand All @@ -39,12 +41,15 @@
from ...exceptions import ConnectivityNotFoundError, CoordinateNotFoundError
from ...fileformats import cf, netcdf
from ...fileformats._nc_load_rules.helpers import get_attr_units, get_names
from ...io import decode_uri, expand_filespecs
from ...util import guess_coord_axis

__all__ = [
"CFUGridReader",
"Connectivity",
"ConnectivityMetadata",
"load_mesh",
"load_meshes",
"Mesh",
"Mesh1DConnectivities",
"Mesh1DCoords",
Expand Down Expand Up @@ -200,9 +205,9 @@ def __init__(
for Fortran and legacy NetCDF files).
* src_dim (int):
Either ``0`` or ``1``. Default is ``0``. Denotes which dimension
of :attr:`indices` varies over the :attr:`src_location`'s (the
of :attr:`indices` varies over the :attr:`src_location`\\ s (the
alternate dimension therefore varying within individual
:attr:`src_location`'s). (This parameter allows support for fastest varying index being
:attr:`src_location`\\ s). (This parameter allows support for fastest varying index being
either first or last).
E.g. for ``face_node_connectivity``, for 10 faces:
``indices.shape[src_dim] = 10``.
Expand Down Expand Up @@ -353,7 +358,7 @@ def start_index(self):
def src_dim(self):
"""
The dimension of the connectivity's :attr:`indices` array that varies
over the connectivity's :attr:`src_location`'s. Either ``0`` or ``1``.
over the connectivity's :attr:`src_location`\\ s. Either ``0`` or ``1``.
**Read-only** - validity of :attr:`indices` is dependent on
:attr:`src_dim`. Use :meth:`transpose` to create a new, transposed
:class:`Connectivity` if a different :attr:`src_dim` is needed.
Expand All @@ -367,7 +372,7 @@ def tgt_dim(self):
Derived as the alternate value of :attr:`src_dim` - each must equal
either ``0`` or ``1``.
The dimension of the connectivity's :attr:`indices` array that varies
within the connectivity's individual :attr:`src_location`'s.
within the connectivity's individual :attr:`src_location`\\ s.

"""
return self._tgt_dim
Expand Down Expand Up @@ -491,7 +496,7 @@ def validate_indices(self):
"""
Perform a thorough validity check of this connectivity's
:attr:`indices`. Includes checking the sizes of individual
:attr:`src_location`'s (specified using masks on the
:attr:`src_location`\\ s (specified using masks on the
:attr:`indices` array) against the :attr:`cf_role`.

Raises a ``ValueError`` if any problems are encountered, otherwise
Expand Down Expand Up @@ -1003,6 +1008,11 @@ def __eq__(self, other):
# TBD: this is a minimalist implementation and requires to be revisited
return id(self) == id(other)

def __hash__(self):
# Allow use in sets and as dictionary keys, as is done for :class:`iris.cube.Cube`.
# See https://github.com/SciTools/iris/pull/1772
return hash(id(self))

def __getstate__(self):
return (
self._metadata_manager,
Expand Down Expand Up @@ -1928,7 +1938,7 @@ def to_MeshCoord(self, location, axis):

def to_MeshCoords(self, location):
"""
Generate a tuple of :class:`MeshCoord`'s, each referencing the current
Generate a tuple of :class:`MeshCoord`\\ s, each referencing the current
:class:`Mesh`, one for each :attr:`AXES` value, passing through the
``location`` argument.

Expand All @@ -1942,7 +1952,7 @@ def to_MeshCoords(self, location):
The ``location`` argument for :class:`MeshCoord` instantiation.

Returns:
tuple of :class:`MeshCoord`'s referencing the current :class:`Mesh`.
tuple of :class:`MeshCoord`\\ s referencing the current :class:`Mesh`.
One for each value in :attr:`AXES`, using the value for the
``axis`` argument.

Expand Down Expand Up @@ -3274,6 +3284,135 @@ def context(self):
PARSE_UGRID_ON_LOAD = ParseUGridOnLoad()


def _meshes_from_cf(cf_reader):
"""
Common behaviour for extracting meshes from a CFReader.

Simple now, but expected to increase in complexity as Mesh sharing develops.

"""
# Mesh instances are shared between file phenomena.
# TODO: more sophisticated Mesh sharing between files.
# TODO: access external Mesh cache?
mesh_vars = cf_reader.cf_group.meshes
meshes = {
name: _build_mesh(cf_reader, var, cf_reader.filename)
for name, var in mesh_vars.items()
}
return meshes


def load_mesh(uris, var_name=None):
pp-mo marked this conversation as resolved.
Show resolved Hide resolved
"""
Load a single :class:`Mesh` object from one or more NetCDF files.

Raises an error if more/less than one :class:`Mesh` is found.

Parameters
----------
uris : str or iterable of str
One or more filenames/URI's. Filenames can include wildcards. Any URI's
must support OpenDAP.
var_name : str, optional
Only return a :class:`Mesh` if its var_name matches this value.

Returns
-------
:class:`Mesh`

"""
meshes_result = load_meshes(uris, var_name)
result = set([mesh for file in meshes_result.values() for mesh in file])
mesh_count = len(result)
if mesh_count != 1:
message = (
f"Expecting 1 mesh, but input file(s) produced: {mesh_count} ."
)
raise ValueError(message)
return result.pop() # Return the single element


def load_meshes(uris, var_name=None):
"""
Load :class:`Mesh` objects from one or more NetCDF files.

Parameters
----------
uris : str or iterable of str
One or more filenames/URI's. Filenames can include wildcards. Any URI's
must support OpenDAP.
var_name : str, optional
Only return :class:`Mesh`\\ es that have var_names matching this value.

Returns
-------
dict
A dictionary mapping each mesh-containing file path/URL in the input
``uris`` to a list of the :class:`Mesh`\\ es returned from each.

"""
# TODO: rationalise UGRID/mesh handling once experimental.ugrid is folded
# into standard behaviour.
# No constraints or callbacks supported - these assume they are operating
# on a Cube.

from iris.fileformats import FORMAT_AGENT

if not PARSE_UGRID_ON_LOAD:
# Explicit behaviour, consistent with netcdf.load_cubes(), rather than
# an invisible assumption.
message = (
f"PARSE_UGRID_ON_LOAD is {bool(PARSE_UGRID_ON_LOAD)}. Must be "
f"True to enable mesh loading."
)
raise ValueError(message)

if isinstance(uris, str):
uris = [uris]

# Group collections of uris by their iris handler
# Create list of tuples relating schemes to part names.
uri_tuples = sorted(decode_uri(uri) for uri in uris)

valid_sources = []
for scheme, groups in groupby(uri_tuples, key=lambda x: x[0]):
# Call each scheme handler with the appropriate URIs
if scheme == "file":
filenames = [x[1] for x in groups]
sources = expand_filespecs(filenames)
elif scheme in ["http", "https"]:
sources = [":".join(x) for x in groups]
else:
message = f"Iris cannot handle the URI scheme: {scheme}"
raise ValueError(message)

for source in sources:
if scheme == "file":
with open(source, "rb") as fh:
handling_format_spec = FORMAT_AGENT.get_spec(
Path(source).name, fh
)
else:
handling_format_spec = FORMAT_AGENT.get_spec(source, None)

if handling_format_spec.handler == netcdf.load_cubes:
valid_sources.append(source)
else:
message = f"Ignoring non-NetCDF file: {source}"
logger.info(msg=message, extra=dict(cls=None))

result = {}
for source in valid_sources:
meshes_dict = _meshes_from_cf(CFUGridReader(source))
meshes = list(meshes_dict.values())
if var_name is not None:
meshes = list(filter(lambda m: m.var_name == var_name, meshes))
if meshes:
result[source] = meshes

return result


############
# CF Overrides.
# These are not included in __all__ since they are not [currently] needed
Expand Down Expand Up @@ -3469,7 +3608,17 @@ def identify(cls, variables, ignore=None, target=None, warn=True):
log_level = logging.WARNING if warn else logging.DEBUG

# Identify all CF-UGRID mesh variables.
all_vars = target == variables
for nc_var_name, nc_var in target.items():
if all_vars:
# SPECIAL BEHAVIOUR FOR MESH VARIABLES.
# We are looking for all mesh variables. Check if THIS variable
# is a mesh using its own attributes.
if getattr(nc_var, "cf_role", "") == "mesh_topology":
result[nc_var_name] = CFUGridMeshVariable(
pp-mo marked this conversation as resolved.
Show resolved Hide resolved
nc_var_name, nc_var
)

# Check for mesh variable references.
nc_var_att = getattr(nc_var, cls.cf_identity, None)

Expand Down Expand Up @@ -3679,6 +3828,21 @@ def _build_mesh(cf, mesh_var, file_path):
attributes = {}
attr_units = get_attr_units(mesh_var, attributes)

cf_role_message = None
if not hasattr(mesh_var, "cf_role"):
cf_role_message = f"{mesh_var.cf_name} has no cf_role attribute."
cf_role = "mesh_topology"
else:
cf_role = getattr(mesh_var, "cf_role")
if cf_role != "mesh_topology":
cf_role_message = (
f"{mesh_var.cf_name} has an inappropriate cf_role: {cf_role}."
)
if cf_role_message:
cf_role_message += " Correcting to 'mesh_topology'."
# TODO: reconsider logging level when we have consistent practice.
logger.warning(cf_role_message, extra=dict(cls=None))

if hasattr(mesh_var, "volume_node_connectivity"):
topology_dimension = 3
elif hasattr(mesh_var, "face_node_connectivity"):
Expand Down Expand Up @@ -3782,7 +3946,6 @@ def _build_mesh(cf, mesh_var, file_path):
edge_dimension=edge_dimension,
face_dimension=face_dimension,
)
assert mesh.cf_role == mesh_var.cf_role

mesh_elements = (
list(mesh.all_coords) + list(mesh.all_connectivities) + [mesh]
Expand Down
5 changes: 5 additions & 0 deletions lib/iris/fileformats/cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,11 @@ def __init__(self, filename, warn=False, monotonic=False):
self._build_cf_groups()
self._reset()

@property
def filename(self):
"""The file that the CFReader is reading."""
return self._filename

def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._filename)

Expand Down
12 changes: 2 additions & 10 deletions lib/iris/fileformats/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,8 +792,8 @@ def load_cubes(filenames, callback=None):
from iris.experimental.ugrid import (
PARSE_UGRID_ON_LOAD,
CFUGridReader,
_build_mesh,
_build_mesh_coords,
_meshes_from_cf,
)
from iris.io import run_callback

Expand All @@ -808,15 +808,7 @@ def load_cubes(filenames, callback=None):
meshes = {}
if PARSE_UGRID_ON_LOAD:
cf = CFUGridReader(filename)

# Mesh instances are shared between file phenomena.
# TODO: more sophisticated Mesh sharing between files.
# TODO: access external Mesh cache?
mesh_vars = cf.cf_group.meshes
meshes = {
name: _build_mesh(cf, var, filename)
for name, var in mesh_vars.items()
}
meshes = _meshes_from_cf(cf)
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
else:
cf = iris.fileformats.cf.CFReader(filename)

Expand Down
Loading