Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 50 additions & 13 deletions lib/iris/fileformats/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1376,6 +1376,8 @@ def _create_cf_dimensions(
unlimited_dim_names.append(dim_name)

for dim_name in dimension_names:
# NOTE: these dim-names have been chosen by _get_dim_names, and
# were already checked+fixed to avoid any name collisions.
if dim_name not in self._dataset.dimensions:
if dim_name in unlimited_dim_names:
size = None
Expand Down Expand Up @@ -1468,6 +1470,10 @@ def _add_mesh(self, cube_or_mesh):
last_dim = f"{cf_mesh_name}_{loc_from}_N_{loc_to}s"
# Create if it does not already exist.
if last_dim not in self._dataset.dimensions:
while last_dim in self._dataset.variables:
# Also avoid collision with variable names.
# See '_get_dim_names' for reason.
last_dim = self._increment_name(last_dim)
length = conn.shape[1 - conn.location_axis]
self._dataset.createDimension(last_dim, length)

Expand Down Expand Up @@ -1869,8 +1875,19 @@ def record_dimension(names_list, dim_name, length, matching_coords=[]):
assert dim_name is not None
# Ensure it is a valid variable name.
dim_name = self.cf_valid_var_name(dim_name)
# Disambiguate if it matches an existing one.
while dim_name in self._existing_dim:
# Disambiguate if it has the same name as an existing
# dimension.
# NOTE: *OR* if it matches the name of an existing file
# variable. Because there is a bug ...
# See https://github.com/Unidata/netcdf-c/issues/1772
# N.B. the workarounds here *ONLY* function because the
# caller (write) will not create any more variables
# in between choosing dim names (here), and creating
# the new dims (via '_create_cf_dimensions').
while (
dim_name in self._existing_dim
or dim_name in self._dataset.variables
):
dim_name = self._increment_name(dim_name)

# Record the new dimension.
Expand Down Expand Up @@ -1915,26 +1932,34 @@ def record_dimension(names_list, dim_name, length, matching_coords=[]):
dim_name = self._get_coord_variable_name(
cube, coord
)
# Disambiguate if it has the same name as an
# existing dimension.
# OR if it matches an existing file variable name.
# NOTE: check against variable names is needed
# because of a netcdf bug ... see note in the
# mesh dimensions block above.
while (
dim_name in self._existing_dim
or dim_name in self._name_coord_map.names
or dim_name in self._dataset.variables
):
dim_name = self._increment_name(dim_name)

else:
# No CF-netCDF coordinates describe this data dimension.
# Make up a new, distinct dimension name
dim_name = f"dim{dim}"
if dim_name in self._existing_dim:
# Increment name if conflicted with one already existing.
if self._existing_dim[dim_name] != cube.shape[dim]:
while (
dim_name in self._existing_dim
and self._existing_dim[dim_name]
!= cube.shape[dim]
or dim_name in self._name_coord_map.names
):
dim_name = self._increment_name(dim_name)
# Increment name if conflicted with one already existing
# (or planned)
# NOTE: check against variable names is needed because
# of a netcdf bug ... see note in the mesh dimensions
# block above.
while (
dim_name in self._existing_dim
and (
self._existing_dim[dim_name] != cube.shape[dim]
)
) or dim_name in self._dataset.variables:
dim_name = self._increment_name(dim_name)

# Record the dimension.
record_dimension(
Expand Down Expand Up @@ -2065,6 +2090,12 @@ def _create_cf_bounds(self, coord, cf_var, cf_name):

if bounds_dimension_name not in self._dataset.dimensions:
# Create the bounds dimension with the appropriate extent.
while bounds_dimension_name in self._dataset.variables:
# Also avoid collision with variable names.
# See '_get_dim_names' for reason.
bounds_dimension_name = self._increment_name(
bounds_dimension_name
)
self._dataset.createDimension(bounds_dimension_name, n_bounds)

boundsvar_name = "{}_{}".format(cf_name, varname_extra)
Expand Down Expand Up @@ -2345,6 +2376,12 @@ def _create_generic_cf_array_var(

# Determine whether to create the string length dimension.
if string_dimension_name not in self._dataset.dimensions:
while string_dimension_name in self._dataset.variables:
# Also avoid collision with variable names.
# See '_get_dim_names' for reason.
string_dimension_name = self._increment_name(
string_dimension_name
)
self._dataset.createDimension(
string_dimension_name, string_dimension_depth
)
Expand Down
6 changes: 5 additions & 1 deletion lib/iris/tests/stock/mesh.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ def sample_mesh(n_nodes=None, n_faces=None, n_edges=None, lazy_values=False):
units="degrees_east",
long_name="long-name",
var_name="var-name",
attributes={"a": 1, "b": "c"},
attributes={
# N.B. cast this so that a save-load roundtrip preserves it
"a": np.int64(1),
"b": "c",
},
)
node_y = AuxCoord(1200 + arr.arange(n_nodes), standard_name="latitude")

Expand Down
147 changes: 146 additions & 1 deletion lib/iris/tests/unit/fileformats/netcdf/test_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,21 @@
# importing anything else.
import iris.tests as tests # isort:skip

from pathlib import Path
from shutil import rmtree
from tempfile import mkdtemp
from unittest import mock

import netCDF4 as nc
import numpy as np

import iris
from iris.coords import DimCoord
from iris.coords import AuxCoord, DimCoord
from iris.cube import Cube, CubeList
from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD
from iris.fileformats.netcdf import CF_CONVENTIONS_VERSION, save
from iris.tests.stock import lat_lon_cube
from iris.tests.stock.mesh import sample_mesh_cube


class Test_conventions(tests.IrisTest):
Expand Down Expand Up @@ -211,5 +216,145 @@ def test_multi_wrong_length(self):
save(cubes, "dummy.nc", fill_value=fill_values)


class Test_HdfSaveBug(tests.IrisTest):
"""
Check for a known problem with netcdf4.

If you create dimension with the same name as an existing variable, there
is a specific problem, relating to HDF so limited to netcdf-4 formats.
See : https://github.com/Unidata/netcdf-c/issues/1772

In all these testcases, a straightforward translation to the file would be
able to save [cube_2, cube_1], but *not* [cube_1, cube_2],
because the latter creates a dim of the same name as the 'cube_1' data
variable.

Here, we are testing the specific workarounds in Iris netcdf save which
avoids that problem.
Unfortunately, owing to the complexity of the iris.fileformats.netcdf.Saver
code, there are several separate places where this had to be fixed.

N.B. we also check that the data (mostly) survives a save-load roundtrip.
To identify the read-back cubes with the originals, we use var-names,
which works because the save code opts to adjust dimension names _instead_.

"""

def _check_save_and_reload(self, cubes):
tempdir = Path(mkdtemp())
filepath = tempdir / "tmp.nc"
try:
# Save the given cubes.
save(cubes, filepath)

# Load them back for roundtrip testing.
with PARSE_UGRID_ON_LOAD.context():
new_cubes = iris.load(str(filepath))

# There should definitely still be the same number of cubes.
self.assertEqual(len(new_cubes), len(cubes))

# Get results in the input order, matching by var_names.
result = [new_cubes.extract_cube(cube.var_name) for cube in cubes]

# Check that input + output match cube-for-cube.
# NB in this codeblock, before we destroy the temporary file.
for cube_in, cube_out in zip(cubes, result):
# Using special tolerant equivalence-check.
self.assertSameCubes(cube_in, cube_out)

finally:
rmtree(tempdir)

# Return result cubes for any additional checks.
return result

def assertSameCubes(self, cube1, cube2):
"""
A special tolerant cube compare.

Ignore any 'Conventions' attributes.
Ignore all var-names.

"""

def clean_cube(cube):
cube = cube.copy() # dont modify the original
# Remove any 'Conventions' attributes
cube.attributes.pop("Conventions", None)
# Remove var-names (as original mesh components wouldn't have them)
cube.var_name = None
for coord in cube.coords():
coord.var_name = None
mesh = cube.mesh
if mesh:
mesh.var_name = None
for component in mesh.coords() + mesh.connectivities():
component.var_name = None

return cube

self.assertEqual(clean_cube(cube1), clean_cube(cube2))

def test_dimcoord_varname_collision(self):
cube_2 = Cube([0, 1], var_name="cube_2")
x_dim = DimCoord([0, 1], long_name="dim_x", var_name="dimco_name")
cube_2.add_dim_coord(x_dim, 0)
# First cube has a varname which collides with the dimcoord.
cube_1 = Cube([0, 1], long_name="cube_1", var_name="dimco_name")
# Test save + loadback
reload_1, reload_2 = self._check_save_and_reload([cube_1, cube_2])
# As re-loaded, the coord will have a different varname.
self.assertEqual(reload_2.coord("dim_x").var_name, "dimco_name_0")

def test_anonymous_dim_varname_collision(self):
# Second cube is going to name an anonymous dim.
cube_2 = Cube([0, 1], var_name="cube_2")
# First cube has a varname which collides with the dim-name.
cube_1 = Cube([0, 1], long_name="cube_1", var_name="dim0")
# Add a dimcoord to prevent the *first* cube having an anonymous dim.
x_dim = DimCoord([0, 1], long_name="dim_x", var_name="dimco_name")
cube_1.add_dim_coord(x_dim, 0)
# Test save + loadback
self._check_save_and_reload([cube_1, cube_2])

def test_bounds_dim_varname_collision(self):
cube_2 = Cube([0, 1], var_name="cube_2")
x_dim = DimCoord([0, 1], long_name="dim_x", var_name="dimco_name")
x_dim.guess_bounds()
cube_2.add_dim_coord(x_dim, 0)
# First cube has a varname which collides with the bounds dimension.
cube_1 = Cube([0], long_name="cube_1", var_name="bnds")
# Test save + loadback
self._check_save_and_reload([cube_1, cube_2])

def test_string_dim_varname_collision(self):
cube_2 = Cube([0, 1], var_name="cube_2")
# NOTE: it *should* be possible for a cube with string data to cause
# this collision, but cubes with string data are currently not working.
# See : https://github.com/SciTools/iris/issues/4412
x_dim = AuxCoord(
["this", "that"], long_name="dim_x", var_name="string_auxco"
)
cube_2.add_aux_coord(x_dim, 0)
cube_1 = Cube([0], long_name="cube_1", var_name="string4")
# Test save + loadback
self._check_save_and_reload([cube_1, cube_2])

def test_mesh_location_dim_varname_collision(self):
cube_2 = sample_mesh_cube()
cube_2.var_name = "cube_2" # Make it identifiable
cube_1 = Cube([0], long_name="cube_1", var_name="Mesh2d_node")
# Test save + loadback
self._check_save_and_reload([cube_1, cube_2])

def test_connectivity_dim_varname_collision(self):
cube_2 = sample_mesh_cube()
cube_2.var_name = "cube_2" # Make it identifiable
cube_1 = Cube([0], long_name="cube_1", var_name="Mesh_2d_face_N_nodes")
# Test save + loadback
self._check_save_and_reload([cube_1, cube_2])


if __name__ == "__main__":
tests.main()