From 74498f2aa6cab9d72979a5113011ad20dd4f687c Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 23 Aug 2021 11:09:39 +0100 Subject: [PATCH 1/7] Refactor iris.fileformats.netcdf into separate load and save submodules. --- lib/iris/fileformats/__init__.py | 12 +- lib/iris/fileformats/netcdf/__init__.py | 9 + lib/iris/fileformats/netcdf/load.py | 707 ++++++++++++++++++ .../fileformats/{netcdf.py => netcdf/save.py} | 691 +---------------- lib/iris/io/__init__.py | 2 +- 5 files changed, 763 insertions(+), 658 deletions(-) create mode 100644 lib/iris/fileformats/netcdf/__init__.py create mode 100644 lib/iris/fileformats/netcdf/load.py rename lib/iris/fileformats/{netcdf.py => netcdf/save.py} (77%) diff --git a/lib/iris/fileformats/__init__.py b/lib/iris/fileformats/__init__.py index 5e03f1e4fd..000b7f00ee 100644 --- a/lib/iris/fileformats/__init__.py +++ b/lib/iris/fileformats/__init__.py @@ -90,7 +90,11 @@ def _load_grib(*args, **kwargs): # FORMAT_AGENT.add_spec( FormatSpecification( - "NetCDF", MagicNumber(4), 0x43444601, netcdf.load_cubes, priority=5 + "NetCDF", + MagicNumber(4), + 0x43444601, + netcdf.load.load_cubes, + priority=5, ) ) @@ -100,7 +104,7 @@ def _load_grib(*args, **kwargs): "NetCDF 64 bit offset format", MagicNumber(4), 0x43444602, - netcdf.load_cubes, + netcdf.load.load_cubes, priority=5, ) ) @@ -112,7 +116,7 @@ def _load_grib(*args, **kwargs): "NetCDF_v4", MagicNumber(8), 0x894844460D0A1A0A, - netcdf.load_cubes, + netcdf.load.load_cubes, priority=5, ) ) @@ -122,7 +126,7 @@ def _load_grib(*args, **kwargs): "NetCDF OPeNDAP", UriProtocol(), lambda protocol: protocol in ["http", "https"], - netcdf.load_cubes, + netcdf.load.load_cubes, priority=6, ) FORMAT_AGENT.add_spec(_nc_dap) diff --git a/lib/iris/fileformats/netcdf/__init__.py b/lib/iris/fileformats/netcdf/__init__.py new file mode 100644 index 0000000000..ba850e540c --- /dev/null +++ b/lib/iris/fileformats/netcdf/__init__.py @@ -0,0 +1,9 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +A package for loading and saving cubes to and from netcdf files. + +""" diff --git a/lib/iris/fileformats/netcdf/load.py b/lib/iris/fileformats/netcdf/load.py new file mode 100644 index 0000000000..aef160ab6e --- /dev/null +++ b/lib/iris/fileformats/netcdf/load.py @@ -0,0 +1,707 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +Module to support the loading of a NetCDF file into an Iris cube. + +See also: `netCDF4 python `_. + +Also refer to document 'NetCDF Climate and Forecast (CF) Metadata Conventions'. + +""" + +import re +import warnings + +import netCDF4 +import numpy as np + +from iris._lazy_data import as_lazy_data +from iris.aux_factory import ( + HybridHeightFactory, + HybridPressureFactory, + OceanSFactory, + OceanSg1Factory, + OceanSg2Factory, + OceanSigmaFactory, + OceanSigmaZFactory, +) +import iris.config +import iris.coord_systems +import iris.coords +import iris.exceptions +import iris.fileformats.cf +import iris.io +import iris.util + +# Show actions activation statistics. +DEBUG = False + +# Configure the logger. +logger = iris.config.get_logger(__name__) + +# Pass through CF attributes: +# - comment +# - Conventions +# - flag_masks +# - flag_meanings +# - flag_values +# - history +# - institution +# - reference +# - source +# - title +# - positive +# +_CF_ATTRS = [ + "add_offset", + "ancillary_variables", + "axis", + "bounds", + "calendar", + "cell_measures", + "cell_methods", + "climatology", + "compress", + "coordinates", + "_FillValue", + "formula_terms", + "grid_mapping", + "leap_month", + "leap_year", + "long_name", + "missing_value", + "month_lengths", + "scale_factor", + "standard_error_multiplier", + "standard_name", + "units", +] + +# Cell methods. +_CM_KNOWN_METHODS = [ + "point", + "sum", + "mean", + "maximum", + "minimum", + "mid_range", + "standard_deviation", + "variance", + "mode", + "median", +] + +_CM_COMMENT = "comment" +_CM_EXTRA = "extra" +_CM_INTERVAL = "interval" +_CM_METHOD = "method" +_CM_NAME = "name" +_CM_PARSE = re.compile( + r""" + (?P([\w_]+\s*?:\s+)+) + (?P[\w_\s]+(?![\w_]*\s*?:))\s* + (?: + \(\s* + (?P[^\)]+) + \)\s* + )? + """, + re.VERBOSE, +) + + +class UnknownCellMethodWarning(Warning): + pass + + +def parse_cell_methods(nc_cell_methods): + """ + Parse a CF cell_methods attribute string into a tuple of zero or + more CellMethod instances. + + Args: + + * nc_cell_methods (str): + The value of the cell methods attribute to be parsed. + + Returns: + + * cell_methods + An iterable of :class:`iris.coords.CellMethod`. + + Multiple coordinates, intervals and comments are supported. + If a method has a non-standard name a warning will be issued, but the + results are not affected. + + """ + + cell_methods = [] + if nc_cell_methods is not None: + for m in _CM_PARSE.finditer(nc_cell_methods): + d = m.groupdict() + method = d[_CM_METHOD] + method = method.strip() + # Check validity of method, allowing for multi-part methods + # e.g. mean over years. + method_words = method.split() + if method_words[0].lower() not in _CM_KNOWN_METHODS: + msg = "NetCDF variable contains unknown cell method {!r}" + warnings.warn( + msg.format("{}".format(method_words[0])), + UnknownCellMethodWarning, + ) + d[_CM_METHOD] = method + name = d[_CM_NAME] + name = name.replace(" ", "") + name = name.rstrip(":") + d[_CM_NAME] = tuple([n for n in name.split(":")]) + interval = [] + comment = [] + if d[_CM_EXTRA] is not None: + # + # tokenise the key words and field colon marker + # + d[_CM_EXTRA] = d[_CM_EXTRA].replace( + "comment:", "<><<:>>" + ) + d[_CM_EXTRA] = d[_CM_EXTRA].replace( + "interval:", "<><<:>>" + ) + d[_CM_EXTRA] = d[_CM_EXTRA].split("<<:>>") + if len(d[_CM_EXTRA]) == 1: + comment.extend(d[_CM_EXTRA]) + else: + next_field_type = comment + for field in d[_CM_EXTRA]: + field_type = next_field_type + index = field.rfind("<>") + if index == 0: + next_field_type = interval + continue + elif index > 0: + next_field_type = interval + else: + index = field.rfind("<>") + if index == 0: + next_field_type = comment + continue + elif index > 0: + next_field_type = comment + if index != -1: + field = field[:index] + field_type.append(field.strip()) + # + # cater for a shared interval over multiple axes + # + if len(interval): + if len(d[_CM_NAME]) != len(interval) and len(interval) == 1: + interval = interval * len(d[_CM_NAME]) + # + # cater for a shared comment over multiple axes + # + if len(comment): + if len(d[_CM_NAME]) != len(comment) and len(comment) == 1: + comment = comment * len(d[_CM_NAME]) + d[_CM_INTERVAL] = tuple(interval) + d[_CM_COMMENT] = tuple(comment) + cell_method = iris.coords.CellMethod( + d[_CM_METHOD], + coords=d[_CM_NAME], + intervals=d[_CM_INTERVAL], + comments=d[_CM_COMMENT], + ) + cell_methods.append(cell_method) + return tuple(cell_methods) + + +def _actions_engine(): + # Return an 'actions engine', which provides a pyke-rules-like interface to + # the core cf translation code. + # Deferred import to avoid circularity. + import iris.fileformats._nc_load_rules.engine as nc_actions_engine + + engine = nc_actions_engine.Engine() + return engine + + +class NetCDFDataProxy: + """A reference to the data payload of a single NetCDF file variable.""" + + __slots__ = ("shape", "dtype", "path", "variable_name", "fill_value") + + def __init__(self, shape, dtype, path, variable_name, fill_value): + self.shape = shape + self.dtype = dtype + self.path = path + self.variable_name = variable_name + self.fill_value = fill_value + + @property + def ndim(self): + return len(self.shape) + + def __getitem__(self, keys): + dataset = netCDF4.Dataset(self.path) + try: + variable = dataset.variables[self.variable_name] + # Get the NetCDF variable data and slice. + var = variable[keys] + finally: + dataset.close() + return np.asanyarray(var) + + def __repr__(self): + fmt = ( + "<{self.__class__.__name__} shape={self.shape}" + " dtype={self.dtype!r} path={self.path!r}" + " variable_name={self.variable_name!r}>" + ) + return fmt.format(self=self) + + def __getstate__(self): + return {attr: getattr(self, attr) for attr in self.__slots__} + + def __setstate__(self, state): + for key, value in state.items(): + setattr(self, key, value) + + +def _assert_case_specific_facts(engine, cf, cf_group): + # Initialise a data store for built cube elements. + # This is used to patch element attributes *not* setup by the actions + # process, after the actions code has run. + engine.cube_parts["coordinates"] = [] + engine.cube_parts["cell_measures"] = [] + engine.cube_parts["ancillary_variables"] = [] + + # Assert facts for CF coordinates. + for cf_name in cf_group.coordinates.keys(): + engine.add_case_specific_fact("coordinate", (cf_name,)) + + # Assert facts for CF auxiliary coordinates. + for cf_name in cf_group.auxiliary_coordinates.keys(): + engine.add_case_specific_fact("auxiliary_coordinate", (cf_name,)) + + # Assert facts for CF cell measures. + for cf_name in cf_group.cell_measures.keys(): + engine.add_case_specific_fact("cell_measure", (cf_name,)) + + # Assert facts for CF ancillary variables. + for cf_name in cf_group.ancillary_variables.keys(): + engine.add_case_specific_fact("ancillary_variable", (cf_name,)) + + # Assert facts for CF grid_mappings. + for cf_name in cf_group.grid_mappings.keys(): + engine.add_case_specific_fact("grid_mapping", (cf_name,)) + + # Assert facts for CF labels. + for cf_name in cf_group.labels.keys(): + engine.add_case_specific_fact("label", (cf_name,)) + + # Assert facts for CF formula terms associated with the cf_group + # of the CF data variable. + + # Collect varnames of formula-root variables as we go. + # NOTE: use dictionary keys as an 'OrderedSet' + # - see: https://stackoverflow.com/a/53657523/2615050 + # This is to ensure that we can handle the resulting facts in a definite + # order, as using a 'set' led to indeterminate results. + formula_root = {} + for cf_var in cf.cf_group.formula_terms.values(): + for cf_root, cf_term in cf_var.cf_terms_by_root.items(): + # Only assert this fact if the formula root variable is + # defined in the CF group of the CF data variable. + if cf_root in cf_group: + formula_root[cf_root] = True + engine.add_case_specific_fact( + "formula_term", + (cf_var.cf_name, cf_root, cf_term), + ) + + for cf_root in formula_root.keys(): + engine.add_case_specific_fact("formula_root", (cf_root,)) + + +def _actions_activation_stats(engine, cf_name): + print("-" * 80) + print("CF Data Variable: %r" % cf_name) + + engine.print_stats() + + print("Rules Triggered:") + + for rule in sorted(list(engine.rule_triggered)): + print("\t%s" % rule) + + print("Case Specific Facts:") + kb_facts = engine.get_kb() + + for key in kb_facts.entity_lists.keys(): + for arg in kb_facts.entity_lists[key].case_specific_facts: + print("\t%s%s" % (key, arg)) + + +def _set_attributes(attributes, key, value): + """Set attributes dictionary, converting unicode strings appropriately.""" + + if isinstance(value, str): + try: + attributes[str(key)] = str(value) + except UnicodeEncodeError: + attributes[str(key)] = value + else: + attributes[str(key)] = value + + +def _add_unused_attributes(iris_object, cf_var): + """ + Populate the attributes of a cf element with the "unused" attributes + from the associated CF-netCDF variable. That is, all those that aren't CF + reserved terms. + + """ + + def attribute_predicate(item): + return item[0] not in _CF_ATTRS + + tmpvar = filter(attribute_predicate, cf_var.cf_attrs_unused()) + for attr_name, attr_value in tmpvar: + _set_attributes(iris_object.attributes, attr_name, attr_value) + + +def _get_actual_dtype(cf_var): + # Figure out what the eventual data type will be after any scale/offset + # transforms. + dummy_data = np.zeros(1, dtype=cf_var.dtype) + if hasattr(cf_var, "scale_factor"): + dummy_data = cf_var.scale_factor * dummy_data + if hasattr(cf_var, "add_offset"): + dummy_data = cf_var.add_offset + dummy_data + return dummy_data.dtype + + +def _get_cf_var_data(cf_var, filename): + # Get lazy chunked data out of a cf variable. + dtype = _get_actual_dtype(cf_var) + + # Create cube with deferred data, but no metadata + fill_value = getattr( + cf_var.cf_data, + "_FillValue", + netCDF4.default_fillvals[cf_var.dtype.str[1:]], + ) + proxy = NetCDFDataProxy( + cf_var.shape, dtype, filename, cf_var.cf_name, fill_value + ) + # Get the chunking specified for the variable : this is either a shape, or + # maybe the string "contiguous". + chunks = cf_var.cf_data.chunking() + # In the "contiguous" case, pass chunks=None to 'as_lazy_data'. + if chunks == "contiguous": + chunks = None + return as_lazy_data(proxy, chunks=chunks) + + +class OrderedAddableList(list): + # Used purely in actions debugging, to accumulate a record of which actions + # were activated. + # It replaces a set, so as to record the ordering of operations, with + # possible repeats, and it also numbers the entries. + # Actions routines invoke the 'add' method, which thus effectively converts + # a set.add into a list.append. + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._n_add = 0 + + def add(self, msg): + self._n_add += 1 + n_add = self._n_add + self.append(f"#{n_add:03d} : {msg}") + + +def _load_cube(engine, cf, cf_var, filename): + from iris.cube import Cube + + """Create the cube associated with the CF-netCDF data variable.""" + data = _get_cf_var_data(cf_var, filename) + cube = Cube(data) + + # Reset the actions engine. + engine.reset() + + # Initialise engine rule processing hooks. + engine.cf_var = cf_var + engine.cube = cube + engine.cube_parts = {} + engine.requires = {} + engine.rule_triggered = OrderedAddableList() + engine.filename = filename + + # Assert all the case-specific facts. + # This extracts 'facts' specific to this data-variable (aka cube), from + # the info supplied in the CFGroup object. + _assert_case_specific_facts(engine, cf, cf_var.cf_group) + + # Run the actions engine. + # This creates various cube elements and attaches them to the cube. + # It also records various other info on the engine, to be processed later. + engine.activate() + + # Having run the rules, now add the "unused" attributes to each cf element. + def fix_attributes_all_elements(role_name): + elements_and_names = engine.cube_parts.get(role_name, []) + + for iris_object, cf_var_name in elements_and_names: + _add_unused_attributes(iris_object, cf.cf_group[cf_var_name]) + + # Populate the attributes of all coordinates, cell-measures and ancillary-vars. + fix_attributes_all_elements("coordinates") + fix_attributes_all_elements("ancillary_variables") + fix_attributes_all_elements("cell_measures") + + # Also populate attributes of the top-level cube itself. + _add_unused_attributes(cube, cf_var) + + # Work out reference names for all the coords. + names = { + coord.var_name: coord.standard_name or coord.var_name or "unknown" + for coord in cube.coords() + } + + # Add all the cube cell methods. + cube.cell_methods = [ + iris.coords.CellMethod( + method=method.method, + intervals=method.intervals, + comments=method.comments, + coords=[ + names[coord_name] if coord_name in names else coord_name + for coord_name in method.coord_names + ], + ) + for method in cube.cell_methods + ] + + if DEBUG: + # Show activation statistics for this data-var (i.e. cube). + _actions_activation_stats(engine, cf_var.cf_name) + + return cube + + +def _load_aux_factory(engine, cube): + """ + Convert any CF-netCDF dimensionless coordinate to an AuxCoordFactory. + + """ + formula_type = engine.requires.get("formula_type") + if formula_type in [ + "atmosphere_hybrid_height_coordinate", + "atmosphere_hybrid_sigma_pressure_coordinate", + "ocean_sigma_z_coordinate", + "ocean_sigma_coordinate", + "ocean_s_coordinate", + "ocean_s_coordinate_g1", + "ocean_s_coordinate_g2", + ]: + + def coord_from_term(term): + # Convert term names to coordinates (via netCDF variable names). + name = engine.requires["formula_terms"].get(term, None) + if name is not None: + for coord, cf_var_name in engine.cube_parts["coordinates"]: + if cf_var_name == name: + return coord + warnings.warn( + "Unable to find coordinate for variable " + "{!r}".format(name) + ) + + if formula_type == "atmosphere_hybrid_height_coordinate": + delta = coord_from_term("a") + sigma = coord_from_term("b") + orography = coord_from_term("orog") + factory = HybridHeightFactory(delta, sigma, orography) + elif formula_type == "atmosphere_hybrid_sigma_pressure_coordinate": + # Hybrid pressure has two valid versions of its formula terms: + # "p0: var1 a: var2 b: var3 ps: var4" or + # "ap: var1 b: var2 ps: var3" where "ap = p0 * a" + # Attempt to get the "ap" term. + delta = coord_from_term("ap") + if delta is None: + # The "ap" term is unavailable, so try getting terms "p0" + # and "a" terms in order to derive an "ap" equivalent term. + coord_p0 = coord_from_term("p0") + if coord_p0 is not None: + if coord_p0.shape != (1,): + msg = ( + "Expecting {!r} to be a scalar reference " + "pressure coordinate, got shape {!r}".format( + coord_p0.var_name, coord_p0.shape + ) + ) + raise ValueError(msg) + if coord_p0.has_bounds(): + msg = ( + "Ignoring atmosphere hybrid sigma pressure " + "scalar coordinate {!r} bounds.".format( + coord_p0.name() + ) + ) + warnings.warn(msg) + coord_a = coord_from_term("a") + if coord_a is not None: + if coord_a.units.is_unknown(): + # Be graceful, and promote unknown to dimensionless units. + coord_a.units = "1" + delta = coord_a * coord_p0.points[0] + delta.units = coord_a.units * coord_p0.units + delta.rename("vertical pressure") + delta.var_name = "ap" + cube.add_aux_coord(delta, cube.coord_dims(coord_a)) + + sigma = coord_from_term("b") + surface_air_pressure = coord_from_term("ps") + factory = HybridPressureFactory(delta, sigma, surface_air_pressure) + elif formula_type == "ocean_sigma_z_coordinate": + sigma = coord_from_term("sigma") + eta = coord_from_term("eta") + depth = coord_from_term("depth") + depth_c = coord_from_term("depth_c") + nsigma = coord_from_term("nsigma") + zlev = coord_from_term("zlev") + factory = OceanSigmaZFactory( + sigma, eta, depth, depth_c, nsigma, zlev + ) + elif formula_type == "ocean_sigma_coordinate": + sigma = coord_from_term("sigma") + eta = coord_from_term("eta") + depth = coord_from_term("depth") + factory = OceanSigmaFactory(sigma, eta, depth) + elif formula_type == "ocean_s_coordinate": + s = coord_from_term("s") + eta = coord_from_term("eta") + depth = coord_from_term("depth") + a = coord_from_term("a") + depth_c = coord_from_term("depth_c") + b = coord_from_term("b") + factory = OceanSFactory(s, eta, depth, a, b, depth_c) + elif formula_type == "ocean_s_coordinate_g1": + s = coord_from_term("s") + c = coord_from_term("c") + eta = coord_from_term("eta") + depth = coord_from_term("depth") + depth_c = coord_from_term("depth_c") + factory = OceanSg1Factory(s, c, eta, depth, depth_c) + elif formula_type == "ocean_s_coordinate_g2": + s = coord_from_term("s") + c = coord_from_term("c") + eta = coord_from_term("eta") + depth = coord_from_term("depth") + depth_c = coord_from_term("depth_c") + factory = OceanSg2Factory(s, c, eta, depth, depth_c) + cube.add_aux_factory(factory) + + +def load_cubes(filenames, callback=None): + """ + Loads cubes from a list of NetCDF filenames/URLs. + + Args: + + * filenames (string/list): + One or more NetCDF filenames/DAP URLs to load from. + + Kwargs: + + * callback (callable function): + Function which can be passed on to :func:`iris.io.run_callback`. + + Returns: + Generator of loaded NetCDF :class:`iris.cube.Cube`. + + """ + # TODO: rationalise UGRID/mesh handling once experimental.ugrid is folded + # into standard behaviour. + # Deferred import to avoid circular imports. + from iris.experimental.ugrid import ( + PARSE_UGRID_ON_LOAD, + CFUGridReader, + _build_mesh, + _build_mesh_coords, + ) + from iris.io import run_callback + + # Create an actions engine. + engine = _actions_engine() + + if isinstance(filenames, str): + filenames = [filenames] + + for filename in filenames: + # Ingest the netCDF file. + meshes = {} + if PARSE_UGRID_ON_LOAD: + cf = CFUGridReader(filename) + + # Mesh instances are shared between file phenomena. + # TODO: more sophisticated Mesh sharing between files. + # TODO: access external Mesh cache? + mesh_vars = cf.cf_group.meshes + meshes = { + name: _build_mesh(cf, var, filename) + for name, var in mesh_vars.items() + } + else: + cf = iris.fileformats.cf.CFReader(filename) + + # Process each CF data variable. + data_variables = list(cf.cf_group.data_variables.values()) + list( + cf.cf_group.promoted.values() + ) + for cf_var in data_variables: + # cf_var-specific mesh handling, if a mesh is present. + # Build the mesh_coords *before* loading the cube - avoids + # mesh-related attributes being picked up by + # _add_unused_attributes(). + mesh_name = None + mesh = None + mesh_coords, mesh_dim = [], None + if PARSE_UGRID_ON_LOAD: + mesh_name = getattr(cf_var, "mesh", None) + if mesh_name is not None: + try: + mesh = meshes[mesh_name] + except KeyError: + message = ( + f"File does not contain mesh: '{mesh_name}' - " + f"referenced by variable: '{cf_var.cf_name}' ." + ) + logger.debug(message) + if mesh is not None: + mesh_coords, mesh_dim = _build_mesh_coords(mesh, cf_var) + + cube = _load_cube(engine, cf, cf_var, filename) + + # Attach the mesh (if present) to the cube. + for mesh_coord in mesh_coords: + cube.add_aux_coord(mesh_coord, mesh_dim) + + # Process any associated formula terms and attach + # the corresponding AuxCoordFactory. + try: + _load_aux_factory(engine, cube) + except ValueError as e: + warnings.warn("{}".format(e)) + + # Perform any user registered callback function. + cube = run_callback(callback, cube, cf_var, filename) + + # Callback mechanism may return None, which must not be yielded + if cube is None: + continue + + yield cube diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf/save.py similarity index 77% rename from lib/iris/fileformats/netcdf.py rename to lib/iris/fileformats/netcdf/save.py index 7bb90665b6..a2afb33dea 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf/save.py @@ -4,7 +4,7 @@ # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -Module to support the loading of a NetCDF file into an Iris cube. +Module to support the saving of Iris cubes to a NetCDF file. See also: `netCDF4 python `_. @@ -26,7 +26,6 @@ import numpy as np import numpy.ma as ma -from iris._lazy_data import as_lazy_data from iris.aux_factory import ( HybridHeightFactory, HybridPressureFactory, @@ -44,53 +43,9 @@ import iris.io import iris.util -# Show actions activation statistics. -DEBUG = False - -# Configure the logger. -logger = iris.config.get_logger(__name__) - # Standard CML spatio-temporal axis names. SPATIO_TEMPORAL_AXES = ["t", "z", "y", "x"] -# Pass through CF attributes: -# - comment -# - Conventions -# - flag_masks -# - flag_meanings -# - flag_values -# - history -# - institution -# - reference -# - source -# - title -# - positive -# -_CF_ATTRS = [ - "add_offset", - "ancillary_variables", - "axis", - "bounds", - "calendar", - "cell_measures", - "cell_methods", - "climatology", - "compress", - "coordinates", - "_FillValue", - "formula_terms", - "grid_mapping", - "leap_month", - "leap_year", - "long_name", - "missing_value", - "month_lengths", - "scale_factor", - "standard_error_multiplier", - "standard_name", - "units", -] - # CF attributes that should not be global. _CF_DATA_ATTRS = [ "flag_masks", @@ -157,141 +112,53 @@ } -# Cell methods. -_CM_KNOWN_METHODS = [ - "point", - "sum", - "mean", - "maximum", - "minimum", - "mid_range", - "standard_deviation", - "variance", - "mode", - "median", -] - -_CM_COMMENT = "comment" -_CM_EXTRA = "extra" -_CM_INTERVAL = "interval" -_CM_METHOD = "method" -_CM_NAME = "name" -_CM_PARSE = re.compile( - r""" - (?P([\w_]+\s*?:\s+)+) - (?P[\w_\s]+(?![\w_]*\s*?:))\s* - (?: - \(\s* - (?P[^\)]+) - \)\s* - )? - """, - re.VERBOSE, -) - - -class UnknownCellMethodWarning(Warning): - pass +def _bytes_if_ascii(string): + """ + Convert the given string to a byte string (str in py2k, bytes in py3k) + if the given string can be encoded to ascii, else maintain the type + of the inputted string. + Note: passing objects without an `encode` method (such as None) will + be returned by the function unchanged. -def parse_cell_methods(nc_cell_methods): """ - Parse a CF cell_methods attribute string into a tuple of zero or - more CellMethod instances. + if isinstance(string, str): + try: + return string.encode(encoding="ascii") + except (AttributeError, UnicodeEncodeError): + pass + return string - Args: - * nc_cell_methods (str): - The value of the cell methods attribute to be parsed. +def _setncattr(variable, name, attribute): + """ + Put the given attribute on the given netCDF4 Data type, casting + attributes as we go to bytes rather than unicode. - Returns: + """ + attribute = _bytes_if_ascii(attribute) + return variable.setncattr(name, attribute) - * cell_methods - An iterable of :class:`iris.coords.CellMethod`. - Multiple coordinates, intervals and comments are supported. - If a method has a non-standard name a warning will be issued, but the - results are not affected. +class _FillValueMaskCheckAndStoreTarget: + """ + To be used with da.store. Remembers whether any element was equal to a + given value and whether it was masked, before passing the chunk to the + given target. """ - cell_methods = [] - if nc_cell_methods is not None: - for m in _CM_PARSE.finditer(nc_cell_methods): - d = m.groupdict() - method = d[_CM_METHOD] - method = method.strip() - # Check validity of method, allowing for multi-part methods - # e.g. mean over years. - method_words = method.split() - if method_words[0].lower() not in _CM_KNOWN_METHODS: - msg = "NetCDF variable contains unknown cell method {!r}" - warnings.warn( - msg.format("{}".format(method_words[0])), - UnknownCellMethodWarning, - ) - d[_CM_METHOD] = method - name = d[_CM_NAME] - name = name.replace(" ", "") - name = name.rstrip(":") - d[_CM_NAME] = tuple([n for n in name.split(":")]) - interval = [] - comment = [] - if d[_CM_EXTRA] is not None: - # - # tokenise the key words and field colon marker - # - d[_CM_EXTRA] = d[_CM_EXTRA].replace( - "comment:", "<><<:>>" - ) - d[_CM_EXTRA] = d[_CM_EXTRA].replace( - "interval:", "<><<:>>" - ) - d[_CM_EXTRA] = d[_CM_EXTRA].split("<<:>>") - if len(d[_CM_EXTRA]) == 1: - comment.extend(d[_CM_EXTRA]) - else: - next_field_type = comment - for field in d[_CM_EXTRA]: - field_type = next_field_type - index = field.rfind("<>") - if index == 0: - next_field_type = interval - continue - elif index > 0: - next_field_type = interval - else: - index = field.rfind("<>") - if index == 0: - next_field_type = comment - continue - elif index > 0: - next_field_type = comment - if index != -1: - field = field[:index] - field_type.append(field.strip()) - # - # cater for a shared interval over multiple axes - # - if len(interval): - if len(d[_CM_NAME]) != len(interval) and len(interval) == 1: - interval = interval * len(d[_CM_NAME]) - # - # cater for a shared comment over multiple axes - # - if len(comment): - if len(d[_CM_NAME]) != len(comment) and len(comment) == 1: - comment = comment * len(d[_CM_NAME]) - d[_CM_INTERVAL] = tuple(interval) - d[_CM_COMMENT] = tuple(comment) - cell_method = iris.coords.CellMethod( - d[_CM_METHOD], - coords=d[_CM_NAME], - intervals=d[_CM_INTERVAL], - comments=d[_CM_COMMENT], - ) - cell_methods.append(cell_method) - return tuple(cell_methods) + def __init__(self, target, fill_value=None): + self.target = target + self.fill_value = fill_value + self.contains_value = False + self.is_masked = False + + def __setitem__(self, keys, arr): + if self.fill_value is not None: + self.contains_value = self.contains_value or self.fill_value in arr + self.is_masked = self.is_masked or ma.is_masked(arr) + self.target[keys] = arr class CFNameCoordMap: @@ -379,488 +246,6 @@ def coord(self, name): return result -def _actions_engine(): - # Return an 'actions engine', which provides a pyke-rules-like interface to - # the core cf translation code. - # Deferred import to avoid circularity. - import iris.fileformats._nc_load_rules.engine as nc_actions_engine - - engine = nc_actions_engine.Engine() - return engine - - -class NetCDFDataProxy: - """A reference to the data payload of a single NetCDF file variable.""" - - __slots__ = ("shape", "dtype", "path", "variable_name", "fill_value") - - def __init__(self, shape, dtype, path, variable_name, fill_value): - self.shape = shape - self.dtype = dtype - self.path = path - self.variable_name = variable_name - self.fill_value = fill_value - - @property - def ndim(self): - return len(self.shape) - - def __getitem__(self, keys): - dataset = netCDF4.Dataset(self.path) - try: - variable = dataset.variables[self.variable_name] - # Get the NetCDF variable data and slice. - var = variable[keys] - finally: - dataset.close() - return np.asanyarray(var) - - def __repr__(self): - fmt = ( - "<{self.__class__.__name__} shape={self.shape}" - " dtype={self.dtype!r} path={self.path!r}" - " variable_name={self.variable_name!r}>" - ) - return fmt.format(self=self) - - def __getstate__(self): - return {attr: getattr(self, attr) for attr in self.__slots__} - - def __setstate__(self, state): - for key, value in state.items(): - setattr(self, key, value) - - -def _assert_case_specific_facts(engine, cf, cf_group): - # Initialise a data store for built cube elements. - # This is used to patch element attributes *not* setup by the actions - # process, after the actions code has run. - engine.cube_parts["coordinates"] = [] - engine.cube_parts["cell_measures"] = [] - engine.cube_parts["ancillary_variables"] = [] - - # Assert facts for CF coordinates. - for cf_name in cf_group.coordinates.keys(): - engine.add_case_specific_fact("coordinate", (cf_name,)) - - # Assert facts for CF auxiliary coordinates. - for cf_name in cf_group.auxiliary_coordinates.keys(): - engine.add_case_specific_fact("auxiliary_coordinate", (cf_name,)) - - # Assert facts for CF cell measures. - for cf_name in cf_group.cell_measures.keys(): - engine.add_case_specific_fact("cell_measure", (cf_name,)) - - # Assert facts for CF ancillary variables. - for cf_name in cf_group.ancillary_variables.keys(): - engine.add_case_specific_fact("ancillary_variable", (cf_name,)) - - # Assert facts for CF grid_mappings. - for cf_name in cf_group.grid_mappings.keys(): - engine.add_case_specific_fact("grid_mapping", (cf_name,)) - - # Assert facts for CF labels. - for cf_name in cf_group.labels.keys(): - engine.add_case_specific_fact("label", (cf_name,)) - - # Assert facts for CF formula terms associated with the cf_group - # of the CF data variable. - - # Collect varnames of formula-root variables as we go. - # NOTE: use dictionary keys as an 'OrderedSet' - # - see: https://stackoverflow.com/a/53657523/2615050 - # This is to ensure that we can handle the resulting facts in a definite - # order, as using a 'set' led to indeterminate results. - formula_root = {} - for cf_var in cf.cf_group.formula_terms.values(): - for cf_root, cf_term in cf_var.cf_terms_by_root.items(): - # Only assert this fact if the formula root variable is - # defined in the CF group of the CF data variable. - if cf_root in cf_group: - formula_root[cf_root] = True - engine.add_case_specific_fact( - "formula_term", - (cf_var.cf_name, cf_root, cf_term), - ) - - for cf_root in formula_root.keys(): - engine.add_case_specific_fact("formula_root", (cf_root,)) - - -def _actions_activation_stats(engine, cf_name): - print("-" * 80) - print("CF Data Variable: %r" % cf_name) - - engine.print_stats() - - print("Rules Triggered:") - - for rule in sorted(list(engine.rule_triggered)): - print("\t%s" % rule) - - print("Case Specific Facts:") - kb_facts = engine.get_kb() - - for key in kb_facts.entity_lists.keys(): - for arg in kb_facts.entity_lists[key].case_specific_facts: - print("\t%s%s" % (key, arg)) - - -def _set_attributes(attributes, key, value): - """Set attributes dictionary, converting unicode strings appropriately.""" - - if isinstance(value, str): - try: - attributes[str(key)] = str(value) - except UnicodeEncodeError: - attributes[str(key)] = value - else: - attributes[str(key)] = value - - -def _add_unused_attributes(iris_object, cf_var): - """ - Populate the attributes of a cf element with the "unused" attributes - from the associated CF-netCDF variable. That is, all those that aren't CF - reserved terms. - - """ - - def attribute_predicate(item): - return item[0] not in _CF_ATTRS - - tmpvar = filter(attribute_predicate, cf_var.cf_attrs_unused()) - for attr_name, attr_value in tmpvar: - _set_attributes(iris_object.attributes, attr_name, attr_value) - - -def _get_actual_dtype(cf_var): - # Figure out what the eventual data type will be after any scale/offset - # transforms. - dummy_data = np.zeros(1, dtype=cf_var.dtype) - if hasattr(cf_var, "scale_factor"): - dummy_data = cf_var.scale_factor * dummy_data - if hasattr(cf_var, "add_offset"): - dummy_data = cf_var.add_offset + dummy_data - return dummy_data.dtype - - -def _get_cf_var_data(cf_var, filename): - # Get lazy chunked data out of a cf variable. - dtype = _get_actual_dtype(cf_var) - - # Create cube with deferred data, but no metadata - fill_value = getattr( - cf_var.cf_data, - "_FillValue", - netCDF4.default_fillvals[cf_var.dtype.str[1:]], - ) - proxy = NetCDFDataProxy( - cf_var.shape, dtype, filename, cf_var.cf_name, fill_value - ) - # Get the chunking specified for the variable : this is either a shape, or - # maybe the string "contiguous". - chunks = cf_var.cf_data.chunking() - # In the "contiguous" case, pass chunks=None to 'as_lazy_data'. - if chunks == "contiguous": - chunks = None - return as_lazy_data(proxy, chunks=chunks) - - -class OrderedAddableList(list): - # Used purely in actions debugging, to accumulate a record of which actions - # were activated. - # It replaces a set, so as to record the ordering of operations, with - # possible repeats, and it also numbers the entries. - # Actions routines invoke the 'add' method, which thus effectively converts - # a set.add into a list.append. - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._n_add = 0 - - def add(self, msg): - self._n_add += 1 - n_add = self._n_add - self.append(f"#{n_add:03d} : {msg}") - - -def _load_cube(engine, cf, cf_var, filename): - from iris.cube import Cube - - """Create the cube associated with the CF-netCDF data variable.""" - data = _get_cf_var_data(cf_var, filename) - cube = Cube(data) - - # Reset the actions engine. - engine.reset() - - # Initialise engine rule processing hooks. - engine.cf_var = cf_var - engine.cube = cube - engine.cube_parts = {} - engine.requires = {} - engine.rule_triggered = OrderedAddableList() - engine.filename = filename - - # Assert all the case-specific facts. - # This extracts 'facts' specific to this data-variable (aka cube), from - # the info supplied in the CFGroup object. - _assert_case_specific_facts(engine, cf, cf_var.cf_group) - - # Run the actions engine. - # This creates various cube elements and attaches them to the cube. - # It also records various other info on the engine, to be processed later. - engine.activate() - - # Having run the rules, now add the "unused" attributes to each cf element. - def fix_attributes_all_elements(role_name): - elements_and_names = engine.cube_parts.get(role_name, []) - - for iris_object, cf_var_name in elements_and_names: - _add_unused_attributes(iris_object, cf.cf_group[cf_var_name]) - - # Populate the attributes of all coordinates, cell-measures and ancillary-vars. - fix_attributes_all_elements("coordinates") - fix_attributes_all_elements("ancillary_variables") - fix_attributes_all_elements("cell_measures") - - # Also populate attributes of the top-level cube itself. - _add_unused_attributes(cube, cf_var) - - # Work out reference names for all the coords. - names = { - coord.var_name: coord.standard_name or coord.var_name or "unknown" - for coord in cube.coords() - } - - # Add all the cube cell methods. - cube.cell_methods = [ - iris.coords.CellMethod( - method=method.method, - intervals=method.intervals, - comments=method.comments, - coords=[ - names[coord_name] if coord_name in names else coord_name - for coord_name in method.coord_names - ], - ) - for method in cube.cell_methods - ] - - if DEBUG: - # Show activation statistics for this data-var (i.e. cube). - _actions_activation_stats(engine, cf_var.cf_name) - - return cube - - -def _load_aux_factory(engine, cube): - """ - Convert any CF-netCDF dimensionless coordinate to an AuxCoordFactory. - - """ - formula_type = engine.requires.get("formula_type") - if formula_type in [ - "atmosphere_hybrid_height_coordinate", - "atmosphere_hybrid_sigma_pressure_coordinate", - "ocean_sigma_z_coordinate", - "ocean_sigma_coordinate", - "ocean_s_coordinate", - "ocean_s_coordinate_g1", - "ocean_s_coordinate_g2", - ]: - - def coord_from_term(term): - # Convert term names to coordinates (via netCDF variable names). - name = engine.requires["formula_terms"].get(term, None) - if name is not None: - for coord, cf_var_name in engine.cube_parts["coordinates"]: - if cf_var_name == name: - return coord - warnings.warn( - "Unable to find coordinate for variable " - "{!r}".format(name) - ) - - if formula_type == "atmosphere_hybrid_height_coordinate": - delta = coord_from_term("a") - sigma = coord_from_term("b") - orography = coord_from_term("orog") - factory = HybridHeightFactory(delta, sigma, orography) - elif formula_type == "atmosphere_hybrid_sigma_pressure_coordinate": - # Hybrid pressure has two valid versions of its formula terms: - # "p0: var1 a: var2 b: var3 ps: var4" or - # "ap: var1 b: var2 ps: var3" where "ap = p0 * a" - # Attempt to get the "ap" term. - delta = coord_from_term("ap") - if delta is None: - # The "ap" term is unavailable, so try getting terms "p0" - # and "a" terms in order to derive an "ap" equivalent term. - coord_p0 = coord_from_term("p0") - if coord_p0 is not None: - if coord_p0.shape != (1,): - msg = ( - "Expecting {!r} to be a scalar reference " - "pressure coordinate, got shape {!r}".format( - coord_p0.var_name, coord_p0.shape - ) - ) - raise ValueError(msg) - if coord_p0.has_bounds(): - msg = ( - "Ignoring atmosphere hybrid sigma pressure " - "scalar coordinate {!r} bounds.".format( - coord_p0.name() - ) - ) - warnings.warn(msg) - coord_a = coord_from_term("a") - if coord_a is not None: - if coord_a.units.is_unknown(): - # Be graceful, and promote unknown to dimensionless units. - coord_a.units = "1" - delta = coord_a * coord_p0.points[0] - delta.units = coord_a.units * coord_p0.units - delta.rename("vertical pressure") - delta.var_name = "ap" - cube.add_aux_coord(delta, cube.coord_dims(coord_a)) - - sigma = coord_from_term("b") - surface_air_pressure = coord_from_term("ps") - factory = HybridPressureFactory(delta, sigma, surface_air_pressure) - elif formula_type == "ocean_sigma_z_coordinate": - sigma = coord_from_term("sigma") - eta = coord_from_term("eta") - depth = coord_from_term("depth") - depth_c = coord_from_term("depth_c") - nsigma = coord_from_term("nsigma") - zlev = coord_from_term("zlev") - factory = OceanSigmaZFactory( - sigma, eta, depth, depth_c, nsigma, zlev - ) - elif formula_type == "ocean_sigma_coordinate": - sigma = coord_from_term("sigma") - eta = coord_from_term("eta") - depth = coord_from_term("depth") - factory = OceanSigmaFactory(sigma, eta, depth) - elif formula_type == "ocean_s_coordinate": - s = coord_from_term("s") - eta = coord_from_term("eta") - depth = coord_from_term("depth") - a = coord_from_term("a") - depth_c = coord_from_term("depth_c") - b = coord_from_term("b") - factory = OceanSFactory(s, eta, depth, a, b, depth_c) - elif formula_type == "ocean_s_coordinate_g1": - s = coord_from_term("s") - c = coord_from_term("c") - eta = coord_from_term("eta") - depth = coord_from_term("depth") - depth_c = coord_from_term("depth_c") - factory = OceanSg1Factory(s, c, eta, depth, depth_c) - elif formula_type == "ocean_s_coordinate_g2": - s = coord_from_term("s") - c = coord_from_term("c") - eta = coord_from_term("eta") - depth = coord_from_term("depth") - depth_c = coord_from_term("depth_c") - factory = OceanSg2Factory(s, c, eta, depth, depth_c) - cube.add_aux_factory(factory) - - -def load_cubes(filenames, callback=None): - """ - Loads cubes from a list of NetCDF filenames/URLs. - - Args: - - * filenames (string/list): - One or more NetCDF filenames/DAP URLs to load from. - - Kwargs: - - * callback (callable function): - Function which can be passed on to :func:`iris.io.run_callback`. - - Returns: - Generator of loaded NetCDF :class:`iris.cube.Cube`. - - """ - # TODO: rationalise UGRID/mesh handling once experimental.ugrid is folded - # into standard behaviour. - # Deferred import to avoid circular imports. - from iris.experimental.ugrid import ( - PARSE_UGRID_ON_LOAD, - CFUGridReader, - _build_mesh_coords, - _meshes_from_cf, - ) - from iris.io import run_callback - - # Create an actions engine. - engine = _actions_engine() - - if isinstance(filenames, str): - filenames = [filenames] - - for filename in filenames: - # Ingest the netCDF file. - meshes = {} - if PARSE_UGRID_ON_LOAD: - cf = CFUGridReader(filename) - meshes = _meshes_from_cf(cf) - else: - cf = iris.fileformats.cf.CFReader(filename) - - # Process each CF data variable. - data_variables = list(cf.cf_group.data_variables.values()) + list( - cf.cf_group.promoted.values() - ) - for cf_var in data_variables: - # cf_var-specific mesh handling, if a mesh is present. - # Build the mesh_coords *before* loading the cube - avoids - # mesh-related attributes being picked up by - # _add_unused_attributes(). - mesh_name = None - mesh = None - mesh_coords, mesh_dim = [], None - if PARSE_UGRID_ON_LOAD: - mesh_name = getattr(cf_var, "mesh", None) - if mesh_name is not None: - try: - mesh = meshes[mesh_name] - except KeyError: - message = ( - f"File does not contain mesh: '{mesh_name}' - " - f"referenced by variable: '{cf_var.cf_name}' ." - ) - logger.debug(message) - if mesh is not None: - mesh_coords, mesh_dim = _build_mesh_coords(mesh, cf_var) - - cube = _load_cube(engine, cf, cf_var, filename) - - # Attach the mesh (if present) to the cube. - for mesh_coord in mesh_coords: - cube.add_aux_coord(mesh_coord, mesh_dim) - - # Process any associated formula terms and attach - # the corresponding AuxCoordFactory. - try: - _load_aux_factory(engine, cube) - except ValueError as e: - warnings.warn("{}".format(e)) - - # Perform any user registered callback function. - cube = run_callback(callback, cube, cf_var, filename) - - # Callback mechanism may return None, which must not be yielded - if cube is None: - continue - - yield cube - - def _bytes_if_ascii(string): """ Convert the given string to a byte string (str in py2k, bytes in py3k) @@ -978,7 +363,7 @@ def __init__(self, filename, netcdf_format): def __enter__(self): return self - def __exit__(self, type, value, traceback): + def __exit__(self, exc_type, exc_value, traceback): """Flush any buffered data to the CF-netCDF file before closing.""" self._dataset.sync() diff --git a/lib/iris/io/__init__.py b/lib/iris/io/__init__.py index 64501afd1e..9af09d8f30 100644 --- a/lib/iris/io/__init__.py +++ b/lib/iris/io/__init__.py @@ -275,7 +275,7 @@ def _check_init_savers(): _savers.update( { "pp": pp.save, - "nc": netcdf.save, + "nc": netcdf.save.save, "dot": _dot_save, "dotpng": _dot_save_png, "grib2": _grib_save, From b9df13711c13909e7e1da27f2d61f8fbec72af68 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 23 Aug 2021 12:01:13 +0100 Subject: [PATCH 2/7] For now, publish all previously public members of 'netcdf'. --- lib/iris/fileformats/_nc_load_rules/engine.py | 12 ++++++------ lib/iris/fileformats/netcdf/__init__.py | 19 +++++++++++++++++++ .../test_build_auxiliary_coordinate.py | 6 +++--- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/lib/iris/fileformats/_nc_load_rules/engine.py b/lib/iris/fileformats/_nc_load_rules/engine.py index 497c2a12c9..2e05b615ed 100644 --- a/lib/iris/fileformats/_nc_load_rules/engine.py +++ b/lib/iris/fileformats/_nc_load_rules/engine.py @@ -5,7 +5,7 @@ # licensing details. """ A simple mimic of the Pyke 'knowledge_engine', for interfacing to the routines -in 'iris.fileformats.netcdf' with minimal changes to that code. +in 'iris.fileformats.netcdf.load' with minimal changes to that code. This allows us to replace the Pyke rules operation with the simpler pure-Python translation operations in :mod:`iris.fileformats._nc_load_rules.actions`. @@ -15,7 +15,7 @@ engine.get_kb() also returns a FactEntity object, which mimics *just enough* API of a Pyke.knowlege_base, so that we can list its case-specific facts, as -used in :meth:`iris.fileformats.netcdf._actions_activation_stats`. +used in :meth:`iris.fileformats.netcdf.load._actions_activation_stats`. """ from .actions import run_actions @@ -66,7 +66,7 @@ class Engine: A minimal mimic of a Pyke.engine. Provides just enough API so that the existing code in - :mod:`iris.fileformats.netcdf` can interface with our new rules functions. + :mod:`iris.fileformats.netcdf.load` can interface with our new rules functions. A list of possible fact-arglists is stored, for each of a set of fact-names (which are strings). @@ -91,7 +91,7 @@ def activate(self): set by engine.cf_var (a CFDataVariable). The rules operation itself is coded elsewhere, - in :mod:`iris.fileformats.netcdf._nc_load_rules.actions`. + in :mod:`iris.fileformats.netcdf.load._nc_load_rules.actions`. """ run_actions(self) @@ -101,7 +101,7 @@ def get_kb(self): Get a FactEntity, which mimic (bits of) a knowledge-base. Just allowing - :meth:`iris.fileformats.netcdf._action_activation_stats` to list the + :meth:`iris.fileformats.netcdf.load._action_activation_stats` to list the facts. """ @@ -110,7 +110,7 @@ def get_kb(self): def print_stats(self): """ No-op, called by - :meth:`iris.fileformats.netcdf._action_activation_stats`. + :meth:`iris.fileformats.netcdf.load._action_activation_stats`. """ pass diff --git a/lib/iris/fileformats/netcdf/__init__.py b/lib/iris/fileformats/netcdf/__init__.py index ba850e540c..f7aee48f60 100644 --- a/lib/iris/fileformats/netcdf/__init__.py +++ b/lib/iris/fileformats/netcdf/__init__.py @@ -7,3 +7,22 @@ A package for loading and saving cubes to and from netcdf files. """ +from .load import ( + NetCDFDataProxy, + OrderedAddableList, + UnknownCellMethodWarning, + load_cubes, + parse_cell_methods, +) +from .save import CFNameCoordMap, Saver, save + +__all__ = [ + "CFNameCoordMap", + "NetCDFDataProxy", + "OrderedAddableList", + "Saver", + "UnknownCellMethodWarning", + "load_cubes", + "parse_cell_methods", + "save", +] diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py index 95f892454b..0ce0a1c622 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py @@ -77,7 +77,7 @@ def patched__getitem__(proxy_self, keys): raise RuntimeError() self.patch( - "iris.fileformats.netcdf.NetCDFDataProxy.__getitem__", + "iris.fileformats.netcdf.load.NetCDFDataProxy.__getitem__", new=patched__getitem__, ) @@ -180,7 +180,7 @@ def patched__getitem__(proxy_self, keys): raise RuntimeError() self.deferred_load_patch = mock.patch( - "iris.fileformats.netcdf.NetCDFDataProxy.__getitem__", + "iris.fileformats.netcdf.load.NetCDFDataProxy.__getitem__", new=patched__getitem__, ) @@ -264,7 +264,7 @@ def patched__getitem__(proxy_self, keys): raise RuntimeError() self.patch( - "iris.fileformats.netcdf.NetCDFDataProxy.__getitem__", + "iris.fileformats.netcdf.load.NetCDFDataProxy.__getitem__", new=patched__getitem__, ) From f5bee438f5e931ea7f53eafbbc9c39c99b80d8f5 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 23 Aug 2021 12:28:04 +0100 Subject: [PATCH 3/7] Fix and re-locate netcdf test routines. --- lib/iris/fileformats/_nc_load_rules/helpers.py | 3 +-- lib/iris/tests/integration/test_netcdf.py | 7 ++----- lib/iris/tests/test_netcdf.py | 6 +++--- .../fileformats/nc_load_rules/actions/__init__.py | 12 ++++++------ .../tests/unit/fileformats/netcdf/load/__init__.py | 6 ++++++ .../netcdf/{ => load}/test__get_cf_var_data.py | 4 ++-- .../netcdf/{ => load}/test__load_aux_factory.py | 4 ++-- .../fileformats/netcdf/{ => load}/test__load_cube.py | 8 ++++---- .../fileformats/netcdf/{ => load}/test_load_cubes.py | 12 ++++++------ .../netcdf/{ => load}/test_parse_cell_methods.py | 4 ++-- .../tests/unit/fileformats/netcdf/save/__init__.py | 6 ++++++ .../unit/fileformats/netcdf/{ => save}/test_Saver.py | 4 ++-- .../test__FillValueMaskCheckAndStoreTarget.py | 4 ++-- .../unit/fileformats/netcdf/{ => save}/test_save.py | 2 +- 14 files changed, 45 insertions(+), 37 deletions(-) create mode 100644 lib/iris/tests/unit/fileformats/netcdf/load/__init__.py rename lib/iris/tests/unit/fileformats/netcdf/{ => load}/test__get_cf_var_data.py (94%) rename lib/iris/tests/unit/fileformats/netcdf/{ => load}/test__load_aux_factory.py (98%) rename lib/iris/tests/unit/fileformats/netcdf/{ => load}/test__load_cube.py (95%) rename lib/iris/tests/unit/fileformats/netcdf/{ => load}/test_load_cubes.py (96%) rename lib/iris/tests/unit/fileformats/netcdf/{ => load}/test_parse_cell_methods.py (97%) create mode 100644 lib/iris/tests/unit/fileformats/netcdf/save/__init__.py rename lib/iris/tests/unit/fileformats/netcdf/{ => save}/test_Saver.py (99%) rename lib/iris/tests/unit/fileformats/netcdf/{ => save}/test__FillValueMaskCheckAndStoreTarget.py (95%) rename lib/iris/tests/unit/fileformats/netcdf/{ => save}/test_save.py (99%) diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py index a5b507d583..5ac2823dc9 100644 --- a/lib/iris/fileformats/_nc_load_rules/helpers.py +++ b/lib/iris/fileformats/_nc_load_rules/helpers.py @@ -26,8 +26,7 @@ import iris.coords import iris.exceptions import iris.fileformats.cf as cf -import iris.fileformats.netcdf -from iris.fileformats.netcdf import ( +from iris.fileformats.netcdf.load import ( UnknownCellMethodWarning, _get_cf_var_data, parse_cell_methods, diff --git a/lib/iris/tests/integration/test_netcdf.py b/lib/iris/tests/integration/test_netcdf.py index 3ff5bbb19d..471f688255 100644 --- a/lib/iris/tests/integration/test_netcdf.py +++ b/lib/iris/tests/integration/test_netcdf.py @@ -26,11 +26,8 @@ import iris from iris.coords import CellMethod from iris.cube import Cube, CubeList -from iris.fileformats.netcdf import ( - CF_CONVENTIONS_VERSION, - Saver, - UnknownCellMethodWarning, -) +from iris.fileformats.netcdf.load import UnknownCellMethodWarning +from iris.fileformats.netcdf.save import CF_CONVENTIONS_VERSION, Saver import iris.tests.stock as stock diff --git a/lib/iris/tests/test_netcdf.py b/lib/iris/tests/test_netcdf.py index 969d987af3..4940f42236 100644 --- a/lib/iris/tests/test_netcdf.py +++ b/lib/iris/tests/test_netcdf.py @@ -28,7 +28,7 @@ import iris.analysis.trajectory import iris.coord_systems as icoord_systems from iris.fileformats._nc_load_rules import helpers as ncload_helpers -import iris.fileformats.netcdf +from iris.fileformats.netcdf.save import Saver import iris.std_names import iris.tests.stock as stock import iris.util @@ -345,7 +345,7 @@ def test_noexist_directory(self): dir_name = os.path.join(tempfile.gettempdir(), "non_existent_dir") fnme = os.path.join(dir_name, "tmp.nc") with self.assertRaises(IOError): - with iris.fileformats.netcdf.Saver(fnme, "NETCDF4"): + with Saver(fnme, "NETCDF4"): pass def test_bad_permissions(self): @@ -359,7 +359,7 @@ def test_bad_permissions(self): try: os.chmod(dir_name, stat.S_IREAD) with self.assertRaises(PermissionError): - iris.fileformats.netcdf.Saver(fname, "NETCDF4") + Saver(fname, "NETCDF4") self.assertFalse(os.path.exists(fname)) finally: shutil.rmtree(dir_name) diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/__init__.py b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/__init__.py index 717e5b5c41..c0d87d3ee5 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/__init__.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/__init__.py @@ -7,7 +7,7 @@ Unit tests for the module :mod:`iris.fileformats._nc_load_rules.actions`. This module provides the engine.activate() call used in the function -`iris.fileformats.netcdf._load_cube`. +`iris.fileformats.netcdf.load._load_cube`. """ from pathlib import Path @@ -17,8 +17,8 @@ import iris.fileformats._nc_load_rules.engine from iris.fileformats.cf import CFReader -import iris.fileformats.netcdf -from iris.fileformats.netcdf import _load_cube +import iris.fileformats.netcdf.load +from iris.fileformats.netcdf.load import _load_cube """ Notes on testing method. @@ -92,11 +92,11 @@ def load_cube_from_cdl(self, cdl_string, cdl_path, nc_path): # Grab a data variable : FOR NOW always grab the 'phenom' variable. cf_var = cf.cf_group.data_variables["phenom"] - engine = iris.fileformats.netcdf._actions_engine() + engine = iris.fileformats.netcdf.load._actions_engine() # If debug enabled, switch on the activation summary debug output. # Use 'patch' so it is restored after the test. - self.patch("iris.fileformats.netcdf.DEBUG", self.debug) + self.patch("iris.fileformats.netcdf.load.DEBUG", self.debug) # Call the main translation function to load a single cube. # _load_cube establishes per-cube facts, activates rules and @@ -107,7 +107,7 @@ def load_cube_from_cdl(self, cdl_string, cdl_path, nc_path): # by the rules operation. # Unlike the other translations, _load_cube does *not* convert this # information into actual cube elements. That is instead done by - # `iris.fileformats.netcdf._load_aux_factory`. + # `iris.fileformats.netcdf.load._load_aux_factory`. # For rules testing, it is anyway more convenient to deal with the raw # data, as each factory type has different validity requirements to # build it, and none of that is relevant to the rules operation. diff --git a/lib/iris/tests/unit/fileformats/netcdf/load/__init__.py b/lib/iris/tests/unit/fileformats/netcdf/load/__init__.py new file mode 100644 index 0000000000..4dffee44ae --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/load/__init__.py @@ -0,0 +1,6 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Unit tests for the :mod:`iris.fileformats.netcdf.load` module.""" diff --git a/lib/iris/tests/unit/fileformats/netcdf/test__get_cf_var_data.py b/lib/iris/tests/unit/fileformats/netcdf/load/test__get_cf_var_data.py similarity index 94% rename from lib/iris/tests/unit/fileformats/netcdf/test__get_cf_var_data.py rename to lib/iris/tests/unit/fileformats/netcdf/load/test__get_cf_var_data.py index 1bf39591d2..380447ff0e 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test__get_cf_var_data.py +++ b/lib/iris/tests/unit/fileformats/netcdf/load/test__get_cf_var_data.py @@ -3,7 +3,7 @@ # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. -"""Unit tests for the `iris.fileformats.netcdf._get_cf_var_data` function.""" +"""Unit tests for the `iris.fileformats.netcdf.load._get_cf_var_data` function.""" # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -16,7 +16,7 @@ from iris._lazy_data import _optimum_chunksize import iris.fileformats.cf -from iris.fileformats.netcdf import _get_cf_var_data +from iris.fileformats.netcdf.load import _get_cf_var_data class Test__get_cf_var_data(tests.IrisTest): diff --git a/lib/iris/tests/unit/fileformats/netcdf/test__load_aux_factory.py b/lib/iris/tests/unit/fileformats/netcdf/load/test__load_aux_factory.py similarity index 98% rename from lib/iris/tests/unit/fileformats/netcdf/test__load_aux_factory.py rename to lib/iris/tests/unit/fileformats/netcdf/load/test__load_aux_factory.py index eb9da6b5d6..ac5fa32cab 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test__load_aux_factory.py +++ b/lib/iris/tests/unit/fileformats/netcdf/load/test__load_aux_factory.py @@ -3,7 +3,7 @@ # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. -"""Unit tests for the `iris.fileformats.netcdf._load_aux_factory` function.""" +"""Unit tests for the `iris.fileformats.netcdf.load._load_aux_factory` function.""" # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -16,7 +16,7 @@ from iris.coords import DimCoord from iris.cube import Cube -from iris.fileformats.netcdf import _load_aux_factory +from iris.fileformats.load.netcdf import _load_aux_factory class TestAtmosphereHybridSigmaPressureCoordinate(tests.IrisTest): diff --git a/lib/iris/tests/unit/fileformats/netcdf/test__load_cube.py b/lib/iris/tests/unit/fileformats/netcdf/load/test__load_cube.py similarity index 95% rename from lib/iris/tests/unit/fileformats/netcdf/test__load_cube.py rename to lib/iris/tests/unit/fileformats/netcdf/load/test__load_cube.py index 0e98eec916..79ad4519a2 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test__load_cube.py +++ b/lib/iris/tests/unit/fileformats/netcdf/load/test__load_cube.py @@ -3,7 +3,7 @@ # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. -"""Unit tests for the `iris.fileformats.netcdf._load_cube` function.""" +"""Unit tests for the `iris.fileformats.netcdf.load._load_cube` function.""" # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -15,7 +15,7 @@ from iris.coords import DimCoord import iris.fileformats.cf -from iris.fileformats.netcdf import _load_cube +from iris.fileformats.netcdf.load import _load_cube class TestCoordAttributes(tests.IrisTest): @@ -28,7 +28,7 @@ def _patcher(engine, cf, cf_group): engine.cube_parts["coordinates"] = coordinates def setUp(self): - this = "iris.fileformats.netcdf._assert_case_specific_facts" + this = "iris.fileformats.netcdf.load._assert_case_specific_facts" patch = mock.patch(this, side_effect=self._patcher) patch.start() self.addCleanup(patch.stop) @@ -112,7 +112,7 @@ def test_flag_pass_thru_multi(self): class TestCubeAttributes(tests.IrisTest): def setUp(self): - this = "iris.fileformats.netcdf._assert_case_specific_facts" + this = "iris.fileformats.netcdf.load._assert_case_specific_facts" patch = mock.patch(this) patch.start() self.addCleanup(patch.stop) diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_load_cubes.py b/lib/iris/tests/unit/fileformats/netcdf/load/test_load_cubes.py similarity index 96% rename from lib/iris/tests/unit/fileformats/netcdf/test_load_cubes.py rename to lib/iris/tests/unit/fileformats/netcdf/load/test_load_cubes.py index c4c868cd59..9c07e68ac7 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_load_cubes.py +++ b/lib/iris/tests/unit/fileformats/netcdf/load/test_load_cubes.py @@ -4,7 +4,7 @@ # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -Unit tests for the :func:`iris.fileformats.netcdf.load_cubes` function. +Unit tests for the :func:`iris.fileformats.netcdf.load.load_cubes` function. todo: migrate the remaining unit-esque tests from iris.tests.test_netcdf, switching to use netcdf.load_cubes() instead of iris.load()/load_cube(). @@ -21,7 +21,7 @@ from iris.coords import AncillaryVariable, CellMeasure from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD, MeshCoord -from iris.fileformats.netcdf import load_cubes, logger +from iris.fileformats.netcdf.load import load_cubes, logger # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -77,7 +77,7 @@ def test_ancillary_variables(self): """ nc_path = cdl_to_nc(ref_cdl) - # Load with iris.fileformats.netcdf.load_cubes, and check expected content. + # Load with load_cubes, and check expected content. cubes = list(load_cubes(nc_path)) self.assertEqual(len(cubes), 1) avs = cubes[0].ancillary_variables() @@ -116,7 +116,7 @@ def test_status_flags(self): """ nc_path = cdl_to_nc(ref_cdl) - # Load with iris.fileformats.netcdf.load_cubes, and check expected content. + # Load with load_cubes, and check expected content. cubes = list(load_cubes(nc_path)) self.assertEqual(len(cubes), 1) avs = cubes[0].ancillary_variables() @@ -163,7 +163,7 @@ def test_cell_measures(self): """ nc_path = cdl_to_nc(ref_cdl) - # Load with iris.fileformats.netcdf.load_cubes, and check expected content. + # Load with load_cubes, and check expected content. cubes = list(load_cubes(nc_path)) self.assertEqual(len(cubes), 1) cms = cubes[0].cell_measures() @@ -207,7 +207,7 @@ def test_default_units(self): """ nc_path = cdl_to_nc(ref_cdl) - # Load with iris.fileformats.netcdf.load_cubes, and check expected content. + # Load with load_cubes, and check expected content. cubes = list(load_cubes(nc_path)) self.assertEqual(len(cubes), 1) self.assertEqual(cubes[0].units, as_unit("unknown")) diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_parse_cell_methods.py b/lib/iris/tests/unit/fileformats/netcdf/load/test_parse_cell_methods.py similarity index 97% rename from lib/iris/tests/unit/fileformats/netcdf/test_parse_cell_methods.py rename to lib/iris/tests/unit/fileformats/netcdf/load/test_parse_cell_methods.py index 9c4fbf622b..ed92d34e84 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_parse_cell_methods.py +++ b/lib/iris/tests/unit/fileformats/netcdf/load/test_parse_cell_methods.py @@ -4,7 +4,7 @@ # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -Unit tests for :func:`iris.fileformats.netcdf.parse_cell_methods`. +Unit tests for :func:`iris.fileformats.netcdf.load.parse_cell_methods`. """ @@ -15,7 +15,7 @@ from unittest import mock from iris.coords import CellMethod -from iris.fileformats.netcdf import parse_cell_methods +from iris.fileformats.netcdf.load import parse_cell_methods class Test(tests.IrisTest): diff --git a/lib/iris/tests/unit/fileformats/netcdf/save/__init__.py b/lib/iris/tests/unit/fileformats/netcdf/save/__init__.py new file mode 100644 index 0000000000..5f6a9ee6ef --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/save/__init__.py @@ -0,0 +1,6 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Unit tests for the :mod:`iris.fileformats.netcdf.save` module.""" diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/save/test_Saver.py similarity index 99% rename from lib/iris/tests/unit/fileformats/netcdf/test_Saver.py rename to lib/iris/tests/unit/fileformats/netcdf/save/test_Saver.py index 2b0372dfa9..eb9e079d11 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/save/test_Saver.py @@ -32,7 +32,7 @@ ) from iris.coords import DimCoord from iris.cube import Cube -from iris.fileformats.netcdf import Saver +from iris.fileformats.netcdf.save import Saver import iris.tests.stock as stock @@ -185,7 +185,7 @@ def test_big_endian(self): def test_zlib(self): cube = self._simple_cube(">f4") - api = self.patch("iris.fileformats.netcdf.netCDF4") + api = self.patch("iris.fileformats.netcdf.save.netCDF4") with Saver("/dummy/path", "NETCDF4") as saver: saver.write(cube, zlib=True) dataset = api.Dataset.return_value diff --git a/lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py b/lib/iris/tests/unit/fileformats/netcdf/save/test__FillValueMaskCheckAndStoreTarget.py similarity index 95% rename from lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py rename to lib/iris/tests/unit/fileformats/netcdf/save/test__FillValueMaskCheckAndStoreTarget.py index 01ba7ff38d..3b64c4d80c 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py +++ b/lib/iris/tests/unit/fileformats/netcdf/save/test__FillValueMaskCheckAndStoreTarget.py @@ -4,7 +4,7 @@ # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -Unit tests for the `iris.fileformats.netcdf._FillValueMaskCheckAndStoreTarget` +Unit tests for the `iris.fileformats.netcdf.save._FillValueMaskCheckAndStoreTarget` class. """ @@ -17,7 +17,7 @@ import numpy as np -from iris.fileformats.netcdf import _FillValueMaskCheckAndStoreTarget +from iris.fileformats.netcdf.save import _FillValueMaskCheckAndStoreTarget class Test__FillValueMaskCheckAndStoreTarget(tests.IrisTest): diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_save.py b/lib/iris/tests/unit/fileformats/netcdf/save/test_save.py similarity index 99% rename from lib/iris/tests/unit/fileformats/netcdf/test_save.py rename to lib/iris/tests/unit/fileformats/netcdf/save/test_save.py index 830d8c5e52..f7c2464ac1 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_save.py +++ b/lib/iris/tests/unit/fileformats/netcdf/save/test_save.py @@ -17,7 +17,7 @@ import iris from iris.coords import DimCoord from iris.cube import Cube, CubeList -from iris.fileformats.netcdf import CF_CONVENTIONS_VERSION, save +from iris.fileformats.netcdf.save import CF_CONVENTIONS_VERSION, save from iris.tests.stock import lat_lon_cube From d7a5f7c77a60f89b408e6bca545f7cb478de0b16 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 23 Aug 2021 12:37:21 +0100 Subject: [PATCH 4/7] Move test-results for new test structure. --- .../unit/fileformats/netcdf/{ => save}/Saver/write/endian.cdl | 0 .../unit/fileformats/netcdf/{ => save}/Saver/write/mercator.cdl | 0 .../netcdf/{ => save}/Saver/write/mercator_no_ellipsoid.cdl | 0 .../fileformats/netcdf/{ => save}/Saver/write/stereographic.cdl | 0 .../netcdf/{ => save}/Saver/write/stereographic_no_ellipsoid.cdl | 0 .../netcdf/{ => save}/Saver/write/transverse_mercator.cdl | 0 .../{ => save}/Saver/write/transverse_mercator_no_ellipsoid.cdl | 0 .../netcdf/{ => save}/Saver/write/with_climatology.cdl | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename lib/iris/tests/results/unit/fileformats/netcdf/{ => save}/Saver/write/endian.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{ => save}/Saver/write/mercator.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{ => save}/Saver/write/mercator_no_ellipsoid.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{ => save}/Saver/write/stereographic.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{ => save}/Saver/write/stereographic_no_ellipsoid.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{ => save}/Saver/write/transverse_mercator.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{ => save}/Saver/write/transverse_mercator_no_ellipsoid.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{ => save}/Saver/write/with_climatology.cdl (100%) diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/endian.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/endian.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/endian.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/endian.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/mercator.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/mercator.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/mercator.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/mercator.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/mercator_no_ellipsoid.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/mercator_no_ellipsoid.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/mercator_no_ellipsoid.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/mercator_no_ellipsoid.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/stereographic.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/stereographic.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/stereographic.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/stereographic.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/stereographic_no_ellipsoid.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/stereographic_no_ellipsoid.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/stereographic_no_ellipsoid.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/stereographic_no_ellipsoid.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/transverse_mercator.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/transverse_mercator.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/transverse_mercator.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/transverse_mercator.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/transverse_mercator_no_ellipsoid.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/transverse_mercator_no_ellipsoid.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/transverse_mercator_no_ellipsoid.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/transverse_mercator_no_ellipsoid.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/with_climatology.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/with_climatology.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/Saver/write/with_climatology.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/with_climatology.cdl From 9dc27ca4cd101379ab876c23b589f9599970c25d Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 23 Aug 2021 13:08:32 +0100 Subject: [PATCH 5/7] Fix more tests. --- lib/iris/fileformats/__init__.py | 8 +- lib/iris/fileformats/_nc_load_rules/engine.py | 12 +- .../fileformats/_nc_load_rules/helpers.py | 2 +- lib/iris/fileformats/netcdf/__init__.py | 4 +- .../fileformats/netcdf/{load.py => loader.py} | 0 lib/iris/fileformats/netcdf/save.py | 2180 ----------------- lib/iris/io/__init__.py | 2 +- lib/iris/tests/integration/test_netcdf.py | 4 +- .../{save => saver}/Saver/write/endian.cdl | 0 .../{save => saver}/Saver/write/mercator.cdl | 0 .../Saver/write/mercator_no_ellipsoid.cdl | 0 .../Saver/write/stereographic.cdl | 0 .../write/stereographic_no_ellipsoid.cdl | 0 .../Saver/write/transverse_mercator.cdl | 0 .../transverse_mercator_no_ellipsoid.cdl | 0 .../Saver/write/with_climatology.cdl | 0 lib/iris/tests/test_netcdf.py | 2 +- .../nc_load_rules/actions/__init__.py | 12 +- .../test_build_auxiliary_coordinate.py | 6 +- .../unit/fileformats/netcdf/load/__init__.py | 6 - .../{load => loader}/test__get_cf_var_data.py | 4 +- .../test__load_aux_factory.py | 4 +- .../{load => loader}/test__load_cube.py | 8 +- .../{load => loader}/test_load_cubes.py | 4 +- .../test_parse_cell_methods.py | 4 +- .../unit/fileformats/netcdf/save/__init__.py | 6 - .../netcdf/{save => saver}/test_Saver.py | 4 +- .../test__FillValueMaskCheckAndStoreTarget.py | 4 +- .../netcdf/{save => saver}/test_save.py | 14 +- 29 files changed, 49 insertions(+), 2241 deletions(-) rename lib/iris/fileformats/netcdf/{load.py => loader.py} (100%) delete mode 100644 lib/iris/fileformats/netcdf/save.py rename lib/iris/tests/results/unit/fileformats/netcdf/{save => saver}/Saver/write/endian.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{save => saver}/Saver/write/mercator.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{save => saver}/Saver/write/mercator_no_ellipsoid.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{save => saver}/Saver/write/stereographic.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{save => saver}/Saver/write/stereographic_no_ellipsoid.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{save => saver}/Saver/write/transverse_mercator.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{save => saver}/Saver/write/transverse_mercator_no_ellipsoid.cdl (100%) rename lib/iris/tests/results/unit/fileformats/netcdf/{save => saver}/Saver/write/with_climatology.cdl (100%) delete mode 100644 lib/iris/tests/unit/fileformats/netcdf/load/__init__.py rename lib/iris/tests/unit/fileformats/netcdf/{load => loader}/test__get_cf_var_data.py (94%) rename lib/iris/tests/unit/fileformats/netcdf/{load => loader}/test__load_aux_factory.py (98%) rename lib/iris/tests/unit/fileformats/netcdf/{load => loader}/test__load_cube.py (95%) rename lib/iris/tests/unit/fileformats/netcdf/{load => loader}/test_load_cubes.py (98%) rename lib/iris/tests/unit/fileformats/netcdf/{load => loader}/test_parse_cell_methods.py (97%) delete mode 100644 lib/iris/tests/unit/fileformats/netcdf/save/__init__.py rename lib/iris/tests/unit/fileformats/netcdf/{save => saver}/test_Saver.py (99%) rename lib/iris/tests/unit/fileformats/netcdf/{save => saver}/test__FillValueMaskCheckAndStoreTarget.py (95%) rename lib/iris/tests/unit/fileformats/netcdf/{save => saver}/test_save.py (93%) diff --git a/lib/iris/fileformats/__init__.py b/lib/iris/fileformats/__init__.py index 000b7f00ee..9ecd8ecd09 100644 --- a/lib/iris/fileformats/__init__.py +++ b/lib/iris/fileformats/__init__.py @@ -93,7 +93,7 @@ def _load_grib(*args, **kwargs): "NetCDF", MagicNumber(4), 0x43444601, - netcdf.load.load_cubes, + netcdf.loader.load_cubes, priority=5, ) ) @@ -104,7 +104,7 @@ def _load_grib(*args, **kwargs): "NetCDF 64 bit offset format", MagicNumber(4), 0x43444602, - netcdf.load.load_cubes, + netcdf.loader.load_cubes, priority=5, ) ) @@ -116,7 +116,7 @@ def _load_grib(*args, **kwargs): "NetCDF_v4", MagicNumber(8), 0x894844460D0A1A0A, - netcdf.load.load_cubes, + netcdf.loader.load_cubes, priority=5, ) ) @@ -126,7 +126,7 @@ def _load_grib(*args, **kwargs): "NetCDF OPeNDAP", UriProtocol(), lambda protocol: protocol in ["http", "https"], - netcdf.load.load_cubes, + netcdf.loader.load_cubes, priority=6, ) FORMAT_AGENT.add_spec(_nc_dap) diff --git a/lib/iris/fileformats/_nc_load_rules/engine.py b/lib/iris/fileformats/_nc_load_rules/engine.py index 2e05b615ed..ff35c7df4f 100644 --- a/lib/iris/fileformats/_nc_load_rules/engine.py +++ b/lib/iris/fileformats/_nc_load_rules/engine.py @@ -5,7 +5,7 @@ # licensing details. """ A simple mimic of the Pyke 'knowledge_engine', for interfacing to the routines -in 'iris.fileformats.netcdf.load' with minimal changes to that code. +in 'iris.fileformats.netcdf.loader' with minimal changes to that code. This allows us to replace the Pyke rules operation with the simpler pure-Python translation operations in :mod:`iris.fileformats._nc_load_rules.actions`. @@ -15,7 +15,7 @@ engine.get_kb() also returns a FactEntity object, which mimics *just enough* API of a Pyke.knowlege_base, so that we can list its case-specific facts, as -used in :meth:`iris.fileformats.netcdf.load._actions_activation_stats`. +used in :meth:`iris.fileformats.netcdf.loader._actions_activation_stats`. """ from .actions import run_actions @@ -66,7 +66,7 @@ class Engine: A minimal mimic of a Pyke.engine. Provides just enough API so that the existing code in - :mod:`iris.fileformats.netcdf.load` can interface with our new rules functions. + :mod:`iris.fileformats.netcdf.loader` can interface with our new rules functions. A list of possible fact-arglists is stored, for each of a set of fact-names (which are strings). @@ -91,7 +91,7 @@ def activate(self): set by engine.cf_var (a CFDataVariable). The rules operation itself is coded elsewhere, - in :mod:`iris.fileformats.netcdf.load._nc_load_rules.actions`. + in :mod:`iris.fileformats.netcdf.loader._nc_load_rules.actions`. """ run_actions(self) @@ -101,7 +101,7 @@ def get_kb(self): Get a FactEntity, which mimic (bits of) a knowledge-base. Just allowing - :meth:`iris.fileformats.netcdf.load._action_activation_stats` to list the + :meth:`iris.fileformats.netcdf.loader._action_activation_stats` to list the facts. """ @@ -110,7 +110,7 @@ def get_kb(self): def print_stats(self): """ No-op, called by - :meth:`iris.fileformats.netcdf.load._action_activation_stats`. + :meth:`iris.fileformats.netcdf.loader._action_activation_stats`. """ pass diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py index 5ac2823dc9..e7dcb97011 100644 --- a/lib/iris/fileformats/_nc_load_rules/helpers.py +++ b/lib/iris/fileformats/_nc_load_rules/helpers.py @@ -26,7 +26,7 @@ import iris.coords import iris.exceptions import iris.fileformats.cf as cf -from iris.fileformats.netcdf.load import ( +from iris.fileformats.netcdf.loader import ( UnknownCellMethodWarning, _get_cf_var_data, parse_cell_methods, diff --git a/lib/iris/fileformats/netcdf/__init__.py b/lib/iris/fileformats/netcdf/__init__.py index f7aee48f60..d0074920f1 100644 --- a/lib/iris/fileformats/netcdf/__init__.py +++ b/lib/iris/fileformats/netcdf/__init__.py @@ -7,14 +7,14 @@ A package for loading and saving cubes to and from netcdf files. """ -from .load import ( +from .loader import ( NetCDFDataProxy, OrderedAddableList, UnknownCellMethodWarning, load_cubes, parse_cell_methods, ) -from .save import CFNameCoordMap, Saver, save +from .saver import CFNameCoordMap, Saver, save __all__ = [ "CFNameCoordMap", diff --git a/lib/iris/fileformats/netcdf/load.py b/lib/iris/fileformats/netcdf/loader.py similarity index 100% rename from lib/iris/fileformats/netcdf/load.py rename to lib/iris/fileformats/netcdf/loader.py diff --git a/lib/iris/fileformats/netcdf/save.py b/lib/iris/fileformats/netcdf/save.py deleted file mode 100644 index a2afb33dea..0000000000 --- a/lib/iris/fileformats/netcdf/save.py +++ /dev/null @@ -1,2180 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the LGPL license. -# See COPYING and COPYING.LESSER in the root of the repository for full -# licensing details. -""" -Module to support the saving of Iris cubes to a NetCDF file. - -See also: `netCDF4 python `_. - -Also refer to document 'NetCDF Climate and Forecast (CF) Metadata Conventions'. - -""" - -import collections -from itertools import repeat, zip_longest -import os -import os.path -import re -import string -import warnings - -import cf_units -import dask.array as da -import netCDF4 -import numpy as np -import numpy.ma as ma - -from iris.aux_factory import ( - HybridHeightFactory, - HybridPressureFactory, - OceanSFactory, - OceanSg1Factory, - OceanSg2Factory, - OceanSigmaFactory, - OceanSigmaZFactory, -) -import iris.config -import iris.coord_systems -import iris.coords -import iris.exceptions -import iris.fileformats.cf -import iris.io -import iris.util - -# Standard CML spatio-temporal axis names. -SPATIO_TEMPORAL_AXES = ["t", "z", "y", "x"] - -# CF attributes that should not be global. -_CF_DATA_ATTRS = [ - "flag_masks", - "flag_meanings", - "flag_values", - "instance_dimension", - "missing_value", - "sample_dimension", - "standard_error_multiplier", -] - -# CF attributes that should only be global. -_CF_GLOBAL_ATTRS = ["conventions", "featureType", "history", "title"] - -# UKMO specific attributes that should not be global. -_UKMO_DATA_ATTRS = ["STASH", "um_stash_source", "ukmo__process_flags"] - -CF_CONVENTIONS_VERSION = "CF-1.7" - -_FactoryDefn = collections.namedtuple( - "_FactoryDefn", ("primary", "std_name", "formula_terms_format") -) -_FACTORY_DEFNS = { - HybridHeightFactory: _FactoryDefn( - primary="delta", - std_name="atmosphere_hybrid_height_coordinate", - formula_terms_format="a: {delta} b: {sigma} orog: {orography}", - ), - HybridPressureFactory: _FactoryDefn( - primary="delta", - std_name="atmosphere_hybrid_sigma_pressure_coordinate", - formula_terms_format="ap: {delta} b: {sigma} " - "ps: {surface_air_pressure}", - ), - OceanSigmaZFactory: _FactoryDefn( - primary="zlev", - std_name="ocean_sigma_z_coordinate", - formula_terms_format="sigma: {sigma} eta: {eta} depth: {depth} " - "depth_c: {depth_c} nsigma: {nsigma} zlev: {zlev}", - ), - OceanSigmaFactory: _FactoryDefn( - primary="sigma", - std_name="ocean_sigma_coordinate", - formula_terms_format="sigma: {sigma} eta: {eta} depth: {depth}", - ), - OceanSFactory: _FactoryDefn( - primary="s", - std_name="ocean_s_coordinate", - formula_terms_format="s: {s} eta: {eta} depth: {depth} a: {a} b: {b} " - "depth_c: {depth_c}", - ), - OceanSg1Factory: _FactoryDefn( - primary="s", - std_name="ocean_s_coordinate_g1", - formula_terms_format="s: {s} c: {c} eta: {eta} depth: {depth} " - "depth_c: {depth_c}", - ), - OceanSg2Factory: _FactoryDefn( - primary="s", - std_name="ocean_s_coordinate_g2", - formula_terms_format="s: {s} c: {c} eta: {eta} depth: {depth} " - "depth_c: {depth_c}", - ), -} - - -def _bytes_if_ascii(string): - """ - Convert the given string to a byte string (str in py2k, bytes in py3k) - if the given string can be encoded to ascii, else maintain the type - of the inputted string. - - Note: passing objects without an `encode` method (such as None) will - be returned by the function unchanged. - - """ - if isinstance(string, str): - try: - return string.encode(encoding="ascii") - except (AttributeError, UnicodeEncodeError): - pass - return string - - -def _setncattr(variable, name, attribute): - """ - Put the given attribute on the given netCDF4 Data type, casting - attributes as we go to bytes rather than unicode. - - """ - attribute = _bytes_if_ascii(attribute) - return variable.setncattr(name, attribute) - - -class _FillValueMaskCheckAndStoreTarget: - """ - To be used with da.store. Remembers whether any element was equal to a - given value and whether it was masked, before passing the chunk to the - given target. - - """ - - def __init__(self, target, fill_value=None): - self.target = target - self.fill_value = fill_value - self.contains_value = False - self.is_masked = False - - def __setitem__(self, keys, arr): - if self.fill_value is not None: - self.contains_value = self.contains_value or self.fill_value in arr - self.is_masked = self.is_masked or ma.is_masked(arr) - self.target[keys] = arr - - -class CFNameCoordMap: - """Provide a simple CF name to CF coordinate mapping.""" - - _Map = collections.namedtuple("_Map", ["name", "coord"]) - - def __init__(self): - self._map = [] - - def append(self, name, coord): - """ - Append the given name and coordinate pair to the mapping. - - Args: - - * name: - CF name of the associated coordinate. - - * coord: - The coordinate of the associated CF name. - - Returns: - None. - - """ - self._map.append(CFNameCoordMap._Map(name, coord)) - - @property - def names(self): - """Return all the CF names.""" - - return [pair.name for pair in self._map] - - @property - def coords(self): - """Return all the coordinates.""" - - return [pair.coord for pair in self._map] - - def name(self, coord): - """ - Return the CF name, given a coordinate - - Args: - - * coord: - The coordinate of the associated CF name. - - Returns: - Coordinate. - - """ - result = None - for pair in self._map: - if coord == pair.coord: - result = pair.name - break - if result is None: - msg = "Coordinate is not mapped, {!r}".format(coord) - raise KeyError(msg) - return result - - def coord(self, name): - """ - Return the coordinate, given a CF name. - - Args: - - * name: - CF name of the associated coordinate. - - Returns: - CF name. - - """ - result = None - for pair in self._map: - if name == pair.name: - result = pair.coord - break - if result is None: - msg = "Name is not mapped, {!r}".format(name) - raise KeyError(msg) - return result - - -def _bytes_if_ascii(string): - """ - Convert the given string to a byte string (str in py2k, bytes in py3k) - if the given string can be encoded to ascii, else maintain the type - of the inputted string. - - Note: passing objects without an `encode` method (such as None) will - be returned by the function unchanged. - - """ - if isinstance(string, str): - try: - return string.encode(encoding="ascii") - except (AttributeError, UnicodeEncodeError): - pass - return string - - -def _setncattr(variable, name, attribute): - """ - Put the given attribute on the given netCDF4 Data type, casting - attributes as we go to bytes rather than unicode. - - """ - attribute = _bytes_if_ascii(attribute) - return variable.setncattr(name, attribute) - - -class _FillValueMaskCheckAndStoreTarget: - """ - To be used with da.store. Remembers whether any element was equal to a - given value and whether it was masked, before passing the chunk to the - given target. - - """ - - def __init__(self, target, fill_value=None): - self.target = target - self.fill_value = fill_value - self.contains_value = False - self.is_masked = False - - def __setitem__(self, keys, arr): - if self.fill_value is not None: - self.contains_value = self.contains_value or self.fill_value in arr - self.is_masked = self.is_masked or ma.is_masked(arr) - self.target[keys] = arr - - -class Saver: - """A manager for saving netcdf files.""" - - def __init__(self, filename, netcdf_format): - """ - A manager for saving netcdf files. - - Args: - - * filename (string): - Name of the netCDF file to save the cube. - - * netcdf_format (string): - Underlying netCDF file format, one of 'NETCDF4', 'NETCDF4_CLASSIC', - 'NETCDF3_CLASSIC' or 'NETCDF3_64BIT'. Default is 'NETCDF4' format. - - Returns: - None. - - For example:: - - # Initialise Manager for saving - with Saver(filename, netcdf_format) as sman: - # Iterate through the cubelist. - for cube in cubes: - sman.write(cube) - - """ - if netcdf_format not in [ - "NETCDF4", - "NETCDF4_CLASSIC", - "NETCDF3_CLASSIC", - "NETCDF3_64BIT", - ]: - raise ValueError( - "Unknown netCDF file format, got %r" % netcdf_format - ) - - # All persistent variables - #: CF name mapping with iris coordinates - self._name_coord_map = CFNameCoordMap() - #: List of dimension coordinates added to the file - self._dim_coords = [] - #: List of grid mappings added to the file - self._coord_systems = [] - #: A dictionary, listing dimension names and corresponding length - self._existing_dim = {} - #: A dictionary, mapping formula terms to owner cf variable name - self._formula_terms_cache = {} - #: NetCDF dataset - try: - self._dataset = netCDF4.Dataset( - filename, mode="w", format=netcdf_format - ) - except RuntimeError: - dir_name = os.path.dirname(filename) - if not os.path.isdir(dir_name): - msg = "No such file or directory: {}".format(dir_name) - raise IOError(msg) - if not os.access(dir_name, os.R_OK | os.W_OK): - msg = "Permission denied: {}".format(filename) - raise IOError(msg) - else: - raise - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - """Flush any buffered data to the CF-netCDF file before closing.""" - - self._dataset.sync() - self._dataset.close() - - def write( - self, - cube, - local_keys=None, - unlimited_dimensions=None, - zlib=False, - complevel=4, - shuffle=True, - fletcher32=False, - contiguous=False, - chunksizes=None, - endian="native", - least_significant_digit=None, - packing=None, - fill_value=None, - ): - """ - Wrapper for saving cubes to a NetCDF file. - - Args: - - * cube (:class:`iris.cube.Cube`): - A :class:`iris.cube.Cube` to be saved to a netCDF file. - - Kwargs: - - * local_keys (iterable of strings): - An interable of cube attribute keys. Any cube attributes with - matching keys will become attributes on the data variable rather - than global attributes. - - * unlimited_dimensions (iterable of strings and/or - :class:`iris.coords.Coord` objects): - List of coordinate names (or coordinate objects) - corresponding to coordinate dimensions of `cube` to save with the - NetCDF dimension variable length 'UNLIMITED'. By default, no - unlimited dimensions are saved. Only the 'NETCDF4' format - supports multiple 'UNLIMITED' dimensions. - - * zlib (bool): - If `True`, the data will be compressed in the netCDF file using - gzip compression (default `False`). - - * complevel (int): - An integer between 1 and 9 describing the level of compression - desired (default 4). Ignored if `zlib=False`. - - * shuffle (bool): - If `True`, the HDF5 shuffle filter will be applied before - compressing the data (default `True`). This significantly improves - compression. Ignored if `zlib=False`. - - * fletcher32 (bool): - If `True`, the Fletcher32 HDF5 checksum algorithm is activated to - detect errors. Default `False`. - - * contiguous (bool): - If `True`, the variable data is stored contiguously on disk. - Default `False`. Setting to `True` for a variable with an unlimited - dimension will trigger an error. - - * chunksizes (tuple of int): - Used to manually specify the HDF5 chunksizes for each dimension of - the variable. A detailed discussion of HDF chunking and I/O - performance is available here: - https://www.unidata.ucar.edu/software/netcdf/documentation/NUG/netcdf_perf_chunking.html. - Basically, you want the chunk size for each dimension to match - as closely as possible the size of the data block that users will - read from the file. `chunksizes` cannot be set if `contiguous=True`. - - * endian (string): - Used to control whether the data is stored in little or big endian - format on disk. Possible values are 'little', 'big' or 'native' - (default). The library will automatically handle endian conversions - when the data is read, but if the data is always going to be read - on a computer with the opposite format as the one used to create - the file, there may be some performance advantage to be gained by - setting the endian-ness. - - * least_significant_digit (int): - If `least_significant_digit` is specified, variable data will be - truncated (quantized). In conjunction with `zlib=True` this - produces 'lossy', but significantly more efficient compression. For - example, if `least_significant_digit=1`, data will be quantized - using `numpy.around(scale*data)/scale`, where `scale = 2**bits`, - and `bits` is determined so that a precision of 0.1 is retained (in - this case `bits=4`). From - http://www.esrl.noaa.gov/psd/data/gridded/conventions/cdc_netcdf_standard.shtml: - "least_significant_digit -- power of ten of the smallest decimal - place in unpacked data that is a reliable value". Default is - `None`, or no quantization, or 'lossless' compression. - - * packing (type or string or dict or list): A numpy integer datatype - (signed or unsigned) or a string that describes a numpy integer - dtype(i.e. 'i2', 'short', 'u4') or a dict of packing parameters as - described below. This provides support for netCDF data packing as - described in - http://www.unidata.ucar.edu/software/netcdf/documentation/NUG/best_practices.html#bp_Packed-Data-Values - If this argument is a type (or type string), appropriate values of - scale_factor and add_offset will be automatically calculated based - on `cube.data` and possible masking. For more control, pass a dict - with one or more of the following keys: `dtype` (required), - `scale_factor` and `add_offset`. Note that automatic calculation of - packing parameters will trigger loading of lazy data; set them - manually using a dict to avoid this. The default is `None`, in - which case the datatype is determined from the cube and no packing - will occur. - - * fill_value: - The value to use for the `_FillValue` attribute on the netCDF - variable. If `packing` is specified the value of `fill_value` - should be in the domain of the packed data. - - Returns: - None. - - .. note:: - - The `zlib`, `complevel`, `shuffle`, `fletcher32`, `contiguous`, - `chunksizes` and `endian` keywords are silently ignored for netCDF - 3 files that do not use HDF5. - - """ - if unlimited_dimensions is None: - unlimited_dimensions = [] - - cf_profile_available = iris.site_configuration.get( - "cf_profile" - ) not in [None, False] - if cf_profile_available: - # Perform a CF profile of the cube. This may result in an exception - # being raised if mandatory requirements are not satisfied. - profile = iris.site_configuration["cf_profile"](cube) - - # Ensure that attributes are CF compliant and if possible to make them - # compliant. - self.check_attribute_compliance(cube, cube.lazy_data()) - for coord in cube.coords(): - self.check_attribute_compliance(coord, coord.points) - - # Get suitable dimension names. - dimension_names = self._get_dim_names(cube) - - # Create the CF-netCDF data dimensions. - self._create_cf_dimensions(cube, dimension_names, unlimited_dimensions) - - # Create the associated cube CF-netCDF data variable. - cf_var_cube = self._create_cf_data_variable( - cube, - dimension_names, - local_keys, - zlib=zlib, - complevel=complevel, - shuffle=shuffle, - fletcher32=fletcher32, - contiguous=contiguous, - chunksizes=chunksizes, - endian=endian, - least_significant_digit=least_significant_digit, - packing=packing, - fill_value=fill_value, - ) - - # Add coordinate variables. - self._add_dim_coords(cube, dimension_names) - - # Add the auxiliary coordinate variables and associate the data - # variable to them - self._add_aux_coords(cube, cf_var_cube, dimension_names) - - # Add the cell_measures variables and associate the data - # variable to them - self._add_cell_measures(cube, cf_var_cube, dimension_names) - - # Add the ancillary_variables variables and associate the data variable - # to them - self._add_ancillary_variables(cube, cf_var_cube, dimension_names) - - # Add the formula terms to the appropriate cf variables for each - # aux factory in the cube. - self._add_aux_factories(cube, cf_var_cube, dimension_names) - - # Add data variable-only attribute names to local_keys. - if local_keys is None: - local_keys = set() - else: - local_keys = set(local_keys) - local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) - - # Add global attributes taking into account local_keys. - global_attributes = { - k: v - for k, v in cube.attributes.items() - if (k not in local_keys and k.lower() != "conventions") - } - self.update_global_attributes(global_attributes) - - if cf_profile_available: - cf_patch = iris.site_configuration.get("cf_patch") - if cf_patch is not None: - # Perform a CF patch of the dataset. - cf_patch(profile, self._dataset, cf_var_cube) - else: - msg = "cf_profile is available but no {} defined.".format( - "cf_patch" - ) - warnings.warn(msg) - - @staticmethod - def check_attribute_compliance(container, data): - def _coerce_value(val_attr, val_attr_value, data_dtype): - val_attr_tmp = np.array(val_attr_value, dtype=data_dtype) - if (val_attr_tmp != val_attr_value).any(): - msg = '"{}" is not of a suitable value ({})' - raise ValueError(msg.format(val_attr, val_attr_value)) - return val_attr_tmp - - data_dtype = data.dtype - - # Ensure that conflicting attributes are not provided. - if ( - container.attributes.get("valid_min") is not None - or container.attributes.get("valid_max") is not None - ) and container.attributes.get("valid_range") is not None: - msg = ( - 'Both "valid_range" and "valid_min" or "valid_max" ' - "attributes present." - ) - raise ValueError(msg) - - # Ensure correct datatype - for val_attr in ["valid_range", "valid_min", "valid_max"]: - val_attr_value = container.attributes.get(val_attr) - if val_attr_value is not None: - val_attr_value = np.asarray(val_attr_value) - if data_dtype.itemsize == 1: - # Allow signed integral type - if val_attr_value.dtype.kind == "i": - continue - new_val = _coerce_value(val_attr, val_attr_value, data_dtype) - container.attributes[val_attr] = new_val - - def update_global_attributes(self, attributes=None, **kwargs): - """ - Update the CF global attributes based on the provided - iterable/dictionary and/or keyword arguments. - - Args: - - * attributes (dict or iterable of key, value pairs): - CF global attributes to be updated. - - """ - if attributes is not None: - # Handle sequence e.g. [('fruit', 'apple'), ...]. - if not hasattr(attributes, "keys"): - attributes = dict(attributes) - - for attr_name in sorted(attributes): - _setncattr(self._dataset, attr_name, attributes[attr_name]) - - for attr_name in sorted(kwargs): - _setncattr(self._dataset, attr_name, kwargs[attr_name]) - - def _create_cf_dimensions( - self, cube, dimension_names, unlimited_dimensions=None - ): - """ - Create the CF-netCDF data dimensions. - - Args: - - * cube (:class:`iris.cube.Cube`): - A :class:`iris.cube.Cube` in which to lookup coordinates. - - Kwargs: - - * unlimited_dimensions (iterable of strings and/or - :class:`iris.coords.Coord` objects): - List of coordinates to make unlimited (None by default). - - Returns: - None. - - """ - unlimited_dim_names = [] - for coord in unlimited_dimensions: - try: - coord = cube.coord(name_or_coord=coord, dim_coords=True) - except iris.exceptions.CoordinateNotFoundError: - # coordinate isn't used for this cube, but it might be - # used for a different one - pass - else: - dim_name = self._get_coord_variable_name(cube, coord) - unlimited_dim_names.append(dim_name) - - for dim_name in dimension_names: - if dim_name not in self._dataset.dimensions: - if dim_name in unlimited_dim_names: - size = None - else: - size = self._existing_dim[dim_name] - self._dataset.createDimension(dim_name, size) - - def _add_inner_related_vars( - self, - cube, - cf_var_cube, - dimension_names, - coordlike_elements, - saver_create_method, - role_attribute_name, - ): - # Common method to create a set of file variables and attach them to - # the parent data variable. - element_names = [] - # Add CF-netCDF variables for the associated auxiliary coordinates. - for element in sorted( - coordlike_elements, key=lambda element: element.name() - ): - # Create the associated CF-netCDF variable. - if element not in self._name_coord_map.coords: - cf_name = saver_create_method(cube, dimension_names, element) - self._name_coord_map.append(cf_name, element) - else: - cf_name = self._name_coord_map.name(element) - - if cf_name is not None: - if role_attribute_name == "cell_measures": - # In the case of cell-measures, the attribute entries are not just - # a var_name, but each have the form ": ". - cf_name = "{}: {}".format(element.measure, cf_name) - element_names.append(cf_name) - - # Add CF-netCDF references to the primary data variable. - if element_names: - variable_names = " ".join(sorted(element_names)) - _setncattr(cf_var_cube, role_attribute_name, variable_names) - - def _add_aux_coords(self, cube, cf_var_cube, dimension_names): - """ - Add aux. coordinate to the dataset and associate with the data variable - - Args: - - * cube (:class:`iris.cube.Cube`): - A :class:`iris.cube.Cube` to be saved to a netCDF file. - * cf_var_cube (:class:`netcdf.netcdf_variable`): - cf variable cube representation. - * dimension_names (list): - Names associated with the dimensions of the cube. - - """ - return self._add_inner_related_vars( - cube, - cf_var_cube, - dimension_names, - cube.aux_coords, - self._create_cf_coord_variable, - "coordinates", - ) - - def _add_cell_measures(self, cube, cf_var_cube, dimension_names): - """ - Add cell measures to the dataset and associate with the data variable - - Args: - - * cube (:class:`iris.cube.Cube`): - A :class:`iris.cube.Cube` to be saved to a netCDF file. - * cf_var_cube (:class:`netcdf.netcdf_variable`): - cf variable cube representation. - * dimension_names (list): - Names associated with the dimensions of the cube. - - """ - return self._add_inner_related_vars( - cube, - cf_var_cube, - dimension_names, - cube.cell_measures(), - self._create_cf_cell_measure_variable, - "cell_measures", - ) - - def _add_ancillary_variables(self, cube, cf_var_cube, dimension_names): - """ - Add ancillary variables measures to the dataset and associate with the - data variable - - Args: - - * cube (:class:`iris.cube.Cube`): - A :class:`iris.cube.Cube` to be saved to a netCDF file. - * cf_var_cube (:class:`netcdf.netcdf_variable`): - cf variable cube representation. - * dimension_names (list): - Names associated with the dimensions of the cube. - - """ - return self._add_inner_related_vars( - cube, - cf_var_cube, - dimension_names, - cube.ancillary_variables(), - self._create_cf_ancildata_variable, - "ancillary_variables", - ) - - def _add_dim_coords(self, cube, dimension_names): - """ - Add coordinate variables to NetCDF dataset. - - Args: - - * cube (:class:`iris.cube.Cube`): - A :class:`iris.cube.Cube` to be saved to a netCDF file. - * dimension_names (list): - Names associated with the dimensions of the cube. - - """ - # Ensure we create the netCDF coordinate variables first. - for coord in cube.dim_coords: - # Create the associated coordinate CF-netCDF variable. - if coord not in self._name_coord_map.coords: - cf_name = self._create_cf_coord_variable( - cube, dimension_names, coord - ) - self._name_coord_map.append(cf_name, coord) - - def _add_aux_factories(self, cube, cf_var_cube, dimension_names): - """ - Modifies the variables of the NetCDF dataset to represent - the presence of dimensionless vertical coordinates based on - the aux factories of the cube (if any). - - Args: - - * cube (:class:`iris.cube.Cube`): - A :class:`iris.cube.Cube` to be saved to a netCDF file. - * cf_var_cube (:class:`netcdf.netcdf_variable`) - CF variable cube representation. - * dimension_names (list): - Names associated with the dimensions of the cube. - - """ - primaries = [] - for factory in cube.aux_factories: - factory_defn = _FACTORY_DEFNS.get(type(factory), None) - if factory_defn is None: - msg = ( - "Unable to determine formula terms " - "for AuxFactory: {!r}".format(factory) - ) - warnings.warn(msg) - else: - # Override `standard_name`, `long_name`, and `axis` of the - # primary coord that signals the presense of a dimensionless - # vertical coord, then set the `formula_terms` attribute. - primary_coord = factory.dependencies[factory_defn.primary] - if primary_coord in primaries: - msg = ( - "Cube {!r} has multiple aux factories that share " - "a common primary coordinate {!r}. Unable to save " - "to netCDF as having multiple formula terms on a " - "single coordinate is not supported." - ) - raise ValueError(msg.format(cube, primary_coord.name())) - primaries.append(primary_coord) - - cf_name = self._name_coord_map.name(primary_coord) - cf_var = self._dataset.variables[cf_name] - - names = { - key: self._name_coord_map.name(coord) - for key, coord in factory.dependencies.items() - } - formula_terms = factory_defn.formula_terms_format.format( - **names - ) - std_name = factory_defn.std_name - - if hasattr(cf_var, "formula_terms"): - if ( - cf_var.formula_terms != formula_terms - or cf_var.standard_name != std_name - ): - # TODO: We need to resolve this corner-case where - # the dimensionless vertical coordinate containing - # the formula_terms is a dimension coordinate of - # the associated cube and a new alternatively named - # dimensionless vertical coordinate is required - # with new formula_terms and a renamed dimension. - if cf_name in dimension_names: - msg = ( - "Unable to create dimensonless vertical " - "coordinate." - ) - raise ValueError(msg) - key = (cf_name, std_name, formula_terms) - name = self._formula_terms_cache.get(key) - if name is None: - # Create a new variable - name = self._create_cf_coord_variable( - cube, dimension_names, primary_coord - ) - cf_var = self._dataset.variables[name] - _setncattr(cf_var, "standard_name", std_name) - _setncattr(cf_var, "axis", "Z") - # Update the formula terms. - ft = formula_terms.split() - ft = [name if t == cf_name else t for t in ft] - _setncattr(cf_var, "formula_terms", " ".join(ft)) - # Update the cache. - self._formula_terms_cache[key] = name - # Update the associated cube variable. - coords = cf_var_cube.coordinates.split() - coords = [name if c == cf_name else c for c in coords] - _setncattr( - cf_var_cube, "coordinates", " ".join(coords) - ) - else: - _setncattr(cf_var, "standard_name", std_name) - _setncattr(cf_var, "axis", "Z") - _setncattr(cf_var, "formula_terms", formula_terms) - - def _get_dim_names(self, cube): - """ - Determine suitable CF-netCDF data dimension names. - - Args: - - * cube (:class:`iris.cube.Cube`): - A :class:`iris.cube.Cube` to be saved to a netCDF file. - - Returns: - List of dimension names with length equal the number of dimensions - in the cube. - - """ - dimension_names = [] - for dim in range(cube.ndim): - coords = cube.coords(dimensions=dim, dim_coords=True) - if coords: - coord = coords[0] - - dim_name = self._get_coord_variable_name(cube, coord) - # Add only dimensions that have not already been added. - if coord not in self._dim_coords: - # Determine unique dimension name - while ( - dim_name in self._existing_dim - or dim_name in self._name_coord_map.names - ): - dim_name = self._increment_name(dim_name) - - # Update names added, current cube dim names used and - # unique coordinates added. - self._existing_dim[dim_name] = coord.shape[0] - dimension_names.append(dim_name) - self._dim_coords.append(coord) - else: - # Return the dim_name associated with the existing - # coordinate. - dim_name = self._name_coord_map.name(coord) - dimension_names.append(dim_name) - - else: - # No CF-netCDF coordinates describe this data dimension. - dim_name = "dim%d" % dim - if dim_name in self._existing_dim: - # Increment name if conflicted with one already existing. - if self._existing_dim[dim_name] != cube.shape[dim]: - while ( - dim_name in self._existing_dim - and self._existing_dim[dim_name] != cube.shape[dim] - or dim_name in self._name_coord_map.names - ): - dim_name = self._increment_name(dim_name) - # Update dictionary with new entry - self._existing_dim[dim_name] = cube.shape[dim] - else: - # Update dictionary with new entry - self._existing_dim[dim_name] = cube.shape[dim] - - dimension_names.append(dim_name) - return dimension_names - - @staticmethod - def cf_valid_var_name(var_name): - """ - Return a valid CF var_name given a potentially invalid name. - - Args: - - * var_name (str): - The var_name to normalise - - Returns: - A var_name suitable for passing through for variable creation. - - """ - # Replace invalid charaters with an underscore ("_"). - var_name = re.sub(r"[^a-zA-Z0-9]", "_", var_name) - # Ensure the variable name starts with a letter. - if re.match(r"^[^a-zA-Z]", var_name): - var_name = "var_{}".format(var_name) - return var_name - - @staticmethod - def _cf_coord_identity(coord): - """ - Determine a suitable units from a given coordinate. - - Args: - - * coord (:class:`iris.coords.Coord`): - A coordinate of a cube. - - Returns: - The (standard_name, long_name, unit) of the given - :class:`iris.coords.Coord` instance. - - """ - - units = str(coord.units) - # Set the 'units' of 'latitude' and 'longitude' coordinates specified - # in 'degrees' to 'degrees_north' and 'degrees_east' respectively, - # as defined in the CF conventions for netCDF files: sections 4.1 and - # 4.2. - if ( - isinstance(coord.coord_system, iris.coord_systems.GeogCS) - or coord.coord_system is None - ) and coord.units == "degrees": - if coord.standard_name == "latitude": - units = "degrees_north" - elif coord.standard_name == "longitude": - units = "degrees_east" - - return coord.standard_name, coord.long_name, units - - def _ensure_valid_dtype(self, values, src_name, src_object): - # NetCDF3 and NetCDF4 classic do not support int64 or unsigned ints, - # so we check if we can store them as int32 instead. - if ( - np.issubdtype(values.dtype, np.int64) - or np.issubdtype(values.dtype, np.unsignedinteger) - ) and self._dataset.file_format in ( - "NETCDF3_CLASSIC", - "NETCDF3_64BIT", - "NETCDF4_CLASSIC", - ): - # Cast to an integer type supported by netCDF3. - if not np.can_cast(values.max(), np.int32) or not np.can_cast( - values.min(), np.int32 - ): - msg = ( - "The data type of {} {!r} is not supported by {} and" - " its values cannot be safely cast to a supported" - " integer type." - ) - msg = msg.format( - src_name, src_object, self._dataset.file_format - ) - raise ValueError(msg) - values = values.astype(np.int32) - return values - - def _create_cf_bounds(self, coord, cf_var, cf_name): - """ - Create the associated CF-netCDF bounds variable. - - Args: - - * coord (:class:`iris.coords.Coord`): - A coordinate of a cube. - * cf_var: - CF-netCDF variable - * cf_name (string): - name of the CF-NetCDF variable. - - Returns: - None - - """ - if coord.has_bounds(): - # Get the values in a form which is valid for the file format. - bounds = self._ensure_valid_dtype( - coord.bounds, "the bounds of coordinate", coord - ) - n_bounds = bounds.shape[-1] - - if n_bounds == 2: - bounds_dimension_name = "bnds" - else: - bounds_dimension_name = "bnds_%s" % n_bounds - - if coord.climatological: - property_name = "climatology" - varname_extra = "climatology" - else: - property_name = "bounds" - varname_extra = "bnds" - - if bounds_dimension_name not in self._dataset.dimensions: - # Create the bounds dimension with the appropriate extent. - self._dataset.createDimension(bounds_dimension_name, n_bounds) - - boundsvar_name = "{}_{}".format(cf_name, varname_extra) - _setncattr(cf_var, property_name, boundsvar_name) - cf_var_bounds = self._dataset.createVariable( - boundsvar_name, - bounds.dtype.newbyteorder("="), - cf_var.dimensions + (bounds_dimension_name,), - ) - cf_var_bounds[:] = bounds - - def _get_cube_variable_name(self, cube): - """ - Returns a CF-netCDF variable name for the given cube. - - Args: - - * cube (class:`iris.cube.Cube`): - An instance of a cube for which a CF-netCDF variable - name is required. - - Returns: - A CF-netCDF variable name as a string. - - """ - if cube.var_name is not None: - cf_name = cube.var_name - else: - # Convert to lower case and replace whitespace by underscores. - cf_name = "_".join(cube.name().lower().split()) - - cf_name = self.cf_valid_var_name(cf_name) - return cf_name - - def _get_coord_variable_name(self, cube, coord): - """ - Returns a CF-netCDF variable name for the given coordinate. - - Args: - - * cube (:class:`iris.cube.Cube`): - The cube that contains the given coordinate. - * coord (:class:`iris.coords.Coord`): - An instance of a coordinate for which a CF-netCDF variable - name is required. - - Returns: - A CF-netCDF variable name as a string. - - """ - if coord.var_name is not None: - cf_name = coord.var_name - else: - name = coord.standard_name or coord.long_name - if not name or set(name).intersection(string.whitespace): - # Auto-generate name based on associated dimensions. - name = "" - for dim in cube.coord_dims(coord): - name += "dim{}".format(dim) - # Handle scalar coordinate (dims == ()). - if not name: - name = "unknown_scalar" - # Convert to lower case and replace whitespace by underscores. - cf_name = "_".join(name.lower().split()) - - cf_name = self.cf_valid_var_name(cf_name) - return cf_name - - def _inner_create_cf_cellmeasure_or_ancil_variable( - self, cube, dimension_names, dimensional_metadata - ): - """ - Create the associated CF-netCDF variable in the netCDF dataset for the - given dimensional_metadata. - - Args: - - * cube (:class:`iris.cube.Cube`): - The associated cube being saved to CF-netCDF file. - * dimension_names (list): - Names for each dimension of the cube. - * dimensional_metadata (:class:`iris.coords.CellMeasure`): - A cell measure OR ancillary variable to be saved to the - CF-netCDF file. - In either case, provides data, units and standard/long/var names. - - Returns: - The string name of the associated CF-netCDF variable saved. - - """ - cf_name = self._get_coord_variable_name(cube, dimensional_metadata) - while cf_name in self._dataset.variables: - cf_name = self._increment_name(cf_name) - - # Derive the data dimension names for the coordinate. - cf_dimensions = [ - dimension_names[dim] - for dim in dimensional_metadata.cube_dims(cube) - ] - - # Get the data values. - data = dimensional_metadata.data - - if isinstance(dimensional_metadata, iris.coords.CellMeasure): - # Disallow saving of *masked* cell measures. - # NOTE: currently, this is the only functional difference required - # between variable creation for an ancillary and a cell measure. - if ma.is_masked(data): - # We can't save masked points properly, as we don't maintain a - # suitable fill_value. (Load will not record one, either). - msg = "Cell measures with missing data are not supported." - raise ValueError(msg) - - # Get the values in a form which is valid for the file format. - data = self._ensure_valid_dtype( - data, "coordinate", dimensional_metadata - ) - - # Create the CF-netCDF variable. - cf_var = self._dataset.createVariable( - cf_name, data.dtype.newbyteorder("="), cf_dimensions - ) - - # Add the data to the CF-netCDF variable. - cf_var[:] = data - - if dimensional_metadata.units.is_udunits(): - _setncattr(cf_var, "units", str(dimensional_metadata.units)) - - if dimensional_metadata.standard_name is not None: - _setncattr( - cf_var, "standard_name", dimensional_metadata.standard_name - ) - - if dimensional_metadata.long_name is not None: - _setncattr(cf_var, "long_name", dimensional_metadata.long_name) - - # Add any other custom coordinate attributes. - for name in sorted(dimensional_metadata.attributes): - value = dimensional_metadata.attributes[name] - - # Don't clobber existing attributes. - if not hasattr(cf_var, name): - _setncattr(cf_var, name, value) - - return cf_name - - def _create_cf_cell_measure_variable( - self, cube, dimension_names, cell_measure - ): - """ - Create the associated CF-netCDF variable in the netCDF dataset for the - given cell_measure. - - Args: - - * cube (:class:`iris.cube.Cube`): - The associated cube being saved to CF-netCDF file. - * dimension_names (list): - Names for each dimension of the cube. - * cell_measure (:class:`iris.coords.CellMeasure`): - The cell measure to be saved to CF-netCDF file. - - Returns: - The string name of the associated CF-netCDF variable saved. - - """ - # Note: currently shares variable creation code with ancillary-variables. - return self._inner_create_cf_cellmeasure_or_ancil_variable( - cube, dimension_names, cell_measure - ) - - def _create_cf_ancildata_variable( - self, cube, dimension_names, ancillary_variable - ): - """ - Create the associated CF-netCDF variable in the netCDF dataset for the - given ancillary variable. - - Args: - - * cube (:class:`iris.cube.Cube`): - The associated cube being saved to CF-netCDF file. - * dimension_names (list): - Names for each dimension of the cube. - * ancillary_variable (:class:`iris.coords.AncillaryVariable`): - The ancillary variable to be saved to the CF-netCDF file. - - Returns: - The string name of the associated CF-netCDF variable saved. - - """ - # Note: currently shares variable creation code with cell-measures. - return self._inner_create_cf_cellmeasure_or_ancil_variable( - cube, dimension_names, ancillary_variable - ) - - def _create_cf_coord_variable(self, cube, dimension_names, coord): - """ - Create the associated CF-netCDF variable in the netCDF dataset for the - given coordinate. If required, also create the CF-netCDF bounds - variable and associated dimension. - - Args: - - * cube (:class:`iris.cube.Cube`): - The associated cube being saved to CF-netCDF file. - * dimension_names (list): - Names for each dimension of the cube. - * coord (:class:`iris.coords.Coord`): - The coordinate to be saved to CF-netCDF file. - - Returns: - The string name of the associated CF-netCDF variable saved. - - """ - cf_name = self._get_coord_variable_name(cube, coord) - while cf_name in self._dataset.variables: - cf_name = self._increment_name(cf_name) - - # Derive the data dimension names for the coordinate. - cf_dimensions = [ - dimension_names[dim] for dim in cube.coord_dims(coord) - ] - - if np.issubdtype(coord.points.dtype, np.str_): - string_dimension_depth = coord.points.dtype.itemsize - if coord.points.dtype.kind == "U": - string_dimension_depth //= 4 - string_dimension_name = "string%d" % string_dimension_depth - - # Determine whether to create the string length dimension. - if string_dimension_name not in self._dataset.dimensions: - self._dataset.createDimension( - string_dimension_name, string_dimension_depth - ) - - # Add the string length dimension to dimension names. - cf_dimensions.append(string_dimension_name) - - # Create the label coordinate variable. - cf_var = self._dataset.createVariable( - cf_name, "|S1", cf_dimensions - ) - - # Add the payload to the label coordinate variable. - if len(cf_dimensions) == 1: - cf_var[:] = list( - "%- *s" % (string_dimension_depth, coord.points[0]) - ) - else: - for index in np.ndindex(coord.points.shape): - index_slice = tuple(list(index) + [slice(None, None)]) - cf_var[index_slice] = list( - "%- *s" % (string_dimension_depth, coord.points[index]) - ) - else: - # Identify the collection of coordinates that represent CF-netCDF - # coordinate variables. - cf_coordinates = cube.dim_coords - - if coord in cf_coordinates: - # By definition of a CF-netCDF coordinate variable this - # coordinate must be 1-D and the name of the CF-netCDF variable - # must be the same as its dimension name. - cf_name = cf_dimensions[0] - - # Get the values in a form which is valid for the file format. - points = self._ensure_valid_dtype( - coord.points, "coordinate", coord - ) - - # Create the CF-netCDF variable. - cf_var = self._dataset.createVariable( - cf_name, points.dtype.newbyteorder("="), cf_dimensions - ) - - # Add the axis attribute for spatio-temporal CF-netCDF coordinates. - if coord in cf_coordinates: - axis = iris.util.guess_coord_axis(coord) - if axis is not None and axis.lower() in SPATIO_TEMPORAL_AXES: - _setncattr(cf_var, "axis", axis.upper()) - - # Add the data to the CF-netCDF variable. - cf_var[:] = points - - # Create the associated CF-netCDF bounds variable. - self._create_cf_bounds(coord, cf_var, cf_name) - - # Deal with CF-netCDF units and standard name. - standard_name, long_name, units = self._cf_coord_identity(coord) - - if cf_units.as_unit(units).is_udunits(): - _setncattr(cf_var, "units", units) - - if standard_name is not None: - _setncattr(cf_var, "standard_name", standard_name) - - if long_name is not None: - _setncattr(cf_var, "long_name", long_name) - - # Add the CF-netCDF calendar attribute. - if coord.units.calendar: - _setncattr(cf_var, "calendar", coord.units.calendar) - - # Add any other custom coordinate attributes. - for name in sorted(coord.attributes): - value = coord.attributes[name] - - if name == "STASH": - # Adopting provisional Metadata Conventions for representing MO - # Scientific Data encoded in NetCDF Format. - name = "um_stash_source" - value = str(value) - - # Don't clobber existing attributes. - if not hasattr(cf_var, name): - _setncattr(cf_var, name, value) - - return cf_name - - def _create_cf_cell_methods(self, cube, dimension_names): - """ - Create CF-netCDF string representation of a cube cell methods. - - Args: - - * cube (:class:`iris.cube.Cube`) or cubelist - (:class:`iris.cube.CubeList`): - A :class:`iris.cube.Cube`, :class:`iris.cube.CubeList` or list of - cubes to be saved to a netCDF file. - * dimension_names (list): - Names associated with the dimensions of the cube. - - Returns: - CF-netCDF string representation of a cube cell methods. - - """ - cell_methods = [] - - # Identify the collection of coordinates that represent CF-netCDF - # coordinate variables. - cf_coordinates = cube.dim_coords - - for cm in cube.cell_methods: - names = "" - - for name in cm.coord_names: - coord = cube.coords(name) - - if coord: - coord = coord[0] - if coord in cf_coordinates: - name = dimension_names[cube.coord_dims(coord)[0]] - - names += "%s: " % name - - interval = " ".join( - ["interval: %s" % interval for interval in cm.intervals or []] - ) - comment = " ".join( - ["comment: %s" % comment for comment in cm.comments or []] - ) - extra = " ".join([interval, comment]).strip() - - if extra: - extra = " (%s)" % extra - - cell_methods.append(names + cm.method + extra) - - return " ".join(cell_methods) - - def _create_cf_grid_mapping(self, cube, cf_var_cube): - """ - Create CF-netCDF grid mapping variable and associated CF-netCDF - data variable grid mapping attribute. - - Args: - - * cube (:class:`iris.cube.Cube`) or cubelist - (:class:`iris.cube.CubeList`): - A :class:`iris.cube.Cube`, :class:`iris.cube.CubeList` or list of - cubes to be saved to a netCDF file. - * cf_var_cube (:class:`netcdf.netcdf_variable`): - cf variable cube representation. - - Returns: - None - - """ - cs = cube.coord_system("CoordSystem") - if cs is not None: - # Grid var not yet created? - if cs not in self._coord_systems: - while cs.grid_mapping_name in self._dataset.variables: - aname = self._increment_name(cs.grid_mapping_name) - cs.grid_mapping_name = aname - - cf_var_grid = self._dataset.createVariable( - cs.grid_mapping_name, np.int32 - ) - _setncattr( - cf_var_grid, "grid_mapping_name", cs.grid_mapping_name - ) - - def add_ellipsoid(ellipsoid): - cf_var_grid.longitude_of_prime_meridian = ( - ellipsoid.longitude_of_prime_meridian - ) - semi_major = ellipsoid.semi_major_axis - semi_minor = ellipsoid.semi_minor_axis - if semi_minor == semi_major: - cf_var_grid.earth_radius = semi_major - else: - cf_var_grid.semi_major_axis = semi_major - cf_var_grid.semi_minor_axis = semi_minor - - # latlon - if isinstance(cs, iris.coord_systems.GeogCS): - add_ellipsoid(cs) - - # rotated latlon - elif isinstance(cs, iris.coord_systems.RotatedGeogCS): - if cs.ellipsoid: - add_ellipsoid(cs.ellipsoid) - cf_var_grid.grid_north_pole_latitude = ( - cs.grid_north_pole_latitude - ) - cf_var_grid.grid_north_pole_longitude = ( - cs.grid_north_pole_longitude - ) - cf_var_grid.north_pole_grid_longitude = ( - cs.north_pole_grid_longitude - ) - - # tmerc - elif isinstance(cs, iris.coord_systems.TransverseMercator): - if cs.ellipsoid: - add_ellipsoid(cs.ellipsoid) - cf_var_grid.longitude_of_central_meridian = ( - cs.longitude_of_central_meridian - ) - cf_var_grid.latitude_of_projection_origin = ( - cs.latitude_of_projection_origin - ) - cf_var_grid.false_easting = cs.false_easting - cf_var_grid.false_northing = cs.false_northing - cf_var_grid.scale_factor_at_central_meridian = ( - cs.scale_factor_at_central_meridian - ) - - # merc - elif isinstance(cs, iris.coord_systems.Mercator): - if cs.ellipsoid: - add_ellipsoid(cs.ellipsoid) - cf_var_grid.longitude_of_projection_origin = ( - cs.longitude_of_projection_origin - ) - # The Mercator class has implicit defaults for certain - # parameters - cf_var_grid.false_easting = 0.0 - cf_var_grid.false_northing = 0.0 - cf_var_grid.scale_factor_at_projection_origin = 1.0 - - # lcc - elif isinstance(cs, iris.coord_systems.LambertConformal): - if cs.ellipsoid: - add_ellipsoid(cs.ellipsoid) - cf_var_grid.standard_parallel = cs.secant_latitudes - cf_var_grid.latitude_of_projection_origin = cs.central_lat - cf_var_grid.longitude_of_central_meridian = cs.central_lon - cf_var_grid.false_easting = cs.false_easting - cf_var_grid.false_northing = cs.false_northing - - # stereo - elif isinstance(cs, iris.coord_systems.Stereographic): - if cs.true_scale_lat is not None: - warnings.warn( - "Stereographic coordinate systems with " - "true scale latitude specified are not " - "yet handled" - ) - else: - if cs.ellipsoid: - add_ellipsoid(cs.ellipsoid) - cf_var_grid.longitude_of_projection_origin = ( - cs.central_lon - ) - cf_var_grid.latitude_of_projection_origin = ( - cs.central_lat - ) - cf_var_grid.false_easting = cs.false_easting - cf_var_grid.false_northing = cs.false_northing - # The Stereographic class has an implicit scale - # factor - cf_var_grid.scale_factor_at_projection_origin = 1.0 - - # osgb (a specific tmerc) - elif isinstance(cs, iris.coord_systems.OSGB): - warnings.warn("OSGB coordinate system not yet handled") - - # lambert azimuthal equal area - elif isinstance( - cs, iris.coord_systems.LambertAzimuthalEqualArea - ): - if cs.ellipsoid: - add_ellipsoid(cs.ellipsoid) - cf_var_grid.longitude_of_projection_origin = ( - cs.longitude_of_projection_origin - ) - cf_var_grid.latitude_of_projection_origin = ( - cs.latitude_of_projection_origin - ) - cf_var_grid.false_easting = cs.false_easting - cf_var_grid.false_northing = cs.false_northing - - # albers conical equal area - elif isinstance(cs, iris.coord_systems.AlbersEqualArea): - if cs.ellipsoid: - add_ellipsoid(cs.ellipsoid) - cf_var_grid.longitude_of_central_meridian = ( - cs.longitude_of_central_meridian - ) - cf_var_grid.latitude_of_projection_origin = ( - cs.latitude_of_projection_origin - ) - cf_var_grid.false_easting = cs.false_easting - cf_var_grid.false_northing = cs.false_northing - cf_var_grid.standard_parallel = cs.standard_parallels - - # vertical perspective - elif isinstance(cs, iris.coord_systems.VerticalPerspective): - if cs.ellipsoid: - add_ellipsoid(cs.ellipsoid) - cf_var_grid.longitude_of_projection_origin = ( - cs.longitude_of_projection_origin - ) - cf_var_grid.latitude_of_projection_origin = ( - cs.latitude_of_projection_origin - ) - cf_var_grid.false_easting = cs.false_easting - cf_var_grid.false_northing = cs.false_northing - cf_var_grid.perspective_point_height = ( - cs.perspective_point_height - ) - - # geostationary - elif isinstance(cs, iris.coord_systems.Geostationary): - if cs.ellipsoid: - add_ellipsoid(cs.ellipsoid) - cf_var_grid.longitude_of_projection_origin = ( - cs.longitude_of_projection_origin - ) - cf_var_grid.latitude_of_projection_origin = ( - cs.latitude_of_projection_origin - ) - cf_var_grid.false_easting = cs.false_easting - cf_var_grid.false_northing = cs.false_northing - cf_var_grid.perspective_point_height = ( - cs.perspective_point_height - ) - cf_var_grid.sweep_angle_axis = cs.sweep_angle_axis - - # other - else: - warnings.warn( - "Unable to represent the horizontal " - "coordinate system. The coordinate system " - "type %r is not yet implemented." % type(cs) - ) - - self._coord_systems.append(cs) - - # Refer to grid var - _setncattr(cf_var_cube, "grid_mapping", cs.grid_mapping_name) - - def _create_cf_data_variable( - self, - cube, - dimension_names, - local_keys=None, - packing=None, - fill_value=None, - **kwargs, - ): - """ - Create CF-netCDF data variable for the cube and any associated grid - mapping. - - Args: - - * cube (:class:`iris.cube.Cube`): - The associated cube being saved to CF-netCDF file. - * dimension_names (list): - String names for each dimension of the cube. - - Kwargs: - - * local_keys (iterable of strings): - * see :func:`iris.fileformats.netcdf.Saver.write` - * packing (type or string or dict or list): - * see :func:`iris.fileformats.netcdf.Saver.write` - * fill_value: - * see :func:`iris.fileformats.netcdf.Saver.write` - - All other keywords are passed through to the dataset's `createVariable` - method. - - Returns: - The newly created CF-netCDF data variable. - - """ - - if packing: - if isinstance(packing, dict): - if "dtype" not in packing: - msg = "The dtype attribute is required for packing." - raise ValueError(msg) - dtype = np.dtype(packing["dtype"]) - scale_factor = packing.get("scale_factor", None) - add_offset = packing.get("add_offset", None) - valid_keys = {"dtype", "scale_factor", "add_offset"} - invalid_keys = set(packing.keys()) - valid_keys - if invalid_keys: - msg = ( - "Invalid packing key(s) found: '{}'. The valid " - "keys are '{}'.".format( - "', '".join(invalid_keys), "', '".join(valid_keys) - ) - ) - raise ValueError(msg) - else: - # We compute the scale_factor and add_offset based on the - # min/max of the data. This requires the data to be loaded. - masked = ma.isMaskedArray(cube.data) - dtype = np.dtype(packing) - cmax = cube.data.max() - cmin = cube.data.min() - n = dtype.itemsize * 8 - if masked: - scale_factor = (cmax - cmin) / (2 ** n - 2) - else: - scale_factor = (cmax - cmin) / (2 ** n - 1) - if dtype.kind == "u": - add_offset = cmin - elif dtype.kind == "i": - if masked: - add_offset = (cmax + cmin) / 2 - else: - add_offset = cmin + 2 ** (n - 1) * scale_factor - - def set_packing_ncattrs(cfvar): - """Set netCDF packing attributes.""" - if packing: - if scale_factor: - _setncattr(cfvar, "scale_factor", scale_factor) - if add_offset: - _setncattr(cfvar, "add_offset", add_offset) - - cf_name = self._get_cube_variable_name(cube) - while cf_name in self._dataset.variables: - cf_name = self._increment_name(cf_name) - - # if netcdf3 avoid streaming due to dtype handling - if not cube.has_lazy_data() or self._dataset.file_format in ( - "NETCDF3_CLASSIC", - "NETCDF3_64BIT", - ): - - # Get the values in a form which is valid for the file format. - data = self._ensure_valid_dtype(cube.data, "cube", cube) - - def store(data, cf_var, fill_value): - cf_var[:] = data - is_masked = ma.is_masked(data) - contains_value = fill_value is not None and fill_value in data - return is_masked, contains_value - - else: - data = cube.lazy_data() - - def store(data, cf_var, fill_value): - # Store lazy data and check whether it is masked and contains - # the fill value - target = _FillValueMaskCheckAndStoreTarget(cf_var, fill_value) - da.store([data], [target]) - return target.is_masked, target.contains_value - - if not packing: - dtype = data.dtype.newbyteorder("=") - - # Create the cube CF-netCDF data variable with data payload. - cf_var = self._dataset.createVariable( - cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs - ) - set_packing_ncattrs(cf_var) - - # If packing attributes are specified, don't bother checking whether - # the fill value is in the data. - if packing: - fill_value_to_check = None - elif fill_value is not None: - fill_value_to_check = fill_value - else: - fill_value_to_check = netCDF4.default_fillvals[dtype.str[1:]] - - # Store the data and check if it is masked and contains the fill value - is_masked, contains_fill_value = store( - data, cf_var, fill_value_to_check - ) - - if dtype.itemsize == 1 and fill_value is None: - if is_masked: - msg = ( - "Cube '{}' contains byte data with masked points, but " - "no fill_value keyword was given. As saved, these " - "points will read back as valid values. To save as " - "masked byte data, please explicitly specify the " - "'fill_value' keyword." - ) - warnings.warn(msg.format(cube.name())) - elif contains_fill_value: - msg = ( - "Cube '{}' contains unmasked data points equal to the " - "fill-value, {}. As saved, these points will read back " - "as missing data. To save these as normal values, please " - "specify a 'fill_value' keyword not equal to any valid " - "data points." - ) - warnings.warn(msg.format(cube.name(), fill_value)) - - if cube.standard_name: - _setncattr(cf_var, "standard_name", cube.standard_name) - - if cube.long_name: - _setncattr(cf_var, "long_name", cube.long_name) - - if cube.units.is_udunits(): - _setncattr(cf_var, "units", str(cube.units)) - - # Add the CF-netCDF calendar attribute. - if cube.units.calendar: - _setncattr(cf_var, "calendar", cube.units.calendar) - - # Add data variable-only attribute names to local_keys. - if local_keys is None: - local_keys = set() - else: - local_keys = set(local_keys) - local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) - - # Add any cube attributes whose keys are in local_keys as - # CF-netCDF data variable attributes. - attr_names = set(cube.attributes).intersection(local_keys) - for attr_name in sorted(attr_names): - # Do not output 'conventions' attribute. - if attr_name.lower() == "conventions": - continue - - value = cube.attributes[attr_name] - - if attr_name == "STASH": - # Adopting provisional Metadata Conventions for representing MO - # Scientific Data encoded in NetCDF Format. - attr_name = "um_stash_source" - value = str(value) - - if attr_name == "ukmo__process_flags": - value = " ".join([x.replace(" ", "_") for x in value]) - - if attr_name in _CF_GLOBAL_ATTRS: - msg = ( - "{attr_name!r} is being added as CF data variable " - "attribute, but {attr_name!r} should only be a CF " - "global attribute.".format(attr_name=attr_name) - ) - warnings.warn(msg) - - _setncattr(cf_var, attr_name, value) - - # Create the CF-netCDF data variable cell method attribute. - cell_methods = self._create_cf_cell_methods(cube, dimension_names) - - if cell_methods: - _setncattr(cf_var, "cell_methods", cell_methods) - - # Create the CF-netCDF grid mapping. - self._create_cf_grid_mapping(cube, cf_var) - - return cf_var - - def _increment_name(self, varname): - """ - Increment string name or begin increment. - - Avoidance of conflicts between variable names, where the name is - incremented to distinguish it from others. - - Args: - - * varname (string): - Variable name to increment. - - Returns: - Incremented varname. - - """ - num = 0 - try: - name, endnum = varname.rsplit("_", 1) - if endnum.isdigit(): - num = int(endnum) + 1 - varname = name - except ValueError: - pass - - return "{}_{}".format(varname, num) - - -def save( - cube, - filename, - netcdf_format="NETCDF4", - local_keys=None, - unlimited_dimensions=None, - zlib=False, - complevel=4, - shuffle=True, - fletcher32=False, - contiguous=False, - chunksizes=None, - endian="native", - least_significant_digit=None, - packing=None, - fill_value=None, -): - """ - Save cube(s) to a netCDF file, given the cube and the filename. - - * Iris will write CF 1.7 compliant NetCDF files. - * The attributes dictionaries on each cube in the saved cube list - will be compared and common attributes saved as NetCDF global - attributes where appropriate. - * Keyword arguments specifying how to save the data are applied - to each cube. To use different settings for different cubes, use - the NetCDF Context manager (:class:`~Saver`) directly. - * The save process will stream the data payload to the file using dask, - enabling large data payloads to be saved and maintaining the 'lazy' - status of the cube's data payload, unless the netcdf_format is explicitly - specified to be 'NETCDF3' or 'NETCDF3_CLASSIC'. - - Args: - - * cube (:class:`iris.cube.Cube` or :class:`iris.cube.CubeList`): - A :class:`iris.cube.Cube`, :class:`iris.cube.CubeList` or other - iterable of cubes to be saved to a netCDF file. - - * filename (string): - Name of the netCDF file to save the cube(s). - - Kwargs: - - * netcdf_format (string): - Underlying netCDF file format, one of 'NETCDF4', 'NETCDF4_CLASSIC', - 'NETCDF3_CLASSIC' or 'NETCDF3_64BIT'. Default is 'NETCDF4' format. - - * local_keys (iterable of strings): - An interable of cube attribute keys. Any cube attributes with - matching keys will become attributes on the data variable rather - than global attributes. - - * unlimited_dimensions (iterable of strings and/or - :class:`iris.coords.Coord` objects): - List of coordinate names (or coordinate objects) corresponding - to coordinate dimensions of `cube` to save with the NetCDF dimension - variable length 'UNLIMITED'. By default, no unlimited dimensions are - saved. Only the 'NETCDF4' format supports multiple 'UNLIMITED' - dimensions. - - * zlib (bool): - If `True`, the data will be compressed in the netCDF file using gzip - compression (default `False`). - - * complevel (int): - An integer between 1 and 9 describing the level of compression desired - (default 4). Ignored if `zlib=False`. - - * shuffle (bool): - If `True`, the HDF5 shuffle filter will be applied before compressing - the data (default `True`). This significantly improves compression. - Ignored if `zlib=False`. - - * fletcher32 (bool): - If `True`, the Fletcher32 HDF5 checksum algorithm is activated to - detect errors. Default `False`. - - * contiguous (bool): - If `True`, the variable data is stored contiguously on disk. Default - `False`. Setting to `True` for a variable with an unlimited dimension - will trigger an error. - - * chunksizes (tuple of int): - Used to manually specify the HDF5 chunksizes for each dimension of the - variable. A detailed discussion of HDF chunking and I/O performance is - available here: https://www.unidata.ucar.edu/software/netcdf/documentation/NUG/netcdf_perf_chunking.html. - Basically, you want the chunk size for each dimension to match as - closely as possible the size of the data block that users will read - from the file. `chunksizes` cannot be set if `contiguous=True`. - - * endian (string): - Used to control whether the data is stored in little or big endian - format on disk. Possible values are 'little', 'big' or 'native' - (default). The library will automatically handle endian conversions - when the data is read, but if the data is always going to be read on a - computer with the opposite format as the one used to create the file, - there may be some performance advantage to be gained by setting the - endian-ness. - - * least_significant_digit (int): - If `least_significant_digit` is specified, variable data will be - truncated (quantized). In conjunction with `zlib=True` this produces - 'lossy', but significantly more efficient compression. For example, if - `least_significant_digit=1`, data will be quantized using - `numpy.around(scale*data)/scale`, where `scale = 2**bits`, and `bits` - is determined so that a precision of 0.1 is retained (in this case - `bits=4`). From - http://www.esrl.noaa.gov/psd/data/gridded/conventions/cdc_netcdf_standard.shtml: - "least_significant_digit -- power of ten of the smallest decimal place - in unpacked data that is a reliable value". Default is `None`, or no - quantization, or 'lossless' compression. - - * packing (type or string or dict or list): A numpy integer datatype - (signed or unsigned) or a string that describes a numpy integer dtype - (i.e. 'i2', 'short', 'u4') or a dict of packing parameters as described - below or an iterable of such types, strings, or dicts. - This provides support for netCDF data packing as described in - http://www.unidata.ucar.edu/software/netcdf/documentation/NUG/best_practices.html#bp_Packed-Data-Values - If this argument is a type (or type string), appropriate values of - scale_factor and add_offset will be automatically calculated based - on `cube.data` and possible masking. For more control, pass a dict with - one or more of the following keys: `dtype` (required), `scale_factor` - and `add_offset`. Note that automatic calculation of packing parameters - will trigger loading of lazy data; set them manually using a dict to - avoid this. The default is `None`, in which case the datatype is - determined from the cube and no packing will occur. If this argument is - a list it must have the same number of elements as `cube` if `cube` is - a `:class:`iris.cube.CubeList`, or one element, and each element of - this argument will be applied to each cube separately. - - * fill_value (numeric or list): - The value to use for the `_FillValue` attribute on the netCDF variable. - If `packing` is specified the value of `fill_value` should be in the - domain of the packed data. If this argument is a list it must have the - same number of elements as `cube` if `cube` is a - `:class:`iris.cube.CubeList`, or a single element, and each element of - this argument will be applied to each cube separately. - - Returns: - None. - - .. note:: - - The `zlib`, `complevel`, `shuffle`, `fletcher32`, `contiguous`, - `chunksizes` and `endian` keywords are silently ignored for netCDF 3 - files that do not use HDF5. - - .. seealso:: - - NetCDF Context manager (:class:`~Saver`). - - """ - from iris.cube import Cube, CubeList - - if unlimited_dimensions is None: - unlimited_dimensions = [] - - if isinstance(cube, Cube): - cubes = CubeList() - cubes.append(cube) - else: - cubes = cube - - if local_keys is None: - local_keys = set() - else: - local_keys = set(local_keys) - - # Determine the attribute keys that are common across all cubes and - # thereby extend the collection of local_keys for attributes - # that should be attributes on data variables. - attributes = cubes[0].attributes - common_keys = set(attributes) - for cube in cubes[1:]: - keys = set(cube.attributes) - local_keys.update(keys.symmetric_difference(common_keys)) - common_keys.intersection_update(keys) - different_value_keys = [] - for key in common_keys: - if np.any(attributes[key] != cube.attributes[key]): - different_value_keys.append(key) - common_keys.difference_update(different_value_keys) - local_keys.update(different_value_keys) - - def is_valid_packspec(p): - """Only checks that the datatype is valid.""" - if isinstance(p, dict): - if "dtype" in p: - return is_valid_packspec(p["dtype"]) - else: - msg = "The argument to packing must contain the key 'dtype'." - raise ValueError(msg) - elif isinstance(p, str) or isinstance(p, type) or isinstance(p, str): - pdtype = np.dtype(p) # Does nothing if it's already a numpy dtype - if pdtype.kind != "i" and pdtype.kind != "u": - msg = "The packing datatype must be a numpy integer type." - raise ValueError(msg) - return True - elif p is None: - return True - else: - return False - - if is_valid_packspec(packing): - packspecs = repeat(packing) - else: - # Assume iterable, make sure packing is the same length as cubes. - for cube, packspec in zip_longest(cubes, packing, fillvalue=-1): - if cube == -1 or packspec == -1: - msg = ( - "If packing is a list, it must have the " - "same number of elements as the argument to" - "cube." - ) - raise ValueError(msg) - if not is_valid_packspec(packspec): - msg = "Invalid packing argument: {}.".format(packspec) - raise ValueError(msg) - packspecs = packing - - # Make fill-value(s) into an iterable over cubes. - if isinstance(fill_value, str): - # Strings are awkward -- handle separately. - fill_values = repeat(fill_value) - else: - try: - fill_values = tuple(fill_value) - except TypeError: - fill_values = repeat(fill_value) - else: - if len(fill_values) != len(cubes): - msg = ( - "If fill_value is a list, it must have the " - "same number of elements as the cube argument." - ) - raise ValueError(msg) - - # Initialise Manager for saving - with Saver(filename, netcdf_format) as sman: - # Iterate through the cubelist. - for cube, packspec, fill_value in zip(cubes, packspecs, fill_values): - sman.write( - cube, - local_keys, - unlimited_dimensions, - zlib, - complevel, - shuffle, - fletcher32, - contiguous, - chunksizes, - endian, - least_significant_digit, - packing=packspec, - fill_value=fill_value, - ) - - if iris.config.netcdf.conventions_override: - # Set to the default if custom conventions are not available. - conventions = cube.attributes.get( - "Conventions", CF_CONVENTIONS_VERSION - ) - else: - conventions = CF_CONVENTIONS_VERSION - - # Perform a CF patch of the conventions attribute. - cf_profile_available = iris.site_configuration.get( - "cf_profile" - ) not in [None, False] - if cf_profile_available: - conventions_patch = iris.site_configuration.get( - "cf_patch_conventions" - ) - if conventions_patch is not None: - conventions = conventions_patch(conventions) - else: - msg = "cf_profile is available but no {} defined.".format( - "cf_patch_conventions" - ) - warnings.warn(msg) - - # Add conventions attribute. - sman.update_global_attributes(Conventions=conventions) diff --git a/lib/iris/io/__init__.py b/lib/iris/io/__init__.py index 9af09d8f30..a1463c8e64 100644 --- a/lib/iris/io/__init__.py +++ b/lib/iris/io/__init__.py @@ -275,7 +275,7 @@ def _check_init_savers(): _savers.update( { "pp": pp.save, - "nc": netcdf.save.save, + "nc": netcdf.saver.save, "dot": _dot_save, "dotpng": _dot_save_png, "grib2": _grib_save, diff --git a/lib/iris/tests/integration/test_netcdf.py b/lib/iris/tests/integration/test_netcdf.py index 471f688255..a63f6d1954 100644 --- a/lib/iris/tests/integration/test_netcdf.py +++ b/lib/iris/tests/integration/test_netcdf.py @@ -26,8 +26,8 @@ import iris from iris.coords import CellMethod from iris.cube import Cube, CubeList -from iris.fileformats.netcdf.load import UnknownCellMethodWarning -from iris.fileformats.netcdf.save import CF_CONVENTIONS_VERSION, Saver +from iris.fileformats.netcdf.loader import UnknownCellMethodWarning +from iris.fileformats.netcdf.saver import CF_CONVENTIONS_VERSION, Saver import iris.tests.stock as stock diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/endian.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/endian.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/endian.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/endian.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/mercator.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/mercator.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/mercator.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/mercator.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/mercator_no_ellipsoid.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/mercator_no_ellipsoid.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/mercator_no_ellipsoid.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/mercator_no_ellipsoid.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/stereographic.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/stereographic.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/stereographic.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/stereographic.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/stereographic_no_ellipsoid.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/stereographic_no_ellipsoid.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/stereographic_no_ellipsoid.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/stereographic_no_ellipsoid.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/transverse_mercator.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/transverse_mercator.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/transverse_mercator.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/transverse_mercator.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/transverse_mercator_no_ellipsoid.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/transverse_mercator_no_ellipsoid.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/transverse_mercator_no_ellipsoid.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/transverse_mercator_no_ellipsoid.cdl diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/with_climatology.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/with_climatology.cdl similarity index 100% rename from lib/iris/tests/results/unit/fileformats/netcdf/save/Saver/write/with_climatology.cdl rename to lib/iris/tests/results/unit/fileformats/netcdf/saver/Saver/write/with_climatology.cdl diff --git a/lib/iris/tests/test_netcdf.py b/lib/iris/tests/test_netcdf.py index 4940f42236..8baadbb48a 100644 --- a/lib/iris/tests/test_netcdf.py +++ b/lib/iris/tests/test_netcdf.py @@ -28,7 +28,7 @@ import iris.analysis.trajectory import iris.coord_systems as icoord_systems from iris.fileformats._nc_load_rules import helpers as ncload_helpers -from iris.fileformats.netcdf.save import Saver +from iris.fileformats.netcdf.saver import Saver import iris.std_names import iris.tests.stock as stock import iris.util diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/__init__.py b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/__init__.py index c0d87d3ee5..4ec6c6af99 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/__init__.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/__init__.py @@ -7,7 +7,7 @@ Unit tests for the module :mod:`iris.fileformats._nc_load_rules.actions`. This module provides the engine.activate() call used in the function -`iris.fileformats.netcdf.load._load_cube`. +`iris.fileformats.netcdf.loader._load_cube`. """ from pathlib import Path @@ -17,8 +17,8 @@ import iris.fileformats._nc_load_rules.engine from iris.fileformats.cf import CFReader -import iris.fileformats.netcdf.load -from iris.fileformats.netcdf.load import _load_cube +import iris.fileformats.netcdf.loader +from iris.fileformats.netcdf.loader import _load_cube """ Notes on testing method. @@ -92,11 +92,11 @@ def load_cube_from_cdl(self, cdl_string, cdl_path, nc_path): # Grab a data variable : FOR NOW always grab the 'phenom' variable. cf_var = cf.cf_group.data_variables["phenom"] - engine = iris.fileformats.netcdf.load._actions_engine() + engine = iris.fileformats.netcdf.loader._actions_engine() # If debug enabled, switch on the activation summary debug output. # Use 'patch' so it is restored after the test. - self.patch("iris.fileformats.netcdf.load.DEBUG", self.debug) + self.patch("iris.fileformats.netcdf.loader.DEBUG", self.debug) # Call the main translation function to load a single cube. # _load_cube establishes per-cube facts, activates rules and @@ -107,7 +107,7 @@ def load_cube_from_cdl(self, cdl_string, cdl_path, nc_path): # by the rules operation. # Unlike the other translations, _load_cube does *not* convert this # information into actual cube elements. That is instead done by - # `iris.fileformats.netcdf.load._load_aux_factory`. + # `iris.fileformats.netcdf.loader._load_aux_factory`. # For rules testing, it is anyway more convenient to deal with the raw # data, as each factory type has different validity requirements to # build it, and none of that is relevant to the rules operation. diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py index 0ce0a1c622..9cb9fc5bda 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py @@ -77,7 +77,7 @@ def patched__getitem__(proxy_self, keys): raise RuntimeError() self.patch( - "iris.fileformats.netcdf.load.NetCDFDataProxy.__getitem__", + "iris.fileformats.netcdf.loader.NetCDFDataProxy.__getitem__", new=patched__getitem__, ) @@ -180,7 +180,7 @@ def patched__getitem__(proxy_self, keys): raise RuntimeError() self.deferred_load_patch = mock.patch( - "iris.fileformats.netcdf.load.NetCDFDataProxy.__getitem__", + "iris.fileformats.netcdf.loader.NetCDFDataProxy.__getitem__", new=patched__getitem__, ) @@ -264,7 +264,7 @@ def patched__getitem__(proxy_self, keys): raise RuntimeError() self.patch( - "iris.fileformats.netcdf.load.NetCDFDataProxy.__getitem__", + "iris.fileformats.netcdf.loader.NetCDFDataProxy.__getitem__", new=patched__getitem__, ) diff --git a/lib/iris/tests/unit/fileformats/netcdf/load/__init__.py b/lib/iris/tests/unit/fileformats/netcdf/load/__init__.py deleted file mode 100644 index 4dffee44ae..0000000000 --- a/lib/iris/tests/unit/fileformats/netcdf/load/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the LGPL license. -# See COPYING and COPYING.LESSER in the root of the repository for full -# licensing details. -"""Unit tests for the :mod:`iris.fileformats.netcdf.load` module.""" diff --git a/lib/iris/tests/unit/fileformats/netcdf/load/test__get_cf_var_data.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py similarity index 94% rename from lib/iris/tests/unit/fileformats/netcdf/load/test__get_cf_var_data.py rename to lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py index 380447ff0e..597dcd612f 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/load/test__get_cf_var_data.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py @@ -3,7 +3,7 @@ # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. -"""Unit tests for the `iris.fileformats.netcdf.load._get_cf_var_data` function.""" +"""Unit tests for the `iris.fileformats.netcdf.loader._get_cf_var_data` function.""" # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -16,7 +16,7 @@ from iris._lazy_data import _optimum_chunksize import iris.fileformats.cf -from iris.fileformats.netcdf.load import _get_cf_var_data +from iris.fileformats.netcdf.loader import _get_cf_var_data class Test__get_cf_var_data(tests.IrisTest): diff --git a/lib/iris/tests/unit/fileformats/netcdf/load/test__load_aux_factory.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__load_aux_factory.py similarity index 98% rename from lib/iris/tests/unit/fileformats/netcdf/load/test__load_aux_factory.py rename to lib/iris/tests/unit/fileformats/netcdf/loader/test__load_aux_factory.py index ac5fa32cab..6504556a43 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/load/test__load_aux_factory.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__load_aux_factory.py @@ -3,7 +3,7 @@ # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. -"""Unit tests for the `iris.fileformats.netcdf.load._load_aux_factory` function.""" +"""Unit tests for the `iris.fileformats.netcdf.loader._load_aux_factory` function.""" # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -16,7 +16,7 @@ from iris.coords import DimCoord from iris.cube import Cube -from iris.fileformats.load.netcdf import _load_aux_factory +from iris.fileformats.netcdf.loader import _load_aux_factory class TestAtmosphereHybridSigmaPressureCoordinate(tests.IrisTest): diff --git a/lib/iris/tests/unit/fileformats/netcdf/load/test__load_cube.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__load_cube.py similarity index 95% rename from lib/iris/tests/unit/fileformats/netcdf/load/test__load_cube.py rename to lib/iris/tests/unit/fileformats/netcdf/loader/test__load_cube.py index 79ad4519a2..855be3f6ea 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/load/test__load_cube.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__load_cube.py @@ -3,7 +3,7 @@ # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. -"""Unit tests for the `iris.fileformats.netcdf.load._load_cube` function.""" +"""Unit tests for the `iris.fileformats.netcdf.loader._load_cube` function.""" # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -15,7 +15,7 @@ from iris.coords import DimCoord import iris.fileformats.cf -from iris.fileformats.netcdf.load import _load_cube +from iris.fileformats.netcdf.loader import _load_cube class TestCoordAttributes(tests.IrisTest): @@ -28,7 +28,7 @@ def _patcher(engine, cf, cf_group): engine.cube_parts["coordinates"] = coordinates def setUp(self): - this = "iris.fileformats.netcdf.load._assert_case_specific_facts" + this = "iris.fileformats.netcdf.loader._assert_case_specific_facts" patch = mock.patch(this, side_effect=self._patcher) patch.start() self.addCleanup(patch.stop) @@ -112,7 +112,7 @@ def test_flag_pass_thru_multi(self): class TestCubeAttributes(tests.IrisTest): def setUp(self): - this = "iris.fileformats.netcdf.load._assert_case_specific_facts" + this = "iris.fileformats.netcdf.loader._assert_case_specific_facts" patch = mock.patch(this) patch.start() self.addCleanup(patch.stop) diff --git a/lib/iris/tests/unit/fileformats/netcdf/load/test_load_cubes.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test_load_cubes.py similarity index 98% rename from lib/iris/tests/unit/fileformats/netcdf/load/test_load_cubes.py rename to lib/iris/tests/unit/fileformats/netcdf/loader/test_load_cubes.py index 9c07e68ac7..e73147b10a 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/load/test_load_cubes.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test_load_cubes.py @@ -4,7 +4,7 @@ # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -Unit tests for the :func:`iris.fileformats.netcdf.load.load_cubes` function. +Unit tests for the :func:`iris.fileformats.netcdf.loader.load_cubes` function. todo: migrate the remaining unit-esque tests from iris.tests.test_netcdf, switching to use netcdf.load_cubes() instead of iris.load()/load_cube(). @@ -21,7 +21,7 @@ from iris.coords import AncillaryVariable, CellMeasure from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD, MeshCoord -from iris.fileformats.netcdf.load import load_cubes, logger +from iris.fileformats.netcdf.loader import load_cubes, logger # Import iris.tests first so that some things can be initialised before # importing anything else. diff --git a/lib/iris/tests/unit/fileformats/netcdf/load/test_parse_cell_methods.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test_parse_cell_methods.py similarity index 97% rename from lib/iris/tests/unit/fileformats/netcdf/load/test_parse_cell_methods.py rename to lib/iris/tests/unit/fileformats/netcdf/loader/test_parse_cell_methods.py index ed92d34e84..5eb93e76fb 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/load/test_parse_cell_methods.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test_parse_cell_methods.py @@ -4,7 +4,7 @@ # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -Unit tests for :func:`iris.fileformats.netcdf.load.parse_cell_methods`. +Unit tests for :func:`iris.fileformats.netcdf.loader.parse_cell_methods`. """ @@ -15,7 +15,7 @@ from unittest import mock from iris.coords import CellMethod -from iris.fileformats.netcdf.load import parse_cell_methods +from iris.fileformats.netcdf.loader import parse_cell_methods class Test(tests.IrisTest): diff --git a/lib/iris/tests/unit/fileformats/netcdf/save/__init__.py b/lib/iris/tests/unit/fileformats/netcdf/save/__init__.py deleted file mode 100644 index 5f6a9ee6ef..0000000000 --- a/lib/iris/tests/unit/fileformats/netcdf/save/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the LGPL license. -# See COPYING and COPYING.LESSER in the root of the repository for full -# licensing details. -"""Unit tests for the :mod:`iris.fileformats.netcdf.save` module.""" diff --git a/lib/iris/tests/unit/fileformats/netcdf/save/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver.py similarity index 99% rename from lib/iris/tests/unit/fileformats/netcdf/save/test_Saver.py rename to lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver.py index eb9e079d11..9ff6c396da 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/save/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver.py @@ -32,7 +32,7 @@ ) from iris.coords import DimCoord from iris.cube import Cube -from iris.fileformats.netcdf.save import Saver +from iris.fileformats.netcdf.saver import Saver import iris.tests.stock as stock @@ -185,7 +185,7 @@ def test_big_endian(self): def test_zlib(self): cube = self._simple_cube(">f4") - api = self.patch("iris.fileformats.netcdf.save.netCDF4") + api = self.patch("iris.fileformats.netcdf.saver.netCDF4") with Saver("/dummy/path", "NETCDF4") as saver: saver.write(cube, zlib=True) dataset = api.Dataset.return_value diff --git a/lib/iris/tests/unit/fileformats/netcdf/save/test__FillValueMaskCheckAndStoreTarget.py b/lib/iris/tests/unit/fileformats/netcdf/saver/test__FillValueMaskCheckAndStoreTarget.py similarity index 95% rename from lib/iris/tests/unit/fileformats/netcdf/save/test__FillValueMaskCheckAndStoreTarget.py rename to lib/iris/tests/unit/fileformats/netcdf/saver/test__FillValueMaskCheckAndStoreTarget.py index 3b64c4d80c..43dcb25be9 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/save/test__FillValueMaskCheckAndStoreTarget.py +++ b/lib/iris/tests/unit/fileformats/netcdf/saver/test__FillValueMaskCheckAndStoreTarget.py @@ -4,7 +4,7 @@ # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -Unit tests for the `iris.fileformats.netcdf.save._FillValueMaskCheckAndStoreTarget` +Unit tests for the `iris.fileformats.netcdf.saver._FillValueMaskCheckAndStoreTarget` class. """ @@ -17,7 +17,7 @@ import numpy as np -from iris.fileformats.netcdf.save import _FillValueMaskCheckAndStoreTarget +from iris.fileformats.netcdf.saver import _FillValueMaskCheckAndStoreTarget class Test__FillValueMaskCheckAndStoreTarget(tests.IrisTest): diff --git a/lib/iris/tests/unit/fileformats/netcdf/save/test_save.py b/lib/iris/tests/unit/fileformats/netcdf/saver/test_save.py similarity index 93% rename from lib/iris/tests/unit/fileformats/netcdf/save/test_save.py rename to lib/iris/tests/unit/fileformats/netcdf/saver/test_save.py index f7c2464ac1..b1def41463 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/save/test_save.py +++ b/lib/iris/tests/unit/fileformats/netcdf/saver/test_save.py @@ -3,7 +3,7 @@ # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. -"""Unit tests for the `iris.fileformats.netcdf.save` function.""" +"""Unit tests for the `iris.fileformats.netcdf.saver.save` function.""" # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -17,7 +17,7 @@ import iris from iris.coords import DimCoord from iris.cube import Cube, CubeList -from iris.fileformats.netcdf.save import CF_CONVENTIONS_VERSION, save +from iris.fileformats.netcdf.saver import CF_CONVENTIONS_VERSION, save from iris.tests.stock import lat_lon_cube @@ -138,7 +138,7 @@ def test_None(self): # Test that when no fill_value argument is passed, the fill_value # argument to Saver.write is None or not present. cubes = self._make_cubes() - with mock.patch("iris.fileformats.netcdf.Saver") as Saver: + with mock.patch("iris.fileformats.netcdf.saver.Saver") as Saver: save(cubes, "dummy.nc") # Get the Saver.write mock @@ -156,7 +156,7 @@ def test_single(self): # that value is passed to each call to Saver.write cubes = self._make_cubes() fill_value = 12345.0 - with mock.patch("iris.fileformats.netcdf.Saver") as Saver: + with mock.patch("iris.fileformats.netcdf.saver.Saver") as Saver: save(cubes, "dummy.nc", fill_value=fill_value) # Get the Saver.write mock @@ -173,7 +173,7 @@ def test_multiple(self): # each element is passed to separate calls to Saver.write cubes = self._make_cubes() fill_values = [123.0, 456.0, 789.0] - with mock.patch("iris.fileformats.netcdf.Saver") as Saver: + with mock.patch("iris.fileformats.netcdf.saver.Saver") as Saver: save(cubes, "dummy.nc", fill_value=fill_values) # Get the Saver.write mock @@ -190,7 +190,7 @@ def test_single_string(self): # that value is passed to calls to Saver.write cube = Cube(["abc", "def", "hij"]) fill_value = "xyz" - with mock.patch("iris.fileformats.netcdf.Saver") as Saver: + with mock.patch("iris.fileformats.netcdf.saver.Saver") as Saver: save(cube, "dummy.nc", fill_value=fill_value) # Get the Saver.write mock @@ -206,7 +206,7 @@ def test_multi_wrong_length(self): # is passed as the fill_value argument, an error is raised cubes = self._make_cubes() fill_values = [1.0, 2.0, 3.0, 4.0] - with mock.patch("iris.fileformats.netcdf.Saver"): + with mock.patch("iris.fileformats.netcdf.saver.Saver"): with self.assertRaises(ValueError): save(cubes, "dummy.nc", fill_value=fill_values) From 07f6a5917b72feaa8075cc2b5f5497b0f767be27 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 23 Aug 2021 13:22:08 +0100 Subject: [PATCH 6/7] Missed files. --- lib/iris/fileformats/netcdf/saver.py | 2131 +++++++++++++++++ .../fileformats/netcdf/loader/__init__.py | 6 + .../unit/fileformats/netcdf/saver/__init__.py | 6 + 3 files changed, 2143 insertions(+) create mode 100644 lib/iris/fileformats/netcdf/saver.py create mode 100644 lib/iris/tests/unit/fileformats/netcdf/loader/__init__.py create mode 100644 lib/iris/tests/unit/fileformats/netcdf/saver/__init__.py diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py new file mode 100644 index 0000000000..753d08be37 --- /dev/null +++ b/lib/iris/fileformats/netcdf/saver.py @@ -0,0 +1,2131 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +Module to support the saving of Iris cubes to a NetCDF file. + +See also: `netCDF4 python `_. + +Also refer to document 'NetCDF Climate and Forecast (CF) Metadata Conventions'. + +""" + +import collections +from itertools import repeat, zip_longest +import os +import os.path +import re +import string +import warnings + +import cf_units +import dask.array as da +import netCDF4 +import numpy as np +import numpy.ma as ma + +from iris.aux_factory import ( + HybridHeightFactory, + HybridPressureFactory, + OceanSFactory, + OceanSg1Factory, + OceanSg2Factory, + OceanSigmaFactory, + OceanSigmaZFactory, +) +import iris.config +import iris.coord_systems +import iris.coords +import iris.exceptions +import iris.fileformats.cf +import iris.io +import iris.util + +# Standard CML spatio-temporal axis names. +SPATIO_TEMPORAL_AXES = ["t", "z", "y", "x"] + +# CF attributes that should not be global. +_CF_DATA_ATTRS = [ + "flag_masks", + "flag_meanings", + "flag_values", + "instance_dimension", + "missing_value", + "sample_dimension", + "standard_error_multiplier", +] + +# CF attributes that should only be global. +_CF_GLOBAL_ATTRS = ["conventions", "featureType", "history", "title"] + +# UKMO specific attributes that should not be global. +_UKMO_DATA_ATTRS = ["STASH", "um_stash_source", "ukmo__process_flags"] + +CF_CONVENTIONS_VERSION = "CF-1.7" + +_FactoryDefn = collections.namedtuple( + "_FactoryDefn", ("primary", "std_name", "formula_terms_format") +) +_FACTORY_DEFNS = { + HybridHeightFactory: _FactoryDefn( + primary="delta", + std_name="atmosphere_hybrid_height_coordinate", + formula_terms_format="a: {delta} b: {sigma} orog: {orography}", + ), + HybridPressureFactory: _FactoryDefn( + primary="delta", + std_name="atmosphere_hybrid_sigma_pressure_coordinate", + formula_terms_format="ap: {delta} b: {sigma} " + "ps: {surface_air_pressure}", + ), + OceanSigmaZFactory: _FactoryDefn( + primary="zlev", + std_name="ocean_sigma_z_coordinate", + formula_terms_format="sigma: {sigma} eta: {eta} depth: {depth} " + "depth_c: {depth_c} nsigma: {nsigma} zlev: {zlev}", + ), + OceanSigmaFactory: _FactoryDefn( + primary="sigma", + std_name="ocean_sigma_coordinate", + formula_terms_format="sigma: {sigma} eta: {eta} depth: {depth}", + ), + OceanSFactory: _FactoryDefn( + primary="s", + std_name="ocean_s_coordinate", + formula_terms_format="s: {s} eta: {eta} depth: {depth} a: {a} b: {b} " + "depth_c: {depth_c}", + ), + OceanSg1Factory: _FactoryDefn( + primary="s", + std_name="ocean_s_coordinate_g1", + formula_terms_format="s: {s} c: {c} eta: {eta} depth: {depth} " + "depth_c: {depth_c}", + ), + OceanSg2Factory: _FactoryDefn( + primary="s", + std_name="ocean_s_coordinate_g2", + formula_terms_format="s: {s} c: {c} eta: {eta} depth: {depth} " + "depth_c: {depth_c}", + ), +} + + +def _bytes_if_ascii(string): + """ + Convert the given string to a byte string (str in py2k, bytes in py3k) + if the given string can be encoded to ascii, else maintain the type + of the inputted string. + + Note: passing objects without an `encode` method (such as None) will + be returned by the function unchanged. + + """ + if isinstance(string, str): + try: + return string.encode(encoding="ascii") + except (AttributeError, UnicodeEncodeError): + pass + return string + + +def _setncattr(variable, name, attribute): + """ + Put the given attribute on the given netCDF4 Data type, casting + attributes as we go to bytes rather than unicode. + + """ + attribute = _bytes_if_ascii(attribute) + return variable.setncattr(name, attribute) + + +class _FillValueMaskCheckAndStoreTarget: + """ + To be used with da.store. Remembers whether any element was equal to a + given value and whether it was masked, before passing the chunk to the + given target. + + """ + + def __init__(self, target, fill_value=None): + self.target = target + self.fill_value = fill_value + self.contains_value = False + self.is_masked = False + + def __setitem__(self, keys, arr): + if self.fill_value is not None: + self.contains_value = self.contains_value or self.fill_value in arr + self.is_masked = self.is_masked or ma.is_masked(arr) + self.target[keys] = arr + + +class CFNameCoordMap: + """Provide a simple CF name to CF coordinate mapping.""" + + _Map = collections.namedtuple("_Map", ["name", "coord"]) + + def __init__(self): + self._map = [] + + def append(self, name, coord): + """ + Append the given name and coordinate pair to the mapping. + + Args: + + * name: + CF name of the associated coordinate. + + * coord: + The coordinate of the associated CF name. + + Returns: + None. + + """ + self._map.append(CFNameCoordMap._Map(name, coord)) + + @property + def names(self): + """Return all the CF names.""" + + return [pair.name for pair in self._map] + + @property + def coords(self): + """Return all the coordinates.""" + + return [pair.coord for pair in self._map] + + def name(self, coord): + """ + Return the CF name, given a coordinate + + Args: + + * coord: + The coordinate of the associated CF name. + + Returns: + Coordinate. + + """ + result = None + for pair in self._map: + if coord == pair.coord: + result = pair.name + break + if result is None: + msg = "Coordinate is not mapped, {!r}".format(coord) + raise KeyError(msg) + return result + + def coord(self, name): + """ + Return the coordinate, given a CF name. + + Args: + + * name: + CF name of the associated coordinate. + + Returns: + CF name. + + """ + result = None + for pair in self._map: + if name == pair.name: + result = pair.coord + break + if result is None: + msg = "Name is not mapped, {!r}".format(name) + raise KeyError(msg) + return result + + +class Saver: + """A manager for saving netcdf files.""" + + def __init__(self, filename, netcdf_format): + """ + A manager for saving netcdf files. + + Args: + + * filename (string): + Name of the netCDF file to save the cube. + + * netcdf_format (string): + Underlying netCDF file format, one of 'NETCDF4', 'NETCDF4_CLASSIC', + 'NETCDF3_CLASSIC' or 'NETCDF3_64BIT'. Default is 'NETCDF4' format. + + Returns: + None. + + For example:: + + # Initialise Manager for saving + with Saver(filename, netcdf_format) as sman: + # Iterate through the cubelist. + for cube in cubes: + sman.write(cube) + + """ + if netcdf_format not in [ + "NETCDF4", + "NETCDF4_CLASSIC", + "NETCDF3_CLASSIC", + "NETCDF3_64BIT", + ]: + raise ValueError( + "Unknown netCDF file format, got %r" % netcdf_format + ) + + # All persistent variables + #: CF name mapping with iris coordinates + self._name_coord_map = CFNameCoordMap() + #: List of dimension coordinates added to the file + self._dim_coords = [] + #: List of grid mappings added to the file + self._coord_systems = [] + #: A dictionary, listing dimension names and corresponding length + self._existing_dim = {} + #: A dictionary, mapping formula terms to owner cf variable name + self._formula_terms_cache = {} + #: NetCDF dataset + try: + self._dataset = netCDF4.Dataset( + filename, mode="w", format=netcdf_format + ) + except RuntimeError: + dir_name = os.path.dirname(filename) + if not os.path.isdir(dir_name): + msg = "No such file or directory: {}".format(dir_name) + raise IOError(msg) + if not os.access(dir_name, os.R_OK | os.W_OK): + msg = "Permission denied: {}".format(filename) + raise IOError(msg) + else: + raise + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + """Flush any buffered data to the CF-netCDF file before closing.""" + + self._dataset.sync() + self._dataset.close() + + def write( + self, + cube, + local_keys=None, + unlimited_dimensions=None, + zlib=False, + complevel=4, + shuffle=True, + fletcher32=False, + contiguous=False, + chunksizes=None, + endian="native", + least_significant_digit=None, + packing=None, + fill_value=None, + ): + """ + Wrapper for saving cubes to a NetCDF file. + + Args: + + * cube (:class:`iris.cube.Cube`): + A :class:`iris.cube.Cube` to be saved to a netCDF file. + + Kwargs: + + * local_keys (iterable of strings): + An interable of cube attribute keys. Any cube attributes with + matching keys will become attributes on the data variable rather + than global attributes. + + * unlimited_dimensions (iterable of strings and/or + :class:`iris.coords.Coord` objects): + List of coordinate names (or coordinate objects) + corresponding to coordinate dimensions of `cube` to save with the + NetCDF dimension variable length 'UNLIMITED'. By default, no + unlimited dimensions are saved. Only the 'NETCDF4' format + supports multiple 'UNLIMITED' dimensions. + + * zlib (bool): + If `True`, the data will be compressed in the netCDF file using + gzip compression (default `False`). + + * complevel (int): + An integer between 1 and 9 describing the level of compression + desired (default 4). Ignored if `zlib=False`. + + * shuffle (bool): + If `True`, the HDF5 shuffle filter will be applied before + compressing the data (default `True`). This significantly improves + compression. Ignored if `zlib=False`. + + * fletcher32 (bool): + If `True`, the Fletcher32 HDF5 checksum algorithm is activated to + detect errors. Default `False`. + + * contiguous (bool): + If `True`, the variable data is stored contiguously on disk. + Default `False`. Setting to `True` for a variable with an unlimited + dimension will trigger an error. + + * chunksizes (tuple of int): + Used to manually specify the HDF5 chunksizes for each dimension of + the variable. A detailed discussion of HDF chunking and I/O + performance is available here: + https://www.unidata.ucar.edu/software/netcdf/documentation/NUG/netcdf_perf_chunking.html. + Basically, you want the chunk size for each dimension to match + as closely as possible the size of the data block that users will + read from the file. `chunksizes` cannot be set if `contiguous=True`. + + * endian (string): + Used to control whether the data is stored in little or big endian + format on disk. Possible values are 'little', 'big' or 'native' + (default). The library will automatically handle endian conversions + when the data is read, but if the data is always going to be read + on a computer with the opposite format as the one used to create + the file, there may be some performance advantage to be gained by + setting the endian-ness. + + * least_significant_digit (int): + If `least_significant_digit` is specified, variable data will be + truncated (quantized). In conjunction with `zlib=True` this + produces 'lossy', but significantly more efficient compression. For + example, if `least_significant_digit=1`, data will be quantized + using `numpy.around(scale*data)/scale`, where `scale = 2**bits`, + and `bits` is determined so that a precision of 0.1 is retained (in + this case `bits=4`). From + http://www.esrl.noaa.gov/psd/data/gridded/conventions/cdc_netcdf_standard.shtml: + "least_significant_digit -- power of ten of the smallest decimal + place in unpacked data that is a reliable value". Default is + `None`, or no quantization, or 'lossless' compression. + + * packing (type or string or dict or list): A numpy integer datatype + (signed or unsigned) or a string that describes a numpy integer + dtype(i.e. 'i2', 'short', 'u4') or a dict of packing parameters as + described below. This provides support for netCDF data packing as + described in + http://www.unidata.ucar.edu/software/netcdf/documentation/NUG/best_practices.html#bp_Packed-Data-Values + If this argument is a type (or type string), appropriate values of + scale_factor and add_offset will be automatically calculated based + on `cube.data` and possible masking. For more control, pass a dict + with one or more of the following keys: `dtype` (required), + `scale_factor` and `add_offset`. Note that automatic calculation of + packing parameters will trigger loading of lazy data; set them + manually using a dict to avoid this. The default is `None`, in + which case the datatype is determined from the cube and no packing + will occur. + + * fill_value: + The value to use for the `_FillValue` attribute on the netCDF + variable. If `packing` is specified the value of `fill_value` + should be in the domain of the packed data. + + Returns: + None. + + .. note:: + + The `zlib`, `complevel`, `shuffle`, `fletcher32`, `contiguous`, + `chunksizes` and `endian` keywords are silently ignored for netCDF + 3 files that do not use HDF5. + + """ + if unlimited_dimensions is None: + unlimited_dimensions = [] + + cf_profile_available = iris.site_configuration.get( + "cf_profile" + ) not in [None, False] + if cf_profile_available: + # Perform a CF profile of the cube. This may result in an exception + # being raised if mandatory requirements are not satisfied. + profile = iris.site_configuration["cf_profile"](cube) + + # Ensure that attributes are CF compliant and if possible to make them + # compliant. + self.check_attribute_compliance(cube, cube.lazy_data()) + for coord in cube.coords(): + self.check_attribute_compliance(coord, coord.points) + + # Get suitable dimension names. + dimension_names = self._get_dim_names(cube) + + # Create the CF-netCDF data dimensions. + self._create_cf_dimensions(cube, dimension_names, unlimited_dimensions) + + # Create the associated cube CF-netCDF data variable. + cf_var_cube = self._create_cf_data_variable( + cube, + dimension_names, + local_keys, + zlib=zlib, + complevel=complevel, + shuffle=shuffle, + fletcher32=fletcher32, + contiguous=contiguous, + chunksizes=chunksizes, + endian=endian, + least_significant_digit=least_significant_digit, + packing=packing, + fill_value=fill_value, + ) + + # Add coordinate variables. + self._add_dim_coords(cube, dimension_names) + + # Add the auxiliary coordinate variables and associate the data + # variable to them + self._add_aux_coords(cube, cf_var_cube, dimension_names) + + # Add the cell_measures variables and associate the data + # variable to them + self._add_cell_measures(cube, cf_var_cube, dimension_names) + + # Add the ancillary_variables variables and associate the data variable + # to them + self._add_ancillary_variables(cube, cf_var_cube, dimension_names) + + # Add the formula terms to the appropriate cf variables for each + # aux factory in the cube. + self._add_aux_factories(cube, cf_var_cube, dimension_names) + + # Add data variable-only attribute names to local_keys. + if local_keys is None: + local_keys = set() + else: + local_keys = set(local_keys) + local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) + + # Add global attributes taking into account local_keys. + global_attributes = { + k: v + for k, v in cube.attributes.items() + if (k not in local_keys and k.lower() != "conventions") + } + self.update_global_attributes(global_attributes) + + if cf_profile_available: + cf_patch = iris.site_configuration.get("cf_patch") + if cf_patch is not None: + # Perform a CF patch of the dataset. + cf_patch(profile, self._dataset, cf_var_cube) + else: + msg = "cf_profile is available but no {} defined.".format( + "cf_patch" + ) + warnings.warn(msg) + + @staticmethod + def check_attribute_compliance(container, data): + def _coerce_value(val_attr, val_attr_value, data_dtype): + val_attr_tmp = np.array(val_attr_value, dtype=data_dtype) + if (val_attr_tmp != val_attr_value).any(): + msg = '"{}" is not of a suitable value ({})' + raise ValueError(msg.format(val_attr, val_attr_value)) + return val_attr_tmp + + data_dtype = data.dtype + + # Ensure that conflicting attributes are not provided. + if ( + container.attributes.get("valid_min") is not None + or container.attributes.get("valid_max") is not None + ) and container.attributes.get("valid_range") is not None: + msg = ( + 'Both "valid_range" and "valid_min" or "valid_max" ' + "attributes present." + ) + raise ValueError(msg) + + # Ensure correct datatype + for val_attr in ["valid_range", "valid_min", "valid_max"]: + val_attr_value = container.attributes.get(val_attr) + if val_attr_value is not None: + val_attr_value = np.asarray(val_attr_value) + if data_dtype.itemsize == 1: + # Allow signed integral type + if val_attr_value.dtype.kind == "i": + continue + new_val = _coerce_value(val_attr, val_attr_value, data_dtype) + container.attributes[val_attr] = new_val + + def update_global_attributes(self, attributes=None, **kwargs): + """ + Update the CF global attributes based on the provided + iterable/dictionary and/or keyword arguments. + + Args: + + * attributes (dict or iterable of key, value pairs): + CF global attributes to be updated. + + """ + if attributes is not None: + # Handle sequence e.g. [('fruit', 'apple'), ...]. + if not hasattr(attributes, "keys"): + attributes = dict(attributes) + + for attr_name in sorted(attributes): + _setncattr(self._dataset, attr_name, attributes[attr_name]) + + for attr_name in sorted(kwargs): + _setncattr(self._dataset, attr_name, kwargs[attr_name]) + + def _create_cf_dimensions( + self, cube, dimension_names, unlimited_dimensions=None + ): + """ + Create the CF-netCDF data dimensions. + + Args: + + * cube (:class:`iris.cube.Cube`): + A :class:`iris.cube.Cube` in which to lookup coordinates. + + Kwargs: + + * unlimited_dimensions (iterable of strings and/or + :class:`iris.coords.Coord` objects): + List of coordinates to make unlimited (None by default). + + Returns: + None. + + """ + unlimited_dim_names = [] + for coord in unlimited_dimensions: + try: + coord = cube.coord(name_or_coord=coord, dim_coords=True) + except iris.exceptions.CoordinateNotFoundError: + # coordinate isn't used for this cube, but it might be + # used for a different one + pass + else: + dim_name = self._get_coord_variable_name(cube, coord) + unlimited_dim_names.append(dim_name) + + for dim_name in dimension_names: + if dim_name not in self._dataset.dimensions: + if dim_name in unlimited_dim_names: + size = None + else: + size = self._existing_dim[dim_name] + self._dataset.createDimension(dim_name, size) + + def _add_inner_related_vars( + self, + cube, + cf_var_cube, + dimension_names, + coordlike_elements, + saver_create_method, + role_attribute_name, + ): + # Common method to create a set of file variables and attach them to + # the parent data variable. + element_names = [] + # Add CF-netCDF variables for the associated auxiliary coordinates. + for element in sorted( + coordlike_elements, key=lambda element: element.name() + ): + # Create the associated CF-netCDF variable. + if element not in self._name_coord_map.coords: + cf_name = saver_create_method(cube, dimension_names, element) + self._name_coord_map.append(cf_name, element) + else: + cf_name = self._name_coord_map.name(element) + + if cf_name is not None: + if role_attribute_name == "cell_measures": + # In the case of cell-measures, the attribute entries are not just + # a var_name, but each have the form ": ". + cf_name = "{}: {}".format(element.measure, cf_name) + element_names.append(cf_name) + + # Add CF-netCDF references to the primary data variable. + if element_names: + variable_names = " ".join(sorted(element_names)) + _setncattr(cf_var_cube, role_attribute_name, variable_names) + + def _add_aux_coords(self, cube, cf_var_cube, dimension_names): + """ + Add aux. coordinate to the dataset and associate with the data variable + + Args: + + * cube (:class:`iris.cube.Cube`): + A :class:`iris.cube.Cube` to be saved to a netCDF file. + * cf_var_cube (:class:`netcdf.netcdf_variable`): + cf variable cube representation. + * dimension_names (list): + Names associated with the dimensions of the cube. + + """ + return self._add_inner_related_vars( + cube, + cf_var_cube, + dimension_names, + cube.aux_coords, + self._create_cf_coord_variable, + "coordinates", + ) + + def _add_cell_measures(self, cube, cf_var_cube, dimension_names): + """ + Add cell measures to the dataset and associate with the data variable + + Args: + + * cube (:class:`iris.cube.Cube`): + A :class:`iris.cube.Cube` to be saved to a netCDF file. + * cf_var_cube (:class:`netcdf.netcdf_variable`): + cf variable cube representation. + * dimension_names (list): + Names associated with the dimensions of the cube. + + """ + return self._add_inner_related_vars( + cube, + cf_var_cube, + dimension_names, + cube.cell_measures(), + self._create_cf_cell_measure_variable, + "cell_measures", + ) + + def _add_ancillary_variables(self, cube, cf_var_cube, dimension_names): + """ + Add ancillary variables measures to the dataset and associate with the + data variable + + Args: + + * cube (:class:`iris.cube.Cube`): + A :class:`iris.cube.Cube` to be saved to a netCDF file. + * cf_var_cube (:class:`netcdf.netcdf_variable`): + cf variable cube representation. + * dimension_names (list): + Names associated with the dimensions of the cube. + + """ + return self._add_inner_related_vars( + cube, + cf_var_cube, + dimension_names, + cube.ancillary_variables(), + self._create_cf_ancildata_variable, + "ancillary_variables", + ) + + def _add_dim_coords(self, cube, dimension_names): + """ + Add coordinate variables to NetCDF dataset. + + Args: + + * cube (:class:`iris.cube.Cube`): + A :class:`iris.cube.Cube` to be saved to a netCDF file. + * dimension_names (list): + Names associated with the dimensions of the cube. + + """ + # Ensure we create the netCDF coordinate variables first. + for coord in cube.dim_coords: + # Create the associated coordinate CF-netCDF variable. + if coord not in self._name_coord_map.coords: + cf_name = self._create_cf_coord_variable( + cube, dimension_names, coord + ) + self._name_coord_map.append(cf_name, coord) + + def _add_aux_factories(self, cube, cf_var_cube, dimension_names): + """ + Modifies the variables of the NetCDF dataset to represent + the presence of dimensionless vertical coordinates based on + the aux factories of the cube (if any). + + Args: + + * cube (:class:`iris.cube.Cube`): + A :class:`iris.cube.Cube` to be saved to a netCDF file. + * cf_var_cube (:class:`netcdf.netcdf_variable`) + CF variable cube representation. + * dimension_names (list): + Names associated with the dimensions of the cube. + + """ + primaries = [] + for factory in cube.aux_factories: + factory_defn = _FACTORY_DEFNS.get(type(factory), None) + if factory_defn is None: + msg = ( + "Unable to determine formula terms " + "for AuxFactory: {!r}".format(factory) + ) + warnings.warn(msg) + else: + # Override `standard_name`, `long_name`, and `axis` of the + # primary coord that signals the presense of a dimensionless + # vertical coord, then set the `formula_terms` attribute. + primary_coord = factory.dependencies[factory_defn.primary] + if primary_coord in primaries: + msg = ( + "Cube {!r} has multiple aux factories that share " + "a common primary coordinate {!r}. Unable to save " + "to netCDF as having multiple formula terms on a " + "single coordinate is not supported." + ) + raise ValueError(msg.format(cube, primary_coord.name())) + primaries.append(primary_coord) + + cf_name = self._name_coord_map.name(primary_coord) + cf_var = self._dataset.variables[cf_name] + + names = { + key: self._name_coord_map.name(coord) + for key, coord in factory.dependencies.items() + } + formula_terms = factory_defn.formula_terms_format.format( + **names + ) + std_name = factory_defn.std_name + + if hasattr(cf_var, "formula_terms"): + if ( + cf_var.formula_terms != formula_terms + or cf_var.standard_name != std_name + ): + # TODO: We need to resolve this corner-case where + # the dimensionless vertical coordinate containing + # the formula_terms is a dimension coordinate of + # the associated cube and a new alternatively named + # dimensionless vertical coordinate is required + # with new formula_terms and a renamed dimension. + if cf_name in dimension_names: + msg = ( + "Unable to create dimensonless vertical " + "coordinate." + ) + raise ValueError(msg) + key = (cf_name, std_name, formula_terms) + name = self._formula_terms_cache.get(key) + if name is None: + # Create a new variable + name = self._create_cf_coord_variable( + cube, dimension_names, primary_coord + ) + cf_var = self._dataset.variables[name] + _setncattr(cf_var, "standard_name", std_name) + _setncattr(cf_var, "axis", "Z") + # Update the formula terms. + ft = formula_terms.split() + ft = [name if t == cf_name else t for t in ft] + _setncattr(cf_var, "formula_terms", " ".join(ft)) + # Update the cache. + self._formula_terms_cache[key] = name + # Update the associated cube variable. + coords = cf_var_cube.coordinates.split() + coords = [name if c == cf_name else c for c in coords] + _setncattr( + cf_var_cube, "coordinates", " ".join(coords) + ) + else: + _setncattr(cf_var, "standard_name", std_name) + _setncattr(cf_var, "axis", "Z") + _setncattr(cf_var, "formula_terms", formula_terms) + + def _get_dim_names(self, cube): + """ + Determine suitable CF-netCDF data dimension names. + + Args: + + * cube (:class:`iris.cube.Cube`): + A :class:`iris.cube.Cube` to be saved to a netCDF file. + + Returns: + List of dimension names with length equal the number of dimensions + in the cube. + + """ + dimension_names = [] + for dim in range(cube.ndim): + coords = cube.coords(dimensions=dim, dim_coords=True) + if coords: + coord = coords[0] + + dim_name = self._get_coord_variable_name(cube, coord) + # Add only dimensions that have not already been added. + if coord not in self._dim_coords: + # Determine unique dimension name + while ( + dim_name in self._existing_dim + or dim_name in self._name_coord_map.names + ): + dim_name = self._increment_name(dim_name) + + # Update names added, current cube dim names used and + # unique coordinates added. + self._existing_dim[dim_name] = coord.shape[0] + dimension_names.append(dim_name) + self._dim_coords.append(coord) + else: + # Return the dim_name associated with the existing + # coordinate. + dim_name = self._name_coord_map.name(coord) + dimension_names.append(dim_name) + + else: + # No CF-netCDF coordinates describe this data dimension. + dim_name = "dim%d" % dim + if dim_name in self._existing_dim: + # Increment name if conflicted with one already existing. + if self._existing_dim[dim_name] != cube.shape[dim]: + while ( + dim_name in self._existing_dim + and self._existing_dim[dim_name] != cube.shape[dim] + or dim_name in self._name_coord_map.names + ): + dim_name = self._increment_name(dim_name) + # Update dictionary with new entry + self._existing_dim[dim_name] = cube.shape[dim] + else: + # Update dictionary with new entry + self._existing_dim[dim_name] = cube.shape[dim] + + dimension_names.append(dim_name) + return dimension_names + + @staticmethod + def cf_valid_var_name(var_name): + """ + Return a valid CF var_name given a potentially invalid name. + + Args: + + * var_name (str): + The var_name to normalise + + Returns: + A var_name suitable for passing through for variable creation. + + """ + # Replace invalid charaters with an underscore ("_"). + var_name = re.sub(r"[^a-zA-Z0-9]", "_", var_name) + # Ensure the variable name starts with a letter. + if re.match(r"^[^a-zA-Z]", var_name): + var_name = "var_{}".format(var_name) + return var_name + + @staticmethod + def _cf_coord_identity(coord): + """ + Determine a suitable units from a given coordinate. + + Args: + + * coord (:class:`iris.coords.Coord`): + A coordinate of a cube. + + Returns: + The (standard_name, long_name, unit) of the given + :class:`iris.coords.Coord` instance. + + """ + + units = str(coord.units) + # Set the 'units' of 'latitude' and 'longitude' coordinates specified + # in 'degrees' to 'degrees_north' and 'degrees_east' respectively, + # as defined in the CF conventions for netCDF files: sections 4.1 and + # 4.2. + if ( + isinstance(coord.coord_system, iris.coord_systems.GeogCS) + or coord.coord_system is None + ) and coord.units == "degrees": + if coord.standard_name == "latitude": + units = "degrees_north" + elif coord.standard_name == "longitude": + units = "degrees_east" + + return coord.standard_name, coord.long_name, units + + def _ensure_valid_dtype(self, values, src_name, src_object): + # NetCDF3 and NetCDF4 classic do not support int64 or unsigned ints, + # so we check if we can store them as int32 instead. + if ( + np.issubdtype(values.dtype, np.int64) + or np.issubdtype(values.dtype, np.unsignedinteger) + ) and self._dataset.file_format in ( + "NETCDF3_CLASSIC", + "NETCDF3_64BIT", + "NETCDF4_CLASSIC", + ): + # Cast to an integer type supported by netCDF3. + if not np.can_cast(values.max(), np.int32) or not np.can_cast( + values.min(), np.int32 + ): + msg = ( + "The data type of {} {!r} is not supported by {} and" + " its values cannot be safely cast to a supported" + " integer type." + ) + msg = msg.format( + src_name, src_object, self._dataset.file_format + ) + raise ValueError(msg) + values = values.astype(np.int32) + return values + + def _create_cf_bounds(self, coord, cf_var, cf_name): + """ + Create the associated CF-netCDF bounds variable. + + Args: + + * coord (:class:`iris.coords.Coord`): + A coordinate of a cube. + * cf_var: + CF-netCDF variable + * cf_name (string): + name of the CF-NetCDF variable. + + Returns: + None + + """ + if coord.has_bounds(): + # Get the values in a form which is valid for the file format. + bounds = self._ensure_valid_dtype( + coord.bounds, "the bounds of coordinate", coord + ) + n_bounds = bounds.shape[-1] + + if n_bounds == 2: + bounds_dimension_name = "bnds" + else: + bounds_dimension_name = "bnds_%s" % n_bounds + + if coord.climatological: + property_name = "climatology" + varname_extra = "climatology" + else: + property_name = "bounds" + varname_extra = "bnds" + + if bounds_dimension_name not in self._dataset.dimensions: + # Create the bounds dimension with the appropriate extent. + self._dataset.createDimension(bounds_dimension_name, n_bounds) + + boundsvar_name = "{}_{}".format(cf_name, varname_extra) + _setncattr(cf_var, property_name, boundsvar_name) + cf_var_bounds = self._dataset.createVariable( + boundsvar_name, + bounds.dtype.newbyteorder("="), + cf_var.dimensions + (bounds_dimension_name,), + ) + cf_var_bounds[:] = bounds + + def _get_cube_variable_name(self, cube): + """ + Returns a CF-netCDF variable name for the given cube. + + Args: + + * cube (class:`iris.cube.Cube`): + An instance of a cube for which a CF-netCDF variable + name is required. + + Returns: + A CF-netCDF variable name as a string. + + """ + if cube.var_name is not None: + cf_name = cube.var_name + else: + # Convert to lower case and replace whitespace by underscores. + cf_name = "_".join(cube.name().lower().split()) + + cf_name = self.cf_valid_var_name(cf_name) + return cf_name + + def _get_coord_variable_name(self, cube, coord): + """ + Returns a CF-netCDF variable name for the given coordinate. + + Args: + + * cube (:class:`iris.cube.Cube`): + The cube that contains the given coordinate. + * coord (:class:`iris.coords.Coord`): + An instance of a coordinate for which a CF-netCDF variable + name is required. + + Returns: + A CF-netCDF variable name as a string. + + """ + if coord.var_name is not None: + cf_name = coord.var_name + else: + name = coord.standard_name or coord.long_name + if not name or set(name).intersection(string.whitespace): + # Auto-generate name based on associated dimensions. + name = "" + for dim in cube.coord_dims(coord): + name += "dim{}".format(dim) + # Handle scalar coordinate (dims == ()). + if not name: + name = "unknown_scalar" + # Convert to lower case and replace whitespace by underscores. + cf_name = "_".join(name.lower().split()) + + cf_name = self.cf_valid_var_name(cf_name) + return cf_name + + def _inner_create_cf_cellmeasure_or_ancil_variable( + self, cube, dimension_names, dimensional_metadata + ): + """ + Create the associated CF-netCDF variable in the netCDF dataset for the + given dimensional_metadata. + + Args: + + * cube (:class:`iris.cube.Cube`): + The associated cube being saved to CF-netCDF file. + * dimension_names (list): + Names for each dimension of the cube. + * dimensional_metadata (:class:`iris.coords.CellMeasure`): + A cell measure OR ancillary variable to be saved to the + CF-netCDF file. + In either case, provides data, units and standard/long/var names. + + Returns: + The string name of the associated CF-netCDF variable saved. + + """ + cf_name = self._get_coord_variable_name(cube, dimensional_metadata) + while cf_name in self._dataset.variables: + cf_name = self._increment_name(cf_name) + + # Derive the data dimension names for the coordinate. + cf_dimensions = [ + dimension_names[dim] + for dim in dimensional_metadata.cube_dims(cube) + ] + + # Get the data values. + data = dimensional_metadata.data + + if isinstance(dimensional_metadata, iris.coords.CellMeasure): + # Disallow saving of *masked* cell measures. + # NOTE: currently, this is the only functional difference required + # between variable creation for an ancillary and a cell measure. + if ma.is_masked(data): + # We can't save masked points properly, as we don't maintain a + # suitable fill_value. (Load will not record one, either). + msg = "Cell measures with missing data are not supported." + raise ValueError(msg) + + # Get the values in a form which is valid for the file format. + data = self._ensure_valid_dtype( + data, "coordinate", dimensional_metadata + ) + + # Create the CF-netCDF variable. + cf_var = self._dataset.createVariable( + cf_name, data.dtype.newbyteorder("="), cf_dimensions + ) + + # Add the data to the CF-netCDF variable. + cf_var[:] = data + + if dimensional_metadata.units.is_udunits(): + _setncattr(cf_var, "units", str(dimensional_metadata.units)) + + if dimensional_metadata.standard_name is not None: + _setncattr( + cf_var, "standard_name", dimensional_metadata.standard_name + ) + + if dimensional_metadata.long_name is not None: + _setncattr(cf_var, "long_name", dimensional_metadata.long_name) + + # Add any other custom coordinate attributes. + for name in sorted(dimensional_metadata.attributes): + value = dimensional_metadata.attributes[name] + + # Don't clobber existing attributes. + if not hasattr(cf_var, name): + _setncattr(cf_var, name, value) + + return cf_name + + def _create_cf_cell_measure_variable( + self, cube, dimension_names, cell_measure + ): + """ + Create the associated CF-netCDF variable in the netCDF dataset for the + given cell_measure. + + Args: + + * cube (:class:`iris.cube.Cube`): + The associated cube being saved to CF-netCDF file. + * dimension_names (list): + Names for each dimension of the cube. + * cell_measure (:class:`iris.coords.CellMeasure`): + The cell measure to be saved to CF-netCDF file. + + Returns: + The string name of the associated CF-netCDF variable saved. + + """ + # Note: currently shares variable creation code with ancillary-variables. + return self._inner_create_cf_cellmeasure_or_ancil_variable( + cube, dimension_names, cell_measure + ) + + def _create_cf_ancildata_variable( + self, cube, dimension_names, ancillary_variable + ): + """ + Create the associated CF-netCDF variable in the netCDF dataset for the + given ancillary variable. + + Args: + + * cube (:class:`iris.cube.Cube`): + The associated cube being saved to CF-netCDF file. + * dimension_names (list): + Names for each dimension of the cube. + * ancillary_variable (:class:`iris.coords.AncillaryVariable`): + The ancillary variable to be saved to the CF-netCDF file. + + Returns: + The string name of the associated CF-netCDF variable saved. + + """ + # Note: currently shares variable creation code with cell-measures. + return self._inner_create_cf_cellmeasure_or_ancil_variable( + cube, dimension_names, ancillary_variable + ) + + def _create_cf_coord_variable(self, cube, dimension_names, coord): + """ + Create the associated CF-netCDF variable in the netCDF dataset for the + given coordinate. If required, also create the CF-netCDF bounds + variable and associated dimension. + + Args: + + * cube (:class:`iris.cube.Cube`): + The associated cube being saved to CF-netCDF file. + * dimension_names (list): + Names for each dimension of the cube. + * coord (:class:`iris.coords.Coord`): + The coordinate to be saved to CF-netCDF file. + + Returns: + The string name of the associated CF-netCDF variable saved. + + """ + cf_name = self._get_coord_variable_name(cube, coord) + while cf_name in self._dataset.variables: + cf_name = self._increment_name(cf_name) + + # Derive the data dimension names for the coordinate. + cf_dimensions = [ + dimension_names[dim] for dim in cube.coord_dims(coord) + ] + + if np.issubdtype(coord.points.dtype, np.str_): + string_dimension_depth = coord.points.dtype.itemsize + if coord.points.dtype.kind == "U": + string_dimension_depth //= 4 + string_dimension_name = "string%d" % string_dimension_depth + + # Determine whether to create the string length dimension. + if string_dimension_name not in self._dataset.dimensions: + self._dataset.createDimension( + string_dimension_name, string_dimension_depth + ) + + # Add the string length dimension to dimension names. + cf_dimensions.append(string_dimension_name) + + # Create the label coordinate variable. + cf_var = self._dataset.createVariable( + cf_name, "|S1", cf_dimensions + ) + + # Add the payload to the label coordinate variable. + if len(cf_dimensions) == 1: + cf_var[:] = list( + "%- *s" % (string_dimension_depth, coord.points[0]) + ) + else: + for index in np.ndindex(coord.points.shape): + index_slice = tuple(list(index) + [slice(None, None)]) + cf_var[index_slice] = list( + "%- *s" % (string_dimension_depth, coord.points[index]) + ) + else: + # Identify the collection of coordinates that represent CF-netCDF + # coordinate variables. + cf_coordinates = cube.dim_coords + + if coord in cf_coordinates: + # By definition of a CF-netCDF coordinate variable this + # coordinate must be 1-D and the name of the CF-netCDF variable + # must be the same as its dimension name. + cf_name = cf_dimensions[0] + + # Get the values in a form which is valid for the file format. + points = self._ensure_valid_dtype( + coord.points, "coordinate", coord + ) + + # Create the CF-netCDF variable. + cf_var = self._dataset.createVariable( + cf_name, points.dtype.newbyteorder("="), cf_dimensions + ) + + # Add the axis attribute for spatio-temporal CF-netCDF coordinates. + if coord in cf_coordinates: + axis = iris.util.guess_coord_axis(coord) + if axis is not None and axis.lower() in SPATIO_TEMPORAL_AXES: + _setncattr(cf_var, "axis", axis.upper()) + + # Add the data to the CF-netCDF variable. + cf_var[:] = points + + # Create the associated CF-netCDF bounds variable. + self._create_cf_bounds(coord, cf_var, cf_name) + + # Deal with CF-netCDF units and standard name. + standard_name, long_name, units = self._cf_coord_identity(coord) + + if cf_units.as_unit(units).is_udunits(): + _setncattr(cf_var, "units", units) + + if standard_name is not None: + _setncattr(cf_var, "standard_name", standard_name) + + if long_name is not None: + _setncattr(cf_var, "long_name", long_name) + + # Add the CF-netCDF calendar attribute. + if coord.units.calendar: + _setncattr(cf_var, "calendar", coord.units.calendar) + + # Add any other custom coordinate attributes. + for name in sorted(coord.attributes): + value = coord.attributes[name] + + if name == "STASH": + # Adopting provisional Metadata Conventions for representing MO + # Scientific Data encoded in NetCDF Format. + name = "um_stash_source" + value = str(value) + + # Don't clobber existing attributes. + if not hasattr(cf_var, name): + _setncattr(cf_var, name, value) + + return cf_name + + def _create_cf_cell_methods(self, cube, dimension_names): + """ + Create CF-netCDF string representation of a cube cell methods. + + Args: + + * cube (:class:`iris.cube.Cube`) or cubelist + (:class:`iris.cube.CubeList`): + A :class:`iris.cube.Cube`, :class:`iris.cube.CubeList` or list of + cubes to be saved to a netCDF file. + * dimension_names (list): + Names associated with the dimensions of the cube. + + Returns: + CF-netCDF string representation of a cube cell methods. + + """ + cell_methods = [] + + # Identify the collection of coordinates that represent CF-netCDF + # coordinate variables. + cf_coordinates = cube.dim_coords + + for cm in cube.cell_methods: + names = "" + + for name in cm.coord_names: + coord = cube.coords(name) + + if coord: + coord = coord[0] + if coord in cf_coordinates: + name = dimension_names[cube.coord_dims(coord)[0]] + + names += "%s: " % name + + interval = " ".join( + ["interval: %s" % interval for interval in cm.intervals or []] + ) + comment = " ".join( + ["comment: %s" % comment for comment in cm.comments or []] + ) + extra = " ".join([interval, comment]).strip() + + if extra: + extra = " (%s)" % extra + + cell_methods.append(names + cm.method + extra) + + return " ".join(cell_methods) + + def _create_cf_grid_mapping(self, cube, cf_var_cube): + """ + Create CF-netCDF grid mapping variable and associated CF-netCDF + data variable grid mapping attribute. + + Args: + + * cube (:class:`iris.cube.Cube`) or cubelist + (:class:`iris.cube.CubeList`): + A :class:`iris.cube.Cube`, :class:`iris.cube.CubeList` or list of + cubes to be saved to a netCDF file. + * cf_var_cube (:class:`netcdf.netcdf_variable`): + cf variable cube representation. + + Returns: + None + + """ + cs = cube.coord_system("CoordSystem") + if cs is not None: + # Grid var not yet created? + if cs not in self._coord_systems: + while cs.grid_mapping_name in self._dataset.variables: + aname = self._increment_name(cs.grid_mapping_name) + cs.grid_mapping_name = aname + + cf_var_grid = self._dataset.createVariable( + cs.grid_mapping_name, np.int32 + ) + _setncattr( + cf_var_grid, "grid_mapping_name", cs.grid_mapping_name + ) + + def add_ellipsoid(ellipsoid): + cf_var_grid.longitude_of_prime_meridian = ( + ellipsoid.longitude_of_prime_meridian + ) + semi_major = ellipsoid.semi_major_axis + semi_minor = ellipsoid.semi_minor_axis + if semi_minor == semi_major: + cf_var_grid.earth_radius = semi_major + else: + cf_var_grid.semi_major_axis = semi_major + cf_var_grid.semi_minor_axis = semi_minor + + # latlon + if isinstance(cs, iris.coord_systems.GeogCS): + add_ellipsoid(cs) + + # rotated latlon + elif isinstance(cs, iris.coord_systems.RotatedGeogCS): + if cs.ellipsoid: + add_ellipsoid(cs.ellipsoid) + cf_var_grid.grid_north_pole_latitude = ( + cs.grid_north_pole_latitude + ) + cf_var_grid.grid_north_pole_longitude = ( + cs.grid_north_pole_longitude + ) + cf_var_grid.north_pole_grid_longitude = ( + cs.north_pole_grid_longitude + ) + + # tmerc + elif isinstance(cs, iris.coord_systems.TransverseMercator): + if cs.ellipsoid: + add_ellipsoid(cs.ellipsoid) + cf_var_grid.longitude_of_central_meridian = ( + cs.longitude_of_central_meridian + ) + cf_var_grid.latitude_of_projection_origin = ( + cs.latitude_of_projection_origin + ) + cf_var_grid.false_easting = cs.false_easting + cf_var_grid.false_northing = cs.false_northing + cf_var_grid.scale_factor_at_central_meridian = ( + cs.scale_factor_at_central_meridian + ) + + # merc + elif isinstance(cs, iris.coord_systems.Mercator): + if cs.ellipsoid: + add_ellipsoid(cs.ellipsoid) + cf_var_grid.longitude_of_projection_origin = ( + cs.longitude_of_projection_origin + ) + # The Mercator class has implicit defaults for certain + # parameters + cf_var_grid.false_easting = 0.0 + cf_var_grid.false_northing = 0.0 + cf_var_grid.scale_factor_at_projection_origin = 1.0 + + # lcc + elif isinstance(cs, iris.coord_systems.LambertConformal): + if cs.ellipsoid: + add_ellipsoid(cs.ellipsoid) + cf_var_grid.standard_parallel = cs.secant_latitudes + cf_var_grid.latitude_of_projection_origin = cs.central_lat + cf_var_grid.longitude_of_central_meridian = cs.central_lon + cf_var_grid.false_easting = cs.false_easting + cf_var_grid.false_northing = cs.false_northing + + # stereo + elif isinstance(cs, iris.coord_systems.Stereographic): + if cs.true_scale_lat is not None: + warnings.warn( + "Stereographic coordinate systems with " + "true scale latitude specified are not " + "yet handled" + ) + else: + if cs.ellipsoid: + add_ellipsoid(cs.ellipsoid) + cf_var_grid.longitude_of_projection_origin = ( + cs.central_lon + ) + cf_var_grid.latitude_of_projection_origin = ( + cs.central_lat + ) + cf_var_grid.false_easting = cs.false_easting + cf_var_grid.false_northing = cs.false_northing + # The Stereographic class has an implicit scale + # factor + cf_var_grid.scale_factor_at_projection_origin = 1.0 + + # osgb (a specific tmerc) + elif isinstance(cs, iris.coord_systems.OSGB): + warnings.warn("OSGB coordinate system not yet handled") + + # lambert azimuthal equal area + elif isinstance( + cs, iris.coord_systems.LambertAzimuthalEqualArea + ): + if cs.ellipsoid: + add_ellipsoid(cs.ellipsoid) + cf_var_grid.longitude_of_projection_origin = ( + cs.longitude_of_projection_origin + ) + cf_var_grid.latitude_of_projection_origin = ( + cs.latitude_of_projection_origin + ) + cf_var_grid.false_easting = cs.false_easting + cf_var_grid.false_northing = cs.false_northing + + # albers conical equal area + elif isinstance(cs, iris.coord_systems.AlbersEqualArea): + if cs.ellipsoid: + add_ellipsoid(cs.ellipsoid) + cf_var_grid.longitude_of_central_meridian = ( + cs.longitude_of_central_meridian + ) + cf_var_grid.latitude_of_projection_origin = ( + cs.latitude_of_projection_origin + ) + cf_var_grid.false_easting = cs.false_easting + cf_var_grid.false_northing = cs.false_northing + cf_var_grid.standard_parallel = cs.standard_parallels + + # vertical perspective + elif isinstance(cs, iris.coord_systems.VerticalPerspective): + if cs.ellipsoid: + add_ellipsoid(cs.ellipsoid) + cf_var_grid.longitude_of_projection_origin = ( + cs.longitude_of_projection_origin + ) + cf_var_grid.latitude_of_projection_origin = ( + cs.latitude_of_projection_origin + ) + cf_var_grid.false_easting = cs.false_easting + cf_var_grid.false_northing = cs.false_northing + cf_var_grid.perspective_point_height = ( + cs.perspective_point_height + ) + + # geostationary + elif isinstance(cs, iris.coord_systems.Geostationary): + if cs.ellipsoid: + add_ellipsoid(cs.ellipsoid) + cf_var_grid.longitude_of_projection_origin = ( + cs.longitude_of_projection_origin + ) + cf_var_grid.latitude_of_projection_origin = ( + cs.latitude_of_projection_origin + ) + cf_var_grid.false_easting = cs.false_easting + cf_var_grid.false_northing = cs.false_northing + cf_var_grid.perspective_point_height = ( + cs.perspective_point_height + ) + cf_var_grid.sweep_angle_axis = cs.sweep_angle_axis + + # other + else: + warnings.warn( + "Unable to represent the horizontal " + "coordinate system. The coordinate system " + "type %r is not yet implemented." % type(cs) + ) + + self._coord_systems.append(cs) + + # Refer to grid var + _setncattr(cf_var_cube, "grid_mapping", cs.grid_mapping_name) + + def _create_cf_data_variable( + self, + cube, + dimension_names, + local_keys=None, + packing=None, + fill_value=None, + **kwargs, + ): + """ + Create CF-netCDF data variable for the cube and any associated grid + mapping. + + Args: + + * cube (:class:`iris.cube.Cube`): + The associated cube being saved to CF-netCDF file. + * dimension_names (list): + String names for each dimension of the cube. + + Kwargs: + + * local_keys (iterable of strings): + * see :func:`iris.fileformats.netcdf.Saver.write` + * packing (type or string or dict or list): + * see :func:`iris.fileformats.netcdf.Saver.write` + * fill_value: + * see :func:`iris.fileformats.netcdf.Saver.write` + + All other keywords are passed through to the dataset's `createVariable` + method. + + Returns: + The newly created CF-netCDF data variable. + + """ + + if packing: + if isinstance(packing, dict): + if "dtype" not in packing: + msg = "The dtype attribute is required for packing." + raise ValueError(msg) + dtype = np.dtype(packing["dtype"]) + scale_factor = packing.get("scale_factor", None) + add_offset = packing.get("add_offset", None) + valid_keys = {"dtype", "scale_factor", "add_offset"} + invalid_keys = set(packing.keys()) - valid_keys + if invalid_keys: + msg = ( + "Invalid packing key(s) found: '{}'. The valid " + "keys are '{}'.".format( + "', '".join(invalid_keys), "', '".join(valid_keys) + ) + ) + raise ValueError(msg) + else: + # We compute the scale_factor and add_offset based on the + # min/max of the data. This requires the data to be loaded. + masked = ma.isMaskedArray(cube.data) + dtype = np.dtype(packing) + cmax = cube.data.max() + cmin = cube.data.min() + n = dtype.itemsize * 8 + if masked: + scale_factor = (cmax - cmin) / (2 ** n - 2) + else: + scale_factor = (cmax - cmin) / (2 ** n - 1) + if dtype.kind == "u": + add_offset = cmin + elif dtype.kind == "i": + if masked: + add_offset = (cmax + cmin) / 2 + else: + add_offset = cmin + 2 ** (n - 1) * scale_factor + + def set_packing_ncattrs(cfvar): + """Set netCDF packing attributes.""" + if packing: + if scale_factor: + _setncattr(cfvar, "scale_factor", scale_factor) + if add_offset: + _setncattr(cfvar, "add_offset", add_offset) + + cf_name = self._get_cube_variable_name(cube) + while cf_name in self._dataset.variables: + cf_name = self._increment_name(cf_name) + + # if netcdf3 avoid streaming due to dtype handling + if not cube.has_lazy_data() or self._dataset.file_format in ( + "NETCDF3_CLASSIC", + "NETCDF3_64BIT", + ): + + # Get the values in a form which is valid for the file format. + data = self._ensure_valid_dtype(cube.data, "cube", cube) + + def store(data, cf_var, fill_value): + cf_var[:] = data + is_masked = ma.is_masked(data) + contains_value = fill_value is not None and fill_value in data + return is_masked, contains_value + + else: + data = cube.lazy_data() + + def store(data, cf_var, fill_value): + # Store lazy data and check whether it is masked and contains + # the fill value + target = _FillValueMaskCheckAndStoreTarget(cf_var, fill_value) + da.store([data], [target]) + return target.is_masked, target.contains_value + + if not packing: + dtype = data.dtype.newbyteorder("=") + + # Create the cube CF-netCDF data variable with data payload. + cf_var = self._dataset.createVariable( + cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs + ) + set_packing_ncattrs(cf_var) + + # If packing attributes are specified, don't bother checking whether + # the fill value is in the data. + if packing: + fill_value_to_check = None + elif fill_value is not None: + fill_value_to_check = fill_value + else: + fill_value_to_check = netCDF4.default_fillvals[dtype.str[1:]] + + # Store the data and check if it is masked and contains the fill value + is_masked, contains_fill_value = store( + data, cf_var, fill_value_to_check + ) + + if dtype.itemsize == 1 and fill_value is None: + if is_masked: + msg = ( + "Cube '{}' contains byte data with masked points, but " + "no fill_value keyword was given. As saved, these " + "points will read back as valid values. To save as " + "masked byte data, please explicitly specify the " + "'fill_value' keyword." + ) + warnings.warn(msg.format(cube.name())) + elif contains_fill_value: + msg = ( + "Cube '{}' contains unmasked data points equal to the " + "fill-value, {}. As saved, these points will read back " + "as missing data. To save these as normal values, please " + "specify a 'fill_value' keyword not equal to any valid " + "data points." + ) + warnings.warn(msg.format(cube.name(), fill_value)) + + if cube.standard_name: + _setncattr(cf_var, "standard_name", cube.standard_name) + + if cube.long_name: + _setncattr(cf_var, "long_name", cube.long_name) + + if cube.units.is_udunits(): + _setncattr(cf_var, "units", str(cube.units)) + + # Add the CF-netCDF calendar attribute. + if cube.units.calendar: + _setncattr(cf_var, "calendar", cube.units.calendar) + + # Add data variable-only attribute names to local_keys. + if local_keys is None: + local_keys = set() + else: + local_keys = set(local_keys) + local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) + + # Add any cube attributes whose keys are in local_keys as + # CF-netCDF data variable attributes. + attr_names = set(cube.attributes).intersection(local_keys) + for attr_name in sorted(attr_names): + # Do not output 'conventions' attribute. + if attr_name.lower() == "conventions": + continue + + value = cube.attributes[attr_name] + + if attr_name == "STASH": + # Adopting provisional Metadata Conventions for representing MO + # Scientific Data encoded in NetCDF Format. + attr_name = "um_stash_source" + value = str(value) + + if attr_name == "ukmo__process_flags": + value = " ".join([x.replace(" ", "_") for x in value]) + + if attr_name in _CF_GLOBAL_ATTRS: + msg = ( + "{attr_name!r} is being added as CF data variable " + "attribute, but {attr_name!r} should only be a CF " + "global attribute.".format(attr_name=attr_name) + ) + warnings.warn(msg) + + _setncattr(cf_var, attr_name, value) + + # Create the CF-netCDF data variable cell method attribute. + cell_methods = self._create_cf_cell_methods(cube, dimension_names) + + if cell_methods: + _setncattr(cf_var, "cell_methods", cell_methods) + + # Create the CF-netCDF grid mapping. + self._create_cf_grid_mapping(cube, cf_var) + + return cf_var + + def _increment_name(self, varname): + """ + Increment string name or begin increment. + + Avoidance of conflicts between variable names, where the name is + incremented to distinguish it from others. + + Args: + + * varname (string): + Variable name to increment. + + Returns: + Incremented varname. + + """ + num = 0 + try: + name, endnum = varname.rsplit("_", 1) + if endnum.isdigit(): + num = int(endnum) + 1 + varname = name + except ValueError: + pass + + return "{}_{}".format(varname, num) + + +def save( + cube, + filename, + netcdf_format="NETCDF4", + local_keys=None, + unlimited_dimensions=None, + zlib=False, + complevel=4, + shuffle=True, + fletcher32=False, + contiguous=False, + chunksizes=None, + endian="native", + least_significant_digit=None, + packing=None, + fill_value=None, +): + """ + Save cube(s) to a netCDF file, given the cube and the filename. + + * Iris will write CF 1.7 compliant NetCDF files. + * The attributes dictionaries on each cube in the saved cube list + will be compared and common attributes saved as NetCDF global + attributes where appropriate. + * Keyword arguments specifying how to save the data are applied + to each cube. To use different settings for different cubes, use + the NetCDF Context manager (:class:`~Saver`) directly. + * The save process will stream the data payload to the file using dask, + enabling large data payloads to be saved and maintaining the 'lazy' + status of the cube's data payload, unless the netcdf_format is explicitly + specified to be 'NETCDF3' or 'NETCDF3_CLASSIC'. + + Args: + + * cube (:class:`iris.cube.Cube` or :class:`iris.cube.CubeList`): + A :class:`iris.cube.Cube`, :class:`iris.cube.CubeList` or other + iterable of cubes to be saved to a netCDF file. + + * filename (string): + Name of the netCDF file to save the cube(s). + + Kwargs: + + * netcdf_format (string): + Underlying netCDF file format, one of 'NETCDF4', 'NETCDF4_CLASSIC', + 'NETCDF3_CLASSIC' or 'NETCDF3_64BIT'. Default is 'NETCDF4' format. + + * local_keys (iterable of strings): + An interable of cube attribute keys. Any cube attributes with + matching keys will become attributes on the data variable rather + than global attributes. + + * unlimited_dimensions (iterable of strings and/or + :class:`iris.coords.Coord` objects): + List of coordinate names (or coordinate objects) corresponding + to coordinate dimensions of `cube` to save with the NetCDF dimension + variable length 'UNLIMITED'. By default, no unlimited dimensions are + saved. Only the 'NETCDF4' format supports multiple 'UNLIMITED' + dimensions. + + * zlib (bool): + If `True`, the data will be compressed in the netCDF file using gzip + compression (default `False`). + + * complevel (int): + An integer between 1 and 9 describing the level of compression desired + (default 4). Ignored if `zlib=False`. + + * shuffle (bool): + If `True`, the HDF5 shuffle filter will be applied before compressing + the data (default `True`). This significantly improves compression. + Ignored if `zlib=False`. + + * fletcher32 (bool): + If `True`, the Fletcher32 HDF5 checksum algorithm is activated to + detect errors. Default `False`. + + * contiguous (bool): + If `True`, the variable data is stored contiguously on disk. Default + `False`. Setting to `True` for a variable with an unlimited dimension + will trigger an error. + + * chunksizes (tuple of int): + Used to manually specify the HDF5 chunksizes for each dimension of the + variable. A detailed discussion of HDF chunking and I/O performance is + available here: https://www.unidata.ucar.edu/software/netcdf/documentation/NUG/netcdf_perf_chunking.html. + Basically, you want the chunk size for each dimension to match as + closely as possible the size of the data block that users will read + from the file. `chunksizes` cannot be set if `contiguous=True`. + + * endian (string): + Used to control whether the data is stored in little or big endian + format on disk. Possible values are 'little', 'big' or 'native' + (default). The library will automatically handle endian conversions + when the data is read, but if the data is always going to be read on a + computer with the opposite format as the one used to create the file, + there may be some performance advantage to be gained by setting the + endian-ness. + + * least_significant_digit (int): + If `least_significant_digit` is specified, variable data will be + truncated (quantized). In conjunction with `zlib=True` this produces + 'lossy', but significantly more efficient compression. For example, if + `least_significant_digit=1`, data will be quantized using + `numpy.around(scale*data)/scale`, where `scale = 2**bits`, and `bits` + is determined so that a precision of 0.1 is retained (in this case + `bits=4`). From + http://www.esrl.noaa.gov/psd/data/gridded/conventions/cdc_netcdf_standard.shtml: + "least_significant_digit -- power of ten of the smallest decimal place + in unpacked data that is a reliable value". Default is `None`, or no + quantization, or 'lossless' compression. + + * packing (type or string or dict or list): A numpy integer datatype + (signed or unsigned) or a string that describes a numpy integer dtype + (i.e. 'i2', 'short', 'u4') or a dict of packing parameters as described + below or an iterable of such types, strings, or dicts. + This provides support for netCDF data packing as described in + http://www.unidata.ucar.edu/software/netcdf/documentation/NUG/best_practices.html#bp_Packed-Data-Values + If this argument is a type (or type string), appropriate values of + scale_factor and add_offset will be automatically calculated based + on `cube.data` and possible masking. For more control, pass a dict with + one or more of the following keys: `dtype` (required), `scale_factor` + and `add_offset`. Note that automatic calculation of packing parameters + will trigger loading of lazy data; set them manually using a dict to + avoid this. The default is `None`, in which case the datatype is + determined from the cube and no packing will occur. If this argument is + a list it must have the same number of elements as `cube` if `cube` is + a `:class:`iris.cube.CubeList`, or one element, and each element of + this argument will be applied to each cube separately. + + * fill_value (numeric or list): + The value to use for the `_FillValue` attribute on the netCDF variable. + If `packing` is specified the value of `fill_value` should be in the + domain of the packed data. If this argument is a list it must have the + same number of elements as `cube` if `cube` is a + `:class:`iris.cube.CubeList`, or a single element, and each element of + this argument will be applied to each cube separately. + + Returns: + None. + + .. note:: + + The `zlib`, `complevel`, `shuffle`, `fletcher32`, `contiguous`, + `chunksizes` and `endian` keywords are silently ignored for netCDF 3 + files that do not use HDF5. + + .. seealso:: + + NetCDF Context manager (:class:`~Saver`). + + """ + from iris.cube import Cube, CubeList + + if unlimited_dimensions is None: + unlimited_dimensions = [] + + if isinstance(cube, Cube): + cubes = CubeList() + cubes.append(cube) + else: + cubes = cube + + if local_keys is None: + local_keys = set() + else: + local_keys = set(local_keys) + + # Determine the attribute keys that are common across all cubes and + # thereby extend the collection of local_keys for attributes + # that should be attributes on data variables. + attributes = cubes[0].attributes + common_keys = set(attributes) + for cube in cubes[1:]: + keys = set(cube.attributes) + local_keys.update(keys.symmetric_difference(common_keys)) + common_keys.intersection_update(keys) + different_value_keys = [] + for key in common_keys: + if np.any(attributes[key] != cube.attributes[key]): + different_value_keys.append(key) + common_keys.difference_update(different_value_keys) + local_keys.update(different_value_keys) + + def is_valid_packspec(p): + """Only checks that the datatype is valid.""" + if isinstance(p, dict): + if "dtype" in p: + return is_valid_packspec(p["dtype"]) + else: + msg = "The argument to packing must contain the key 'dtype'." + raise ValueError(msg) + elif isinstance(p, str) or isinstance(p, type) or isinstance(p, str): + pdtype = np.dtype(p) # Does nothing if it's already a numpy dtype + if pdtype.kind != "i" and pdtype.kind != "u": + msg = "The packing datatype must be a numpy integer type." + raise ValueError(msg) + return True + elif p is None: + return True + else: + return False + + if is_valid_packspec(packing): + packspecs = repeat(packing) + else: + # Assume iterable, make sure packing is the same length as cubes. + for cube, packspec in zip_longest(cubes, packing, fillvalue=-1): + if cube == -1 or packspec == -1: + msg = ( + "If packing is a list, it must have the " + "same number of elements as the argument to" + "cube." + ) + raise ValueError(msg) + if not is_valid_packspec(packspec): + msg = "Invalid packing argument: {}.".format(packspec) + raise ValueError(msg) + packspecs = packing + + # Make fill-value(s) into an iterable over cubes. + if isinstance(fill_value, str): + # Strings are awkward -- handle separately. + fill_values = repeat(fill_value) + else: + try: + fill_values = tuple(fill_value) + except TypeError: + fill_values = repeat(fill_value) + else: + if len(fill_values) != len(cubes): + msg = ( + "If fill_value is a list, it must have the " + "same number of elements as the cube argument." + ) + raise ValueError(msg) + + # Initialise Manager for saving + with Saver(filename, netcdf_format) as sman: + # Iterate through the cubelist. + for cube, packspec, fill_value in zip(cubes, packspecs, fill_values): + sman.write( + cube, + local_keys, + unlimited_dimensions, + zlib, + complevel, + shuffle, + fletcher32, + contiguous, + chunksizes, + endian, + least_significant_digit, + packing=packspec, + fill_value=fill_value, + ) + + if iris.config.netcdf.conventions_override: + # Set to the default if custom conventions are not available. + conventions = cube.attributes.get( + "Conventions", CF_CONVENTIONS_VERSION + ) + else: + conventions = CF_CONVENTIONS_VERSION + + # Perform a CF patch of the conventions attribute. + cf_profile_available = iris.site_configuration.get( + "cf_profile" + ) not in [None, False] + if cf_profile_available: + conventions_patch = iris.site_configuration.get( + "cf_patch_conventions" + ) + if conventions_patch is not None: + conventions = conventions_patch(conventions) + else: + msg = "cf_profile is available but no {} defined.".format( + "cf_patch_conventions" + ) + warnings.warn(msg) + + # Add conventions attribute. + sman.update_global_attributes(Conventions=conventions) diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/__init__.py b/lib/iris/tests/unit/fileformats/netcdf/loader/__init__.py new file mode 100644 index 0000000000..7c2ae96158 --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/__init__.py @@ -0,0 +1,6 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Unit tests for the :mod:`iris.fileformats.netcdf.loader` module.""" diff --git a/lib/iris/tests/unit/fileformats/netcdf/saver/__init__.py b/lib/iris/tests/unit/fileformats/netcdf/saver/__init__.py new file mode 100644 index 0000000000..a68d5fc5d0 --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/saver/__init__.py @@ -0,0 +1,6 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Unit tests for the :mod:`iris.fileformats.netcdf.saver` module.""" From 6e81665ff5d63383039656b4e3e243571d4a4a52 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 23 Aug 2021 13:52:41 +0100 Subject: [PATCH 7/7] More fixes. --- lib/iris/experimental/ugrid/__init__.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/iris/experimental/ugrid/__init__.py b/lib/iris/experimental/ugrid/__init__.py index bfc570fcfd..fb607ffd6b 100644 --- a/lib/iris/experimental/ugrid/__init__.py +++ b/lib/iris/experimental/ugrid/__init__.py @@ -39,8 +39,9 @@ from ...config import get_logger from ...coords import AuxCoord, _DimensionalMetadata from ...exceptions import ConnectivityNotFoundError, CoordinateNotFoundError -from ...fileformats import cf, netcdf +from ...fileformats import cf from ...fileformats._nc_load_rules.helpers import get_attr_units, get_names +from ...fileformats.netcdf import loader as nc_loader from ...io import decode_uri, expand_filespecs from ...util import guess_coord_axis @@ -3359,7 +3360,7 @@ def load_meshes(uris, var_name=None): from iris.fileformats import FORMAT_AGENT if not PARSE_UGRID_ON_LOAD: - # Explicit behaviour, consistent with netcdf.load_cubes(), rather than + # Explicit behaviour, consistent with netcdf.loader.load_cubes(), rather than # an invisible assumption. message = ( f"PARSE_UGRID_ON_LOAD is {bool(PARSE_UGRID_ON_LOAD)}. Must be " @@ -3395,7 +3396,7 @@ def load_meshes(uris, var_name=None): else: handling_format_spec = FORMAT_AGENT.get_spec(source, None) - if handling_format_spec.handler == netcdf.load_cubes: + if handling_format_spec.handler == nc_loader.load_cubes: valid_sources.append(source) else: message = f"Ignoring non-NetCDF file: {source}" @@ -3718,7 +3719,7 @@ class CFUGridReader(cf.CFReader): ############ # Object construction. -# Helper functions, supporting netcdf.load_cubes ONLY, expected to +# Helper functions, supporting netcdf.loader.load_cubes ONLY, expected to # altered/moved when pyke is removed. @@ -3733,7 +3734,7 @@ def _build_aux_coord(coord_var, file_path): assert isinstance(coord_var, CFUGridAuxiliaryCoordinateVariable) attributes = {} attr_units = get_attr_units(coord_var, attributes) - points_data = netcdf._get_cf_var_data(coord_var, file_path) + points_data = nc_loader._get_cf_var_data(coord_var, file_path) # Bounds will not be loaded: # Bounds may be present, but the UGRID conventions state this would @@ -3785,7 +3786,7 @@ def _build_connectivity(connectivity_var, file_path, location_dims): assert isinstance(connectivity_var, CFUGridConnectivityVariable) attributes = {} attr_units = get_attr_units(connectivity_var, attributes) - indices_data = netcdf._get_cf_var_data(connectivity_var, file_path) + indices_data = nc_loader._get_cf_var_data(connectivity_var, file_path) cf_role = connectivity_var.cf_role start_index = connectivity_var.start_index @@ -3952,7 +3953,7 @@ def _build_mesh(cf, mesh_var, file_path): ) mesh_elements = filter(None, mesh_elements) for iris_object in mesh_elements: - netcdf._add_unused_attributes( + nc_loader._add_unused_attributes( iris_object, cf.cf_group[iris_object.var_name] )