diff --git a/docs/iris/src/whatsnew/contributions_1.10/newfeature_2016-Jun-03_cell_method_parsing.txt b/docs/iris/src/whatsnew/contributions_1.10/newfeature_2016-Jun-03_cell_method_parsing.txt new file mode 100644 index 0000000000..6113361f30 --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_1.10/newfeature_2016-Jun-03_cell_method_parsing.txt @@ -0,0 +1 @@ +* The parsing functionality for Cell Methods from netCDF files is available as part of the :mod:`iris.fileformats.netcdf` module. diff --git a/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb b/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb index e04e58fd1f..03f6c3ecbe 100644 --- a/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb +++ b/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb @@ -884,7 +884,6 @@ fc_formula_terms fc_extras import six - import re import warnings import biggus @@ -897,6 +896,7 @@ fc_extras import iris.coords import iris.coord_systems import iris.fileformats.cf as cf + import iris.fileformats.netcdf import iris.fileformats.pp as pp import iris.exceptions import iris.std_names @@ -994,26 +994,6 @@ fc_extras CF_VALUE_STD_NAME_GRID_LON = 'grid_longitude' CF_VALUE_STD_NAME_PROJ_X = 'projection_x_coordinate' CF_VALUE_STD_NAME_PROJ_Y = 'projection_y_coordinate' - - # Cell methods. - CM_KNOWN_METHODS = ['point', 'sum', 'mean', 'maximum', 'minimum', 'mid_range', - 'standard_deviation', 'variance', 'mode', 'median'] - - CM_COMMENT = 'comment' - CM_EXTRA = 'extra' - CM_INTERVAL = 'interval' - CM_METHOD = 'method' - CM_NAME = 'name' - - CM_PARSE = re.compile( r''' - (?P([\w_]+\s*?:\s+)+) - (?P[\w_\s]+(?![\w_]*\s*?:))\s* - (?: - \(\s* - (?P[^\)]+) - \)\s* - )? - ''', re.VERBOSE) ################################################################################ @@ -1044,7 +1024,8 @@ fc_extras # Incorporate cell methods nc_att_cell_methods = getattr(cf_var, CF_ATTR_CELL_METHODS, None) - cube.cell_methods = _parse_cell_methods(cf_var.cf_name, nc_att_cell_methods) + cube.cell_methods = iris.fileformats.netcdf.parse_cell_methods( + cf_var.cf_name, nc_att_cell_methods) # Set the cube global attributes. for attr_name, attr_value in six.iteritems(cf_var.cf_group.global_attributes): diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 07ea55ba36..addd5c0b98 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -31,6 +31,7 @@ import collections import os import os.path +import re import string import warnings @@ -137,6 +138,116 @@ } +# Cell methods. +_CM_KNOWN_METHODS = ['point', 'sum', 'mean', 'maximum', 'minimum', 'mid_range', + 'standard_deviation', 'variance', 'mode', 'median'] + +_CM_COMMENT = 'comment' +_CM_EXTRA = 'extra' +_CM_INTERVAL = 'interval' +_CM_METHOD = 'method' +_CM_NAME = 'name' +_CM_PARSE = re.compile(r''' + (?P([\w_]+\s*?:\s+)+) + (?P[\w_\s]+(?![\w_]*\s*?:))\s* + (?: + \(\s* + (?P[^\)]+) + \)\s* + )? + ''', re.VERBOSE) + + +def parse_cell_methods(cf_var_name, nc_cell_methods): + """ + Parse a CF cell_methods attribute string into a tuple of zero or + more CellMethod instances. + + Args: + + * cf_var_name (str): + The name of the netCDF variable that contains this cell methods + attribute. + + * nc_cell_methods (str): + The value of the cell methods attribute to be parsed. + + """ + + cell_methods = [] + if nc_cell_methods is not None: + for m in _CM_PARSE.finditer(nc_cell_methods): + d = m.groupdict() + method = d[_CM_METHOD] + method = method.strip() + # Check validity of method, allowing for multi-part methods + # e.g. mean over years. + method_words = method.split() + if method_words[0].lower() not in _CM_KNOWN_METHODS: + msg = 'NetCDF variable {!r} contains unknown cell ' \ + 'method {!r}' + warnings.warn(msg.format('{}'.format(cf_var_name), + '{}'.format(method_words[0]))) + d[_CM_METHOD] = method + name = d[_CM_NAME] + name = name.replace(' ', '') + name = name.rstrip(':') + d[_CM_NAME] = tuple([n for n in name.split(':')]) + interval = [] + comment = [] + if d[_CM_EXTRA] is not None: + # + # tokenise the key words and field colon marker + # + d[_CM_EXTRA] = d[_CM_EXTRA].replace('comment:', + '<><<:>>') + d[_CM_EXTRA] = d[_CM_EXTRA].replace('interval:', + '<><<:>>') + d[_CM_EXTRA] = d[_CM_EXTRA].split('<<:>>') + if len(d[_CM_EXTRA]) == 1: + comment.extend(d[_CM_EXTRA]) + else: + next_field_type = comment + for field in d[_CM_EXTRA]: + field_type = next_field_type + index = field.rfind('<>') + if index == 0: + next_field_type = interval + continue + elif index > 0: + next_field_type = interval + else: + index = field.rfind('<>') + if index == 0: + next_field_type = comment + continue + elif index > 0: + next_field_type = comment + if index != -1: + field = field[:index] + field_type.append(field.strip()) + # + # cater for a shared interval over multiple axes + # + if len(interval): + if len(d[_CM_NAME]) != len(interval) and len(interval) == 1: + interval = interval*len(d[_CM_NAME]) + # + # cater for a shared comment over multiple axes + # + if len(comment): + if len(d[_CM_NAME]) != len(comment) and len(comment) == 1: + comment = comment*len(d[_CM_NAME]) + d[_CM_INTERVAL] = tuple(interval) + d[_CM_COMMENT] = tuple(comment) + cell_method = iris.coords.CellMethod(d[_CM_METHOD], + coords=d[_CM_NAME], + intervals=d[_CM_INTERVAL], + comments=d[_CM_COMMENT]) + cell_methods.append(cell_method) + return tuple(cell_methods) + + class CFNameCoordMap(object): """Provide a simple CF name to CF coordinate mapping.""" diff --git a/lib/iris/tests/unit/fileformats/pyke_rules/compiled_krb/fc_rules_cf_fc/test__parse_cell_methods.py b/lib/iris/tests/unit/fileformats/netcdf/test_parse_cell_methods.py similarity index 87% rename from lib/iris/tests/unit/fileformats/pyke_rules/compiled_krb/fc_rules_cf_fc/test__parse_cell_methods.py rename to lib/iris/tests/unit/fileformats/netcdf/test_parse_cell_methods.py index daf1eb0594..91f865b6e6 100644 --- a/lib/iris/tests/unit/fileformats/pyke_rules/compiled_krb/fc_rules_cf_fc/test__parse_cell_methods.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_parse_cell_methods.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2015, Met Office +# (C) British Crown Copyright 2015 - 2016, Met Office # # This file is part of Iris. # @@ -15,8 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . """ -Test function :func:`iris.fileformats._pyke_rules.compiled_krb.\ -fc_rules_cf_fc._parse_cell_methods`. +Unit tests for :func:`iris.fileformats.netcdf.parse_cell_methods`. """ @@ -28,8 +27,7 @@ import iris.tests as tests from iris.coords import CellMethod -from iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc import \ - _parse_cell_methods +from iris.fileformats.netcdf import parse_cell_methods from iris.tests import mock @@ -41,7 +39,7 @@ def test_simple(self): ] expected = (CellMethod(method='mean', coords='time'),) for cell_method_str in cell_method_strings: - res = _parse_cell_methods('test_var', cell_method_str) + res = parse_cell_methods('test_var', cell_method_str) self.assertEqual(res, expected) def test_with_interval(self): @@ -52,7 +50,7 @@ def test_with_interval(self): expected = (CellMethod(method='variance', coords='time', intervals='1 hr'),) for cell_method_str in cell_method_strings: - res = _parse_cell_methods('test_var', cell_method_str) + res = parse_cell_methods('test_var', cell_method_str) self.assertEqual(res, expected) def test_multiple(self): @@ -67,7 +65,7 @@ def test_multiple(self): CellMethod(method='mean', coords='time', intervals='1 day')) for cell_method_str in cell_method_strings: - res = _parse_cell_methods('test_var', cell_method_str) + res = parse_cell_methods('test_var', cell_method_str) self.assertEqual(res, expected) def test_comment(self): @@ -86,7 +84,7 @@ def test_comment(self): CellMethod(method='mean', coords='time', intervals='1 day', comments='second bit')) for cell_method_str in cell_method_strings: - res = _parse_cell_methods('test_var', cell_method_str) + res = parse_cell_methods('test_var', cell_method_str) self.assertEqual(res, expected) def test_portions_of_cells(self): @@ -97,7 +95,7 @@ def test_portions_of_cells(self): expected = (CellMethod(method='mean where sea_ice over sea', coords='area'),) for cell_method_str in cell_method_strings: - res = _parse_cell_methods('test_var', cell_method_str) + res = parse_cell_methods('test_var', cell_method_str) self.assertEqual(res, expected) def test_climatology(self): @@ -110,7 +108,7 @@ def test_climatology(self): expected = (CellMethod(method='minimum within days', coords='time'), CellMethod(method='mean over days', coords='time')) for cell_method_str in cell_method_strings: - res = _parse_cell_methods('test_var', cell_method_str) + res = parse_cell_methods('test_var', cell_method_str) self.assertEqual(res, expected) def test_climatology_with_unknown_method(self): @@ -124,7 +122,7 @@ def test_climatology_with_unknown_method(self): CellMethod(method='mean over days', coords='time')) for cell_method_str in cell_method_strings: with mock.patch('warnings.warn') as warn: - res = _parse_cell_methods('test_var', cell_method_str) + res = parse_cell_methods('test_var', cell_method_str) self.assertIn("NetCDF variable 'test_var' contains unknown " "cell method 'min'", warn.call_args[0][0])