From bc442fdf616432af15c665123d0d6a415cb9b82e Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 17 Jan 2017 18:02:00 +0000 Subject: [PATCH 1/7] Initial ideas --- lib/iris/fileformats/pp.py | 24 ++++++++++------------ lib/iris/util.py | 41 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index ea921dca20..60928bfcc1 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -33,8 +33,8 @@ import struct import warnings -import biggus import cf_units +import dask import numpy as np import numpy.ma as ma import netcdftime @@ -44,6 +44,7 @@ import iris.fileformats.rules import iris.fileformats.pp_rules import iris.coord_systems +from iris.util import is_lazy_data, as_real_array, as_lazy_array try: import mo_pack @@ -1286,11 +1287,10 @@ def data(self): """ # Cache the real data on first use - if isinstance(self._data, biggus.Array): - data = self._data.masked_array() - if ma.count_masked(data) == 0: - data = data.data - self._data = data + # N.B. this throws away the original lazy object. + if is_lazy_data(self._data): + # Get the data as a numpy array. + self._data = self._data.compute() return self._data @data.setter @@ -1642,12 +1642,8 @@ def __eq__(self, other): for attr in self.__slots__: attrs = [hasattr(self, attr), hasattr(other, attr)] if all(attrs): - self_attr = getattr(self, attr) - other_attr = getattr(other, attr) - if isinstance(self_attr, biggus.NumpyArrayAdapter): - self_attr = self_attr.concrete - if isinstance(other_attr, biggus.NumpyArrayAdapter): - other_attr = other_attr.concrete + self_attr = as_real_array(getattr(self, attr)) + other_attr = as_real_array(getattr(other, attr)) if not np.all(self_attr == other_attr): result = False break @@ -1866,7 +1862,7 @@ def _interpret_fields(fields): def _create_field_data(field, data_shape, land_mask): """ Modifies a field's ``_data`` attribute either by: - * converting DeferredArrayBytes into a biggus array, + * converting DeferredArrayBytes into a dask array, * converting LoadedArrayBytes into an actual numpy array. """ @@ -1887,7 +1883,7 @@ def _create_field_data(field, data_shape, land_mask): field.raw_lbpack, field.boundary_packing, field.bmdi, land_mask) - field._data = biggus.NumpyArrayAdapter(proxy) + field._data = as_lazy_array(proxy) def _field_gen(filename, read_data_bytes, little_ended=False): diff --git a/lib/iris/util.py b/lib/iris/util.py index ebb6bfa746..9a3a11606e 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -35,6 +35,7 @@ import time import cf_units +import dask.array as da import numpy as np import numpy.ma as ma @@ -1599,3 +1600,43 @@ def demote_dim_coord_to_aux_coord(cube, name_or_coord): cube.remove_coord(dim_coord) cube.add_aux_coord(dim_coord, coord_dim) + + +def is_lazy_data(data): + """ + Return whether the argument is an Iris 'lazy' data array. + + At present, this means simply a Dask array. + We determine this by checking for a "compute" property. + + """ + return hasattr(data, 'compute') + + +def data_as_real_array(data): + """ + Returned the realised value of the argument, as a numpy array. + + If lazy, fetch the data, otherwise do nothing. + """ + if is_lazy_data(data): + data = data.compute() + return data + + +# A magic value, borrowed from biggus +_MAX_CHUNK_SIZE = 8 * 1024 * 1024 * 2 + +def data_as_lazy_array(data): + """ + Return a lazy equivalent of the argument, as a dask array. + + For an existing dask array, do nothing. + Otherwise, wrap with dask.array.from_array. + This assumes the underlying object support numpy-like indexing. + + """ + if not is_lazy_data(data): + data = da.from_array(data, chunks=_MAX_CHUNK_SIZE) + return data + From 86da9628ae26a3f7ab626c6196217fb0019e2cd9 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 18 Jan 2017 13:01:20 +0000 Subject: [PATCH 2/7] Revert iris.util and put lazy ops in iris._lazy_data. --- lib/iris/_lazy_data.py | 78 ++++++++++++++++++++++++++++++++++++++ lib/iris/fileformats/pp.py | 13 +++---- lib/iris/util.py | 41 -------------------- 3 files changed, 84 insertions(+), 48 deletions(-) create mode 100644 lib/iris/_lazy_data.py diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py new file mode 100644 index 0000000000..460cfd2d2b --- /dev/null +++ b/lib/iris/_lazy_data.py @@ -0,0 +1,78 @@ +# (C) British Crown Copyright 2010 - 2016, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +""" +Routines for lazy data handling. + +To avoid replicating implementation-dependent test and conversion code. + +""" +from __future__ import (absolute_import, division, print_function) + +import dask.array as da + + +def is_lazy_data(data): + """ + Return whether the argument is an Iris 'lazy' data array. + + At present, this means simply a Dask array. + We determine this by checking for a "compute" property. + + """ + return hasattr(data, 'compute') + + +def as_concrete_data(data): + """ + Return the actual content of the argument, as a numpy array. + + If lazy, return the realised data, otherwise return the argument unchanged. + + """ + if is_lazy_data(data): + data = data.compute() + return data + + +# A magic value, borrowed from biggus +_MAX_CHUNK_SIZE = 8 * 1024 * 1024 * 2 + + +def data_as_lazy_array(data): + """ + Return a lazy equivalent of the argument, as a lazy array. + + For an existing dask array, return it unchanged. + Otherwise, return the argument wrapped with dask.array.from_array. + This assumes the underlying object has numpy-array-like properties. + + """ + # + # NOTE: there is still some doubts here about what forms of indexing are + # valid. + # Call an integer, slice, ellipsis or new-axis object a "simple" index, and + # other cases "compound" : a list, tuple, or array of integers. + # ( Except, a length-1 tuple, list or array might count as "simple" ? ) + # If there is at most one compund index, I think we are ok -- i.e. all + # interpretations should deliver the same. + # If there is *more than one* "compound" index there is potential for + # trouble. + # NOTE#2: cube indexing processes the indices, which may also be relevant. + # + if not is_lazy_data(data): + data = da.from_array(data, chunks=_MAX_CHUNK_SIZE) + return data diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index 60928bfcc1..bc67419cbd 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -34,7 +34,6 @@ import warnings import cf_units -import dask import numpy as np import numpy.ma as ma import netcdftime @@ -44,7 +43,7 @@ import iris.fileformats.rules import iris.fileformats.pp_rules import iris.coord_systems -from iris.util import is_lazy_data, as_real_array, as_lazy_array +from iris._lazy_data import is_lazy_data, as_concrete_data, as_lazy_data try: import mo_pack @@ -1290,7 +1289,7 @@ def data(self): # N.B. this throws away the original lazy object. if is_lazy_data(self._data): # Get the data as a numpy array. - self._data = self._data.compute() + self._data = as_concrete_data(self._data) return self._data @data.setter @@ -1642,8 +1641,8 @@ def __eq__(self, other): for attr in self.__slots__: attrs = [hasattr(self, attr), hasattr(other, attr)] if all(attrs): - self_attr = as_real_array(getattr(self, attr)) - other_attr = as_real_array(getattr(other, attr)) + self_attr = as_concrete_data(getattr(self, attr)) + other_attr = as_concrete_data(getattr(other, attr)) if not np.all(self_attr == other_attr): result = False break @@ -1862,7 +1861,7 @@ def _interpret_fields(fields): def _create_field_data(field, data_shape, land_mask): """ Modifies a field's ``_data`` attribute either by: - * converting DeferredArrayBytes into a dask array, + * converting DeferredArrayBytes into a lazy array, * converting LoadedArrayBytes into an actual numpy array. """ @@ -1883,7 +1882,7 @@ def _create_field_data(field, data_shape, land_mask): field.raw_lbpack, field.boundary_packing, field.bmdi, land_mask) - field._data = as_lazy_array(proxy) + field._data = as_lazy_data(proxy) def _field_gen(filename, read_data_bytes, little_ended=False): diff --git a/lib/iris/util.py b/lib/iris/util.py index 9a3a11606e..ebb6bfa746 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -35,7 +35,6 @@ import time import cf_units -import dask.array as da import numpy as np import numpy.ma as ma @@ -1600,43 +1599,3 @@ def demote_dim_coord_to_aux_coord(cube, name_or_coord): cube.remove_coord(dim_coord) cube.add_aux_coord(dim_coord, coord_dim) - - -def is_lazy_data(data): - """ - Return whether the argument is an Iris 'lazy' data array. - - At present, this means simply a Dask array. - We determine this by checking for a "compute" property. - - """ - return hasattr(data, 'compute') - - -def data_as_real_array(data): - """ - Returned the realised value of the argument, as a numpy array. - - If lazy, fetch the data, otherwise do nothing. - """ - if is_lazy_data(data): - data = data.compute() - return data - - -# A magic value, borrowed from biggus -_MAX_CHUNK_SIZE = 8 * 1024 * 1024 * 2 - -def data_as_lazy_array(data): - """ - Return a lazy equivalent of the argument, as a dask array. - - For an existing dask array, do nothing. - Otherwise, wrap with dask.array.from_array. - This assumes the underlying object support numpy-like indexing. - - """ - if not is_lazy_data(data): - data = da.from_array(data, chunks=_MAX_CHUNK_SIZE) - return data - From 1811b0e23144dd4296fbe72333ab102a7ed04c92 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 18 Jan 2017 13:32:05 +0000 Subject: [PATCH 3/7] Add testing for lazy ops; fix pep8 issues. --- lib/iris/_lazy_data.py | 5 +- lib/iris/fileformats/pp.py | 2 +- .../tests/integration/temp_dask/__init__.py | 26 ++++++ .../integration/temp_dask/test_lazy_utils.py | 79 +++++++++++++++++++ 4 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 lib/iris/tests/integration/temp_dask/__init__.py create mode 100644 lib/iris/tests/integration/temp_dask/test_lazy_utils.py diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 460cfd2d2b..87dcff2ba0 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2016, Met Office +# (C) British Crown Copyright 2017, Met Office # # This file is part of Iris. # @@ -21,6 +21,7 @@ """ from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa import dask.array as da @@ -52,7 +53,7 @@ def as_concrete_data(data): _MAX_CHUNK_SIZE = 8 * 1024 * 1024 * 2 -def data_as_lazy_array(data): +def as_lazy_data(data): """ Return a lazy equivalent of the argument, as a lazy array. diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index bc67419cbd..fbcfe7e3f2 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2016, Met Office +# (C) British Crown Copyright 2010 - 2017, Met Office # # This file is part of Iris. # diff --git a/lib/iris/tests/integration/temp_dask/__init__.py b/lib/iris/tests/integration/temp_dask/__init__.py new file mode 100644 index 0000000000..50f059effc --- /dev/null +++ b/lib/iris/tests/integration/temp_dask/__init__.py @@ -0,0 +1,26 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +""" +Temporary integration tests, specific to replacement of biggus with dask. + +Note: some content here may eventually move into main tests. +Keep it here for now, so we can easily test all dask code with : + python -m unittest discover -v lib/iris/tests/integration/temp_dask + +""" +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa diff --git a/lib/iris/tests/integration/temp_dask/test_lazy_utils.py b/lib/iris/tests/integration/temp_dask/test_lazy_utils.py new file mode 100644 index 0000000000..eee210f22b --- /dev/null +++ b/lib/iris/tests/integration/temp_dask/test_lazy_utils.py @@ -0,0 +1,79 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +""" +Test lazy data utility functions. + +Note: really belongs in "tests/unit/lazy_data". + +""" +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + + +import numpy as np +import dask.array as da + + +from iris._lazy_data import is_lazy_data, as_lazy_data, as_concrete_data + + +class MixinLazyTests(object): + def setUp(self): + # Create test real and dask arrays. + self.real_array = np.arange(24).reshape((2, 3, 4)) + self.lazy_values = np.arange(30).reshape((2, 5, 3)) + self.lazy_array = da.from_array(self.lazy_values, 1e6) + + +class Test_is_lazy_data(MixinLazyTests, tests.IrisTest): + def test_lazy(self): + self.assertTrue(is_lazy_data(self.lazy_array)) + + def test_real(self): + self.assertFalse(is_lazy_data(self.real_array)) + + +class Test_as_lazy_data(MixinLazyTests, tests.IrisTest): + def test_lazy(self): + result = as_lazy_data(self.lazy_array) + self.assertTrue(is_lazy_data(result)) + self.assertIs(result, self.lazy_array) + + def test_real(self): + result = as_lazy_data(self.real_array) + self.assertTrue(is_lazy_data(result)) + self.assertArrayAllClose(as_concrete_data(result), self.real_array) + + +class Test_as_concrete_data(MixinLazyTests, tests.IrisTest): + def test_lazy(self): + result = as_concrete_data(self.lazy_array) + self.assertFalse(is_lazy_data(result)) + self.assertArrayAllClose(result, self.lazy_values) + + def test_real(self): + result = as_concrete_data(self.real_array) + self.assertFalse(is_lazy_data(result)) + self.assertIs(result, self.real_array) + + +if __name__ == '__main__': + tests.main() From 735eaa5dd60b7f48dc701b695e95c31b30301b5b Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 18 Jan 2017 16:20:07 +0000 Subject: [PATCH 4/7] Get basic load working. --- lib/iris/cube.py | 19 ++-- .../integration/temp_dask/test_pp_lazy.py | 102 ++++++++++++++++++ 2 files changed, 112 insertions(+), 9 deletions(-) create mode 100644 lib/iris/tests/integration/temp_dask/test_pp_lazy.py diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 79fb074ea0..f02cfbcae2 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -46,6 +46,7 @@ import iris.coords import iris._concatenate import iris._constraints +from iris._lazy_data import is_lazy_data, as_lazy_data, as_concrete_data import iris._merge import iris.exceptions import iris.util @@ -713,7 +714,7 @@ def __init__(self, data, standard_name=None, long_name=None, if isinstance(data, six.string_types): raise TypeError('Invalid data type: {!r}.'.format(data)) - if not isinstance(data, (biggus.Array, ma.MaskedArray)): + if not is_lazy_data(data): data = np.asarray(data) self._my_data = data @@ -1630,8 +1631,8 @@ def lazy_data(self, array=None): """ if array is not None: - if not isinstance(array, biggus.Array): - raise TypeError('new values must be a biggus.Array') + if not is_lazy_data(array): + raise TypeError('new values must be a lazy array') if self.shape != array.shape: # The _ONLY_ data reshape permitted is converting a # 0-dimensional array into a 1-dimensional array of @@ -1643,8 +1644,8 @@ def lazy_data(self, array=None): self._my_data = array else: array = self._my_data - if not isinstance(array, biggus.Array): - array = biggus.NumpyArrayAdapter(array) + if not is_lazy_data(array): + array = as_lazy_data(array) return array @property @@ -1681,9 +1682,9 @@ def data(self): """ data = self._my_data - if not isinstance(data, np.ndarray): + if is_lazy_data(data): try: - data = data.masked_array() + data = as_concrete_data(data) except MemoryError: msg = "Failed to create the cube's data as there was not" \ " enough memory available.\n" \ @@ -1694,7 +1695,7 @@ def data(self): msg = msg.format(self.shape, data.dtype) raise MemoryError(msg) # Unmask the array only if it is filled. - if isinstance(data, np.ndarray) and ma.count_masked(data) == 0: + if isinstance(data, np.ma.masked_array) and ma.count_masked(data) == 0: data = data.data # data may be a numeric type, so ensure an np.ndarray is returned self._my_data = np.asanyarray(data) @@ -1715,7 +1716,7 @@ def data(self, value): self._my_data = data def has_lazy_data(self): - return isinstance(self._my_data, biggus.Array) + return is_lazy_data(self._my_data) @property def dim_coords(self): diff --git a/lib/iris/tests/integration/temp_dask/test_pp_lazy.py b/lib/iris/tests/integration/temp_dask/test_pp_lazy.py new file mode 100644 index 0000000000..b9650ec481 --- /dev/null +++ b/lib/iris/tests/integration/temp_dask/test_pp_lazy.py @@ -0,0 +1,102 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +""" +Test lazy data handlingin iris.fileformats.pp. + +Note: probably belongs in "tests/unit/fileformats/pp", if a separate test is +actually required. + +""" +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +from dask.array.core import Array as DaskArray +import numpy as np + +import iris + + +class TestLazyLoad(tests.IrisTest): + def setUp(self): + path = tests.get_data_path(('PP', 'aPPglob1', 'global.pp')) + self.cube, = iris.load_raw(path) + # This is the same as iris.tests.stock.global_pp(), but avoids the + # merge, which is presently not working. + + def test_load(self): + # Check that a simple load gets us lazy data. + cube = self.cube + raw_data = cube._my_data + self.assertIsInstance(raw_data, DaskArray) + + def test_data(self): + # Check that data access realises. + cube = self.cube + raw_data = cube._my_data + data = cube.data + self.assertIsInstance(data, np.ndarray) + self.assertArrayAllClose(data, raw_data.compute()) + + def test_has_lazy(self): + # Check cube.has_lazy_data(). + cube = self.cube + self.assertTrue(cube.has_lazy_data()) + cube.data + self.assertFalse(cube.has_lazy_data()) + + def test_lazy_data(self): + # Check cube.lazy_data(). + cube = self.cube + raw_data = cube._my_data + lazy = cube.lazy_data() + self.assertIs(cube.lazy_data(), raw_data) + cube.data + lazy = cube.lazy_data() + self.assertIsNot(cube.lazy_data(), raw_data) + self.assertArrayAllClose(lazy.compute(), raw_data.compute()) + + def test_lazy_data__set(self): + # Check cube.lazy_data(). + cube = self.cube + raw_data = cube._my_data + cube.lazy_data(raw_data + 100.0) + real_data = raw_data.compute() + self.assertArrayAllClose(cube.lazy_data(), + real_data + 100.0) + + def test_lazy_data__fail_set_bad_shape(self): + # Check cube.lazy_data(). + cube = self.cube + raw_data = cube.lazy_data() + msg = 'cube data with shape \(73, 96\), got \(72, 96\)' + with self.assertRaisesRegexp(ValueError, msg): + cube.lazy_data(raw_data[1:]) + + def test_lazy_data__fail_set_not_lazy(self): + # Check cube.lazy_data(). + cube = self.cube + raw_data = cube.lazy_data() + with self.assertRaisesRegexp(TypeError, 'must be a lazy array'): + cube.lazy_data(np.zeros(raw_data.shape)) + + +if __name__ == '__main__': + tests.main() From 87bf9fcdeb9f5f3f83500773e12e58ae448a494b Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 18 Jan 2017 19:04:54 +0000 Subject: [PATCH 5/7] Adjust Travis testing for feature branch. --- .travis.yml | 4 +--- conda-requirements.txt | 1 + minimal-conda-requirements.txt | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3458ab9dc8..2b5f50809c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,8 +15,6 @@ env: - TEST_TARGET=default - TEST_TARGET=default TEST_MINIMAL=true - TEST_TARGET=coding - - TEST_TARGET=example - - TEST_TARGET=doctest git: depth: 10000 @@ -107,7 +105,7 @@ install: script: - if [[ $TEST_TARGET == 'default' ]]; then - python -m iris.tests.runner --default-tests --system-tests --print-failed-images; + python -m unittest discover -v lib/iris/tests/integration/temp_dask; fi - if [[ $TEST_TARGET == 'example' ]]; then python -m iris.tests.runner --example-tests --print-failed-images; diff --git a/conda-requirements.txt b/conda-requirements.txt index 3324c5fbc4..cd89693e57 100644 --- a/conda-requirements.txt +++ b/conda-requirements.txt @@ -10,6 +10,7 @@ numpy pyke udunits2 cf_units +dask # Iris build dependencies setuptools diff --git a/minimal-conda-requirements.txt b/minimal-conda-requirements.txt index 5299e438e9..a87c787ec9 100644 --- a/minimal-conda-requirements.txt +++ b/minimal-conda-requirements.txt @@ -10,6 +10,7 @@ numpy pyke udunits2 cf_units +dask # Iris build dependencies setuptools From b7466936e08905f1b4dcdd6cefb211908adb5e6c Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 18 Jan 2017 20:10:54 +0000 Subject: [PATCH 6/7] Tests fixes. --- lib/iris/cube.py | 5 +++-- lib/iris/tests/integration/temp_dask/test_pp_lazy.py | 1 + lib/iris/util.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/iris/cube.py b/lib/iris/cube.py index f02cfbcae2..18aa4d350d 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2016, Met Office +# (C) British Crown Copyright 2010 - 2017, Met Office # # This file is part of Iris. # @@ -1695,7 +1695,8 @@ def data(self): msg = msg.format(self.shape, data.dtype) raise MemoryError(msg) # Unmask the array only if it is filled. - if isinstance(data, np.ma.masked_array) and ma.count_masked(data) == 0: + if (isinstance(data, np.ma.masked_array) and + ma.count_masked(data) == 0): data = data.data # data may be a numeric type, so ensure an np.ndarray is returned self._my_data = np.asanyarray(data) diff --git a/lib/iris/tests/integration/temp_dask/test_pp_lazy.py b/lib/iris/tests/integration/temp_dask/test_pp_lazy.py index b9650ec481..c15c72ae2c 100644 --- a/lib/iris/tests/integration/temp_dask/test_pp_lazy.py +++ b/lib/iris/tests/integration/temp_dask/test_pp_lazy.py @@ -34,6 +34,7 @@ import iris +@tests.skip_data class TestLazyLoad(tests.IrisTest): def setUp(self): path = tests.get_data_path(('PP', 'aPPglob1', 'global.pp')) diff --git a/lib/iris/util.py b/lib/iris/util.py index ebb6bfa746..16f6cdb87c 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2016, Met Office +# (C) British Crown Copyright 2010 - 2017, Met Office # # This file is part of Iris. # From 438011b251af7540316636b86c0045b02704f663 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 20 Jan 2017 15:49:27 +0000 Subject: [PATCH 7/7] Review changes. --- lib/iris/cube.py | 17 ++--- .../integration/temp_dask/test_lazy_utils.py | 8 +-- .../integration/temp_dask/test_pp_lazy.py | 71 +++++++++++++------ 3 files changed, 63 insertions(+), 33 deletions(-) diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 18aa4d350d..c121849dc9 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -1607,27 +1607,28 @@ def ndim(self): def lazy_data(self, array=None): """ - Return a :class:`biggus.Array` representing the - multi-dimensional data of the Cube, and optionally provide a - new array of values. + Return a lazy array representing the Cube data. + + Optionally, provide a new lazy array to assign as the cube data. + This must also be a lazy array, according to + :meth:`iris._lazy_data.is_lazy_data`. Accessing this method will never cause the data to be loaded. Similarly, calling methods on, or indexing, the returned Array will not cause the Cube to have loaded data. If the data have already been loaded for the Cube, the returned - Array will be a :class:`biggus.NumpyArrayAdapter` which wraps - the numpy array from `self.data`. + Array will be a lazy array wrapper, generated by a call to + :meth:`iris._lazy_data.as_lazy_data`. Kwargs: - * array (:class:`biggus.Array` or None): + * array (lazy array or None): When this is not None it sets the multi-dimensional data of the cube to the given value. Returns: - A :class:`biggus.Array` representing the multi-dimensional - data of the Cube. + A lazy array, representing the Cube data array. """ if array is not None: diff --git a/lib/iris/tests/integration/temp_dask/test_lazy_utils.py b/lib/iris/tests/integration/temp_dask/test_lazy_utils.py index eee210f22b..8a774de738 100644 --- a/lib/iris/tests/integration/temp_dask/test_lazy_utils.py +++ b/lib/iris/tests/integration/temp_dask/test_lazy_utils.py @@ -35,7 +35,7 @@ from iris._lazy_data import is_lazy_data, as_lazy_data, as_concrete_data -class MixinLazyTests(object): +class MixinLazyTestData(object): def setUp(self): # Create test real and dask arrays. self.real_array = np.arange(24).reshape((2, 3, 4)) @@ -43,7 +43,7 @@ def setUp(self): self.lazy_array = da.from_array(self.lazy_values, 1e6) -class Test_is_lazy_data(MixinLazyTests, tests.IrisTest): +class Test_is_lazy_data(MixinLazyTestData, tests.IrisTest): def test_lazy(self): self.assertTrue(is_lazy_data(self.lazy_array)) @@ -51,7 +51,7 @@ def test_real(self): self.assertFalse(is_lazy_data(self.real_array)) -class Test_as_lazy_data(MixinLazyTests, tests.IrisTest): +class Test_as_lazy_data(MixinLazyTestData, tests.IrisTest): def test_lazy(self): result = as_lazy_data(self.lazy_array) self.assertTrue(is_lazy_data(result)) @@ -63,7 +63,7 @@ def test_real(self): self.assertArrayAllClose(as_concrete_data(result), self.real_array) -class Test_as_concrete_data(MixinLazyTests, tests.IrisTest): +class Test_as_concrete_data(MixinLazyTestData, tests.IrisTest): def test_lazy(self): result = as_concrete_data(self.lazy_array) self.assertFalse(is_lazy_data(result)) diff --git a/lib/iris/tests/integration/temp_dask/test_pp_lazy.py b/lib/iris/tests/integration/temp_dask/test_pp_lazy.py index c15c72ae2c..c7428018dd 100644 --- a/lib/iris/tests/integration/temp_dask/test_pp_lazy.py +++ b/lib/iris/tests/integration/temp_dask/test_pp_lazy.py @@ -15,7 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . """ -Test lazy data handlingin iris.fileformats.pp. +Test lazy data handling in :mod:`iris.fileformats.pp`. Note: probably belongs in "tests/unit/fileformats/pp", if a separate test is actually required. @@ -34,8 +34,16 @@ import iris +class MixinLazyCubeLoad(object): + def setUp(self): + path = tests.get_data_path(('PP', 'aPPglob1', 'global.pp')) + self.cube, = iris.load_raw(path) + # This is the same as iris.tests.stock.global_pp(), but avoids the + # merge, which is presently not working. + + @tests.skip_data -class TestLazyLoad(tests.IrisTest): +class TestLazyCubeLoad(MixinLazyCubeLoad, tests.IrisTest): def setUp(self): path = tests.get_data_path(('PP', 'aPPglob1', 'global.pp')) self.cube, = iris.load_raw(path) @@ -43,56 +51,77 @@ def setUp(self): # merge, which is presently not working. def test_load(self): - # Check that a simple load gets us lazy data. + # Check that a simple load results in a cube with a lazy data array. cube = self.cube raw_data = cube._my_data + # It has loaded as a dask array. self.assertIsInstance(raw_data, DaskArray) def test_data(self): - # Check that data access realises. + # Check that .data returns a realised array with the expected values. cube = self.cube raw_data = cube._my_data data = cube.data + # "normal" .data is a numpy array. self.assertIsInstance(data, np.ndarray) + # values match the lazy original. self.assertArrayAllClose(data, raw_data.compute()) - def test_has_lazy(self): - # Check cube.has_lazy_data(). + +@tests.skip_data +class Test_has_lazy_data(MixinLazyCubeLoad, tests.IrisTest): + def test(self): + # Check result before and after touching the data. cube = self.cube + # normal load yields lazy data. self.assertTrue(cube.has_lazy_data()) + # touch data. cube.data + # cube has real data after .data access. self.assertFalse(cube.has_lazy_data()) - def test_lazy_data(self): - # Check cube.lazy_data(). + +@tests.skip_data +class Test_lazy_data(MixinLazyCubeLoad, tests.IrisTest): + def test__before_and_after_realise(self): + # Check return values from cube.lazy_data(). cube = self.cube raw_data = cube._my_data - lazy = cube.lazy_data() - self.assertIs(cube.lazy_data(), raw_data) + self.assertIsInstance(raw_data, DaskArray) + # before touching .data, lazy_data() returns the original raw data. + lazy_before = cube.lazy_data() + self.assertIs(lazy_before, raw_data) + # touch data. cube.data - lazy = cube.lazy_data() - self.assertIsNot(cube.lazy_data(), raw_data) - self.assertArrayAllClose(lazy.compute(), raw_data.compute()) - - def test_lazy_data__set(self): + # after touching .data, lazy_data() is not the original raw data, but + # it computes the same result. + lazy_after = cube.lazy_data() + self.assertIsInstance(lazy_after, DaskArray) + self.assertIsNot(lazy_after, lazy_before) + self.assertArrayAllClose(lazy_after.compute(), + lazy_before.compute()) + + def test__newdata(self): # Check cube.lazy_data(). cube = self.cube raw_data = cube._my_data - cube.lazy_data(raw_data + 100.0) real_data = raw_data.compute() - self.assertArrayAllClose(cube.lazy_data(), + # set new lazy value. + cube.lazy_data(raw_data + 100.0) + # check that results are as expected. + self.assertArrayAllClose(cube.lazy_data().compute(), real_data + 100.0) - def test_lazy_data__fail_set_bad_shape(self): - # Check cube.lazy_data(). + def test__newdata_fail_bad_shape(self): + # Check cube.lazy_data() with bad shape. cube = self.cube raw_data = cube.lazy_data() msg = 'cube data with shape \(73, 96\), got \(72, 96\)' with self.assertRaisesRegexp(ValueError, msg): cube.lazy_data(raw_data[1:]) - def test_lazy_data__fail_set_not_lazy(self): - # Check cube.lazy_data(). + def test__newdata_fail_not_lazy(self): + # Check cube.lazy_data() with non-lazy argument. cube = self.cube raw_data = cube.lazy_data() with self.assertRaisesRegexp(TypeError, 'must be a lazy array'):