diff --git a/setup.py b/setup.py index 91546ba..1ffe5ba 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,9 @@ extensions = [Extension('{}._vinterp'.format(NAME), [os.path.join(NAME, '_vinterp.pyx')], + include_dirs=[np.get_include()]), + Extension('{}._conservative'.format(NAME), + [os.path.join(NAME, '_conservative.pyx')], include_dirs=[np.get_include()])] diff --git a/stratify/__init__.py b/stratify/__init__.py index 9e298e6..3dbcaae 100644 --- a/stratify/__init__.py +++ b/stratify/__init__.py @@ -1,9 +1,11 @@ from __future__ import absolute_import -from ._vinterp import (interpolate, interp_schemes, extrap_schemes, - INTERPOLATE_LINEAR, INTERPOLATE_NEAREST, +from ._vinterp import (interpolate, interp_schemes, # noqa: F401 + extrap_schemes, INTERPOLATE_LINEAR, INTERPOLATE_NEAREST, EXTRAPOLATE_NAN, EXTRAPOLATE_NEAREST, - EXTRAPOLATE_LINEAR, PyFuncExtrapolator, + EXTRAPOLATE_LINEAR, PyFuncExtrapolator, PyFuncInterpolator) +from ._bounded_vinterp import interpolate_conservative # noqa: F401 + __version__ = '0.1a3.dev0' diff --git a/stratify/_bounded_vinterp.py b/stratify/_bounded_vinterp.py new file mode 100644 index 0000000..b2c865c --- /dev/null +++ b/stratify/_bounded_vinterp.py @@ -0,0 +1,135 @@ +from __future__ import absolute_import + +import numpy as np + +from ._conservative import conservative_interpolation + + +def interpolate_conservative(z_target, z_src, fz_src, axis=-1): + """ + 1d conservative interpolation across multiple dimensions. + + This function provides the ability to perform 1d interpolation on datasets + with more than one dimension. For instance, this function can be used to + interpolate a set of vertical levels, even if the interpolation coordinate + depends upon other dimensions. + + A good use case might be when wanting to interpolate at a specific height + for height data which also depends on x and y - e.g. extract 1000hPa level + from a 3d dataset and associated pressure field. In the case of this + example, pressure would be the `z` coordinate, and the dataset + (e.g. geopotential height / temperature etc.) would be `f(z)`. + + Parameters + ---------- + z_target: :class:`np.ndarray` + Target coordinate. + This coordinate defines the levels to interpolate the source data + ``fz_src`` to. ``z_target`` must have the same dimensionality as the + source coordinate ``z_src``, and the shape of ``z_target`` must match + the shape of ``z_src``, although the axis of interpolation may differ + in dimension size. + z_src: :class:`np.ndarray` + Source coordinate. + This coordinate defines the levels that the source data ``fz_src`` is + interpolated from. + fz_src: :class:`np.ndarray` + The source data; the phenomenon data values to be interpolated from + ``z_src`` to ``z_target``. + The data array must be at least ``z_src.ndim``, and its trailing + dimensions (i.e. those on its right hand side) must be exactly + the same as the shape of ``z_src``. + axis: int (default -1) + The ``fz_src`` axis to perform the interpolation over. + + Returns + ------- + : :class:`np.ndarray` + fz_src interpolated from z_src to z_target. + + Note + ---- + - Support for 1D z_target and corresponding ND z_src will be provided in + future as driven by user requirement. + - Those cells, where 'nan' values in the source data contribute, a 'nan' + value is returned. + + """ + if z_src.ndim != z_target.ndim: + msg = ('Expecting source and target levels dimensionality to be ' + 'identical. {} != {}.') + raise ValueError(msg.format(z_src.ndim, z_target.ndim)) + + # Relative axis + axis = axis % fz_src.ndim + axis_relative = axis - (fz_src.ndim - (z_target.ndim-1)) + + src_shape = list(z_src.shape) + src_shape.pop(axis_relative) + tgt_shape = list(z_target.shape) + tgt_shape.pop(axis_relative) + + if src_shape != tgt_shape: + src_shape = list(z_src.shape) + src_shape[axis_relative] = '-' + tgt_shape = list(z_target.shape) + src_shape[axis_relative] = '-' + msg = ('Expecting the shape of the source and target levels except ' + 'the axis of interpolation to be identical. {} != {}') + raise ValueError(msg.format(tuple(src_shape), tuple(tgt_shape))) + + dat_shape = list(fz_src.shape) + dat_shape = dat_shape[-(z_src.ndim-1):] + src_shape = list(z_src.shape[:-1]) + if dat_shape != src_shape: + dat_shape = list(fz_src.shape) + dat_shape[:-(z_src.ndim-1)] = '-' + msg = ('The provided data is not of compatible shape with the ' + 'provided source bounds. {} != {}') + raise ValueError(msg.format(tuple(dat_shape), tuple(src_shape))) + + if z_src.shape[-1] != 2: + msg = 'Unexpected source and target bounds shape. shape[-1] != 2' + raise ValueError(msg) + + # Define our source in a consistent way. + # [broadcasting_dims, axis_interpolation, z_varying] + + # src_data + bdims = list(range(fz_src.ndim - (z_src.ndim-1))) + data_vdims = [ind for ind in range(fz_src.ndim) if ind not in + (bdims + [axis])] + data_transpose = bdims + [axis] + data_vdims + fz_src_reshaped = np.transpose(fz_src, data_transpose) + fz_src_orig = list(fz_src_reshaped.shape) + shape = ( + int(np.product(fz_src_reshaped.shape[:len(bdims)])), + fz_src_reshaped.shape[len(bdims)], + int(np.product(fz_src_reshaped.shape[len(bdims)+1:]))) + fz_src_reshaped = fz_src_reshaped.reshape(shape) + + # Define our src and target bounds in a consistent way. + # [axis_interpolation, z_varying, 2] + vdims = list(set(range(z_src.ndim)) - set([axis_relative])) + z_src_reshaped = np.transpose(z_src, [axis_relative] + vdims) + z_target_reshaped = np.transpose(z_target, [axis_relative] + vdims) + + shape = int(np.product(z_src_reshaped.shape[1:-1])) + z_src_reshaped = z_src_reshaped.reshape([z_src_reshaped.shape[0], shape, + z_src_reshaped.shape[-1]]) + shape = int(np.product(z_target_reshaped.shape[1:-1])) + z_target_reshaped = z_target_reshaped.reshape( + [z_target_reshaped.shape[0], shape, z_target_reshaped.shape[-1]]) + + result = conservative_interpolation( + z_src_reshaped, z_target_reshaped, fz_src_reshaped) + + # Turn the result into a shape consistent with the source. + # First reshape, then reverse transpose. + shape = fz_src_orig + shape[len(bdims)] = z_target.shape[axis_relative] + result = result.reshape(shape) + invert_transpose = [data_transpose.index(ind) for ind in + list(range(result.ndim))] + result = result.transpose(invert_transpose) + return result diff --git a/stratify/_conservative.pyx b/stratify/_conservative.pyx new file mode 100644 index 0000000..ab8e0e0 --- /dev/null +++ b/stratify/_conservative.pyx @@ -0,0 +1,126 @@ +import numpy as np +cimport numpy as np + + +cdef calculate_weights(np.ndarray[np.float64_t, ndim=2] src_point, + np.ndarray[np.float64_t, ndim=2] tgt_point): + """ + Calculate weights for a given point. + + The following visually illustrates the calculation:: + + src_min src_max + |----------| : Source + tgt_min tgt_max + |------------| : Target + |----------| : Delta (src_max - src_min) + |----| : Overlap (between src & tgt) + weight = overlap / delta + + Parameters + ---------- + src_point (2d double array) - Source point (at a specific location). + tgt_point (2d double array) - Target point (at a specific location). + + Returns + ------- + 2d double array - Weights corresponding to shape [src_point.shape[0], + tgt_point.shape[0]]. + + """ + cdef Py_ssize_t src_ind, tgt_ind + cdef np.float64_t delta, weight + cdef np.ndarray[np.float64_t, ndim=2] weights + cdef np.ndarray[np.float64_t, ndim=1] src_cell, tgt_cell + + weights = np.zeros([src_point.shape[0], tgt_point.shape[0]], + dtype=np.float64) + for src_ind, src_cell in enumerate(src_point): + delta = src_cell.max() - src_cell.min() + for tgt_ind, tgt_cell in enumerate(tgt_point): + weight = (min(src_cell.max(), tgt_cell.max()) - + max(src_cell.min(), tgt_cell.min())) / float(delta) + if weight > 0: + weights[src_ind, tgt_ind] = weight + return weights + + +cdef apply_weights(np.ndarray[np.float64_t, ndim=3] src_point, + np.ndarray[np.float64_t, ndim=3] tgt_point, + np.ndarray[np.float64_t, ndim=3] src_data): + """ + Perform conservative interpolation. + + Conservation interpolation of a dataset between a provided source + coordinate and a target coordinate. Where no source cells contribute to a + target cell, a np.nan value is returned. + + Parameters + ---------- + src_points (3d double array) - Source coordinate, taking the form + [axis_interpolation, z_varying, 2]. + tgt_points (3d double array) - Target coordinate, taking the form + [axis_interpolation, z_varying, 2]. + src_data (3d double array) - The source data, the phenomenon data to be + interpolated from ``src_points`` to ``tgt_points``. Taking the form + [broadcasting_dims, axis_interpolation, z_varying]. + + Returns + ------- + 3d double array - Interpolated result over target levels (``tgt_points``). + Taking the form [broadcasting_dims, axis_interpolation, z_varying]. + + """ + cdef Py_ssize_t ind + cdef np.ndarray[np.float64_t, ndim=3] results, weighted_contrib + cdef np.ndarray[np.float64_t, ndim=2] weights + results = np.zeros( + [src_data.shape[0], tgt_point.shape[0], src_data.shape[2]], + dtype='float64') + # Calculate and apply weights + for ind in range(src_data.shape[2]): + weights = calculate_weights(src_point[:, ind], tgt_point[:, ind]) + if not (weights.sum(axis=1) == 1).all(): + msg = ('Weights calculation yields a less than conservative ' + 'result. Aborting.') + raise ValueError(msg) + weighted_contrib = weights * src_data[..., ind][..., None] + results[..., ind] = ( + np.nansum(weighted_contrib, axis=1)) + # Return nan values for those target cells, where there is any + # contribution of nan data from the source data. + results[..., ind][ + ((weights > 0) * np.isnan(weighted_contrib)).any(axis=1)] = np.nan + + # Return np.nan for those target cells where no source contributes. + results[:, weights.sum(axis=0) == 0, ind] = np.nan + return results + + +def conservative_interpolation(src_points, tgt_points, src_data): + """ + Perform conservative interpolation. + + Conservation interpolation of a dataset between a provided source + coordinate and a target coordinate. All inputs are recast to 64-bit float + arrays before calculation. + + Parameters + ---------- + src_points (3d array) - Source coordinate, taking the form + [axis_interpolation, z_varying, 2]. + tgt_points (3d array) - Target coordinate, taking the form + [axis_interpolation, z_varying, 2]. + src_data (3d array) - The source data, the phenonenon data to be + interpolated from ``src_points`` to ``tgt_points``. Taking the form + [broadcasting_dims, axis_interpolation, z_varying]. + + Returns + ------- + 3d double array - Interpolated result over target levels (``tgt_points``). + Taking the form [broadcasting_dims, axis_interpolation, z_varying]. + + """ + return apply_weights(src_points.astype('float64'), + tgt_points.astype('float64'), + src_data.astype('float64')) diff --git a/stratify/tests/test_bounded_vinterp.py b/stratify/tests/test_bounded_vinterp.py new file mode 100644 index 0000000..58c48c7 --- /dev/null +++ b/stratify/tests/test_bounded_vinterp.py @@ -0,0 +1,228 @@ +import unittest + +import numpy as np +from numpy.testing import assert_array_equal + +import stratify._bounded_vinterp as bounded_vinterp + + +class Test1D(unittest.TestCase): + # 1D cases represent the situation where the vertical coordinate is + # 1-dimensional and as such, there are no dimension which the vertical + # coordinates vary over other than the 'axis of interpolation'. + # The common approach in solving these problems is to reshape and + # transpose arrays into the form: + # [broadcasting_dims, axis_interpolation, z_varying]. + # This this will take the form: + # [broadcasting_dims, axis_interpolation, 1]. + def setUp(self): + self.data = np.ones((4, 6)) + self.bounds = self.gen_bounds(0, 7, 1) + + def gen_bounds(self, start, stop, step): + bounds = np.vstack([np.arange(start, stop-step, step), + np.arange(start+step, stop, step)]) + bounds = bounds.transpose((1, 0)) + return bounds.copy() + + def test_target_half_resolution(self): + target_bounds = self.gen_bounds(0, 7, 2) + res = bounded_vinterp.interpolate_conservative(target_bounds, + self.bounds, self.data, + axis=1) + target_data = np.ones((4, 3))*2 + assert_array_equal(res, target_data) + + def test_target_double_resolution(self): + target_bounds = self.gen_bounds(0, 6.5, 0.5) + res = bounded_vinterp.interpolate_conservative(target_bounds, + self.bounds, self.data, + axis=1) + target_data = np.ones((4, 12))*.5 + assert_array_equal(res, target_data) + + def test_no_broadcasting(self): + # In the case of no broadcasting, transposed and reshaped array of form + # [broadcasting_dims, axis_interpolation, z_varying] should end up + # [1, axis_interpolation, 1]. + data = self.data[0] + target_bounds = self.gen_bounds(0, 7, 2) + res = bounded_vinterp.interpolate_conservative(target_bounds, + self.bounds, data, + axis=0) + target_data = np.ones((3))*2 + assert_array_equal(res, target_data) + + def test_source_with_nans(self): + # In the case of no broadcasting, transposed and reshaped array of form + # [broadcasting_dims, axis_interpolation, z_varying] should end up + # [1, axis_interpolation, 1]. + data = self.data[0] + data[0] = data[-2:] = np.nan + target_bounds = self.gen_bounds(0, 6.5, 0.5) + res = bounded_vinterp.interpolate_conservative(target_bounds, + self.bounds, data, + axis=0) + target_data = np.ones((12))*.5 + target_data[:2] = np.nan + target_data[-4:] = np.nan + assert_array_equal(res, target_data) + + def test_target_extends_above_source(self): + # |-|-|-|-|-|-|-| - Source + # |-|-|-|-|-|-|-|-| - Target + source_bounds = self.gen_bounds(0, 7, 1) + target_bounds = self.gen_bounds(0, 8, 1) + res = bounded_vinterp.interpolate_conservative(target_bounds, + source_bounds, + self.data, axis=1) + target_data = np.ones((4, 7)) + target_data[:, -1] = np.nan + assert_array_equal(res, target_data) + + def test_target_extends_above_source_non_equally_spaced_coords(self): + # |--|--|-------|| - Source + # |-|-|-|-|-|-|-|-| - Target + source_bounds = np.array([[0, 1.5], [1.5, 2], [2, 6], [6, 6.5]]) + target_bounds = self.gen_bounds(0, 8, 1) + data = np.ones((4, 4)) + res = bounded_vinterp.interpolate_conservative(target_bounds, + source_bounds, data, + axis=1) + target_data = np.array( + [1/1.5, 1 + ((1/3.)/1), 0.25, 0.25, 0.25, 0.25, 1.])[None] + target_data = np.repeat(target_data, 4, 0) + assert_array_equal(res, target_data) + + def test_target_extends_below_source(self): + # |-|-|-|-|-|-|-| - Source + # |-|-|-|-|-|-|-|-| - Target + source_bounds = self.gen_bounds(0, 7, 1) + target_bounds = self.gen_bounds(-1, 7, 1) + res = bounded_vinterp.interpolate_conservative(target_bounds, + source_bounds, + self.data, axis=1) + target_data = np.ones((4, 7)) + target_data[:, 0] = np.nan + assert_array_equal(res, target_data) + + +class TestND(unittest.TestCase): + # ND cases represent the situation where the vertical coordinate varies + # over dimensions other than the axis of interpolation. + def setUp(self): + self.data = np.ones((2, 6, 4, 3)) + self.bounds = self.gen_bounds(0, 7, 1) + + def gen_bounds(self, start, stop, step): + bounds = np.vstack([np.arange(start, stop-step, step), + np.arange(start+step, stop, step)]) + bounds = bounds.transpose((1, 0)) + bounds = bounds[..., None, :].repeat(4, -2) + bounds = bounds[..., None, :].repeat(3, -2) + return bounds.copy() + + def test_target_half_resolution(self): + target_bounds = self.gen_bounds(0, 7, 2) + res = bounded_vinterp.interpolate_conservative(target_bounds, + self.bounds, self.data, + axis=1) + + target_data = np.ones((2, 3, 4, 3))*2 + assert_array_equal(res, target_data) + + def test_target_half_resolution_alt_axis(self): + # Ensure results as expected with an alternative axis of interpolation. + data = self.data.transpose((0, 2, 1, 3)) + bounds = self.bounds.transpose((1, 0, 2, 3)) + target_bounds = self.gen_bounds(0, 7, 2) + target_bounds = target_bounds.transpose((1, 0, 2, 3)) + + res = bounded_vinterp.interpolate_conservative(target_bounds, bounds, + data, axis=2) + target_data = np.ones((2, 4, 3, 3))*2 + assert_array_equal(res, target_data) + + def test_target_double_resolution(self): + target_bounds = self.gen_bounds(0, 6.5, 0.5) + res = bounded_vinterp.interpolate_conservative(target_bounds, + self.bounds, + self.data, axis=1) + target_data = np.ones((2, 12, 4, 3))*.5 + assert_array_equal(res, target_data) + + +class TestExceptions(unittest.TestCase): + def test_mismatch_source_target_level_dimensionality(self): + source_bounds = np.zeros((3, 4, 2)) + target_bounds = np.zeros((4, 2)) + data = np.zeros((3, 4)) + + msg = 'Expecting source and target levels dimensionality' + with self.assertRaisesRegexp(ValueError, msg): + bounded_vinterp.interpolate_conservative(target_bounds, + source_bounds, data) + + def test_mismatch_source_target_level_shape(self): + # The source and target levels should have identical shape, other than + # the axis of interpolation. + source_bounds = np.zeros((3, 4, 2)) + target_bounds = np.zeros((2, 5, 2)) + data = np.zeros((3, 4)) + + msg = ('Expecting the shape of the source and target levels except ' + 'the axis of interpolation to be identical. ' + "\('-', 4, 2\) != \(2, 5, 2\)") + with self.assertRaisesRegexp(ValueError, msg): + bounded_vinterp.interpolate_conservative(target_bounds, + source_bounds, data, + axis=0) + + def test_mismatch_between_source_levels_source_data(self): + # The source levels should reflect the shape of the data. + source_bounds = np.zeros((2, 4, 2)) + target_bounds = np.zeros((2, 4, 2)) + data = np.zeros((3, 4)) + + msg = ('The provided data is not of compatible shape with the ' + "provided source bounds. \('-', 3, 4\) != \(2, 4\)") + with self.assertRaisesRegexp(ValueError, msg): + bounded_vinterp.interpolate_conservative(target_bounds, + source_bounds, data, + axis=0) + + def test_unexpected_bounds_shape(self): + # Expecting bounds of size 2 (that is the upper and lower). + # The source levels should reflect the shape of the data. + source_bounds = np.zeros((3, 4, 4)) + target_bounds = np.zeros((4, 4, 4)) + data = np.zeros((3, 4)) + + msg = 'Unexpected source and target bounds shape. shape\[-1\] != 2' + with self.assertRaisesRegexp(ValueError, msg): + bounded_vinterp.interpolate_conservative(target_bounds, + source_bounds, data, + axis=0) + + def test_not_conservative(self): + # Where the target does not cover the full extent of the source. + # |-|-|-|-|-|-| - Source + # |-|-|-|-| - Target + def gen_bounds(start, stop, step): + bounds = np.vstack([np.arange(start, stop-step, step), + np.arange(start+step, stop, step)]) + bounds = bounds.transpose((1, 0)) + return bounds.copy() + source_bounds = gen_bounds(0, 7, 1) + target_bounds = gen_bounds(1, 6, 1) + data = np.ones((4, 6)) + + msg = 'Weights calculation yields a less than conservative result.' + with self.assertRaisesRegexp(ValueError, msg): + bounded_vinterp.interpolate_conservative(target_bounds, + source_bounds, data, + axis=1) + + +if __name__ == '__main__': + unittest.main()