-
Notifications
You must be signed in to change notification settings - Fork 19
ENH: Added conservative interpolation approach #18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
08239e2
009b38e
c87fb50
a5e9e7c
b0a2c5f
44edf42
20eb3f5
54bb77a
58098b8
bab0fdd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,11 @@ | ||
| from __future__ import absolute_import | ||
|
|
||
| from ._vinterp import (interpolate, interp_schemes, extrap_schemes, | ||
| INTERPOLATE_LINEAR, INTERPOLATE_NEAREST, | ||
| from ._vinterp import (interpolate, interp_schemes, # noqa: F401 | ||
| extrap_schemes, INTERPOLATE_LINEAR, INTERPOLATE_NEAREST, | ||
| EXTRAPOLATE_NAN, EXTRAPOLATE_NEAREST, | ||
| EXTRAPOLATE_LINEAR, PyFuncExtrapolator, | ||
| EXTRAPOLATE_LINEAR, PyFuncExtrapolator, | ||
| PyFuncInterpolator) | ||
| from ._bounded_vinterp import interpolate_conservative # noqa: F401 | ||
|
|
||
|
|
||
| __version__ = '0.1a3.dev0' |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| from __future__ import absolute_import | ||
|
|
||
| import numpy as np | ||
|
|
||
| from ._conservative import conservative_interpolation | ||
|
|
||
|
|
||
| def interpolate_conservative(z_target, z_src, fz_src, axis=-1): | ||
| """ | ||
| 1d conservative interpolation across multiple dimensions. | ||
|
|
||
| This function provides the ability to perform 1d interpolation on datasets | ||
| with more than one dimension. For instance, this function can be used to | ||
| interpolate a set of vertical levels, even if the interpolation coordinate | ||
| depends upon other dimensions. | ||
|
|
||
| A good use case might be when wanting to interpolate at a specific height | ||
| for height data which also depends on x and y - e.g. extract 1000hPa level | ||
| from a 3d dataset and associated pressure field. In the case of this | ||
| example, pressure would be the `z` coordinate, and the dataset | ||
| (e.g. geopotential height / temperature etc.) would be `f(z)`. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| z_target: :class:`np.ndarray` | ||
| Target coordinate. | ||
| This coordinate defines the levels to interpolate the source data | ||
| ``fz_src`` to. ``z_target`` must have the same dimensionality as the | ||
| source coordinate ``z_src``, and the shape of ``z_target`` must match | ||
| the shape of ``z_src``, although the axis of interpolation may differ | ||
| in dimension size. | ||
| z_src: :class:`np.ndarray` | ||
| Source coordinate. | ||
| This coordinate defines the levels that the source data ``fz_src`` is | ||
| interpolated from. | ||
| fz_src: :class:`np.ndarray` | ||
| The source data; the phenomenon data values to be interpolated from | ||
| ``z_src`` to ``z_target``. | ||
| The data array must be at least ``z_src.ndim``, and its trailing | ||
| dimensions (i.e. those on its right hand side) must be exactly | ||
| the same as the shape of ``z_src``. | ||
| axis: int (default -1) | ||
| The ``fz_src`` axis to perform the interpolation over. | ||
|
|
||
| Returns | ||
| ------- | ||
| : :class:`np.ndarray` | ||
| fz_src interpolated from z_src to z_target. | ||
|
|
||
| Note | ||
| ---- | ||
| - Support for 1D z_target and corresponding ND z_src will be provided in | ||
| future as driven by user requirement. | ||
| - Those cells, where 'nan' values in the source data contribute, a 'nan' | ||
| value is returned. | ||
|
|
||
| """ | ||
| if z_src.ndim != z_target.ndim: | ||
| msg = ('Expecting source and target levels dimensionality to be ' | ||
| 'identical. {} != {}.') | ||
| raise ValueError(msg.format(z_src.ndim, z_target.ndim)) | ||
|
|
||
| # Relative axis | ||
| axis = axis % fz_src.ndim | ||
| axis_relative = axis - (fz_src.ndim - (z_target.ndim-1)) | ||
|
|
||
| src_shape = list(z_src.shape) | ||
| src_shape.pop(axis_relative) | ||
| tgt_shape = list(z_target.shape) | ||
| tgt_shape.pop(axis_relative) | ||
|
|
||
| if src_shape != tgt_shape: | ||
| src_shape = list(z_src.shape) | ||
| src_shape[axis_relative] = '-' | ||
| tgt_shape = list(z_target.shape) | ||
| src_shape[axis_relative] = '-' | ||
| msg = ('Expecting the shape of the source and target levels except ' | ||
| 'the axis of interpolation to be identical. {} != {}') | ||
| raise ValueError(msg.format(tuple(src_shape), tuple(tgt_shape))) | ||
|
|
||
| dat_shape = list(fz_src.shape) | ||
| dat_shape = dat_shape[-(z_src.ndim-1):] | ||
| src_shape = list(z_src.shape[:-1]) | ||
| if dat_shape != src_shape: | ||
| dat_shape = list(fz_src.shape) | ||
| dat_shape[:-(z_src.ndim-1)] = '-' | ||
| msg = ('The provided data is not of compatible shape with the ' | ||
| 'provided source bounds. {} != {}') | ||
| raise ValueError(msg.format(tuple(dat_shape), tuple(src_shape))) | ||
|
|
||
| if z_src.shape[-1] != 2: | ||
| msg = 'Unexpected source and target bounds shape. shape[-1] != 2' | ||
| raise ValueError(msg) | ||
|
|
||
| # Define our source in a consistent way. | ||
| # [broadcasting_dims, axis_interpolation, z_varying] | ||
|
|
||
| # src_data | ||
| bdims = list(range(fz_src.ndim - (z_src.ndim-1))) | ||
| data_vdims = [ind for ind in range(fz_src.ndim) if ind not in | ||
| (bdims + [axis])] | ||
| data_transpose = bdims + [axis] + data_vdims | ||
| fz_src_reshaped = np.transpose(fz_src, data_transpose) | ||
| fz_src_orig = list(fz_src_reshaped.shape) | ||
| shape = ( | ||
| int(np.product(fz_src_reshaped.shape[:len(bdims)])), | ||
| fz_src_reshaped.shape[len(bdims)], | ||
| int(np.product(fz_src_reshaped.shape[len(bdims)+1:]))) | ||
| fz_src_reshaped = fz_src_reshaped.reshape(shape) | ||
|
|
||
| # Define our src and target bounds in a consistent way. | ||
| # [axis_interpolation, z_varying, 2] | ||
| vdims = list(set(range(z_src.ndim)) - set([axis_relative])) | ||
| z_src_reshaped = np.transpose(z_src, [axis_relative] + vdims) | ||
| z_target_reshaped = np.transpose(z_target, [axis_relative] + vdims) | ||
|
|
||
| shape = int(np.product(z_src_reshaped.shape[1:-1])) | ||
| z_src_reshaped = z_src_reshaped.reshape([z_src_reshaped.shape[0], shape, | ||
| z_src_reshaped.shape[-1]]) | ||
| shape = int(np.product(z_target_reshaped.shape[1:-1])) | ||
| z_target_reshaped = z_target_reshaped.reshape( | ||
| [z_target_reshaped.shape[0], shape, z_target_reshaped.shape[-1]]) | ||
|
|
||
| result = conservative_interpolation( | ||
| z_src_reshaped, z_target_reshaped, fz_src_reshaped) | ||
|
|
||
| # Turn the result into a shape consistent with the source. | ||
| # First reshape, then reverse transpose. | ||
| shape = fz_src_orig | ||
| shape[len(bdims)] = z_target.shape[axis_relative] | ||
| result = result.reshape(shape) | ||
| invert_transpose = [data_transpose.index(ind) for ind in | ||
| list(range(result.ndim))] | ||
| result = result.transpose(invert_transpose) | ||
| return result | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comments:
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. I saw these two points as being connected. I think a refactor to support caching would entail pulling both existing and conservative capability into classes. I didn't want to pre-define a structure before considering this refactor in that wider context of caching (it was easier to use a proxy to the existing API for now). With regards to common functions, as far as I can tell. the existing stratify usage is rather hardcoded to a points based approach right now. Cheers |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| import numpy as np | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No docstrings 😱
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks Phil, just added (see latest commit) Cheers |
||
| cimport numpy as np | ||
|
|
||
|
|
||
| cdef calculate_weights(np.ndarray[np.float64_t, ndim=2] src_point, | ||
| np.ndarray[np.float64_t, ndim=2] tgt_point): | ||
| """ | ||
| Calculate weights for a given point. | ||
|
|
||
| The following visually illustrates the calculation:: | ||
|
|
||
| src_min src_max | ||
| |----------| : Source | ||
| tgt_min tgt_max | ||
| |------------| : Target | ||
| |----------| : Delta (src_max - src_min) | ||
| |----| : Overlap (between src & tgt) | ||
| weight = overlap / delta | ||
|
|
||
| Parameters | ||
| ---------- | ||
| src_point (2d double array) - Source point (at a specific location). | ||
| tgt_point (2d double array) - Target point (at a specific location). | ||
|
|
||
| Returns | ||
| ------- | ||
| 2d double array - Weights corresponding to shape [src_point.shape[0], | ||
| tgt_point.shape[0]]. | ||
|
|
||
| """ | ||
| cdef Py_ssize_t src_ind, tgt_ind | ||
| cdef np.float64_t delta, weight | ||
| cdef np.ndarray[np.float64_t, ndim=2] weights | ||
| cdef np.ndarray[np.float64_t, ndim=1] src_cell, tgt_cell | ||
|
|
||
| weights = np.zeros([src_point.shape[0], tgt_point.shape[0]], | ||
| dtype=np.float64) | ||
| for src_ind, src_cell in enumerate(src_point): | ||
| delta = src_cell.max() - src_cell.min() | ||
| for tgt_ind, tgt_cell in enumerate(tgt_point): | ||
| weight = (min(src_cell.max(), tgt_cell.max()) - | ||
| max(src_cell.min(), tgt_cell.min())) / float(delta) | ||
| if weight > 0: | ||
| weights[src_ind, tgt_ind] = weight | ||
| return weights | ||
|
|
||
|
|
||
| cdef apply_weights(np.ndarray[np.float64_t, ndim=3] src_point, | ||
| np.ndarray[np.float64_t, ndim=3] tgt_point, | ||
| np.ndarray[np.float64_t, ndim=3] src_data): | ||
| """ | ||
| Perform conservative interpolation. | ||
|
|
||
| Conservation interpolation of a dataset between a provided source | ||
| coordinate and a target coordinate. Where no source cells contribute to a | ||
| target cell, a np.nan value is returned. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| src_points (3d double array) - Source coordinate, taking the form | ||
| [axis_interpolation, z_varying, 2]. | ||
| tgt_points (3d double array) - Target coordinate, taking the form | ||
| [axis_interpolation, z_varying, 2]. | ||
| src_data (3d double array) - The source data, the phenomenon data to be | ||
| interpolated from ``src_points`` to ``tgt_points``. Taking the form | ||
| [broadcasting_dims, axis_interpolation, z_varying]. | ||
|
|
||
| Returns | ||
| ------- | ||
| 3d double array - Interpolated result over target levels (``tgt_points``). | ||
| Taking the form [broadcasting_dims, axis_interpolation, z_varying]. | ||
|
|
||
| """ | ||
| cdef Py_ssize_t ind | ||
| cdef np.ndarray[np.float64_t, ndim=3] results, weighted_contrib | ||
| cdef np.ndarray[np.float64_t, ndim=2] weights | ||
| results = np.zeros( | ||
| [src_data.shape[0], tgt_point.shape[0], src_data.shape[2]], | ||
| dtype='float64') | ||
| # Calculate and apply weights | ||
| for ind in range(src_data.shape[2]): | ||
| weights = calculate_weights(src_point[:, ind], tgt_point[:, ind]) | ||
| if not (weights.sum(axis=1) == 1).all(): | ||
| msg = ('Weights calculation yields a less than conservative ' | ||
| 'result. Aborting.') | ||
| raise ValueError(msg) | ||
| weighted_contrib = weights * src_data[..., ind][..., None] | ||
| results[..., ind] = ( | ||
| np.nansum(weighted_contrib, axis=1)) | ||
| # Return nan values for those target cells, where there is any | ||
| # contribution of nan data from the source data. | ||
| results[..., ind][ | ||
| ((weights > 0) * np.isnan(weighted_contrib)).any(axis=1)] = np.nan | ||
|
|
||
| # Return np.nan for those target cells where no source contributes. | ||
| results[:, weights.sum(axis=0) == 0, ind] = np.nan | ||
| return results | ||
|
|
||
|
|
||
| def conservative_interpolation(src_points, tgt_points, src_data): | ||
| """ | ||
| Perform conservative interpolation. | ||
|
|
||
| Conservation interpolation of a dataset between a provided source | ||
| coordinate and a target coordinate. All inputs are recast to 64-bit float | ||
| arrays before calculation. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| src_points (3d array) - Source coordinate, taking the form | ||
| [axis_interpolation, z_varying, 2]. | ||
| tgt_points (3d array) - Target coordinate, taking the form | ||
| [axis_interpolation, z_varying, 2]. | ||
| src_data (3d array) - The source data, the phenonenon data to be | ||
| interpolated from ``src_points`` to ``tgt_points``. Taking the form | ||
| [broadcasting_dims, axis_interpolation, z_varying]. | ||
|
|
||
| Returns | ||
| ------- | ||
| 3d double array - Interpolated result over target levels (``tgt_points``). | ||
| Taking the form [broadcasting_dims, axis_interpolation, z_varying]. | ||
|
|
||
| """ | ||
| return apply_weights(src_points.astype('float64'), | ||
| tgt_points.astype('float64'), | ||
| src_data.astype('float64')) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would this (and following assertions) be more transparent as:
It's just moving the value of the "thing" to where the definition of the "thing" is.