Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

extensions = [Extension('{}._vinterp'.format(NAME),
[os.path.join(NAME, '_vinterp.pyx')],
include_dirs=[np.get_include()]),
Extension('{}._conservative'.format(NAME),
[os.path.join(NAME, '_conservative.pyx')],
include_dirs=[np.get_include()])]


Expand Down
8 changes: 5 additions & 3 deletions stratify/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import absolute_import

from ._vinterp import (interpolate, interp_schemes, extrap_schemes,
INTERPOLATE_LINEAR, INTERPOLATE_NEAREST,
from ._vinterp import (interpolate, interp_schemes, # noqa: F401
extrap_schemes, INTERPOLATE_LINEAR, INTERPOLATE_NEAREST,
EXTRAPOLATE_NAN, EXTRAPOLATE_NEAREST,
EXTRAPOLATE_LINEAR, PyFuncExtrapolator,
EXTRAPOLATE_LINEAR, PyFuncExtrapolator,
PyFuncInterpolator)
from ._bounded_vinterp import interpolate_conservative # noqa: F401


__version__ = '0.1a3.dev0'
135 changes: 135 additions & 0 deletions stratify/_bounded_vinterp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
from __future__ import absolute_import

import numpy as np

from ._conservative import conservative_interpolation


def interpolate_conservative(z_target, z_src, fz_src, axis=-1):
"""
1d conservative interpolation across multiple dimensions.

This function provides the ability to perform 1d interpolation on datasets
with more than one dimension. For instance, this function can be used to
interpolate a set of vertical levels, even if the interpolation coordinate
depends upon other dimensions.

A good use case might be when wanting to interpolate at a specific height
for height data which also depends on x and y - e.g. extract 1000hPa level
from a 3d dataset and associated pressure field. In the case of this
example, pressure would be the `z` coordinate, and the dataset
(e.g. geopotential height / temperature etc.) would be `f(z)`.

Parameters
----------
z_target: :class:`np.ndarray`
Target coordinate.
This coordinate defines the levels to interpolate the source data
``fz_src`` to. ``z_target`` must have the same dimensionality as the
source coordinate ``z_src``, and the shape of ``z_target`` must match
the shape of ``z_src``, although the axis of interpolation may differ
in dimension size.
z_src: :class:`np.ndarray`
Source coordinate.
This coordinate defines the levels that the source data ``fz_src`` is
interpolated from.
fz_src: :class:`np.ndarray`
The source data; the phenomenon data values to be interpolated from
``z_src`` to ``z_target``.
The data array must be at least ``z_src.ndim``, and its trailing
dimensions (i.e. those on its right hand side) must be exactly
the same as the shape of ``z_src``.
axis: int (default -1)
The ``fz_src`` axis to perform the interpolation over.

Returns
-------
: :class:`np.ndarray`
fz_src interpolated from z_src to z_target.

Note
----
- Support for 1D z_target and corresponding ND z_src will be provided in
future as driven by user requirement.
- Those cells, where 'nan' values in the source data contribute, a 'nan'
value is returned.

"""
if z_src.ndim != z_target.ndim:
msg = ('Expecting source and target levels dimensionality to be '
'identical. {} != {}.')
raise ValueError(msg.format(z_src.ndim, z_target.ndim))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this (and following assertions) be more transparent as:

('Expecting dimensionality of source levels {} and target levels {} to be '
               'identical.')

It's just moving the value of the "thing" to where the definition of the "thing" is.


# Relative axis
axis = axis % fz_src.ndim
axis_relative = axis - (fz_src.ndim - (z_target.ndim-1))

src_shape = list(z_src.shape)
src_shape.pop(axis_relative)
tgt_shape = list(z_target.shape)
tgt_shape.pop(axis_relative)

if src_shape != tgt_shape:
src_shape = list(z_src.shape)
src_shape[axis_relative] = '-'
tgt_shape = list(z_target.shape)
src_shape[axis_relative] = '-'
msg = ('Expecting the shape of the source and target levels except '
'the axis of interpolation to be identical. {} != {}')
raise ValueError(msg.format(tuple(src_shape), tuple(tgt_shape)))

dat_shape = list(fz_src.shape)
dat_shape = dat_shape[-(z_src.ndim-1):]
src_shape = list(z_src.shape[:-1])
if dat_shape != src_shape:
dat_shape = list(fz_src.shape)
dat_shape[:-(z_src.ndim-1)] = '-'
msg = ('The provided data is not of compatible shape with the '
'provided source bounds. {} != {}')
raise ValueError(msg.format(tuple(dat_shape), tuple(src_shape)))

if z_src.shape[-1] != 2:
msg = 'Unexpected source and target bounds shape. shape[-1] != 2'
raise ValueError(msg)

# Define our source in a consistent way.
# [broadcasting_dims, axis_interpolation, z_varying]

# src_data
bdims = list(range(fz_src.ndim - (z_src.ndim-1)))
data_vdims = [ind for ind in range(fz_src.ndim) if ind not in
(bdims + [axis])]
data_transpose = bdims + [axis] + data_vdims
fz_src_reshaped = np.transpose(fz_src, data_transpose)
fz_src_orig = list(fz_src_reshaped.shape)
shape = (
int(np.product(fz_src_reshaped.shape[:len(bdims)])),
fz_src_reshaped.shape[len(bdims)],
int(np.product(fz_src_reshaped.shape[len(bdims)+1:])))
fz_src_reshaped = fz_src_reshaped.reshape(shape)

# Define our src and target bounds in a consistent way.
# [axis_interpolation, z_varying, 2]
vdims = list(set(range(z_src.ndim)) - set([axis_relative]))
z_src_reshaped = np.transpose(z_src, [axis_relative] + vdims)
z_target_reshaped = np.transpose(z_target, [axis_relative] + vdims)

shape = int(np.product(z_src_reshaped.shape[1:-1]))
z_src_reshaped = z_src_reshaped.reshape([z_src_reshaped.shape[0], shape,
z_src_reshaped.shape[-1]])
shape = int(np.product(z_target_reshaped.shape[1:-1]))
z_target_reshaped = z_target_reshaped.reshape(
[z_target_reshaped.shape[0], shape, z_target_reshaped.shape[-1]])

result = conservative_interpolation(
z_src_reshaped, z_target_reshaped, fz_src_reshaped)

# Turn the result into a shape consistent with the source.
# First reshape, then reverse transpose.
shape = fz_src_orig
shape[len(bdims)] = z_target.shape[axis_relative]
result = result.reshape(shape)
invert_transpose = [data_transpose.index(ind) for ind in
list(range(result.ndim))]
result = result.transpose(invert_transpose)
return result
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comments:

  • Looks like this could be a class with behaviour (conservative_interpolation).
  • Very similar code for the non-bounded case. Would be interesting to pull this all out into common functions.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed.

I saw these two points as being connected. I think a refactor to support caching would entail pulling both existing and conservative capability into classes. I didn't want to pre-define a structure before considering this refactor in that wider context of caching (it was easier to use a proxy to the existing API for now). With regards to common functions, as far as I can tell. the existing stratify usage is rather hardcoded to a points based approach right now.
In the refactor to support caching, I hope to have a similar mindset to what I have done here, in that we pull out the code which needn't be cython code into a python module (inc. enduser API), allowing greater flexibility and functionality to be more readily shared between points-based and bounds-based approaches. Are you happy with this mindset?

Cheers

126 changes: 126 additions & 0 deletions stratify/_conservative.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import numpy as np
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No docstrings 😱

Copy link
Author

@cpelley cpelley Nov 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Phil, just added (see latest commit)
I have taken a numpy doc style approach in the python module but followed the same style forming as the existing cython module for the new conservative cython module. Let me know what you think.

Cheers

cimport numpy as np


cdef calculate_weights(np.ndarray[np.float64_t, ndim=2] src_point,
np.ndarray[np.float64_t, ndim=2] tgt_point):
"""
Calculate weights for a given point.

The following visually illustrates the calculation::

src_min src_max
|----------| : Source
tgt_min tgt_max
|------------| : Target
|----------| : Delta (src_max - src_min)
|----| : Overlap (between src & tgt)
weight = overlap / delta

Parameters
----------
src_point (2d double array) - Source point (at a specific location).
tgt_point (2d double array) - Target point (at a specific location).

Returns
-------
2d double array - Weights corresponding to shape [src_point.shape[0],
tgt_point.shape[0]].

"""
cdef Py_ssize_t src_ind, tgt_ind
cdef np.float64_t delta, weight
cdef np.ndarray[np.float64_t, ndim=2] weights
cdef np.ndarray[np.float64_t, ndim=1] src_cell, tgt_cell

weights = np.zeros([src_point.shape[0], tgt_point.shape[0]],
dtype=np.float64)
for src_ind, src_cell in enumerate(src_point):
delta = src_cell.max() - src_cell.min()
for tgt_ind, tgt_cell in enumerate(tgt_point):
weight = (min(src_cell.max(), tgt_cell.max()) -
max(src_cell.min(), tgt_cell.min())) / float(delta)
if weight > 0:
weights[src_ind, tgt_ind] = weight
return weights


cdef apply_weights(np.ndarray[np.float64_t, ndim=3] src_point,
np.ndarray[np.float64_t, ndim=3] tgt_point,
np.ndarray[np.float64_t, ndim=3] src_data):
"""
Perform conservative interpolation.

Conservation interpolation of a dataset between a provided source
coordinate and a target coordinate. Where no source cells contribute to a
target cell, a np.nan value is returned.

Parameters
----------
src_points (3d double array) - Source coordinate, taking the form
[axis_interpolation, z_varying, 2].
tgt_points (3d double array) - Target coordinate, taking the form
[axis_interpolation, z_varying, 2].
src_data (3d double array) - The source data, the phenomenon data to be
interpolated from ``src_points`` to ``tgt_points``. Taking the form
[broadcasting_dims, axis_interpolation, z_varying].

Returns
-------
3d double array - Interpolated result over target levels (``tgt_points``).
Taking the form [broadcasting_dims, axis_interpolation, z_varying].

"""
cdef Py_ssize_t ind
cdef np.ndarray[np.float64_t, ndim=3] results, weighted_contrib
cdef np.ndarray[np.float64_t, ndim=2] weights
results = np.zeros(
[src_data.shape[0], tgt_point.shape[0], src_data.shape[2]],
dtype='float64')
# Calculate and apply weights
for ind in range(src_data.shape[2]):
weights = calculate_weights(src_point[:, ind], tgt_point[:, ind])
if not (weights.sum(axis=1) == 1).all():
msg = ('Weights calculation yields a less than conservative '
'result. Aborting.')
raise ValueError(msg)
weighted_contrib = weights * src_data[..., ind][..., None]
results[..., ind] = (
np.nansum(weighted_contrib, axis=1))
# Return nan values for those target cells, where there is any
# contribution of nan data from the source data.
results[..., ind][
((weights > 0) * np.isnan(weighted_contrib)).any(axis=1)] = np.nan

# Return np.nan for those target cells where no source contributes.
results[:, weights.sum(axis=0) == 0, ind] = np.nan
return results


def conservative_interpolation(src_points, tgt_points, src_data):
"""
Perform conservative interpolation.

Conservation interpolation of a dataset between a provided source
coordinate and a target coordinate. All inputs are recast to 64-bit float
arrays before calculation.

Parameters
----------
src_points (3d array) - Source coordinate, taking the form
[axis_interpolation, z_varying, 2].
tgt_points (3d array) - Target coordinate, taking the form
[axis_interpolation, z_varying, 2].
src_data (3d array) - The source data, the phenonenon data to be
interpolated from ``src_points`` to ``tgt_points``. Taking the form
[broadcasting_dims, axis_interpolation, z_varying].

Returns
-------
3d double array - Interpolated result over target levels (``tgt_points``).
Taking the form [broadcasting_dims, axis_interpolation, z_varying].

"""
return apply_weights(src_points.astype('float64'),
tgt_points.astype('float64'),
src_data.astype('float64'))
Loading