diff --git a/examples/series/rolling/series_rolling_apply.py b/examples/series/rolling/series_rolling_apply.py new file mode 100644 index 000000000..78ebd7ba2 --- /dev/null +++ b/examples/series/rolling/series_rolling_apply.py @@ -0,0 +1,44 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pandas as pd +from numba import njit + + +@njit +def series_rolling_apply(): + series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6 + + def get_median(x): + return np.median(x) + + out_series = series.rolling(3).apply(get_median) + + return out_series # Expect series of NaN, NaN, 4.0, 3.0, 5.0 + + +print(series_rolling_apply()) diff --git a/sdc/datatypes/hpat_pandas_series_rolling_functions.py b/sdc/datatypes/hpat_pandas_series_rolling_functions.py index 324706d52..26188405f 100644 --- a/sdc/datatypes/hpat_pandas_series_rolling_functions.py +++ b/sdc/datatypes/hpat_pandas_series_rolling_functions.py @@ -84,6 +84,12 @@ """ +@register_jitable +def arr_apply(arr, func): + """Apply function for values""" + return func(arr) + + @register_jitable def arr_corr(x, y): """Calculate correlation of values""" @@ -278,6 +284,47 @@ def impl(self): gen_hpat_pandas_series_rolling_impl(arr_sum, float64)) +@sdc_overload_method(SeriesRollingType, 'apply') +def hpat_pandas_series_rolling_apply(self, func, raw=None): + + ty_checker = TypeChecker('Method rolling.apply().') + ty_checker.check(self, SeriesRollingType) + + raw_accepted = (Omitted, NoneType, Boolean) + if not isinstance(raw, raw_accepted) and raw is not None: + ty_checker.raise_exc(raw, 'bool', 'raw') + + def hpat_pandas_rolling_series_apply_impl(self, func, raw=None): + win = self._window + minp = self._min_periods + + input_series = self._data + input_arr = input_series._data + length = len(input_arr) + output_arr = numpy.empty(length, dtype=float64) + + def culc_apply(arr, func, minp): + finite_arr = arr.copy() + finite_arr[numpy.isinf(arr)] = numpy.nan + if len(finite_arr) < minp: + return numpy.nan + else: + return arr_apply(finite_arr, func) + + boundary = min(win, length) + for i in prange(boundary): + arr_range = input_arr[:i + 1] + output_arr[i] = culc_apply(arr_range, func, minp) + + for i in prange(boundary, length): + arr_range = input_arr[i + 1 - win:i + 1] + output_arr[i] = culc_apply(arr_range, func, minp) + + return pandas.Series(output_arr, input_series._index, name=input_series._name) + + return hpat_pandas_rolling_series_apply_impl + + @sdc_overload_method(SeriesRollingType, 'corr') def hpat_pandas_series_rolling_corr(self, other=None, pairwise=None): @@ -776,6 +823,26 @@ def culc_var(arr, ddof, minp): return hpat_pandas_rolling_series_var_impl +hpat_pandas_series_rolling_apply.__doc__ = hpat_pandas_series_rolling_docstring_tmpl.format(**{ + 'method_name': 'apply', + 'example_caption': 'Calculate the rolling apply.', + 'limitations_block': + """ + Limitations + ----------- + Supported ``raw`` only can be `None` or `True`. Parameters ``args``, ``kwargs`` unsupported. + Series elements cannot be max/min float/integer. Otherwise SDC and Pandas results are different. + """, + 'extra_params': + """ + func: + A single value producer + raw: :obj:`bool` + False : passes each row or column as a Series to the function. + True or None : the passed function will receive ndarray objects instead. + """ +}) + hpat_pandas_series_rolling_corr.__doc__ = hpat_pandas_series_rolling_docstring_tmpl.format(**{ 'method_name': 'corr', 'example_caption': 'Calculate rolling correlation.', diff --git a/sdc/tests/test_rolling.py b/sdc/tests/test_rolling.py index f91b2054a..e0685e3d3 100644 --- a/sdc/tests/test_rolling.py +++ b/sdc/tests/test_rolling.py @@ -501,6 +501,78 @@ def test_impl(series, window, min_periods, center, msg = msg_tmpl.format('closed', 'int64', 'str') self.assertIn(msg, str(raises.exception)) + @skip_sdc_jit('Series.rolling.apply() unsupported Series index') + def test_series_rolling_apply_mean(self): + def test_impl(series, window, min_periods): + def func(x): + if len(x) == 0: + return np.nan + return x.mean() + return series.rolling(window, min_periods).apply(func) + + hpat_func = self.jit(test_impl) + + all_data = [ + list(range(10)), [1., -1., 0., 0.1, -0.1], + [1., np.inf, np.inf, -1., 0., np.inf, np.NINF, np.NINF], + [np.nan, np.inf, np.inf, np.nan, np.nan, np.nan, np.NINF, np.NZERO] + ] + indices = [list(range(len(data)))[::-1] for data in all_data] + for data, index in zip(all_data, indices): + series = pd.Series(data, index, name='A') + for window in range(0, len(series) + 3, 2): + for min_periods in range(0, window + 1, 2): + with self.subTest(series=series, window=window, + min_periods=min_periods): + jit_result = hpat_func(series, window, min_periods) + ref_result = test_impl(series, window, min_periods) + pd.testing.assert_series_equal(jit_result, ref_result) + + @skip_sdc_jit('Series.rolling.apply() unsupported exceptions') + def test_series_rolling_apply_unsupported_types(self): + def test_impl(raw): + def func(x): + if len(x) == 0: + return np.nan + return np.median(x) + series = pd.Series([1., -1., 0., 0.1, -0.1]) + return series.rolling(3).apply(func, raw=raw) + + hpat_func = self.jit(test_impl) + + with self.assertRaises(TypingError) as raises: + hpat_func(1) + msg = 'Method rolling.apply(). The object raw\n given: int64\n expected: bool' + self.assertIn(msg, str(raises.exception)) + + @unittest.skip('Series.rolling.apply() unsupported args') + def test_series_rolling_apply_args(self): + def test_impl(series, window, min_periods, q): + def func(x, q): + if len(x) == 0: + return np.nan + return np.quantile(x, q) + return series.rolling(window, min_periods).apply(func, raw=None, args=(q,)) + + hpat_func = self.jit(test_impl) + + all_data = [ + list(range(10)), [1., -1., 0., 0.1, -0.1], + [1., np.inf, np.inf, -1., 0., np.inf, np.NINF, np.NINF], + [np.nan, np.inf, np.inf, np.nan, np.nan, np.nan, np.NINF, np.NZERO] + ] + indices = [list(range(len(data)))[::-1] for data in all_data] + for data, index in zip(all_data, indices): + series = pd.Series(data, index, name='A') + for window in range(0, len(series) + 3, 2): + for min_periods in range(0, window + 1, 2): + for q in [0.25, 0.5, 0.75]: + with self.subTest(series=series, window=window, + min_periods=min_periods, q=q): + jit_result = hpat_func(series, window, min_periods, q) + ref_result = test_impl(series, window, min_periods, q) + pd.testing.assert_series_equal(jit_result, ref_result) + @skip_sdc_jit('Series.rolling.corr() unsupported Series index') def test_series_rolling_corr(self): def test_impl(series, window, min_periods, other):