diff --git a/sdc/datatypes/hpat_pandas_series_rolling_functions.py b/sdc/datatypes/hpat_pandas_series_rolling_functions.py index e14d89f02..8c7a4680a 100644 --- a/sdc/datatypes/hpat_pandas_series_rolling_functions.py +++ b/sdc/datatypes/hpat_pandas_series_rolling_functions.py @@ -293,8 +293,6 @@ def apply_minp(arr, ddof, minp): gen_hpat_pandas_series_rolling_impl(arr_kurt)) hpat_pandas_rolling_series_max_impl = register_jitable( gen_hpat_pandas_series_rolling_impl(arr_max)) -hpat_pandas_rolling_series_mean_impl = register_jitable( - gen_hpat_pandas_series_rolling_impl(arr_mean)) hpat_pandas_rolling_series_median_impl = register_jitable( gen_hpat_pandas_series_rolling_impl(arr_median)) hpat_pandas_rolling_series_min_impl = register_jitable( @@ -336,7 +334,17 @@ def result_or_nan(nfinite, minp, result): return result -def gen_sdc_pandas_series_rolling_impl(pop, put, init_result=numpy.nan): +@sdc_register_jitable +def mean_result_or_nan(nfinite, minp, result): + """Get result mean taking into account min periods.""" + if nfinite == 0 or nfinite < minp: + return numpy.nan + + return result / nfinite + + +def gen_sdc_pandas_series_rolling_impl(pop, put, get_result=result_or_nan, + init_result=numpy.nan): """Generate series rolling methods implementations based on pop/put funcs""" def impl(self): win = self._window @@ -366,22 +374,24 @@ def impl(self): for idx in range(interlude_start, interlude_stop): value = input_arr[idx] nfinite, result = put(value, nfinite, result) - output_arr[idx] = result_or_nan(nfinite, minp, result) + output_arr[idx] = get_result(nfinite, minp, result) for idx in range(interlude_stop, chunk.stop): put_value = input_arr[idx] pop_value = input_arr[idx - win] nfinite, result = put(put_value, nfinite, result) nfinite, result = pop(pop_value, nfinite, result) - output_arr[idx] = result_or_nan(nfinite, minp, result) + output_arr[idx] = get_result(nfinite, minp, result) return pandas.Series(output_arr, input_series._index, name=input_series._name) return impl -sdc_pandas_series_rolling_sum_impl = register_jitable( - gen_sdc_pandas_series_rolling_impl(pop_sum, put_sum, init_result=0.)) +sdc_pandas_series_rolling_mean_impl = gen_sdc_pandas_series_rolling_impl( + pop_sum, put_sum, get_result=mean_result_or_nan, init_result=0.) +sdc_pandas_series_rolling_sum_impl = gen_sdc_pandas_series_rolling_impl( + pop_sum, put_sum, init_result=0.) @sdc_rolling_overload(SeriesRollingType, 'apply') @@ -552,7 +562,30 @@ def _impl(self, other=None, pairwise=None, ddof=1): bias_adj = count / (count - ddof) def mean(series): - return series.rolling(win, min_periods=minp).mean() + # cannot call return series.rolling(win, min_periods=minp).mean() + # due to different float rounding in new and old implementations + # TODO: fix this during optimizing of covariance + input_arr = series._data + length = len(input_arr) + output_arr = numpy.empty(length, dtype=float64) + + def apply_minp(arr, minp): + finite_arr = arr[numpy.isfinite(arr)] + if len(finite_arr) < minp: + return numpy.nan + else: + return arr_mean(finite_arr) + + boundary = min(win, length) + for i in prange(boundary): + arr_range = input_arr[:i + 1] + output_arr[i] = apply_minp(arr_range, minp) + + for i in prange(boundary, length): + arr_range = input_arr[i + 1 - win:i + 1] + output_arr[i] = apply_minp(arr_range, minp) + + return pandas.Series(output_arr, series._index, name=series._name) return (mean(main_aligned * other_aligned) - mean(main_aligned) * mean(other_aligned)) * bias_adj @@ -593,13 +626,13 @@ def hpat_pandas_series_rolling_max(self): return hpat_pandas_rolling_series_max_impl -@sdc_rolling_overload(SeriesRollingType, 'mean') +@sdc_overload_method(SeriesRollingType, 'mean') def hpat_pandas_series_rolling_mean(self): ty_checker = TypeChecker('Method rolling.mean().') ty_checker.check(self, SeriesRollingType) - return hpat_pandas_rolling_series_mean_impl + return sdc_pandas_series_rolling_mean_impl @sdc_rolling_overload(SeriesRollingType, 'median') diff --git a/sdc/tests/test_rolling.py b/sdc/tests/test_rolling.py index 73984c09b..deb6b40e6 100644 --- a/sdc/tests/test_rolling.py +++ b/sdc/tests/test_rolling.py @@ -715,8 +715,8 @@ def test_impl(obj, window, min_periods): hpat_func = self.jit(test_impl) assert_equal = self._get_assert_equal(obj) - for window in range(0, len(obj) + 3, 2): - for min_periods in range(0, window + 1, 2): + for window in range(len(obj) + 2): + for min_periods in range(window): with self.subTest(obj=obj, window=window, min_periods=min_periods): jit_result = hpat_func(obj, window, min_periods) diff --git a/sdc/tests/tests_perf/test_perf_series_rolling.py b/sdc/tests/tests_perf/test_perf_series_rolling.py index a38bbe1a9..08aab460f 100644 --- a/sdc/tests/tests_perf/test_perf_series_rolling.py +++ b/sdc/tests/tests_perf/test_perf_series_rolling.py @@ -85,6 +85,7 @@ class TestSeriesRollingMethods(TestBase): def setUpClass(cls): super().setUpClass() cls.map_ncalls_dlength = { + 'mean': (100, [8 * 10 ** 5]), 'sum': (100, [8 * 10 ** 5]), } @@ -124,6 +125,9 @@ def _test_series_rolling_method(self, name, rolling_params=None, data_num += len(extra_usecase_params.split(', ')) self._test_case(usecase, name, total_data_length, data_num=data_num) + def test_series_rolling_mean(self): + self._test_series_rolling_method('mean') + def test_series_rolling_sum(self): self._test_series_rolling_method('sum') @@ -135,7 +139,6 @@ def test_series_rolling_sum(self): TC(name='cov', size=[10 ** 7]), TC(name='kurt', size=[10 ** 7]), TC(name='max', size=[10 ** 7]), - TC(name='mean', size=[10 ** 7]), TC(name='median', size=[10 ** 7]), TC(name='min', size=[10 ** 7]), TC(name='quantile', size=[10 ** 7], params='0.2'),