Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
53 changes: 43 additions & 10 deletions sdc/datatypes/hpat_pandas_series_rolling_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,6 @@ def apply_minp(arr, ddof, minp):
gen_hpat_pandas_series_rolling_impl(arr_kurt))
hpat_pandas_rolling_series_max_impl = register_jitable(
gen_hpat_pandas_series_rolling_impl(arr_max))
hpat_pandas_rolling_series_mean_impl = register_jitable(
gen_hpat_pandas_series_rolling_impl(arr_mean))
hpat_pandas_rolling_series_median_impl = register_jitable(
gen_hpat_pandas_series_rolling_impl(arr_median))
hpat_pandas_rolling_series_min_impl = register_jitable(
Expand Down Expand Up @@ -336,7 +334,17 @@ def result_or_nan(nfinite, minp, result):
return result


def gen_sdc_pandas_series_rolling_impl(pop, put, init_result=numpy.nan):
@sdc_register_jitable
def mean_result_or_nan(nfinite, minp, result):
"""Get result mean taking into account min periods."""
if nfinite == 0 or nfinite < minp:
return numpy.nan

return result / nfinite


def gen_sdc_pandas_series_rolling_impl(pop, put, get_result=result_or_nan,
init_result=numpy.nan):
"""Generate series rolling methods implementations based on pop/put funcs"""
def impl(self):
win = self._window
Expand Down Expand Up @@ -366,22 +374,24 @@ def impl(self):
for idx in range(interlude_start, interlude_stop):
value = input_arr[idx]
nfinite, result = put(value, nfinite, result)
output_arr[idx] = result_or_nan(nfinite, minp, result)
output_arr[idx] = get_result(nfinite, minp, result)

for idx in range(interlude_stop, chunk.stop):
put_value = input_arr[idx]
pop_value = input_arr[idx - win]
nfinite, result = put(put_value, nfinite, result)
nfinite, result = pop(pop_value, nfinite, result)
output_arr[idx] = result_or_nan(nfinite, minp, result)
output_arr[idx] = get_result(nfinite, minp, result)

return pandas.Series(output_arr, input_series._index,
name=input_series._name)
return impl


sdc_pandas_series_rolling_sum_impl = register_jitable(
gen_sdc_pandas_series_rolling_impl(pop_sum, put_sum, init_result=0.))
sdc_pandas_series_rolling_mean_impl = gen_sdc_pandas_series_rolling_impl(
pop_sum, put_sum, get_result=mean_result_or_nan, init_result=0.)
sdc_pandas_series_rolling_sum_impl = gen_sdc_pandas_series_rolling_impl(
pop_sum, put_sum, init_result=0.)


@sdc_rolling_overload(SeriesRollingType, 'apply')
Expand Down Expand Up @@ -552,7 +562,30 @@ def _impl(self, other=None, pairwise=None, ddof=1):
bias_adj = count / (count - ddof)

def mean(series):
return series.rolling(win, min_periods=minp).mean()
# cannot call return series.rolling(win, min_periods=minp).mean()
# due to different float rounding in new and old implementations
# TODO: fix this during optimizing of covariance
input_arr = series._data
length = len(input_arr)
output_arr = numpy.empty(length, dtype=float64)

def apply_minp(arr, minp):
finite_arr = arr[numpy.isfinite(arr)]
if len(finite_arr) < minp:
return numpy.nan
else:
return arr_mean(finite_arr)

boundary = min(win, length)
for i in prange(boundary):
arr_range = input_arr[:i + 1]
output_arr[i] = apply_minp(arr_range, minp)

for i in prange(boundary, length):
arr_range = input_arr[i + 1 - win:i + 1]
output_arr[i] = apply_minp(arr_range, minp)

return pandas.Series(output_arr, series._index, name=series._name)

return (mean(main_aligned * other_aligned) - mean(main_aligned) * mean(other_aligned)) * bias_adj

Expand Down Expand Up @@ -593,13 +626,13 @@ def hpat_pandas_series_rolling_max(self):
return hpat_pandas_rolling_series_max_impl


@sdc_rolling_overload(SeriesRollingType, 'mean')
@sdc_overload_method(SeriesRollingType, 'mean')
def hpat_pandas_series_rolling_mean(self):

ty_checker = TypeChecker('Method rolling.mean().')
ty_checker.check(self, SeriesRollingType)

return hpat_pandas_rolling_series_mean_impl
return sdc_pandas_series_rolling_mean_impl


@sdc_rolling_overload(SeriesRollingType, 'median')
Expand Down
4 changes: 2 additions & 2 deletions sdc/tests/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,8 +715,8 @@ def test_impl(obj, window, min_periods):
hpat_func = self.jit(test_impl)
assert_equal = self._get_assert_equal(obj)

for window in range(0, len(obj) + 3, 2):
for min_periods in range(0, window + 1, 2):
for window in range(len(obj) + 2):
for min_periods in range(window):
with self.subTest(obj=obj, window=window,
min_periods=min_periods):
jit_result = hpat_func(obj, window, min_periods)
Expand Down
5 changes: 4 additions & 1 deletion sdc/tests/tests_perf/test_perf_series_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class TestSeriesRollingMethods(TestBase):
def setUpClass(cls):
super().setUpClass()
cls.map_ncalls_dlength = {
'mean': (100, [8 * 10 ** 5]),
'sum': (100, [8 * 10 ** 5]),
}

Expand Down Expand Up @@ -124,6 +125,9 @@ def _test_series_rolling_method(self, name, rolling_params=None,
data_num += len(extra_usecase_params.split(', '))
self._test_case(usecase, name, total_data_length, data_num=data_num)

def test_series_rolling_mean(self):
self._test_series_rolling_method('mean')

def test_series_rolling_sum(self):
self._test_series_rolling_method('sum')

Expand All @@ -135,7 +139,6 @@ def test_series_rolling_sum(self):
TC(name='cov', size=[10 ** 7]),
TC(name='kurt', size=[10 ** 7]),
TC(name='max', size=[10 ** 7]),
TC(name='mean', size=[10 ** 7]),
TC(name='median', size=[10 ** 7]),
TC(name='min', size=[10 ** 7]),
TC(name='quantile', size=[10 ** 7], params='0.2'),
Expand Down