From dff4b85b8a780d2059455dd6e87c7af88f09449d Mon Sep 17 00:00:00 2001 From: Sergey Pokhodenko Date: Sun, 13 Oct 2019 13:15:14 +0300 Subject: [PATCH] git # This is a combination of 7 commits. Implement Series.sum() in new style Commented 'sum' in series_kernels.py series_replace_funcs list because the line is helpful and will be deleted later. Removed kwargs, use skipna for selecting numpy.sum() or numpy.nansum(), use the same parameters for hpat_pandas_series_sum_impl Add tests for Series.sum() Skip test_series_sum2 because Series sum operator is not implemented Uncomment sum in series_replace_funcs because it breaks many tests Use numba.typing.arraydecl.ArrayAttribute.resolve_sum() instead of hpat.hiframes.pd_series_ext.SeriesAttribute.resolve_sum() in SumDummyTyper.generic() because resolve_sum was removed from SeriesAttribute Return 'sum' in _run_call_series. It fixed count_array_REPs(). Skip Series.sum(skipna) test Skip tests for DataFrame.sum() in new style --- .../datatypes/hpat_pandas_series_functions.py | 79 +++++++++++++++++++ hpat/hiframes/pd_dataframe_ext.py | 2 +- hpat/hiframes/pd_series_ext.py | 1 + hpat/tests/test_dataframe.py | 1 + hpat/tests/test_series.py | 22 +++++- 5 files changed, 102 insertions(+), 3 deletions(-) diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/hpat/datatypes/hpat_pandas_series_functions.py index a2612b148..4c1f9c55d 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/hpat/datatypes/hpat_pandas_series_functions.py @@ -683,6 +683,85 @@ def hpat_pandas_series_sub_number_impl(self, other): raise TypingError('{} The object must be a pandas.series or scalar. Given other: {}'.format(_func_name, other)) +@overload_method(SeriesType, 'sum') +def hpat_pandas_series_sum( + self, + axis=None, + skipna=None, + level=None, + numeric_only=None, + min_count=0, +): + """ + Pandas Series method :meth:`pandas.Series.sum` implementation. + + .. only:: developer + + Tests: + python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sum1 + # python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sum2 + + Parameters + ---------- + self: :class:`pandas.Series` + input series + axis: + *unsupported* + skipna: :obj:`bool`, default :obj:`True` + Exclude NA/null values when computing the result. + level: + *unsupported* + numeric_only: + *unsupported* + min_count: + *unsupported* + + Returns + ------- + :obj:`float` + scalar or Series (if level specified) + """ + + _func_name = 'Method sum().' + + if not isinstance(self, SeriesType): + raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) + + if not (isinstance(axis, (types.Integer, types.Omitted)) or axis == None): + raise TypingError('{} The axis must be an Integer. Currently unsupported. Given: {}'.format(_func_name, axis)) + + if not (isinstance(skipna, (types.Boolean, types.Omitted)) or skipna == None): + raise TypingError('{} The skipna must be a Boolean. Given: {}'.format(_func_name, skipna)) + + if not (isinstance(level, (types.Integer, types.StringLiteral, types.Omitted)) or level == None): + raise TypingError('{} The level must be an Integer or level name. Currently unsupported. Given: {}'.format(_func_name, level)) + + if not (isinstance(numeric_only, (types.Boolean, types.Omitted)) or numeric_only == None): + raise TypingError('{} The numeric_only must be a Boolean. Currently unsupported. Given: {}'.format(_func_name, numeric_only)) + + if not (isinstance(min_count, (types.Integer, types.Omitted)) or min_count == 0): + raise TypingError('{} The min_count must be an Integer. Currently unsupported. Given: {}'.format(_func_name, min_count)) + + def hpat_pandas_series_sum_impl( + self, + axis=None, + skipna=None, + level=None, + numeric_only=None, + min_count=0, + ): + """ + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sum1 + """ + if skipna is None: + skipna = True + if skipna: + return numpy.nansum(self._data) + return numpy.sum(self._data) + + return hpat_pandas_series_sum_impl + + @overload_method(SeriesType, 'take') def hpat_pandas_series_take(self, indices, axis=0, is_copy=False): """ diff --git a/hpat/hiframes/pd_dataframe_ext.py b/hpat/hiframes/pd_dataframe_ext.py index d2f75d645..5958b6228 100644 --- a/hpat/hiframes/pd_dataframe_ext.py +++ b/hpat/hiframes/pd_dataframe_ext.py @@ -1514,7 +1514,7 @@ def generic(self, args, kws): df = args[0] # TODO: ignore non-numerics # get series sum output types - dtypes = tuple(hpat.hiframes.pd_series_ext.SeriesAttribute.resolve_sum( + dtypes = tuple(numba.typing.arraydecl.ArrayAttribute.resolve_sum( self, SeriesType(d.dtype)).get_call_type(self, (), {}).return_type for d in df.data) diff --git a/hpat/hiframes/pd_series_ext.py b/hpat/hiframes/pd_series_ext.py index e477340bd..f26518fac 100644 --- a/hpat/hiframes/pd_series_ext.py +++ b/hpat/hiframes/pd_series_ext.py @@ -992,6 +992,7 @@ def generic_expand_cumulative_series(self, args, kws): # TODO: add itemsize, strides, etc. when removed from Pandas _not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten', + 'resolve_sum', 'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique'] # use ArrayAttribute for attributes not defined in SeriesAttribute diff --git a/hpat/tests/test_dataframe.py b/hpat/tests/test_dataframe.py index d3c7a5bcc..b9156d25b 100644 --- a/hpat/tests/test_dataframe.py +++ b/hpat/tests/test_dataframe.py @@ -797,6 +797,7 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) + @unittest.skipIf(not hpat.config.config_pipeline_hpat_default, "DataFrame.sum() not implemented in new style") def test_sum1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index d41b8c220..5fe4ae3f4 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -694,7 +694,7 @@ def test_series_op5(self): df = pd.DataFrame({'A': np.arange(1, n), 'B': np.ones(n - 1)}) pd.testing.assert_series_equal(hpat_func(df.A, df.B), test_impl(df.A, df.B), check_names=False) - @unittest.skipIf(platform.system() == 'Windows', + @unittest.skipIf(platform.system() == 'Windows', 'Series values are different (20.0 %)' '[left]: [1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824, 3486784401, 10000000000]' '[right]: [1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824, -808182895, 1410065408]') @@ -1031,7 +1031,15 @@ def test_impl(A): df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) pd.testing.assert_series_equal(hpat_func(df.A), test_impl(df.A)) - def test_series_sum1(self): + def test_series_sum_default(self): + def test_impl(S): + return S.sum() + hpat_func = hpat.jit(test_impl) + + S = pd.Series([1., 2., 3.]) + self.assertEqual(hpat_func(S), test_impl(S)) + + def test_series_sum_nan(self): def test_impl(S): return S.sum() hpat_func = hpat.jit(test_impl) @@ -1044,6 +1052,16 @@ def test_impl(S): S = pd.Series([np.nan, np.nan]) self.assertEqual(hpat_func(S), test_impl(S)) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Old style Series.sum() does not support parameters") + def test_series_sum_skipna_false(self): + def test_impl(S): + return S.sum(skipna=False) + hpat_func = hpat.jit(test_impl) + + S = pd.Series([np.nan, 2., 3.]) + self.assertEqual(np.isnan(hpat_func(S)),np.isnan(test_impl(S))) + + @unittest.skipIf(not hpat.config.config_pipeline_hpat_default, "Series.sum() operator + is not implemented yet for Numba") def test_series_sum2(self): def test_impl(S): return (S + S).sum()