diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/hpat/datatypes/hpat_pandas_series_functions.py index 1954afd88..87f06703e 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/hpat/datatypes/hpat_pandas_series_functions.py @@ -1239,6 +1239,71 @@ def hpat_pandas_series_pow_impl(self, other): raise TypingError('{} The object must be a pandas.series and argument must be a number. Given: {} and other: {}'.format(_func_name, self, other)) +@overload_method(SeriesType, 'prod') +def hpat_pandas_series_prod(self, axis=None, skipna=True, level=None, numeric_only=None, min_count=0): + """ + Pandas Series method :meth:`pandas.Series.prod` implementation. + + .. only:: developer + + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_prod + + Parameters + ----------- + self: :obj:`pandas.Series` + input series + axis: {index (0)} + Axis for the function to be applied on. + *unsupported* + skipna: :obj:`bool`, default :obj:`True` + Exclude nan values when computing the result + level: :obj:`int`, :obj:`str`, default :obj:`None` + If the axis is a MultiIndex (hierarchical), count along a particular level, collapsing into a scalar. + *unsupported* + numeric_only: :obj:`bool`, default :obj:`None` + Include only float, int, boolean columns. + If None, will attempt to use everything, then use only numeric data. + Not implemented for Series. + *unsupported* + min_count: :obj:`int`, default 0 + The required number of valid values to perform the operation. + If fewer than min_count non-NA values are present the result will be NA. + *unsupported* + + Returns + ------- + :obj: + Returns scalar or Series (if level specified) + """ + + _func_name = 'Method prod().' + + if not isinstance(self, SeriesType): + raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) + + if not isinstance(self.data.dtype, (types.Integer, types.Float)): + raise TypingError('{} Non numeric values unsupported. Given: {}'.format(_func_name, self.data.data.dtype)) + + if not (isinstance(skipna, (types.Omitted, types.Boolean)) or skipna is True): + raise TypingError("{} 'skipna' must be a boolean type. Given: {}".format(_func_name, skipna)) + + if not (isinstance(axis, types.Omitted) or axis is None) \ + or not (isinstance(level, types.Omitted) or level is None) \ + or not (isinstance(numeric_only, types.Omitted) or numeric_only is None) \ + or not (isinstance(min_count, types.Omitted) or min_count == 0): + raise TypingError( + '{} Unsupported parameters. Given axis: {}, level: {}, numeric_only: {}, min_count: {}'.format( + _func_name, axis, level, numeric_only, min_count)) + + def hpat_pandas_series_prod_impl(self, axis=None, skipna=True, level=None, numeric_only=None, min_count=0): + if skipna: + return numpy.nanprod(self._data) + else: + return numpy.prod(self._data) + + return hpat_pandas_series_prod_impl + + @overload_method(SeriesType, 'quantile') def hpat_pandas_series_quantile(self, q=0.5, interpolation='linear'): """ diff --git a/hpat/hiframes/hiframes_typed.py b/hpat/hiframes/hiframes_typed.py index 9982d7997..6da99b89b 100644 --- a/hpat/hiframes/hiframes_typed.py +++ b/hpat/hiframes/hiframes_typed.py @@ -842,7 +842,7 @@ def parse_impl(data): def _run_call_series(self, assign, lhs, rhs, series_var, func_name): # single arg functions - if func_name in ('sum', 'count', 'mean', 'var', 'min', 'max', 'prod'): + if func_name in ('sum', 'count', 'mean', 'var', 'min', 'max'): if rhs.args or rhs.kws: raise ValueError("HPAT pipeline does not support arguments for Series.{}()".format(func_name)) diff --git a/hpat/hiframes/pd_dataframe_ext.py b/hpat/hiframes/pd_dataframe_ext.py index 5958b6228..cd1d7f1e5 100644 --- a/hpat/hiframes/pd_dataframe_ext.py +++ b/hpat/hiframes/pd_dataframe_ext.py @@ -1554,7 +1554,7 @@ def generic(self, args, kws): df = args[0] # TODO: ignore non-numerics # get series prod output types - dtypes = tuple(hpat.hiframes.pd_series_ext.SeriesAttribute.resolve_prod( + dtypes = tuple(numba.typing.arraydecl.ArrayAttribute.resolve_prod( self, SeriesType(d.dtype)).get_call_type(self, (), {}).return_type for d in df.data) diff --git a/hpat/hiframes/pd_series_ext.py b/hpat/hiframes/pd_series_ext.py index 88f09d1a9..9c840a5db 100644 --- a/hpat/hiframes/pd_series_ext.py +++ b/hpat/hiframes/pd_series_ext.py @@ -993,7 +993,8 @@ def generic_expand_cumulative_series(self, args, kws): # TODO: add itemsize, strides, etc. when removed from Pandas _not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten', 'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean', - 'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique'] + 'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique', + 'resolve_prod'] # use ArrayAttribute for attributes not defined in SeriesAttribute for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items(): diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index e7556e9d6..825d0c31d 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -1201,17 +1201,41 @@ def test_impl(S): S = pd.Series([np.nan, np.nan]) self.assertEqual(hpat_func(S), test_impl(S)) - def test_series_prod1(self): + def test_series_prod(self): + def test_impl(S, skipna): + return S.prod(skipna=skipna) + hpat_func = hpat.jit(test_impl) + + data_samples = [ + [6, 6, 2, 1, 3, 3, 2, 1, 2], + [1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2], + [6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2], + [6, 6, np.nan, 2, np.nan, 1, 3, 3, np.inf, 2, 1, 2, np.inf], + [1.1, 0.3, np.nan, 1.0, np.inf, 0.3, 2.1, np.nan, 2.2, np.inf], + [1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2], + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.inf], + ] + + for data in data_samples: + S = pd.Series(data) + + for skipna_var in [True, False]: + actual = hpat_func(S, skipna=skipna_var) + expected = test_impl(S, skipna=skipna_var) + + if np.isnan(actual) or np.isnan(expected): + # con not compare Nan != Nan directly + self.assertEqual(np.isnan(actual), np.isnan(expected)) + else: + self.assertEqual(actual, expected) + + def test_series_prod_skipna_default(self): def test_impl(S): return S.prod() hpat_func = hpat.jit(test_impl) - # column with NA - S = pd.Series([np.nan, 2., 3.]) - self.assertEqual(hpat_func(S), test_impl(S)) - - # all NA case should produce 1 - S = pd.Series([np.nan, np.nan]) + S = pd.Series([np.nan, 2, 3.]) self.assertEqual(hpat_func(S), test_impl(S)) def test_series_count1(self):