Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1239,6 +1239,71 @@ def hpat_pandas_series_pow_impl(self, other):
raise TypingError('{} The object must be a pandas.series and argument must be a number. Given: {} and other: {}'.format(_func_name, self, other))


@overload_method(SeriesType, 'prod')
def hpat_pandas_series_prod(self, axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
"""
Pandas Series method :meth:`pandas.Series.prod` implementation.

.. only:: developer

Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_prod

Parameters
-----------
self: :obj:`pandas.Series`
input series
axis: {index (0)}
Axis for the function to be applied on.
*unsupported*
skipna: :obj:`bool`, default :obj:`True`
Exclude nan values when computing the result
level: :obj:`int`, :obj:`str`, default :obj:`None`
If the axis is a MultiIndex (hierarchical), count along a particular level, collapsing into a scalar.
*unsupported*
numeric_only: :obj:`bool`, default :obj:`None`
Include only float, int, boolean columns.
If None, will attempt to use everything, then use only numeric data.
Not implemented for Series.
*unsupported*
min_count: :obj:`int`, default 0
The required number of valid values to perform the operation.
If fewer than min_count non-NA values are present the result will be NA.
*unsupported*

Returns
-------
:obj:
Returns scalar or Series (if level specified)
"""

_func_name = 'Method prod().'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not isinstance(self.data.dtype, (types.Integer, types.Float)):
raise TypingError('{} Non numeric values unsupported. Given: {}'.format(_func_name, self.data.data.dtype))

if not (isinstance(skipna, (types.Omitted, types.Boolean)) or skipna is True):
raise TypingError("{} 'skipna' must be a boolean type. Given: {}".format(_func_name, skipna))

if not (isinstance(axis, types.Omitted) or axis is None) \
or not (isinstance(level, types.Omitted) or level is None) \
or not (isinstance(numeric_only, types.Omitted) or numeric_only is None) \
or not (isinstance(min_count, types.Omitted) or min_count == 0):
raise TypingError(
'{} Unsupported parameters. Given axis: {}, level: {}, numeric_only: {}, min_count: {}'.format(
_func_name, axis, level, numeric_only, min_count))

def hpat_pandas_series_prod_impl(self, axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
if skipna:
return numpy.nanprod(self._data)
else:
return numpy.prod(self._data)

return hpat_pandas_series_prod_impl


@overload_method(SeriesType, 'quantile')
def hpat_pandas_series_quantile(self, q=0.5, interpolation='linear'):
"""
Expand Down
2 changes: 1 addition & 1 deletion hpat/hiframes/hiframes_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ def parse_impl(data):

def _run_call_series(self, assign, lhs, rhs, series_var, func_name):
# single arg functions
if func_name in ('sum', 'count', 'mean', 'var', 'min', 'max', 'prod'):
if func_name in ('sum', 'count', 'mean', 'var', 'min', 'max'):
if rhs.args or rhs.kws:
raise ValueError("HPAT pipeline does not support arguments for Series.{}()".format(func_name))

Expand Down
2 changes: 1 addition & 1 deletion hpat/hiframes/pd_dataframe_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -1554,7 +1554,7 @@ def generic(self, args, kws):
df = args[0]
# TODO: ignore non-numerics
# get series prod output types
dtypes = tuple(hpat.hiframes.pd_series_ext.SeriesAttribute.resolve_prod(
dtypes = tuple(numba.typing.arraydecl.ArrayAttribute.resolve_prod(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What issue you trying to solve by this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SeriesAttribute have no resolve_prod() because it was not copied from ArrayAttribute.
Here I added 'resolve_prod' into _not_series_array_attrs and it prevents copying of corresponding method from ArrayAttribute to SeriesAttribute here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue is exception with 'attribute not found'

self, SeriesType(d.dtype)).get_call_type(self, (), {}).return_type
for d in df.data)

Expand Down
3 changes: 2 additions & 1 deletion hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,8 @@ def generic_expand_cumulative_series(self, args, kws):
# TODO: add itemsize, strides, etc. when removed from Pandas
_not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten',
'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean',
'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique']
'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique',
'resolve_prod']

# use ArrayAttribute for attributes not defined in SeriesAttribute
for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items():
Expand Down
38 changes: 31 additions & 7 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,17 +1201,41 @@ def test_impl(S):
S = pd.Series([np.nan, np.nan])
self.assertEqual(hpat_func(S), test_impl(S))

def test_series_prod1(self):
def test_series_prod(self):
def test_impl(S, skipna):
return S.prod(skipna=skipna)
hpat_func = hpat.jit(test_impl)

data_samples = [
[6, 6, 2, 1, 3, 3, 2, 1, 2],
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2],
[6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2],
[6, 6, np.nan, 2, np.nan, 1, 3, 3, np.inf, 2, 1, 2, np.inf],
[1.1, 0.3, np.nan, 1.0, np.inf, 0.3, 2.1, np.nan, 2.2, np.inf],
[1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2],
[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.inf],
]

for data in data_samples:
S = pd.Series(data)

for skipna_var in [True, False]:
actual = hpat_func(S, skipna=skipna_var)
expected = test_impl(S, skipna=skipna_var)

if np.isnan(actual) or np.isnan(expected):
# con not compare Nan != Nan directly
self.assertEqual(np.isnan(actual), np.isnan(expected))
else:
self.assertEqual(actual, expected)

def test_series_prod_skipna_default(self):
def test_impl(S):
return S.prod()
hpat_func = hpat.jit(test_impl)

# column with NA
S = pd.Series([np.nan, 2., 3.])
self.assertEqual(hpat_func(S), test_impl(S))

# all NA case should produce 1
S = pd.Series([np.nan, np.nan])
S = pd.Series([np.nan, 2, 3.])
self.assertEqual(hpat_func(S), test_impl(S))

def test_series_count1(self):
Expand Down