Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2082,3 +2082,67 @@ def hpat_pandas_series_nunique_impl(self, dropna=True):
return len(data_set) + 1

return hpat_pandas_series_nunique_impl


@overload_method(SeriesType, 'median')
def hpat_pandas_series_median(self, axis=None, skipna=True, level=None, numeric_only=None):
"""
Pandas Series method :meth:`pandas.Series.median` implementation.

.. only:: developer

Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_median1
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_median_skipna_default1
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_median_skipna_false1

Parameters
-----------
self: :obj:`pandas.Series`
input series
axis: :obj:`int` or :obj:`string` {0 or `index`, None}, default None
The axis for the function to be applied on.
*unsupported*
skipna: :obj:`bool`, default True
exclude NA/null values when computing the result
level: :obj:`int` or :obj:`string`, default None
*unsupported*
numeric_only: :obj:`bool` or None, default None
*unsupported*

Returns
-------
:obj:`float` or :obj:`pandas.Series` (if level is specified)
median of values in the series

"""

_func_name = 'Method median().'

if not isinstance(self, SeriesType):
raise TypingError(
'{} The object must be a pandas.series. Given self: {}'.format(_func_name, self))

if not isinstance(self.dtype, types.Number):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shssf eventually what do you think about extracting dtype from self? self.dtype or self.data.dtype. BTW for me dtype means "data type", data.dtype means "data data dtype". I vote for shorter option.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if not isinstance(self.dtype, types.Number):
if not isinstance(self.data.dtype, types.Number):

raise TypingError(
'{} The function only applies to elements that are all numeric. Given data type: {}'.format(_func_name, self.dtype))

if not (isinstance(axis, (types.Integer, types.UnicodeType, types.Omitted)) or axis is None):
raise TypingError('{} The axis must be an Integer or a String. Currently unsupported. Given: {}'.format(_func_name, axis))

if not (isinstance(skipna, (types.Boolean, types.Omitted)) or skipna == True):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you exactly need to check skipna == True?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, without it the check will fail, because the check is passed several times during typing. So if skipna argument is omitted during one pass skipna will have types.Omitted and during the other pass it will have type(skipna)=bool, hence the second part of the check is needed to work properly and not raise exception.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe additional check on Python bool can help?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@densmirn I think this is a bug in Numba. Without second check it will not work.

raise TypingError('{} The is_copy must be a boolean. Given: {}'.format(_func_name, skipna))

if not ((level is None or isinstance(level, types.Omitted))
or (numeric_only is None or isinstance(numeric_only, types.Omitted))
or (axis is None or isinstance(axis, types.Omitted))
):
raise TypingError('{} Unsupported parameters. Given level: {}, numeric_only: {}, axis: {}'.format(_func_name, level, numeric_only, axis))


def hpat_pandas_series_median_impl(self, axis=None, skipna=True, level=None, numeric_only=None):
if skipna:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see no checks for skipna type. What happened if it is None?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected by adding checks.

return numpy.nanmedian(self._data)

return numpy.median(self._data)

return hpat_pandas_series_median_impl
14 changes: 7 additions & 7 deletions hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,13 +687,13 @@ def resolve_head(self, ary, args, kws):
assert not kws
return signature(ary, *args)

@bound_function("series.median")
def resolve_median(self, ary, args, kws):
assert not kws
dtype = ary.dtype
# median converts integer output to float
dtype = types.float64 if isinstance(dtype, types.Integer) else dtype
return signature(dtype, *args)
# @bound_function("series.median")
# def resolve_median(self, ary, args, kws):
# assert not kws
# dtype = ary.dtype
# # median converts integer output to float
# dtype = types.float64 if isinstance(dtype, types.Integer) else dtype
# return signature(dtype, *args)

# @bound_function("series.idxmin")
# def resolve_idxmin(self, ary, args, kws):
Expand Down
32 changes: 32 additions & 0 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1989,6 +1989,7 @@ def test_impl(S):
self.assertTrue(count_array_OneDs() > 0)

def test_series_median1(self):
'''Verifies median implementation for float and integer series of random data'''
def test_impl(S):
return S.median()
hpat_func = hpat.jit(test_impl)
Expand All @@ -2009,6 +2010,34 @@ def test_impl(S):
S = pd.Series(np.random.ranf(m))
self.assertEqual(hpat_func(S), test_impl(S))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
"BUG: old-style median implementation doesn't filter NaNs")
def test_series_median_skipna_default1(self):
'''Verifies median implementation with default skipna=True argument on a series with NA values'''
def test_impl(S):
return S.median()
hpat_func = hpat.jit(test_impl)

S = pd.Series([2., 3., 5., np.nan, 5., 6., 7.])
self.assertEqual(hpat_func(S), test_impl(S))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
"Skipna argument is not supported in old-style")
def test_series_median_skipna_false1(self):
'''Verifies median implementation with skipna=False on a series with NA values'''
def test_impl(S):
return S.median(skipna=False)
hpat_func = hpat.jit(test_impl)

# np.inf is not NaN, so verify that a correct number is returned
S1 = pd.Series([2., 3., 5., np.inf, 5., 6., 7.])
self.assertEqual(hpat_func(S1), test_impl(S1))

# TODO: both return values are 'nan', but HPAT's is not np.nan, hence checking with
# assertIs() doesn't work - check if it's Numba relatated
S2 = pd.Series([2., 3., 5., np.nan, 5., 6., 7.])
self.assertEqual(np.isnan(hpat_func(S2)), np.isnan(test_impl(S2)))

def test_series_median_parallel1(self):
# create `kde.parquet` file
ParquetGenerator.gen_kde_pq()
Expand All @@ -2020,6 +2049,9 @@ def test_impl():
hpat_func = hpat.jit(test_impl)

self.assertEqual(hpat_func(), test_impl())
self.assertEqual(count_array_REPs(), 0)
self.assertEqual(count_parfor_REPs(), 0)
self.assertTrue(count_array_OneDs() > 0)

def test_series_argsort_parallel(self):
# create `kde.parquet` file
Expand Down