Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Implement idxmin #216

Merged
merged 13 commits into from
Oct 20, 2019
60 changes: 60 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,66 @@ def hpat_pandas_series_ge_impl(self, other):
raise TypingError('{} The object must be a pandas.series and argument must be a number. Given: {} and other: {}'.format(_func_name, self, other))


@overload_method(SeriesType, 'idxmin')
def hpat_pandas_series_idxmin(self, axis=None, skipna=True, *args):
"""
Pandas Series method :meth:`pandas.Series.idxmin` implementation.

.. only:: developer

Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin1
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_str
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_int
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_no
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update the list of tests in docstring according to the real list.


Parameters
-----------
axis : :obj:`int`, :obj:`str`, default: None
Axis along which the operation acts
0/None - row-wise operation
1 - column-wise operation
*unsupported*
skipna: :obj:`bool`, default: True
exclude NA/null values
*unsupported*

Returns
-------
:obj:`pandas.Series.index` or nan
returns: Label of the minimum value.
"""

_func_name = 'Method idxmin().'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not isinstance(self.data.dtype, types.Number):
raise TypingError(
'{} Currently function supports only numeric values. Given data type: {}'.format(_func_name, self.dtype))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.dtype -> self.data.dtype


if not isinstance(skipna, (types.Omitted, types.Boolean, bool)):
raise TypingError(
'{} The parameter must be a boolean type. Given type skipna: {}'.format(_func_name, type(skipna)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type(skipna) -> skipna


if not (isinstance(axis, types.Omitted) or axis is None):
raise TypingError('{} Unsupported parameters. Given axis: {}'.format(_func_name, axis))

if not isinstance(self.index, types.NoneType):
def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=True):

result = numpy.argmin(self._data)
return self._index[int(result)]

return hpat_pandas_series_idxmin_impl
else:
def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=True):

return numpy.argmin(self._data)

return hpat_pandas_series_idxmin_impl
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could do return once outside of if-else statement.



@overload_method(SeriesType, 'lt')
def hpat_pandas_series_lt(self, other, level=None, fill_value=None, axis=0):
"""
Expand Down
2 changes: 1 addition & 1 deletion hpat/hiframes/hiframes_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ def _run_call_series(self, assign, lhs, rhs, series_var, func_name):
return self._replace_func(func, [data], pre_nodes=nodes)

if func_name in ('std', 'nunique', 'describe', 'isna',
'isnull', 'median', 'idxmin', 'idxmax', 'unique'):
'isnull', 'median', 'idxmax', 'unique'):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it will not work (didn't check). if parallel tests are not working, please leave this line as in original, and add resolve_idxmin value to the array as here https://github.com/IntelPython/hpat/pull/191/files#diff-f13c21aac60fb8f7ae3c9527239d1e17R990

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initially, there were no parallel tests for this function, should I write them?
resolve_idxmin need to dont overlap arrays methods, but idxmin for array does not exist

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, let's wait test system results

if rhs.args or rhs.kws:
raise ValueError("unsupported Series.{}() arguments".format(
func_name))
Expand Down
9 changes: 5 additions & 4 deletions hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,10 +690,10 @@ def resolve_median(self, ary, args, kws):
dtype = types.float64 if isinstance(dtype, types.Integer) else dtype
return signature(dtype, *args)

@bound_function("series.idxmin")
def resolve_idxmin(self, ary, args, kws):
assert not kws
return signature(types.intp, *args)
# @bound_function("series.idxmin")
# def resolve_idxmin(self, ary, args, kws):
# assert not kws
# return signature(types.intp, *args)

@bound_function("series.idxmax")
def resolve_idxmax(self, ary, args, kws):
Expand Down Expand Up @@ -1280,3 +1280,4 @@ def hpat_pandas_series_ctor_impl(data=None, index=None, dtype=None, name=None, c
return hpat_pandas_series_ctor_impl

from hpat.datatypes.hpat_pandas_series_functions import *

90 changes: 90 additions & 0 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1556,6 +1556,96 @@ def test_impl(A):
S = pd.Series(np.random.ranf(n))
np.testing.assert_array_equal(hpat_func(S), test_impl(S))

def test_series_idxmin_str(self):
def test_impl(S):
return S.idxmin()
hpat_func = hpat.jit(test_impl)

S = pd.Series([8, 6, 34, np.nan], ['a', 'ab', 'abc', 'c'])
print(hpat_func(S))
print(test_impl(S))
self.assertEqual(hpat_func(S), test_impl(S))

@unittest.skip("Cant return 2 types: string or nan in one case")
def test_series_idxmin_str_idx(self):
def test_impl(S):
return S.idxmin(skipna=False)

hpat_func = hpat.jit(test_impl)

S = pd.Series([8, 6, 34, np.nan], ['a', 'ab', 'abc', 'c'])
print(hpat_func(S))
print(test_impl(S))
self.assertEqual(hpat_func(S), test_impl(S))

def test_series_idxmin_no(self):
def test_impl(S):
return S.idxmin()
hpat_func = hpat.jit(test_impl)

S = pd.Series([8, 6, 34, np.nan])
self.assertEqual(hpat_func(S), test_impl(S))

@unittest.skip("Enable after fixing index")
def test_series_idxmin_int(self):
def test_impl(S):
return S.idxmin()
hpat_func = hpat.jit(test_impl)

S = pd.Series([1, 2, 3], [4, 45, 14])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please see this comment #217 (comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add 1 test and fix another

print(hpat_func(S))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this in test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

print(test_impl(S))
self.assertEqual(hpat_func(S), test_impl(S))

@unittest.skip("Need index fix")
def test_series_idxmin(self):
def test_series_idxmin_impl(S):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to stick to the common name for the jitted func - test_impl.

return S.idxmin()

hpat_func = hpat.jit(test_series_idxmin_impl)

test_input_data = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems the list is used nowhere.

data_simple = [[6, 6, 2, 1, 3, 3, 2, 1, 2],
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2],
[6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2],
]

data_extra = [[np.nan, np.nan, np.nan, np.nan],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to have one 'series_data' list (instead of data_simple + data_extra) with it's elements covering all possible situations (i.e. containing numbers, NaNs or both, in different combinations). There's no obvious benefit of using loops in unittests - better identify and cover different cases yourself.

[np.nan, np.nan, np.inf, np.inf],
]

test_input_data = data_simple + data_extra

for input_data in data_simple:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the first two loops for testing default index in Series? Then they should make one separate test. You will have another compilation of test_func on the last two loops anyway, so there'll be no benefit to put this all in one test.

S = pd.Series(input_data)

result_ref = test_series_idxmin_impl(S)
result = hpat_func(S)
self.assertEqual(result, result_ref)

for input_data in test_input_data:
S = pd.Series(input_data)

result_ref = test_series_idxmin_impl(S)
result = hpat_func(S)
self.assertEqual(result, result_ref)

for input_data in data_simple:
for index_data in data_simple:
S = pd.Series(input_data, index_data)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to test index_data at all? Does our implementation depend on the indexes?


result_ref = test_series_idxmin_impl(S)
result = hpat_func(S)
self.assertEqual(result, result_ref)

for input_data in test_input_data:
for index_data in test_input_data:
S = pd.Series(input_data, index_data)

result_ref = test_series_idxmin_impl(S)
result = hpat_func(S)
self.assertEqual(result, result_ref)

def test_series_idxmax1(self):
def test_impl(A):
return A.idxmax()
Expand Down