-
Notifications
You must be signed in to change notification settings - Fork 61
Implement idxmin #216
Implement idxmin #216
Changes from 4 commits
0693c7b
3ffdf2a
9a35161
07c0725
957f2d3
883da23
a8ccdb9
5b01b92
f2cd21d
02b6fa5
c8f6d1b
3d0d810
5340737
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1096,6 +1096,66 @@ def hpat_pandas_series_ge_impl(self, other): | |
raise TypingError('{} The object must be a pandas.series and argument must be a number. Given: {} and other: {}'.format(_func_name, self, other)) | ||
|
||
|
||
@overload_method(SeriesType, 'idxmin') | ||
def hpat_pandas_series_idxmin(self, axis=None, skipna=True, *args): | ||
""" | ||
Pandas Series method :meth:`pandas.Series.idxmin` implementation. | ||
|
||
.. only:: developer | ||
|
||
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin1 | ||
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_str | ||
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_int | ||
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_no | ||
|
||
Parameters | ||
----------- | ||
axis : :obj:`int`, :obj:`str`, default: None | ||
Axis along which the operation acts | ||
0/None - row-wise operation | ||
1 - column-wise operation | ||
*unsupported* | ||
skipna: :obj:`bool`, default: True | ||
exclude NA/null values | ||
*unsupported* | ||
|
||
Returns | ||
------- | ||
:obj:`pandas.Series.index` or nan | ||
returns: Label of the minimum value. | ||
""" | ||
|
||
_func_name = 'Method idxmin().' | ||
|
||
if not isinstance(self, SeriesType): | ||
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) | ||
|
||
if not isinstance(self.data.dtype, types.Number): | ||
raise TypingError( | ||
'{} Currently function supports only numeric values. Given data type: {}'.format(_func_name, self.dtype)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
if not isinstance(skipna, (types.Omitted, types.Boolean, bool)): | ||
raise TypingError( | ||
'{} The parameter must be a boolean type. Given type skipna: {}'.format(_func_name, type(skipna))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
if not (isinstance(axis, types.Omitted) or axis is None): | ||
raise TypingError('{} Unsupported parameters. Given axis: {}'.format(_func_name, axis)) | ||
|
||
if not isinstance(self.index, types.NoneType): | ||
def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=True): | ||
|
||
result = numpy.argmin(self._data) | ||
return self._index[int(result)] | ||
|
||
return hpat_pandas_series_idxmin_impl | ||
else: | ||
def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=True): | ||
|
||
return numpy.argmin(self._data) | ||
|
||
return hpat_pandas_series_idxmin_impl | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could do return once outside of |
||
|
||
|
||
@overload_method(SeriesType, 'lt') | ||
def hpat_pandas_series_lt(self, other, level=None, fill_value=None, axis=0): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -866,7 +866,7 @@ def _run_call_series(self, assign, lhs, rhs, series_var, func_name): | |
return self._replace_func(func, [data], pre_nodes=nodes) | ||
|
||
if func_name in ('std', 'nunique', 'describe', 'isna', | ||
'isnull', 'median', 'idxmin', 'idxmax', 'unique'): | ||
'isnull', 'median', 'idxmax', 'unique'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it will not work (didn't check). if parallel tests are not working, please leave this line as in original, and add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Initially, there were no parallel tests for this function, should I write them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, let's wait test system results |
||
if rhs.args or rhs.kws: | ||
raise ValueError("unsupported Series.{}() arguments".format( | ||
func_name)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1556,6 +1556,96 @@ def test_impl(A): | |
S = pd.Series(np.random.ranf(n)) | ||
np.testing.assert_array_equal(hpat_func(S), test_impl(S)) | ||
|
||
def test_series_idxmin_str(self): | ||
def test_impl(S): | ||
return S.idxmin() | ||
hpat_func = hpat.jit(test_impl) | ||
|
||
S = pd.Series([8, 6, 34, np.nan], ['a', 'ab', 'abc', 'c']) | ||
print(hpat_func(S)) | ||
print(test_impl(S)) | ||
self.assertEqual(hpat_func(S), test_impl(S)) | ||
|
||
@unittest.skip("Cant return 2 types: string or nan in one case") | ||
def test_series_idxmin_str_idx(self): | ||
def test_impl(S): | ||
return S.idxmin(skipna=False) | ||
|
||
hpat_func = hpat.jit(test_impl) | ||
|
||
S = pd.Series([8, 6, 34, np.nan], ['a', 'ab', 'abc', 'c']) | ||
print(hpat_func(S)) | ||
print(test_impl(S)) | ||
self.assertEqual(hpat_func(S), test_impl(S)) | ||
|
||
def test_series_idxmin_no(self): | ||
def test_impl(S): | ||
return S.idxmin() | ||
hpat_func = hpat.jit(test_impl) | ||
|
||
S = pd.Series([8, 6, 34, np.nan]) | ||
self.assertEqual(hpat_func(S), test_impl(S)) | ||
|
||
@unittest.skip("Enable after fixing index") | ||
def test_series_idxmin_int(self): | ||
def test_impl(S): | ||
return S.idxmin() | ||
hpat_func = hpat.jit(test_impl) | ||
|
||
S = pd.Series([1, 2, 3], [4, 45, 14]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please see this comment #217 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add 1 test and fix another |
||
print(hpat_func(S)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this in test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
print(test_impl(S)) | ||
self.assertEqual(hpat_func(S), test_impl(S)) | ||
|
||
@unittest.skip("Need index fix") | ||
def test_series_idxmin(self): | ||
def test_series_idxmin_impl(S): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's better to stick to the common name for the jitted func - test_impl. |
||
return S.idxmin() | ||
|
||
hpat_func = hpat.jit(test_series_idxmin_impl) | ||
|
||
test_input_data = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems the list is used nowhere. |
||
data_simple = [[6, 6, 2, 1, 3, 3, 2, 1, 2], | ||
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2], | ||
[6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2], | ||
] | ||
|
||
data_extra = [[np.nan, np.nan, np.nan, np.nan], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's better to have one 'series_data' list (instead of data_simple + data_extra) with it's elements covering all possible situations (i.e. containing numbers, NaNs or both, in different combinations). There's no obvious benefit of using loops in unittests - better identify and cover different cases yourself. |
||
[np.nan, np.nan, np.inf, np.inf], | ||
] | ||
|
||
test_input_data = data_simple + data_extra | ||
|
||
for input_data in data_simple: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are the first two loops for testing default index in Series? Then they should make one separate test. You will have another compilation of test_func on the last two loops anyway, so there'll be no benefit to put this all in one test. |
||
S = pd.Series(input_data) | ||
|
||
result_ref = test_series_idxmin_impl(S) | ||
result = hpat_func(S) | ||
self.assertEqual(result, result_ref) | ||
|
||
for input_data in test_input_data: | ||
S = pd.Series(input_data) | ||
|
||
result_ref = test_series_idxmin_impl(S) | ||
result = hpat_func(S) | ||
self.assertEqual(result, result_ref) | ||
|
||
for input_data in data_simple: | ||
for index_data in data_simple: | ||
S = pd.Series(input_data, index_data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to test index_data at all? Does our implementation depend on the indexes? |
||
|
||
result_ref = test_series_idxmin_impl(S) | ||
result = hpat_func(S) | ||
self.assertEqual(result, result_ref) | ||
|
||
for input_data in test_input_data: | ||
for index_data in test_input_data: | ||
S = pd.Series(input_data, index_data) | ||
|
||
result_ref = test_series_idxmin_impl(S) | ||
result = hpat_func(S) | ||
self.assertEqual(result, result_ref) | ||
|
||
def test_series_idxmax1(self): | ||
def test_impl(A): | ||
return A.idxmax() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please update the list of tests in docstring according to the real list.