diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/hpat/datatypes/hpat_pandas_series_functions.py index 6abb36d63..ec1cbb46c 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/hpat/datatypes/hpat_pandas_series_functions.py @@ -2181,10 +2181,13 @@ def hpat_pandas_series_argsort(self, axis=0, kind='quicksort', order=None): input arg axis: :obj:`int` Has no effect but is accepted for compatibility with numpy. - kind: {‘mergesort’, ‘quicksort’, ‘heapsort’}, default ‘quicksort’ - Choice of sorting algorithm. See np.sort for more information. ‘mergesort’ is the only stable algorithm + *unsupported* + kind: {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. See np.sort for more information. 'mergesort' is the only stable algorithm + *unsupported, uses python func - sorted()* order: None Has no effect but is accepted for compatibility with numpy. + *unsupported* Returns ------- @@ -2262,3 +2265,219 @@ def hpat_pandas_series_argsort_impl(self, axis=0, kind='quicksort', order=None): return hpat_pandas_series_argsort_impl +@overload_method(SeriesType, 'sort_values') +def hpat_pandas_series_sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): + """ + Pandas Series method :meth:`pandas.Series.sort_values` implementation. + + .. only:: developer + + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values1 + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values2 + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_index1 + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_noidx + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_idx + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_parallel1 + + Parameters + ----------- + self: :class:'pandas.Series' + input arg + axis: 0 or :obj:'pandas.Series.index' + Axis to direct sorting. + *unsupported* + ascending: :obj:'bool', default: True + If True, sort values in ascending order, otherwise descending. + kind: {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. + *unsupported, uses python func - sorted()* + na_position: {'first' or 'last'}, default 'last' + Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. + *unsupported* + + Returns + ------- + :obj:`pandas.Series` + returns: Series ordered by values. + """ + + _func_name = 'Method sort_values().' + + if not isinstance(self, SeriesType): + raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) + + if not (isinstance(ascending, types.Omitted) or isinstance(ascending, types.Boolean) or ascending is True or False): + raise TypingError('{} Unsupported parameters. Given ascending: {}'.format(_func_name, ascending)) + + if isinstance(self.index, types.NoneType) and isinstance(self.data.dtype, types.UnicodeType): + def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort', + na_position='last'): + + index = numpy.arange(len(self._data)) + my_index = numpy.arange(len(self._data)) + used_index = numpy.full((len(self._data)), -1) + result = sorted(self._data) + cycle = range(len(self._data)) + if ascending is False: + result = result[::-1] + cycle = range(len(self._data) - 1, -1, -1) + result_index = index.copy() + for i in range(len(result_index)): + find = 0 + for search in cycle: + check = 0 + for j in used_index: + if my_index[search] == j: + check = 1 + if (self._data[search] == result[i]) and check == 0 and find == 0: + result_index[i] = index[search] + used_index[i] = my_index[search] + find = 1 + + na = 0 + for i in self.isna(): + if i: + na += 1 + num = 0 + for i in self.isna(): + j = len(result_index) - na + if i and used_index[j] == -1: + result_index[j] = index[num] + used_index[j] = my_index[num] + na -= 1 + num += 1 + + return pandas.Series(result, result_index) + + return hpat_pandas_series_sort_values_impl + + if isinstance(self.index, types.NoneType) and isinstance(self.data.dtype, types.Number): + def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort', + na_position='last'): + + na = 0 + for i in self.isna(): + if i: + na += 1 + index = numpy.arange(len(self._data)) + my_index = numpy.arange(len(self._data)) + used_index = numpy.full((len(self._data)), -1) + result = numpy.sort(self._data) + i = len(self._data) - na + cycle = range(len(self._data)) + if ascending is False: + result[:i] = result[:i][::-1] + cycle = range(len(self._data), -1, -1) + result_index = index.copy() + + for i in range(len(result_index)): + find = 0 + for search in cycle: + check = 0 + for j in used_index: + if my_index[search] == j: + check = 1 + if (self._data[search] == result[i]) and check == 0 and find == 0: + result_index[i] = index[search] + used_index[i] = my_index[search] + find = 1 + + + num = 0 + for i in self.isna(): + j = len(result_index) - na + if i and used_index[j] == -1: + result_index[j] = index[num] + used_index[j] = my_index[num] + na -= 1 + num += 1 + + return pandas.Series(result, result_index) + + return hpat_pandas_series_sort_values_impl + + if isinstance(self.data.dtype, types.UnicodeType): + def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort', + na_position='last'): + + index = self._index + my_index = numpy.arange(len(self._data)) + used_index = numpy.full((len(self._data)), -1) + result = sorted(self._data) + cycle = range(len(self._data)) + if ascending is False: + result = result[::-1] + cycle = range(len(self._data) - 1, -1, -1) + result_index = self._index.copy() + for i in range(len(result_index)): + find = 0 + for search in cycle: + check = 0 + for j in used_index: + if my_index[search] == j: + check = 1 + if (self._data[search] == result[i]) and check == 0 and find == 0: + result_index[i] = index[search] + used_index[i] = my_index[search] + find = 1 + + na = 0 + for i in self.isna(): + if i: + na += 1 + num = 0 + for i in self.isna(): + j = len(result_index) - na + if i and used_index[j] == -1: + result_index[j] = index[num] + used_index[j] = my_index[num] + na -= 1 + num += 1 + + return pandas.Series(result, result_index) + + return hpat_pandas_series_sort_values_impl + + if isinstance(self.data.dtype, types.Number): + def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort', + na_position='last'): + + na = 0 + for i in self.isna(): + if i: + na += 1 + i = len(self._data) - na + index = self._index + my_index = numpy.arange(len(self._data)) + used_index = numpy.full((len(self._data)), -1) + result = numpy.sort(self._data) + cycle = range(len(self._data)) + if ascending is False: + result[:i] = result[:i][::-1] + cycle = range(len(self._data), -1, -1) + result_index = self._index.copy() + for i in range(len(result_index)): + find = 0 + for search in cycle: + check = 0 + for j in used_index: + if my_index[search] == j: + check = 1 + if (self._data[search] == result[i]) and check == 0 and find == 0: + result_index[i] = index[search] + used_index[i] = my_index[search] + find = 1 + + + num = 0 + for i in self.isna(): + j = len(result_index) - na + if i and used_index[j] == -1: + result_index[j] = index[num] + used_index[j] = my_index[num] + na -= 1 + num += 1 + + return pandas.Series(result, result_index) + + return hpat_pandas_series_sort_values_impl diff --git a/hpat/hiframes/pd_series_ext.py b/hpat/hiframes/pd_series_ext.py index 195b93bc4..0d06c0e3d 100644 --- a/hpat/hiframes/pd_series_ext.py +++ b/hpat/hiframes/pd_series_ext.py @@ -484,14 +484,14 @@ def resolve_rolling(self, ary, args, kws): # sig.return_type = if_arr_to_series_type(sig.return_type) # return sig - @bound_function("series.sort_values") - def resolve_sort_values(self, ary, args, kws): - # output will have permuted input index - out_index = ary.index - if out_index == types.none: - out_index = types.Array(types.intp, 1, 'C') - out = SeriesType(ary.dtype, ary.data, out_index) - return signature(out, *args) + # @bound_function("series.sort_values") + # def resolve_sort_values(self, ary, args, kws): + # # output will have permuted input index + # out_index = ary.index + # if out_index == types.none: + # out_index = types.Array(types.intp, 1, 'C') + # out = SeriesType(ary.dtype, ary.data, out_index) + # return signature(out, *args) # @bound_function("array.take") # def resolve_take(self, ary, args, kws): @@ -994,7 +994,7 @@ def generic_expand_cumulative_series(self, args, kws): _not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten', 'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean', 'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique', - 'resolve_prod', 'resolve_count', 'resolve_argsort'] + 'resolve_prod', 'resolve_count', 'resolve_argsort', 'resolve_sort_values'] # use ArrayAttribute for attributes not defined in SeriesAttribute diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 1d396203b..209275d6b 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -2220,6 +2220,14 @@ def test_impl(A): S = pd.Series(np.random.ranf(n)) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) + def test_series_sort_values2(self): + def test_impl(S): + return S.sort_values(ascending=False) + hpat_func = hpat.jit(test_impl) + + S = pd.Series([6, 6, 2, 1, 3, 3, 2, 1, 2]) + pd.testing.assert_series_equal(test_impl(S), hpat_func(S)) + def test_series_sort_values_index1(self): def test_impl(A, B): S = pd.Series(A, B) @@ -2234,6 +2242,63 @@ def test_impl(A, B): B = np.random.ranf(n) pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B)) + def test_series_sort_values_noidx(self): + def test_impl_true(S): + return S.sort_values(ascending=True) + + def test_impl_false(S): + return S.sort_values(ascending=False) + + hpat_func1 = hpat.jit(test_impl_true) + hpat_func2 = hpat.jit(test_impl_false) + + data_test = [[6, 6, 2, 1, 3, 3, 2, 1, 2], + [1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2], + [6, 6.1, 2.2, 1, 3, 0, 2.2, 1, 2], + [6, 6, 2, 1, 3, np.nan, np.nan, np.nan, np.nan], + [3., 5.3, np.nan, np.nan, 33.2, 56.3, 4.4, 3.7, 8.9], + ['a', 's', 'dd', 'm', 'll', '345', 'xrt', 'kd', 'qq'], + ['dh', 'a', '', 'cv', 'b', '', 'b', 'b', 'p'] + ] + + for input_data in data_test: + S = pd.Series(input_data) + result_ref = test_impl_true(S) + result = hpat_func1(S) + pd.testing.assert_series_equal(result, result_ref) + result_ref = test_impl_false(S) + result = hpat_func2(S) + pd.testing.assert_series_equal(result, result_ref) + + def test_series_sort_values_idx(self): + def test_impl_true(S): + return S.sort_values(ascending=True) + + def test_impl_false(S): + return S.sort_values(ascending=False) + + hpat_func1 = hpat.jit(test_impl_true) + hpat_func2 = hpat.jit(test_impl_false) + + data_test = [[6, 6, 2, 1, 3, 3, 2, 1, 2], + [1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2], + [6, 6.1, 2.2, 1, 3, 0, 2.2, 1, 2], + [6, 6, 2, 1, 3, np.nan, np.nan, np.nan, np.nan], + [3., 5.3, np.nan, np.nan, np.inf, np.inf, 4.4, 3.7, 8.9], + ['a', 's', 'dd', 'm', 'll', '345', 'xrt', 'kd', 'qq'], + ['dh', 'a', '', 'cv', 'b', '', 'b', 'b', 'p'] + ] + + for input_data in data_test: + for index_data in data_test: + S = pd.Series(input_data, index_data) + result_ref = test_impl_true(S) + result = hpat_func1(S) + pd.testing.assert_series_equal(result, result_ref) + result_ref = test_impl_false(S) + result = hpat_func2(S) + pd.testing.assert_series_equal(result, result_ref) + def test_series_sort_values_parallel1(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq()