Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Add Series.sort_values() and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
etotmeni committed Oct 25, 2019
1 parent d658914 commit b7d4481
Show file tree
Hide file tree
Showing 3 changed files with 295 additions and 11 deletions.
223 changes: 221 additions & 2 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2181,10 +2181,13 @@ def hpat_pandas_series_argsort(self, axis=0, kind='quicksort', order=None):
input arg
axis: :obj:`int`
Has no effect but is accepted for compatibility with numpy.
kind: {‘mergesort’, ‘quicksort’, ‘heapsort’}, default ‘quicksort’
Choice of sorting algorithm. See np.sort for more information. ‘mergesort’ is the only stable algorithm
*unsupported*
kind: {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
Choice of sorting algorithm. See np.sort for more information. 'mergesort' is the only stable algorithm
*unsupported, uses python func - sorted()*
order: None
Has no effect but is accepted for compatibility with numpy.
*unsupported*
Returns
-------
Expand Down Expand Up @@ -2262,3 +2265,219 @@ def hpat_pandas_series_argsort_impl(self, axis=0, kind='quicksort', order=None):
return hpat_pandas_series_argsort_impl


@overload_method(SeriesType, 'sort_values')
def hpat_pandas_series_sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'):
"""
Pandas Series method :meth:`pandas.Series.sort_values` implementation.
.. only:: developer
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values1
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values2
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_index1
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_noidx
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_idx
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_parallel1
Parameters
-----------
self: :class:'pandas.Series'
input arg
axis: 0 or :obj:'pandas.Series.index'
Axis to direct sorting.
*unsupported*
ascending: :obj:'bool', default: True
If True, sort values in ascending order, otherwise descending.
kind: {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
Choice of sorting algorithm.
*unsupported, uses python func - sorted()*
na_position: {'first' or 'last'}, default 'last'
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
*unsupported*
Returns
-------
:obj:`pandas.Series`
returns: Series ordered by values.
"""

_func_name = 'Method sort_values().'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not (isinstance(ascending, types.Omitted) or isinstance(ascending, types.Boolean) or ascending is True or False):
raise TypingError('{} Unsupported parameters. Given ascending: {}'.format(_func_name, ascending))

if isinstance(self.index, types.NoneType) and isinstance(self.data.dtype, types.UnicodeType):
def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort',
na_position='last'):

index = numpy.arange(len(self._data))
my_index = numpy.arange(len(self._data))
used_index = numpy.full((len(self._data)), -1)
result = sorted(self._data)
cycle = range(len(self._data))
if ascending is False:
result = result[::-1]
cycle = range(len(self._data) - 1, -1, -1)
result_index = index.copy()
for i in range(len(result_index)):
find = 0
for search in cycle:
check = 0
for j in used_index:
if my_index[search] == j:
check = 1
if (self._data[search] == result[i]) and check == 0 and find == 0:
result_index[i] = index[search]
used_index[i] = my_index[search]
find = 1

na = 0
for i in self.isna():
if i:
na += 1
num = 0
for i in self.isna():
j = len(result_index) - na
if i and used_index[j] == -1:
result_index[j] = index[num]
used_index[j] = my_index[num]
na -= 1
num += 1

return pandas.Series(result, result_index)

return hpat_pandas_series_sort_values_impl

if isinstance(self.index, types.NoneType) and isinstance(self.data.dtype, types.Number):
def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort',
na_position='last'):

na = 0
for i in self.isna():
if i:
na += 1
index = numpy.arange(len(self._data))
my_index = numpy.arange(len(self._data))
used_index = numpy.full((len(self._data)), -1)
result = numpy.sort(self._data)
i = len(self._data) - na
cycle = range(len(self._data))
if ascending is False:
result[:i] = result[:i][::-1]
cycle = range(len(self._data), -1, -1)
result_index = index.copy()

for i in range(len(result_index)):
find = 0
for search in cycle:
check = 0
for j in used_index:
if my_index[search] == j:
check = 1
if (self._data[search] == result[i]) and check == 0 and find == 0:
result_index[i] = index[search]
used_index[i] = my_index[search]
find = 1


num = 0
for i in self.isna():
j = len(result_index) - na
if i and used_index[j] == -1:
result_index[j] = index[num]
used_index[j] = my_index[num]
na -= 1
num += 1

return pandas.Series(result, result_index)

return hpat_pandas_series_sort_values_impl

if isinstance(self.data.dtype, types.UnicodeType):
def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort',
na_position='last'):

index = self._index
my_index = numpy.arange(len(self._data))
used_index = numpy.full((len(self._data)), -1)
result = sorted(self._data)
cycle = range(len(self._data))
if ascending is False:
result = result[::-1]
cycle = range(len(self._data) - 1, -1, -1)
result_index = self._index.copy()
for i in range(len(result_index)):
find = 0
for search in cycle:
check = 0
for j in used_index:
if my_index[search] == j:
check = 1
if (self._data[search] == result[i]) and check == 0 and find == 0:
result_index[i] = index[search]
used_index[i] = my_index[search]
find = 1

na = 0
for i in self.isna():
if i:
na += 1
num = 0
for i in self.isna():
j = len(result_index) - na
if i and used_index[j] == -1:
result_index[j] = index[num]
used_index[j] = my_index[num]
na -= 1
num += 1

return pandas.Series(result, result_index)

return hpat_pandas_series_sort_values_impl

if isinstance(self.data.dtype, types.Number):
def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort',
na_position='last'):

na = 0
for i in self.isna():
if i:
na += 1
i = len(self._data) - na
index = self._index
my_index = numpy.arange(len(self._data))
used_index = numpy.full((len(self._data)), -1)
result = numpy.sort(self._data)
cycle = range(len(self._data))
if ascending is False:
result[:i] = result[:i][::-1]
cycle = range(len(self._data), -1, -1)
result_index = self._index.copy()
for i in range(len(result_index)):
find = 0
for search in cycle:
check = 0
for j in used_index:
if my_index[search] == j:
check = 1
if (self._data[search] == result[i]) and check == 0 and find == 0:
result_index[i] = index[search]
used_index[i] = my_index[search]
find = 1


num = 0
for i in self.isna():
j = len(result_index) - na
if i and used_index[j] == -1:
result_index[j] = index[num]
used_index[j] = my_index[num]
na -= 1
num += 1

return pandas.Series(result, result_index)

return hpat_pandas_series_sort_values_impl
18 changes: 9 additions & 9 deletions hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,14 +484,14 @@ def resolve_rolling(self, ary, args, kws):
# sig.return_type = if_arr_to_series_type(sig.return_type)
# return sig

@bound_function("series.sort_values")
def resolve_sort_values(self, ary, args, kws):
# output will have permuted input index
out_index = ary.index
if out_index == types.none:
out_index = types.Array(types.intp, 1, 'C')
out = SeriesType(ary.dtype, ary.data, out_index)
return signature(out, *args)
# @bound_function("series.sort_values")
# def resolve_sort_values(self, ary, args, kws):
# # output will have permuted input index
# out_index = ary.index
# if out_index == types.none:
# out_index = types.Array(types.intp, 1, 'C')
# out = SeriesType(ary.dtype, ary.data, out_index)
# return signature(out, *args)

# @bound_function("array.take")
# def resolve_take(self, ary, args, kws):
Expand Down Expand Up @@ -994,7 +994,7 @@ def generic_expand_cumulative_series(self, args, kws):
_not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten',
'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean',
'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique',
'resolve_prod', 'resolve_count', 'resolve_argsort']
'resolve_prod', 'resolve_count', 'resolve_argsort', 'resolve_sort_values']


# use ArrayAttribute for attributes not defined in SeriesAttribute
Expand Down
65 changes: 65 additions & 0 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2220,6 +2220,14 @@ def test_impl(A):
S = pd.Series(np.random.ranf(n))
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

def test_series_sort_values2(self):
def test_impl(S):
return S.sort_values(ascending=False)
hpat_func = hpat.jit(test_impl)

S = pd.Series([6, 6, 2, 1, 3, 3, 2, 1, 2])
pd.testing.assert_series_equal(test_impl(S), hpat_func(S))

def test_series_sort_values_index1(self):
def test_impl(A, B):
S = pd.Series(A, B)
Expand All @@ -2234,6 +2242,63 @@ def test_impl(A, B):
B = np.random.ranf(n)
pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B))

def test_series_sort_values_noidx(self):
def test_impl_true(S):
return S.sort_values(ascending=True)

def test_impl_false(S):
return S.sort_values(ascending=False)

hpat_func1 = hpat.jit(test_impl_true)
hpat_func2 = hpat.jit(test_impl_false)

data_test = [[6, 6, 2, 1, 3, 3, 2, 1, 2],
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2],
[6, 6.1, 2.2, 1, 3, 0, 2.2, 1, 2],
[6, 6, 2, 1, 3, np.nan, np.nan, np.nan, np.nan],
[3., 5.3, np.nan, np.nan, 33.2, 56.3, 4.4, 3.7, 8.9],
['a', 's', 'dd', 'm', 'll', '345', 'xrt', 'kd', 'qq'],
['dh', 'a', '', 'cv', 'b', '', 'b', 'b', 'p']
]

for input_data in data_test:
S = pd.Series(input_data)
result_ref = test_impl_true(S)
result = hpat_func1(S)
pd.testing.assert_series_equal(result, result_ref)
result_ref = test_impl_false(S)
result = hpat_func2(S)
pd.testing.assert_series_equal(result, result_ref)

def test_series_sort_values_idx(self):
def test_impl_true(S):
return S.sort_values(ascending=True)

def test_impl_false(S):
return S.sort_values(ascending=False)

hpat_func1 = hpat.jit(test_impl_true)
hpat_func2 = hpat.jit(test_impl_false)

data_test = [[6, 6, 2, 1, 3, 3, 2, 1, 2],
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2],
[6, 6.1, 2.2, 1, 3, 0, 2.2, 1, 2],
[6, 6, 2, 1, 3, np.nan, np.nan, np.nan, np.nan],
[3., 5.3, np.nan, np.nan, np.inf, np.inf, 4.4, 3.7, 8.9],
['a', 's', 'dd', 'm', 'll', '345', 'xrt', 'kd', 'qq'],
['dh', 'a', '', 'cv', 'b', '', 'b', 'b', 'p']
]

for input_data in data_test:
for index_data in data_test:
S = pd.Series(input_data, index_data)
result_ref = test_impl_true(S)
result = hpat_func1(S)
pd.testing.assert_series_equal(result, result_ref)
result_ref = test_impl_false(S)
result = hpat_func2(S)
pd.testing.assert_series_equal(result, result_ref)

def test_series_sort_values_parallel1(self):
# create `kde.parquet` file
ParquetGenerator.gen_kde_pq()
Expand Down

0 comments on commit b7d4481

Please sign in to comment.