Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Merge b7d4481 into 902ea10
Browse files Browse the repository at this point in the history
  • Loading branch information
1e-to committed Oct 25, 2019
2 parents 902ea10 + b7d4481 commit 0aa6d25
Show file tree
Hide file tree
Showing 4 changed files with 453 additions and 18 deletions.
321 changes: 321 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2218,3 +2218,324 @@ def hpat_pandas_series_median_impl(self, axis=None, skipna=True, level=None, num
return numpy.median(self._data)

return hpat_pandas_series_median_impl


@overload_method(SeriesType, 'argsort')
def hpat_pandas_series_argsort(self, axis=0, kind='quicksort', order=None):
"""
Pandas Series method :meth:`pandas.Series.argsort` implementation.
.. only:: developer
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_argsort1
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_argsort2
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_argsort_noidx
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_argsort_idx
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_argsort_parallel
Parameters
-----------
self: :class:`pandas.Series`
input arg
axis: :obj:`int`
Has no effect but is accepted for compatibility with numpy.
*unsupported*
kind: {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
Choice of sorting algorithm. See np.sort for more information. 'mergesort' is the only stable algorithm
*unsupported, uses python func - sorted()*
order: None
Has no effect but is accepted for compatibility with numpy.
*unsupported*
Returns
-------
:obj:`pandas.Series`
returns: Positions of values within the sort order with -1 indicating nan values.
"""

_func_name = 'Method argsort().'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not isinstance(self.data.dtype, types.Number):
raise TypingError('{} Currently function supports only numeric values. Given data type: {}'.format(_func_name,
self.data.dtype))

if not (isinstance(axis, types.Omitted) or isinstance(axis, types.Integer) or axis == 0):
raise TypingError('{} Unsupported parameters. Given axis: {}'.format(_func_name, axis))

if not isinstance(self.index, types.NoneType):
def hpat_pandas_series_argsort_impl(self, axis=0, kind='quicksort', order=None):

sort = numpy.argsort(self._data)
series_data = pandas.Series(self._data)
na = 0
for i in series_data.isna():
if i:
na += 1
id = 0
i = 0
list_no_nan = numpy.empty(len(self._data) - na)
for bool_value in series_data.isna():
if not bool_value:
list_no_nan[id] = self._data[i]
id += 1
i += 1
sort_no_nan = numpy.argsort(list_no_nan)
ne_na = sort[:len(sort) - na]
num = 0
result = numpy.full((len(self._data)), -1)
for i in numpy.sort(ne_na):
result[i] = sort_no_nan[num]
num += 1

return pandas.Series(result, self._index)

return hpat_pandas_series_argsort_impl

def hpat_pandas_series_argsort_impl(self, axis=0, kind='quicksort', order=None):

sort = numpy.argsort(self._data)
series_data = pandas.Series(self._data)
na = 0
for i in series_data.isna():
if i:
na += 1
id = 0
i = 0
list_no_nan = numpy.empty(len(self._data) - na)
for bool_value in series_data.isna():
if not bool_value:
list_no_nan[id] = self._data[i]
id += 1
i += 1
sort_no_nan = numpy.argsort(list_no_nan)
ne_na = sort[:len(sort) - na]
num = 0
result = numpy.full((len(self._data)), -1)
for i in numpy.sort(ne_na):
result[i] = sort_no_nan[num]
num += 1

return pandas.Series(result)

return hpat_pandas_series_argsort_impl


@overload_method(SeriesType, 'sort_values')
def hpat_pandas_series_sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'):
"""
Pandas Series method :meth:`pandas.Series.sort_values` implementation.
.. only:: developer
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values1
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values2
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_index1
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_noidx
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_idx
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sort_values_parallel1
Parameters
-----------
self: :class:'pandas.Series'
input arg
axis: 0 or :obj:'pandas.Series.index'
Axis to direct sorting.
*unsupported*
ascending: :obj:'bool', default: True
If True, sort values in ascending order, otherwise descending.
kind: {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
Choice of sorting algorithm.
*unsupported, uses python func - sorted()*
na_position: {'first' or 'last'}, default 'last'
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
*unsupported*
Returns
-------
:obj:`pandas.Series`
returns: Series ordered by values.
"""

_func_name = 'Method sort_values().'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not (isinstance(ascending, types.Omitted) or isinstance(ascending, types.Boolean) or ascending is True or False):
raise TypingError('{} Unsupported parameters. Given ascending: {}'.format(_func_name, ascending))

if isinstance(self.index, types.NoneType) and isinstance(self.data.dtype, types.UnicodeType):
def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort',
na_position='last'):

index = numpy.arange(len(self._data))
my_index = numpy.arange(len(self._data))
used_index = numpy.full((len(self._data)), -1)
result = sorted(self._data)
cycle = range(len(self._data))
if ascending is False:
result = result[::-1]
cycle = range(len(self._data) - 1, -1, -1)
result_index = index.copy()
for i in range(len(result_index)):
find = 0
for search in cycle:
check = 0
for j in used_index:
if my_index[search] == j:
check = 1
if (self._data[search] == result[i]) and check == 0 and find == 0:
result_index[i] = index[search]
used_index[i] = my_index[search]
find = 1

na = 0
for i in self.isna():
if i:
na += 1
num = 0
for i in self.isna():
j = len(result_index) - na
if i and used_index[j] == -1:
result_index[j] = index[num]
used_index[j] = my_index[num]
na -= 1
num += 1

return pandas.Series(result, result_index)

return hpat_pandas_series_sort_values_impl

if isinstance(self.index, types.NoneType) and isinstance(self.data.dtype, types.Number):
def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort',
na_position='last'):

na = 0
for i in self.isna():
if i:
na += 1
index = numpy.arange(len(self._data))
my_index = numpy.arange(len(self._data))
used_index = numpy.full((len(self._data)), -1)
result = numpy.sort(self._data)
i = len(self._data) - na
cycle = range(len(self._data))
if ascending is False:
result[:i] = result[:i][::-1]
cycle = range(len(self._data), -1, -1)
result_index = index.copy()

for i in range(len(result_index)):
find = 0
for search in cycle:
check = 0
for j in used_index:
if my_index[search] == j:
check = 1
if (self._data[search] == result[i]) and check == 0 and find == 0:
result_index[i] = index[search]
used_index[i] = my_index[search]
find = 1


num = 0
for i in self.isna():
j = len(result_index) - na
if i and used_index[j] == -1:
result_index[j] = index[num]
used_index[j] = my_index[num]
na -= 1
num += 1

return pandas.Series(result, result_index)

return hpat_pandas_series_sort_values_impl

if isinstance(self.data.dtype, types.UnicodeType):
def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort',
na_position='last'):

index = self._index
my_index = numpy.arange(len(self._data))
used_index = numpy.full((len(self._data)), -1)
result = sorted(self._data)
cycle = range(len(self._data))
if ascending is False:
result = result[::-1]
cycle = range(len(self._data) - 1, -1, -1)
result_index = self._index.copy()
for i in range(len(result_index)):
find = 0
for search in cycle:
check = 0
for j in used_index:
if my_index[search] == j:
check = 1
if (self._data[search] == result[i]) and check == 0 and find == 0:
result_index[i] = index[search]
used_index[i] = my_index[search]
find = 1

na = 0
for i in self.isna():
if i:
na += 1
num = 0
for i in self.isna():
j = len(result_index) - na
if i and used_index[j] == -1:
result_index[j] = index[num]
used_index[j] = my_index[num]
na -= 1
num += 1

return pandas.Series(result, result_index)

return hpat_pandas_series_sort_values_impl

if isinstance(self.data.dtype, types.Number):
def hpat_pandas_series_sort_values_impl(self, axis=0, ascending=True, inplace=False, kind='quicksort',
na_position='last'):

na = 0
for i in self.isna():
if i:
na += 1
i = len(self._data) - na
index = self._index
my_index = numpy.arange(len(self._data))
used_index = numpy.full((len(self._data)), -1)
result = numpy.sort(self._data)
cycle = range(len(self._data))
if ascending is False:
result[:i] = result[:i][::-1]
cycle = range(len(self._data), -1, -1)
result_index = self._index.copy()
for i in range(len(result_index)):
find = 0
for search in cycle:
check = 0
for j in used_index:
if my_index[search] == j:
check = 1
if (self._data[search] == result[i]) and check == 0 and find == 0:
result_index[i] = index[search]
used_index[i] = my_index[search]
find = 1


num = 0
for i in self.isna():
j = len(result_index) - na
if i and used_index[j] == -1:
result_index[j] = index[num]
used_index[j] = my_index[num]
na -= 1
num += 1

return pandas.Series(result, result_index)

return hpat_pandas_series_sort_values_impl
6 changes: 3 additions & 3 deletions hpat/hiframes/hiframes_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,9 +969,9 @@ def run_call_series_quantile_default(A):
func = series_replace_funcs[func_name]
return self._replace_func(func, [series_var, S2])

if func_name in ('argsort', 'sort_values'):
return self._handle_series_sort(
lhs, rhs, series_var, func_name == 'argsort')
# if func_name in ('argsort', 'sort_values'):
# return self._handle_series_sort(
# lhs, rhs, series_var, func_name == 'argsort')

if func_name == 'rolling':
# XXX: remove rolling setup call, assuming still available in definitions
Expand Down
31 changes: 16 additions & 15 deletions hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,21 +477,21 @@ def resolve_astype(self, ary, args, kws):
def resolve_rolling(self, ary, args, kws):
return signature(SeriesRollingType(ary.dtype), *args)

@bound_function("array.argsort")
def resolve_argsort(self, ary, args, kws):
resolver = ArrayAttribute.resolve_argsort.__wrapped__
sig = resolver(self, ary.data, args, kws)
sig.return_type = if_arr_to_series_type(sig.return_type)
return sig
# @bound_function("array.argsort")
# def resolve_argsort(self, ary, args, kws):
# resolver = ArrayAttribute.resolve_argsort.__wrapped__
# sig = resolver(self, ary.data, args, kws)
# sig.return_type = if_arr_to_series_type(sig.return_type)
# return sig

@bound_function("series.sort_values")
def resolve_sort_values(self, ary, args, kws):
# output will have permuted input index
out_index = ary.index
if out_index == types.none:
out_index = types.Array(types.intp, 1, 'C')
out = SeriesType(ary.dtype, ary.data, out_index)
return signature(out, *args)
# @bound_function("series.sort_values")
# def resolve_sort_values(self, ary, args, kws):
# # output will have permuted input index
# out_index = ary.index
# if out_index == types.none:
# out_index = types.Array(types.intp, 1, 'C')
# out = SeriesType(ary.dtype, ary.data, out_index)
# return signature(out, *args)

# @bound_function("array.take")
# def resolve_take(self, ary, args, kws):
Expand Down Expand Up @@ -994,7 +994,8 @@ def generic_expand_cumulative_series(self, args, kws):
_not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten',
'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean',
'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique',
'resolve_prod', 'resolve_count']
'resolve_prod', 'resolve_count', 'resolve_argsort', 'resolve_sort_values']


# use ArrayAttribute for attributes not defined in SeriesAttribute
for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items():
Expand Down
Loading

0 comments on commit 0aa6d25

Please sign in to comment.