Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
90 changes: 68 additions & 22 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2926,7 +2926,7 @@ def hpat_pandas_series_take_impl(self, indices, axis=0, is_copy=False):


@sdc_overload_method(SeriesType, 'idxmax')
def hpat_pandas_series_idxmax(self, axis=None, skipna=True):
def hpat_pandas_series_idxmax(self, axis=None, skipna=None):
"""
Intel Scalable Dataframe Compiler User Guide
********************************************
Expand Down Expand Up @@ -2975,25 +2975,48 @@ def hpat_pandas_series_idxmax(self, axis=None, skipna=True):
if not isinstance(self.data.dtype, types.Number):
ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype')

if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is True):
if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is None):
ty_checker.raise_exc(skipna, 'bool', 'skipna')

if not (isinstance(axis, types.Omitted) or axis is None):
ty_checker.raise_exc(axis, 'None', 'axis')

if isinstance(self.index, types.NoneType) or self.index is None:
def hpat_pandas_series_idxmax_impl(self, axis=None, skipna=True):
return numpy.argmax(self._data)

return hpat_pandas_series_idxmax_impl
none_index = isinstance(self.index, types.NoneType) or self.index is None
if isinstance(self.data, StringArrayType):
def hpat_pandas_series_idxmax_str_impl(self, axis=None, skipna=None):
if skipna is None:
_skipna = True
else:
raise ValueError("Method idxmax(). Unsupported parameter 'skipna'=False with str data")

else:
def hpat_pandas_series_idxmax_index_impl(self, axis=None, skipna=True):
# no numpy.nanargmax is supported by Numba at this time
result = numpy.argmax(self._data)
if none_index == True: # noqa
return result
else:
return self._index[int(result)]

return hpat_pandas_series_idxmax_str_impl

def hpat_pandas_series_idxmax_impl(self, axis=None, skipna=None):
# return numpy.argmax(self._data)
if skipna is None:
_skipna = True
else:
_skipna = skipna

if _skipna:
result = numpy_like.nanargmax(self._data)
else:
result = numpy_like.argmax(self._data)

if none_index == True: # noqa
return result
else:
return self._index[int(result)]

return hpat_pandas_series_idxmax_index_impl
return numpy_like.argmax(self._data)

return hpat_pandas_series_idxmax_impl


@sdc_overload_method(SeriesType, 'mul')
Expand Down Expand Up @@ -3987,7 +4010,7 @@ def hpat_pandas_series_ge_impl(self, other, level=None, fill_value=None, axis=0)


@sdc_overload_method(SeriesType, 'idxmin')
def hpat_pandas_series_idxmin(self, axis=None, skipna=True):
def hpat_pandas_series_idxmin(self, axis=None, skipna=None):
"""
Intel Scalable Dataframe Compiler User Guide
********************************************
Expand Down Expand Up @@ -4036,25 +4059,48 @@ def hpat_pandas_series_idxmin(self, axis=None, skipna=True):
if not isinstance(self.data.dtype, types.Number):
ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype')

if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is True):
if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is None):
ty_checker.raise_exc(skipna, 'bool', 'skipna')

if not (isinstance(axis, types.Omitted) or axis is None):
ty_checker.raise_exc(axis, 'None', 'axis')

if isinstance(self.index, types.NoneType) or self.index is None:
def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=True):
return numpy.argmin(self._data)

return hpat_pandas_series_idxmin_impl
none_index = isinstance(self.index, types.NoneType) or self.index is None
if isinstance(self.data, StringArrayType):
def hpat_pandas_series_idxmin_str_impl(self, axis=None, skipna=None):
if skipna is None:
_skipna = True
else:
raise ValueError("Method idxmin(). Unsupported parameter 'skipna'=False with str data")

else:
def hpat_pandas_series_idxmin_index_impl(self, axis=None, skipna=True):
# no numpy.nanargmin is supported by Numba at this time
result = numpy.argmin(self._data)
if none_index == True: # noqa
return result
else:
return self._index[int(result)]

return hpat_pandas_series_idxmin_str_impl

def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=None):
# return numpy.argmin(self._data)
if skipna is None:
_skipna = True
else:
_skipna = skipna

if _skipna:
result = numpy_like.nanargmin(self._data)
else:
result = numpy_like.argmin(self._data)

if none_index == True: # noqa
return result
else:
return self._index[int(result)]

return hpat_pandas_series_idxmin_index_impl
return numpy_like.argmin(self._data)

return hpat_pandas_series_idxmin_impl


@sdc_overload_method(SeriesType, 'lt')
Expand Down
185 changes: 184 additions & 1 deletion sdc/functions/numpy_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import numba
import numpy
import sys
import pandas
import numpy as np

Expand All @@ -42,6 +43,9 @@

import sdc
from sdc.utilities.sdc_typing_utils import TypeChecker
from sdc.utilities.utils import (sdc_overload, sdc_register_jitable,
min_dtype_int_val, max_dtype_int_val, min_dtype_float_val,
max_dtype_float_val)
from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size, str_arr_is_na)
from sdc.utilities.utils import sdc_overload, sdc_register_jitable
from sdc.utilities.prange_utils import parallel_chunks
Expand All @@ -51,6 +55,22 @@ def astype(self, dtype):
pass


def argmin(self):
pass


def argmax(self):
pass


def nanargmin(self):
pass


def nanargmax(self):
pass


def fillna(self, inplace=False, value=None):
pass

Expand Down Expand Up @@ -133,7 +153,170 @@ def sdc_astype_number_impl(self, dtype):

return sdc_astype_number_impl

ty_checker.raise_exc(self.dtype, 'str or type', 'self.dtype')

def sdc_nanarg_overload(reduce_op):
def nanarg_impl(self):
"""
Intel Scalable Dataframe Compiler Developer Guide
*************************************************
Parallel replacement of numpy.nanargmin/numpy.nanargmax.

.. only:: developer
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k nanargmin
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k nanargmax

"""

ty_checker = TypeChecker("numpy-like 'nanargmin'/'nanargmax'")
dtype = self.dtype
isnan = get_isnan(dtype)
max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64))
if isinstance(dtype, types.Integer):
initial_result = {
min: max_dtype_int_val(dtype),
max: min_dtype_int_val(dtype),
}[reduce_op]

if isinstance(dtype, types.Float):
initial_result = {
min: max_dtype_float_val(dtype),
max: min_dtype_float_val(dtype),
}[reduce_op]

if not isinstance(self, types.Array):
return None

if isinstance(dtype, types.Number):
def sdc_nanargmin_impl(self):
chunks = parallel_chunks(len(self))
arr_res = numpy.empty(shape=len(chunks), dtype=dtype)
arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64)
for i in prange(len(chunks)):
chunk = chunks[i]
res = initial_result
pos = max_int64
for j in range(chunk.start, chunk.stop):
if reduce_op(res, self[j]) != self[j]:
continue
if isnan(self[j]):
continue
if res == self[j]:
pos = min(pos, j)
else:
pos = j
res = self[j]
arr_res[i] = res
arr_pos[i] = pos

general_res = initial_result
general_pos = max_int64
for i in range(len(chunks)):
if reduce_op(general_res, arr_res[i]) != arr_res[i]:
continue
if general_res == arr_res[i]:
general_pos = min(general_pos, arr_pos[i])
else:
general_pos = arr_pos[i]
general_res = arr_res[i]

return general_pos

return sdc_nanargmin_impl

ty_checker.raise_exc(dtype, 'number', 'self.dtype')
return nanarg_impl


sdc_overload(nanargmin)(sdc_nanarg_overload(min))
sdc_overload(nanargmax)(sdc_nanarg_overload(max))


def sdc_arg_overload(reduce_op):
def arg_impl(self):
"""
Intel Scalable Dataframe Compiler Developer Guide
*************************************************
Parallel replacement of numpy.argmin/numpy.argmax.

.. only:: developer
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmin
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmax

"""

ty_checker = TypeChecker("numpy-like 'argmin'/'argmax'")
dtype = self.dtype
isnan = get_isnan(dtype)
max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64))
if isinstance(dtype, types.Integer):
initial_result = {
min: max_dtype_int_val(dtype),
max: min_dtype_int_val(dtype),
}[reduce_op]

if isinstance(dtype, types.Float):
initial_result = {
min: max_dtype_float_val(dtype),
max: min_dtype_float_val(dtype),
}[reduce_op]

if not isinstance(self, types.Array):
return None

if isinstance(dtype, types.Number):
def sdc_argmin_impl(self):
chunks = parallel_chunks(len(self))
arr_res = numpy.empty(shape=len(chunks), dtype=dtype)
arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64)
for i in prange(len(chunks)):
chunk = chunks[i]
res = initial_result
pos = max_int64
for j in range(chunk.start, chunk.stop):
if not isnan(self[j]):
if reduce_op(res, self[j]) != self[j]:
continue
if res == self[j]:
pos = min(pos, j)
else:
pos = j
res = self[j]
else:
if numpy.isnan(res):
pos = min(pos, j)
else:
pos = j
res = self[j]

arr_res[i] = res
arr_pos[i] = pos
general_res = initial_result
general_pos = max_int64
for i in range(len(chunks)):
if not isnan(arr_res[i]):
if reduce_op(general_res, arr_res[i]) != arr_res[i]:
continue
if general_res == arr_res[i]:
general_pos = min(general_pos, arr_pos[i])
else:
general_pos = arr_pos[i]
general_res = arr_res[i]
else:
if numpy.isnan(general_res):
general_pos = min(general_pos, arr_pos[i])
else:
general_pos = arr_pos[i]
general_res = arr_res[i]
return general_pos

return sdc_argmin_impl

ty_checker.raise_exc(dtype, 'number', 'self.dtype')
return arg_impl


sdc_overload(argmin)(sdc_arg_overload(min))
sdc_overload(argmax)(sdc_arg_overload(max))


@sdc_overload(copy)
Expand Down
Loading