IntelPython · AlexanderKalistratov · Feb 19, 2020 · Feb 7, 2020 · Feb 7, 2020 · Feb 7, 2020
diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -2926,7 +2926,7 @@ def hpat_pandas_series_take_impl(self, indices, axis=0, is_copy=False):
 
 
 @sdc_overload_method(SeriesType, 'idxmax')
-def hpat_pandas_series_idxmax(self, axis=None, skipna=True):
+def hpat_pandas_series_idxmax(self, axis=None, skipna=None):
     """
     Intel Scalable Dataframe Compiler User Guide
     ********************************************
@@ -2975,25 +2975,48 @@ def hpat_pandas_series_idxmax(self, axis=None, skipna=True):
     if not isinstance(self.data.dtype, types.Number):
         ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype')
 
-    if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is True):
+    if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is None):
         ty_checker.raise_exc(skipna, 'bool', 'skipna')
 
     if not (isinstance(axis, types.Omitted) or axis is None):
         ty_checker.raise_exc(axis, 'None', 'axis')
 
-    if isinstance(self.index, types.NoneType) or self.index is None:
-        def hpat_pandas_series_idxmax_impl(self, axis=None, skipna=True):
-            return numpy.argmax(self._data)
-
-        return hpat_pandas_series_idxmax_impl
+    none_index = isinstance(self.index, types.NoneType) or self.index is None
+    if isinstance(self.data, StringArrayType):
+        def hpat_pandas_series_idxmax_str_impl(self, axis=None, skipna=None):
+            if skipna is None:
+                _skipna = True
+            else:
+                raise ValueError("Method idxmax(). Unsupported parameter 'skipna'=False with str data")
 
-    else:
-        def hpat_pandas_series_idxmax_index_impl(self, axis=None, skipna=True):
-            # no numpy.nanargmax is supported by Numba at this time
             result = numpy.argmax(self._data)
+            if none_index == True:  # noqa
+                return result
+            else:
+                return self._index[int(result)]
+
+        return hpat_pandas_series_idxmax_str_impl
+
+    def hpat_pandas_series_idxmax_impl(self, axis=None, skipna=None):
+        # return numpy.argmax(self._data)
+        if skipna is None:
+            _skipna = True
+        else:
+            _skipna = skipna
+
+        if _skipna:
+            result = numpy_like.nanargmax(self._data)
+        else:
+            result = numpy_like.argmax(self._data)
+
+        if none_index == True:  # noqa
+            return result
+        else:
             return self._index[int(result)]
 
-        return hpat_pandas_series_idxmax_index_impl
+        return numpy_like.argmax(self._data)
+
+    return hpat_pandas_series_idxmax_impl
 
 
 @sdc_overload_method(SeriesType, 'mul')
@@ -3987,7 +4010,7 @@ def hpat_pandas_series_ge_impl(self, other, level=None, fill_value=None, axis=0)
 
 
 @sdc_overload_method(SeriesType, 'idxmin')
-def hpat_pandas_series_idxmin(self, axis=None, skipna=True):
+def hpat_pandas_series_idxmin(self, axis=None, skipna=None):
     """
     Intel Scalable Dataframe Compiler User Guide
     ********************************************
@@ -4036,25 +4059,48 @@ def hpat_pandas_series_idxmin(self, axis=None, skipna=True):
     if not isinstance(self.data.dtype, types.Number):
         ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype')
 
-    if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is True):
+    if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is None):
         ty_checker.raise_exc(skipna, 'bool', 'skipna')
 
     if not (isinstance(axis, types.Omitted) or axis is None):
         ty_checker.raise_exc(axis, 'None', 'axis')
 
-    if isinstance(self.index, types.NoneType) or self.index is None:
-        def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=True):
-            return numpy.argmin(self._data)
-
-        return hpat_pandas_series_idxmin_impl
+    none_index = isinstance(self.index, types.NoneType) or self.index is None
+    if isinstance(self.data, StringArrayType):
+        def hpat_pandas_series_idxmin_str_impl(self, axis=None, skipna=None):
+            if skipna is None:
+                _skipna = True
+            else:
+                raise ValueError("Method idxmin(). Unsupported parameter 'skipna'=False with str data")
 
-    else:
-        def hpat_pandas_series_idxmin_index_impl(self, axis=None, skipna=True):
-            # no numpy.nanargmin is supported by Numba at this time
             result = numpy.argmin(self._data)
+            if none_index == True:  # noqa
+                return result
+            else:
+                return self._index[int(result)]
+
+        return hpat_pandas_series_idxmin_str_impl
+
+    def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=None):
+        # return numpy.argmin(self._data)
+        if skipna is None:
+            _skipna = True
+        else:
+            _skipna = skipna
+
+        if _skipna:
+            result = numpy_like.nanargmin(self._data)
+        else:
+            result = numpy_like.argmin(self._data)
+
+        if none_index == True:  # noqa
+            return result
+        else:
             return self._index[int(result)]
 
-        return hpat_pandas_series_idxmin_index_impl
+        return numpy_like.argmin(self._data)
+
+    return hpat_pandas_series_idxmin_impl
 
 
 @sdc_overload_method(SeriesType, 'lt')

diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py
@@ -33,6 +33,7 @@
 
 import numba
 import numpy
+import sys
 import pandas
 import numpy as np
 
@@ -42,6 +43,9 @@
 
 import sdc
 from sdc.utilities.sdc_typing_utils import TypeChecker
+from sdc.utilities.utils import (sdc_overload, sdc_register_jitable,
+                                 min_dtype_int_val, max_dtype_int_val, min_dtype_float_val,
+                                 max_dtype_float_val)
 from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size, str_arr_is_na)
 from sdc.utilities.utils import sdc_overload, sdc_register_jitable
 from sdc.utilities.prange_utils import parallel_chunks
@@ -51,6 +55,22 @@ def astype(self, dtype):
     pass
 
 
+def argmin(self):
+    pass
+
+
+def argmax(self):
+    pass
+
+
+def nanargmin(self):
+    pass
+
+
+def nanargmax(self):
+    pass
+
+
 def fillna(self, inplace=False, value=None):
     pass
 
@@ -133,7 +153,170 @@ def sdc_astype_number_impl(self, dtype):
 
         return sdc_astype_number_impl
 
-    ty_checker.raise_exc(self.dtype, 'str or type', 'self.dtype')
+
+def sdc_nanarg_overload(reduce_op):
+    def nanarg_impl(self):
+        """
+        Intel Scalable Dataframe Compiler Developer Guide
+        *************************************************
+        Parallel replacement of numpy.nanargmin/numpy.nanargmax.
+
+        .. only:: developer
+        Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k nanargmin
+        Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k nanargmax
+
+        """
+
+        ty_checker = TypeChecker("numpy-like 'nanargmin'/'nanargmax'")
+        dtype = self.dtype
+        isnan = get_isnan(dtype)
+        max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64))
+        if isinstance(dtype, types.Integer):
+            initial_result = {
+                min: max_dtype_int_val(dtype),
+                max: min_dtype_int_val(dtype),
+            }[reduce_op]
+
+        if isinstance(dtype, types.Float):
+            initial_result = {
+                min: max_dtype_float_val(dtype),
+                max: min_dtype_float_val(dtype),
+            }[reduce_op]
+
+        if not isinstance(self, types.Array):
+            return None
+
+        if isinstance(dtype, types.Number):
+            def sdc_nanargmin_impl(self):
+                chunks = parallel_chunks(len(self))
+                arr_res = numpy.empty(shape=len(chunks), dtype=dtype)
+                arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64)
+                for i in prange(len(chunks)):
+                    chunk = chunks[i]
+                    res = initial_result
+                    pos = max_int64
+                    for j in range(chunk.start, chunk.stop):
+                        if reduce_op(res, self[j]) != self[j]:
+                            continue
+                        if isnan(self[j]):
+                            continue
+                        if res == self[j]:
+                            pos = min(pos, j)
+                        else:
+                            pos = j
+                            res = self[j]
+                    arr_res[i] = res
+                    arr_pos[i] = pos
+
+                general_res = initial_result
+                general_pos = max_int64
+                for i in range(len(chunks)):
+                    if reduce_op(general_res, arr_res[i]) != arr_res[i]:
+                        continue
+                    if general_res == arr_res[i]:
+                        general_pos = min(general_pos, arr_pos[i])
+                    else:
+                        general_pos = arr_pos[i]
+                        general_res = arr_res[i]
+
+                return general_pos
+
+            return sdc_nanargmin_impl
+
+        ty_checker.raise_exc(dtype, 'number', 'self.dtype')
+    return nanarg_impl
+
+
+sdc_overload(nanargmin)(sdc_nanarg_overload(min))
+sdc_overload(nanargmax)(sdc_nanarg_overload(max))
+
+
+def sdc_arg_overload(reduce_op):
+    def arg_impl(self):
+        """
+        Intel Scalable Dataframe Compiler Developer Guide
+        *************************************************
+        Parallel replacement of numpy.argmin/numpy.argmax.
+
+        .. only:: developer
+        Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmin
+        Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmax
+
+        """
+
+        ty_checker = TypeChecker("numpy-like 'argmin'/'argmax'")
+        dtype = self.dtype
+        isnan = get_isnan(dtype)
+        max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64))
+        if isinstance(dtype, types.Integer):
+            initial_result = {
+                min: max_dtype_int_val(dtype),
+                max: min_dtype_int_val(dtype),
+            }[reduce_op]
+
+        if isinstance(dtype, types.Float):
+            initial_result = {
+                min: max_dtype_float_val(dtype),
+                max: min_dtype_float_val(dtype),
+            }[reduce_op]
+
+        if not isinstance(self, types.Array):
+            return None
+
+        if isinstance(dtype, types.Number):
+            def sdc_argmin_impl(self):
+                chunks = parallel_chunks(len(self))
+                arr_res = numpy.empty(shape=len(chunks), dtype=dtype)
+                arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64)
+                for i in prange(len(chunks)):
+                    chunk = chunks[i]
+                    res = initial_result
+                    pos = max_int64
+                    for j in range(chunk.start, chunk.stop):
+                        if not isnan(self[j]):
+                            if reduce_op(res, self[j]) != self[j]:
+                                continue
+                            if res == self[j]:
+                                pos = min(pos, j)
+                            else:
+                                pos = j
+                                res = self[j]
+                        else:
+                            if numpy.isnan(res):
+                                pos = min(pos, j)
+                            else:
+                                pos = j
+                            res = self[j]
+
+                    arr_res[i] = res
+                    arr_pos[i] = pos
+                general_res = initial_result
+                general_pos = max_int64
+                for i in range(len(chunks)):
+                    if not isnan(arr_res[i]):
+                        if reduce_op(general_res, arr_res[i]) != arr_res[i]:
+                            continue
+                        if general_res == arr_res[i]:
+                            general_pos = min(general_pos, arr_pos[i])
+                        else:
+                            general_pos = arr_pos[i]
+                            general_res = arr_res[i]
+                    else:
+                        if numpy.isnan(general_res):
+                            general_pos = min(general_pos, arr_pos[i])
+                        else:
+                            general_pos = arr_pos[i]
+                        general_res = arr_res[i]
+                return general_pos
+
+            return sdc_argmin_impl
+
+        ty_checker.raise_exc(dtype, 'number', 'self.dtype')
+    return arg_impl
+
+
+sdc_overload(argmin)(sdc_arg_overload(min))
+sdc_overload(argmax)(sdc_arg_overload(max))
 
 
 @sdc_overload(copy)