IntelPython · kozlov-alexey · Apr 6, 2021 · Dec 7, 2020 · Jan 19, 2021 · Jan 19, 2021
diff --git a/sdc/datatypes/common_functions.py b/sdc/datatypes/common_functions.py
diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py
diff --git a/sdc/datatypes/hpat_pandas_groupby_functions.py b/sdc/datatypes/hpat_pandas_groupby_functions.py
@@ -41,12 +41,13 @@
 from numba.core.typing import signature
 from numba import literally
 
-from sdc.datatypes.common_functions import sdc_arrays_argsort, _sdc_asarray, _sdc_take
+from sdc.datatypes.common_functions import sdc_arrays_argsort, _sdc_asarray
 from sdc.datatypes.hpat_pandas_groupby_types import DataFrameGroupByType, SeriesGroupByType
 from sdc.utilities.sdc_typing_utils import TypeChecker, kwsparams2list, sigparams2list
 from sdc.utilities.utils import (sdc_overload, sdc_overload_method, sdc_register_jitable)
 from sdc.hiframes.pd_series_type import SeriesType
 from sdc.str_ext import string_type
+from sdc.functions.numpy_like import take as nplike_take
 
 
 performance_limitation = "This function may reveal slower performance than Pandas* on user system.\
@@ -218,15 +219,15 @@ def _sdc_pandas_groupby_generic_func_codegen(func_name, columns, column_loc,
             f'  column_data_{i} = {df}._data[{type_id}][{col_id}]',
             f'  for j in numpy.arange(res_index_len):',
             f'    idx = argsorted_index[j] if {groupby_param_sort} else j',
-            f'    group_arr_{i} = _sdc_take(column_data_{i}, list({groupby_dict}[group_keys[idx]]))',
+            f'    group_arr_{i} = sdc_take(column_data_{i}, list({groupby_dict}[group_keys[idx]]))',
             f'    group_series_{i} = pandas.Series(group_arr_{i})',
             f'    result_data_{i}[j] = group_series_{i}.{func_name}({extra_impl_params})',
         ]
 
     data = ', '.join(f'\'{column_names[i]}\': result_data_{i}' for i in range(len(columns)))
     func_lines.extend(['\n'.join([
         f'  if {groupby_param_sort}:',
-        f'    res_index = _sdc_take(group_keys, argsorted_index)',
+        f'    res_index = sdc_take(group_keys, argsorted_index)',
         f'  else:',
         f'    res_index = group_keys',
         f'  return pandas.DataFrame({{{data}}}, index=res_index)'
@@ -236,7 +237,7 @@ def _sdc_pandas_groupby_generic_func_codegen(func_name, columns, column_loc,
     global_vars = {'pandas': pandas,
                    'numpy': numpy,
                    '_sdc_asarray': _sdc_asarray,
-                   '_sdc_take': _sdc_take,
+                   'sdc_take': nplike_take,
                    'sdc_arrays_argsort': sdc_arrays_argsort}
 
     return func_text, global_vars
@@ -262,11 +263,11 @@ def _sdc_pandas_series_groupby_generic_func_codegen(func_name, func_params, defa
         f'  result_data = numpy.empty(res_index_len, dtype=res_dtype)',
         f'  for j in numpy.arange(res_index_len):',
         f'    idx = argsorted_index[j] if {groupby_param_sort} else j',
-        f'    group_arr = _sdc_take({series}._data, list({groupby_dict}[group_keys[idx]]))',
+        f'    group_arr = sdc_take({series}._data, list({groupby_dict}[group_keys[idx]]))',
         f'    group_series = pandas.Series(group_arr)',
         f'    result_data[j] = group_series.{func_name}({extra_impl_params})',
         f'  if {groupby_param_sort}:',
-        f'    res_index = _sdc_take(group_keys, argsorted_index)',
+        f'    res_index = sdc_take(group_keys, argsorted_index)',
         f'  else:',
         f'    res_index = group_keys',
         f'  return pandas.Series(data=result_data, index=res_index, name={series}._name)'
@@ -276,7 +277,7 @@ def _sdc_pandas_series_groupby_generic_func_codegen(func_name, func_params, defa
     global_vars = {'pandas': pandas,
                    'numpy': numpy,
                    '_sdc_asarray': _sdc_asarray,
-                   '_sdc_take': _sdc_take,
+                   'sdc_take': nplike_take,
                    'sdc_arrays_argsort': sdc_arrays_argsort}
 
     return func_text, global_vars

diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py
@@ -89,7 +89,7 @@ def hpat_pandas_stringmethods_upper_impl(self):
 from sdc.utilities.utils import sdc_overload_method, sdc_register_jitable
 from sdc.hiframes.api import get_nan_mask
 from sdc.str_arr_ext import str_arr_set_na_by_mask, create_str_arr_from_list
-from sdc.datatypes.common_functions import SDCLimitation
+from sdc.utilities.sdc_typing_utils import SDCLimitation
 
 
 @sdc_overload_method(StringMethodsType, 'center')

diff --git a/sdc/datatypes/indexes/__init__.py b/sdc/datatypes/indexes/__init__.py
@@ -0,0 +1,32 @@
+# *****************************************************************************
+# Copyright (c) 2020, Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#     Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+# modules are dependent on each other e.g. positional_index_type
+# needs range_index_type to be imported, so below order matters
+from .range_index_type import RangeIndexType
+from .positional_index_type import PositionalIndexType
+from .empty_index_type import EmptyIndexType
+from .int64_index_type import Int64IndexType
diff --git a/sdc/datatypes/indexes/empty_index_type.py b/sdc/datatypes/indexes/empty_index_type.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2020, Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#     Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+from numba import types
+from numba.extending import (
+    models,
+    register_model,
+    make_attribute_wrapper
+)
+
+
+class EmptyIndexType(types.Type):
+
+    # this index represents special case of pd.Index([]) with dtype='object'
+    # for overload typing functions assume it has following dtype
+    dtype = types.pyobject
+
+    def __init__(self, is_named=False):
+        self.is_named = is_named
+        super(EmptyIndexType, self).__init__(
+            name='EmptyIndexType({})'.format(is_named))
+
+
+@register_model(EmptyIndexType)
+class EmptyIndexModel(models.StructModel):
+    def __init__(self, dmm, fe_type):
+
+        name_type = types.unicode_type if fe_type.is_named else types.none
+        members = [
+            ('name', name_type),
+        ]
+        models.StructModel.__init__(self, dmm, fe_type, members)
+
+
+# FIXME_Numba#3372: add into numba.types to allow returning from objmode
+types.EmptyIndexType = EmptyIndexType
+
+
+make_attribute_wrapper(EmptyIndexType, 'name', '_name')
diff --git a/sdc/datatypes/int64_index_type.py → sdc/datatypes/indexes/int64_index_type.py b/sdc/datatypes/int64_index_type.py → sdc/datatypes/indexes/int64_index_type.py
diff --git a/sdc/datatypes/indexes/positional_index_type.py b/sdc/datatypes/indexes/positional_index_type.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2020, Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#     Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+from numba import types
+from numba.extending import (
+    models,
+    register_model,
+    make_attribute_wrapper
+)
+
+from sdc.datatypes.indexes import RangeIndexType
+
+
+class PositionalIndexType(types.IterableType):
+    dtype = types.int64
+
+    def __init__(self, is_named=False):
+        self.data = RangeIndexType(is_named)
+        self.is_named = is_named
+        super(PositionalIndexType, self).__init__(
+            name='PositionalIndexType({})'.format(is_named))
+
+    @property
+    def iterator_type(self):
+        res = self.data.iterator_type
+        return res
+
+
+@register_model(PositionalIndexType)
+class PositionalIndexModel(models.StructModel):
+    def __init__(self, dmm, fe_type):
+
+        members = [
+            ('data', fe_type.data),
+        ]
+        models.StructModel.__init__(self, dmm, fe_type, members)
+
+
+# FIXME_Numba#3372: add into numba.types to allow returning from objmode
+types.PositionalIndexType = PositionalIndexType
+
+
+make_attribute_wrapper(PositionalIndexType, 'data', '_data')
diff --git a/sdc/datatypes/range_index_type.py → sdc/datatypes/indexes/range_index_type.py b/sdc/datatypes/range_index_type.py → sdc/datatypes/indexes/range_index_type.py
diff --git a/sdc/extensions/indexes/empty_index_ext.py b/sdc/extensions/indexes/empty_index_ext.py
@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2019-2020, Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#     Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import numba
+import numpy as np
+import pandas as pd
+
+from numba import types
+from numba.core import cgutils
+from numba.extending import (NativeValue, intrinsic, box, unbox, )
+from numba.core.typing.templates import signature
+
+from sdc.datatypes.indexes import EmptyIndexType
+from sdc.utilities.sdc_typing_utils import sdc_pandas_index_types
+from sdc.utilities.utils import sdc_overload, sdc_overload_attribute, sdc_overload_method
+from sdc.utilities.sdc_typing_utils import TypeChecker
+
+
+@intrinsic
+def init_empty_index(typingctx, name=None):
+    name = types.none if name is None else name
+    is_named = False if name is types.none else True
+
+    def codegen(context, builder, sig, args):
+        name_val, = args
+        # create series struct and store values
+        index_struct = cgutils.create_struct_proxy(
+            sig.return_type)(context, builder)
+
+        if is_named:
+            if isinstance(name, types.StringLiteral):
+                index_struct.name = numba.cpython.unicode.make_string_from_constant(
+                    context, builder, types.unicode_type, name.literal_value)
+            else:
+                index_struct.name = name_val
+
+        if context.enable_nrt and is_named:
+            context.nrt.incref(builder, sig.args[1], name_val)
+
+        return index_struct._getvalue()
+
+    ret_typ = EmptyIndexType(is_named)
+    sig = signature(ret_typ, name)
+    return sig, codegen
+
+
+@box(EmptyIndexType)
+def box_empty_index(typ, val, c):
+
+    mod_name = c.context.insert_const_string(c.builder.module, "pandas")
+    pd_class_obj = c.pyapi.import_module_noblock(mod_name)
+
+    empty_index = cgutils.create_struct_proxy(
+        typ)(c.context, c.builder, val)
+
+    data = c.pyapi.list_new(c.context.get_constant(types.int64, 0))
+    if typ.is_named:
+        name = c.pyapi.from_native_value(types.unicode_type, empty_index.name)
+    else:
+        name = c.pyapi.make_none()
+
+    res = c.pyapi.call_method(pd_class_obj, "Index", (data, name))
+
+    c.pyapi.decref(data)
+    c.pyapi.decref(name)
+    c.pyapi.decref(pd_class_obj)
+    return res
+
+
+@unbox(EmptyIndexType)
+def unbox_empty_index(typ, val, c):
+
+    index_struct = cgutils.create_struct_proxy(typ)(c.context, c.builder)
+
+    if typ.is_named:
+        name_obj = c.pyapi.object_getattr_string(val, "name")
+        index_struct.name = numba.cpython.unicode.unbox_unicode_str(
+            types.unicode_type, name_obj, c).value
+        c.pyapi.decref(name_obj)
+
+    is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
+    return NativeValue(index_struct._getvalue(), is_error=is_error)
+
+
+@sdc_overload_method(EmptyIndexType, 'take')
+def pd_empty_index_take_overload(self, indexes):
+    if not isinstance(self, EmptyIndexType):
+        return None
+
+    _func_name = 'Method take().'
+    ty_checker = TypeChecker(_func_name)
+
+    valid_indexes_types = (types.Array, types.List) + sdc_pandas_index_types
+    if not (isinstance(indexes, valid_indexes_types) and isinstance(indexes.dtype, types.Integer)):
+        ty_checker.raise_exc(indexes, 'array/list of integers or integer index', 'indexes')
+
+    def pd_empty_index_take_impl(self, indexes):
+        return init_empty_index(name=self._name)
+
+    return pd_empty_index_take_impl
+
+
+@sdc_overload(len)
+def pd_empty_index_len_overload(self):
+    if not isinstance(self, EmptyIndexType):
+        return None
+
+    def pd_empty_index_len_impl(self):
+        return 0
+
+    return pd_empty_index_len_impl