IntelPython · kozlov-alexey · Jul 29, 2021 · Jun 12, 2021 · Jun 12, 2021 · Jun 13, 2021
diff --git a/sdc/__init__.py b/sdc/__init__.py
@@ -49,6 +49,7 @@
 
 import sdc.extensions.indexes.range_index_ext
 import sdc.extensions.indexes.int64_index_ext
+import sdc.extensions.indexes.multi_index_ext
 
 import sdc.extensions.sdc_hashmap_ext
 

diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -4574,7 +4574,7 @@ def _series_operator_add_str_impl(self, other):
                 else:
                     indexes_join_res = sdc_indexes_join_outer(left_index, right_index)
 
-                # FIXME_Numba#XXXX: remove sdc_fix_indexes_join call at all when issue is fixed
+                # FIXME_Numba#6686: remove sdc_fix_indexes_join call at all when issue is fixed
                 joined_index, left_indexer, right_indexer = sdc_fix_indexes_join(*indexes_join_res)
                 result_size = len(joined_index)
                 result_nan_mask = numpy.zeros(result_size, dtype=numpy.bool_)
@@ -4692,7 +4692,7 @@ def _series_operator_mul_common_impl(self, other):
                 else:
                     indexes_join_res = sdc_indexes_join_outer(left_index, right_index)
 
-                # FIXME_Numba#XXXX: remove sdc_fix_indexes_join call at all when issue is fixed
+                # FIXME_Numba#6686: remove sdc_fix_indexes_join call at all when issue is fixed
                 joined_index, left_indexer, right_indexer = sdc_fix_indexes_join(*indexes_join_res)
                 str_series_operand = self if self_is_string_series == True else other  # noqa
                 str_series_indexer = left_indexer if self_is_string_series == True else right_indexer  # noqa

diff --git a/sdc/datatypes/indexes/__init__.py b/sdc/datatypes/indexes/__init__.py
@@ -30,3 +30,4 @@
 from .positional_index_type import PositionalIndexType
 from .empty_index_type import EmptyIndexType
 from .int64_index_type import Int64IndexType
+from .multi_index_type import MultiIndexType
diff --git a/sdc/datatypes/indexes/multi_index_type.py b/sdc/datatypes/indexes/multi_index_type.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2021, Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#     Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+from numba import types
+from numba.extending import (
+    models,
+    register_model,
+    make_attribute_wrapper,
+)
+
+
+class MultiIndexIteratorType(types.SimpleIteratorType):
+    def __init__(self, iterable):
+        self.parent = iterable
+        yield_type = iterable.dtype
+        name = "iter[{}->{}],{}".format(
+            iterable, yield_type, iterable.name
+        )
+        super(MultiIndexIteratorType, self).__init__(name, yield_type)
+
+
+@register_model(MultiIndexIteratorType)
+class MultiIndexIterModel(models.StructModel):
+    def __init__(self, dmm, fe_type):
+        members = [
+            ('parent', fe_type.parent),                 # reference to the index object
+            ('state', types.CPointer(types.int64)),     # iterator state (i.e. counter)
+        ]
+        super(MultiIndexIterModel, self).__init__(dmm, fe_type, members)
+
+
+class MultiIndexType(types.IterableType):
+
+    def __init__(self, levels, codes, is_named=False):
+        self.levels = levels
+        self.codes = codes
+        self.is_named = is_named
+        super(MultiIndexType, self).__init__(
+            name='MultiIndexType({}, {}, {})'.format(levels, codes, is_named))
+
+    @property
+    def iterator_type(self):
+        return MultiIndexIteratorType(self).iterator_type
+
+    @property
+    def dtype(self):
+        nlevels = len(self.levels)
+        levels_types = [self.levels.dtype] * nlevels if isinstance(self.levels, types.UniTuple) else self.levels
+        return types.Tuple.from_types([level.dtype for level in levels_types])
+
+    @property
+    def nlevels(self):
+        return len(self.levels)
+
+    @property
+    def levels_types(self):
+        if isinstance(self.levels, types.UniTuple):
+            return [self.levels.dtype] * self.levels.count
+
+        return self.levels
+
+    @property
+    def codes_types(self):
+        if isinstance(self.codes, types.UniTuple):
+            return [self.codes.dtype] * self.codes.count
+
+        return self.codes
+
+
+@register_model(MultiIndexType)
+class MultiIndexModel(models.StructModel):
+    def __init__(self, dmm, fe_type):
+
+        levels_type = fe_type.levels
+        codes_type = fe_type.codes
+        name_type = types.unicode_type if fe_type.is_named else types.none  # TO-DO: change to types.Optional
+        members = [
+            ('levels', levels_type),
+            ('codes', codes_type),
+            ('name', name_type),
+        ]
+        models.StructModel.__init__(self, dmm, fe_type, members)
+
+
+make_attribute_wrapper(MultiIndexType, 'levels', '_levels')
+make_attribute_wrapper(MultiIndexType, 'codes', '_codes')
+make_attribute_wrapper(MultiIndexType, 'name', '_name')
diff --git a/sdc/datatypes/sdc_typeref.py b/sdc/datatypes/sdc_typeref.py
@@ -0,0 +1,63 @@
+# *****************************************************************************
+# Copyright (c) 2021, Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#     Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import pandas as pd
+
+from numba.core import types
+from numba.extending import (models, register_model, )
+from numba.core.typing.templates import infer_global
+
+from sdc.extensions.sdc_hashmap_type import ConcurrentDict, ConcurrentDictType
+from sdc.datatypes.indexes import MultiIndexType
+
+
+# FIXME_Numba#6781: due to overlapping of overload_methods for Numba TypeRef
+# we have to use our new SdcTypeRef to type objects created from types.Type
+# (i.e. ConcurrentDict meta-type). This should be removed once it's fixed.
+def sdc_make_new_typeref_class():
+    class SdcTypeRef(types.Dummy):
+        """Reference to a type.
+
+        Used when a type is passed as a value.
+        """
+        def __init__(self, instance_type):
+            self.instance_type = instance_type
+            super(SdcTypeRef, self).__init__('sdc_typeref[{}]'.format(self.instance_type))
+
+    @register_model(SdcTypeRef)
+    class SdcTypeRefModel(models.OpaqueModel):
+        def __init__(self, dmm, fe_type):
+
+            models.OpaqueModel.__init__(self, dmm, fe_type)
+
+    return SdcTypeRef
+
+
+ConcurrentDictTypeRef = sdc_make_new_typeref_class()
+MultiIndexTypeRef = sdc_make_new_typeref_class()
+
+infer_global(ConcurrentDict, ConcurrentDictTypeRef(ConcurrentDictType))
+infer_global(pd.MultiIndex, MultiIndexTypeRef(MultiIndexType))
diff --git a/sdc/extensions/indexes/indexes_generic.py b/sdc/extensions/indexes/indexes_generic.py
@@ -30,12 +30,12 @@
 import pandas as pd
 
 from numba import types
-from numba.typed import Dict
+from numba.typed import Dict, List
 from numba.typed.typedobjectutils import _nonoptional
 
 from sdc.utilities.sdc_typing_utils import sdc_pandas_index_types, sdc_old_index_types
 from sdc.datatypes.indexes import *
-from sdc.utilities.utils import sdc_overload_method, sdc_overload
+from sdc.utilities.utils import sdc_overload
 from sdc.utilities.sdc_typing_utils import (
                         find_index_common_dtype,
                         sdc_indexes_wo_values_cache,
@@ -96,7 +96,9 @@ def sdc_indexes_operator_eq_ovld(self, other):
     # TO-DO: this is for numeric indexes only now, extend to string-index when it's added
     use_self_values = isinstance(self, sdc_pandas_index_types) and not isinstance(self, types.Array)
     use_other_values = isinstance(other, sdc_pandas_index_types) and not isinstance(other, types.Array)
-    one_operand_is_scalar = isinstance(self, types.Number) or isinstance(other, types.Number)
+
+    one_operand_is_scalar = (isinstance(other, sdc_pandas_index_types) and self is other.dtype
+                             or isinstance(self, sdc_pandas_index_types) and other is self.dtype)
 
     def sdc_indexes_operator_eq_impl(self, other):
 
@@ -217,8 +219,8 @@ def pd_fix_indexes_join_overload(joined, indexer1, indexer2):
     """ Wraps pandas index.join() into new function that returns indexers as arrays and not optional(array) """
 
     # This function is simply a workaround for problem with parfor lowering
-    # broken by indexers typed as types.Optional(Array) - FIXME_Numba#XXXX: remove it
-    # in all places whne parfor issue is fixed
+    # broken by indexers typed as types.Optional(Array) - FIXME_Numba#6686: remove it
+    # in all places when parfor issue is fixed
     def pd_fix_indexes_join_impl(joined, indexer1, indexer2):
         if indexer1 is not None:
             _indexer1 = _nonoptional(indexer1)
@@ -282,3 +284,109 @@ def sdc_np_array_overload(A):
 
     if isinstance(A, Int64IndexType):
         return lambda A: A._data
+
+
+def sdc_indexes_take(self, target):
+    pass
+
+
+@sdc_overload(sdc_indexes_take)
+def pd_fix_indexes_take_overload(self, indexes):
+    """ Simply workaround for not having take method as unique indexes due to
+        the fact that StringArrayType is one of the index types """
+
+    check = isinstance(self, sdc_pandas_index_types)
+    if not isinstance(self, sdc_pandas_index_types):
+        return None
+
+    index_api_supported = not isinstance(self, sdc_old_index_types)
+
+    def pd_fix_indexes_take_impl(self, indexes):
+
+        if index_api_supported == True:  # noqa
+            res = self.take(indexes)
+        else:
+            res = numpy_like.take(self, indexes)
+
+        return res
+
+    return pd_fix_indexes_take_impl
+
+
+def sdc_indexes_rename(index, name):
+    pass
+
+
+@sdc_overload(sdc_indexes_rename)
+def sdc_index_rename_ovld(index, name):
+
+    if not isinstance(index, sdc_pandas_index_types):
+        return None
+
+    if isinstance(index, sdc_old_index_types):
+        def sdc_indexes_rename_stub(index, name):
+            # cannot rename string or float indexes, TO-DO: StringIndexType
+            return index
+        return sdc_indexes_rename_stub
+
+    if isinstance(index, PositionalIndexType):
+        from sdc.extensions.indexes.positional_index_ext import init_positional_index
+
+        def sdc_indexes_rename_impl(index, name):
+            return init_positional_index(len(index), name)
+        return sdc_indexes_rename_impl
+
+    elif isinstance(index, RangeIndexType):
+        def sdc_indexes_rename_impl(index, name):
+            return pd.RangeIndex(index.start, index.stop, index.step, name=name)
+        return sdc_indexes_rename_impl
+
+    elif isinstance(index, Int64IndexType):
+        def sdc_indexes_rename_impl(index, name):
+            return pd.Int64Index(index, name=name)
+        return sdc_indexes_rename_impl
+
+
+def sdc_indexes_get_name(index):
+    pass
+
+
+@sdc_overload(sdc_indexes_get_name)
+def sdc_indexes_get_name_ovld(index):
+
+    if (isinstance(index, sdc_pandas_index_types)
+            and not isinstance(index, sdc_old_index_types)):
+        def sdc_indexes_get_name_impl(index):
+            return index.name
+        return sdc_indexes_get_name_impl
+
+    def sdc_indexes_get_name_stub(index):
+        # cannot rename string or float indexes, TO-DO: StringIndexType
+        return None
+    return sdc_indexes_get_name_stub
+
+
+def sdc_indexes_build_map_positions(self):
+    pass
+
+
+@sdc_overload(sdc_indexes_build_map_positions)
+def sdc_indexes_build_map_positions_ovld(self):
+
+    indexer_dtype = self.dtype
+    indexer_value_type = types.ListType(types.int64)
+
+    def sdc_indexes_build_map_positions_impl(self):
+        indexer_map = Dict.empty(indexer_dtype, indexer_value_type)
+        for i in range(len(self)):
+            val = self[i]
+            index_list = indexer_map.get(val, None)
+            if index_list is None:
+                indexer_map[val] = List.empty_list(types.int64)
+                indexer_map[val].append(i)
+            else:
+                index_list.append(i)
+
+        return indexer_map
+
+    return sdc_indexes_build_map_positions_impl