Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

import sdc.extensions.indexes.range_index_ext
import sdc.extensions.indexes.int64_index_ext
import sdc.extensions.indexes.multi_index_ext

import sdc.extensions.sdc_hashmap_ext

Expand Down
4 changes: 2 additions & 2 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4574,7 +4574,7 @@ def _series_operator_add_str_impl(self, other):
else:
indexes_join_res = sdc_indexes_join_outer(left_index, right_index)

# FIXME_Numba#XXXX: remove sdc_fix_indexes_join call at all when issue is fixed
# FIXME_Numba#6686: remove sdc_fix_indexes_join call at all when issue is fixed
joined_index, left_indexer, right_indexer = sdc_fix_indexes_join(*indexes_join_res)
result_size = len(joined_index)
result_nan_mask = numpy.zeros(result_size, dtype=numpy.bool_)
Expand Down Expand Up @@ -4692,7 +4692,7 @@ def _series_operator_mul_common_impl(self, other):
else:
indexes_join_res = sdc_indexes_join_outer(left_index, right_index)

# FIXME_Numba#XXXX: remove sdc_fix_indexes_join call at all when issue is fixed
# FIXME_Numba#6686: remove sdc_fix_indexes_join call at all when issue is fixed
joined_index, left_indexer, right_indexer = sdc_fix_indexes_join(*indexes_join_res)
str_series_operand = self if self_is_string_series == True else other # noqa
str_series_indexer = left_indexer if self_is_string_series == True else right_indexer # noqa
Expand Down
1 change: 1 addition & 0 deletions sdc/datatypes/indexes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@
from .positional_index_type import PositionalIndexType
from .empty_index_type import EmptyIndexType
from .int64_index_type import Int64IndexType
from .multi_index_type import MultiIndexType
111 changes: 111 additions & 0 deletions sdc/datatypes/indexes/multi_index_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# -*- coding: utf-8 -*-
# *****************************************************************************
# Copyright (c) 2021, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

from numba import types
from numba.extending import (
models,
register_model,
make_attribute_wrapper,
)


class MultiIndexIteratorType(types.SimpleIteratorType):
def __init__(self, iterable):
self.parent = iterable
yield_type = iterable.dtype
name = "iter[{}->{}],{}".format(
iterable, yield_type, iterable.name
)
super(MultiIndexIteratorType, self).__init__(name, yield_type)


@register_model(MultiIndexIteratorType)
class MultiIndexIterModel(models.StructModel):
def __init__(self, dmm, fe_type):
members = [
('parent', fe_type.parent), # reference to the index object
('state', types.CPointer(types.int64)), # iterator state (i.e. counter)
]
super(MultiIndexIterModel, self).__init__(dmm, fe_type, members)


class MultiIndexType(types.IterableType):

def __init__(self, levels, codes, is_named=False):
self.levels = levels
self.codes = codes
self.is_named = is_named
super(MultiIndexType, self).__init__(
name='MultiIndexType({}, {}, {})'.format(levels, codes, is_named))

@property
def iterator_type(self):
return MultiIndexIteratorType(self).iterator_type

@property
def dtype(self):
nlevels = len(self.levels)
levels_types = [self.levels.dtype] * nlevels if isinstance(self.levels, types.UniTuple) else self.levels
return types.Tuple.from_types([level.dtype for level in levels_types])

@property
def nlevels(self):
return len(self.levels)

@property
def levels_types(self):
if isinstance(self.levels, types.UniTuple):
return [self.levels.dtype] * self.levels.count

return self.levels

@property
def codes_types(self):
if isinstance(self.codes, types.UniTuple):
return [self.codes.dtype] * self.codes.count

return self.codes


@register_model(MultiIndexType)
class MultiIndexModel(models.StructModel):
def __init__(self, dmm, fe_type):

levels_type = fe_type.levels
codes_type = fe_type.codes
name_type = types.unicode_type if fe_type.is_named else types.none # TO-DO: change to types.Optional
members = [
('levels', levels_type),
('codes', codes_type),
('name', name_type),
]
models.StructModel.__init__(self, dmm, fe_type, members)


make_attribute_wrapper(MultiIndexType, 'levels', '_levels')
make_attribute_wrapper(MultiIndexType, 'codes', '_codes')
make_attribute_wrapper(MultiIndexType, 'name', '_name')
63 changes: 63 additions & 0 deletions sdc/datatypes/sdc_typeref.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# *****************************************************************************
# Copyright (c) 2021, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

import pandas as pd

from numba.core import types
from numba.extending import (models, register_model, )
from numba.core.typing.templates import infer_global

from sdc.extensions.sdc_hashmap_type import ConcurrentDict, ConcurrentDictType
from sdc.datatypes.indexes import MultiIndexType


# FIXME_Numba#6781: due to overlapping of overload_methods for Numba TypeRef
# we have to use our new SdcTypeRef to type objects created from types.Type
# (i.e. ConcurrentDict meta-type). This should be removed once it's fixed.
def sdc_make_new_typeref_class():
class SdcTypeRef(types.Dummy):
"""Reference to a type.

Used when a type is passed as a value.
"""
def __init__(self, instance_type):
self.instance_type = instance_type
super(SdcTypeRef, self).__init__('sdc_typeref[{}]'.format(self.instance_type))

@register_model(SdcTypeRef)
class SdcTypeRefModel(models.OpaqueModel):
def __init__(self, dmm, fe_type):

models.OpaqueModel.__init__(self, dmm, fe_type)

return SdcTypeRef


ConcurrentDictTypeRef = sdc_make_new_typeref_class()
MultiIndexTypeRef = sdc_make_new_typeref_class()

infer_global(ConcurrentDict, ConcurrentDictTypeRef(ConcurrentDictType))
infer_global(pd.MultiIndex, MultiIndexTypeRef(MultiIndexType))
118 changes: 113 additions & 5 deletions sdc/extensions/indexes/indexes_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@
import pandas as pd

from numba import types
from numba.typed import Dict
from numba.typed import Dict, List
from numba.typed.typedobjectutils import _nonoptional

from sdc.utilities.sdc_typing_utils import sdc_pandas_index_types, sdc_old_index_types
from sdc.datatypes.indexes import *
from sdc.utilities.utils import sdc_overload_method, sdc_overload
from sdc.utilities.utils import sdc_overload
from sdc.utilities.sdc_typing_utils import (
find_index_common_dtype,
sdc_indexes_wo_values_cache,
Expand Down Expand Up @@ -96,7 +96,9 @@ def sdc_indexes_operator_eq_ovld(self, other):
# TO-DO: this is for numeric indexes only now, extend to string-index when it's added
use_self_values = isinstance(self, sdc_pandas_index_types) and not isinstance(self, types.Array)
use_other_values = isinstance(other, sdc_pandas_index_types) and not isinstance(other, types.Array)
one_operand_is_scalar = isinstance(self, types.Number) or isinstance(other, types.Number)

one_operand_is_scalar = (isinstance(other, sdc_pandas_index_types) and self is other.dtype
or isinstance(self, sdc_pandas_index_types) and other is self.dtype)

def sdc_indexes_operator_eq_impl(self, other):

Expand Down Expand Up @@ -217,8 +219,8 @@ def pd_fix_indexes_join_overload(joined, indexer1, indexer2):
""" Wraps pandas index.join() into new function that returns indexers as arrays and not optional(array) """

# This function is simply a workaround for problem with parfor lowering
# broken by indexers typed as types.Optional(Array) - FIXME_Numba#XXXX: remove it
# in all places whne parfor issue is fixed
# broken by indexers typed as types.Optional(Array) - FIXME_Numba#6686: remove it
# in all places when parfor issue is fixed
def pd_fix_indexes_join_impl(joined, indexer1, indexer2):
if indexer1 is not None:
_indexer1 = _nonoptional(indexer1)
Expand Down Expand Up @@ -282,3 +284,109 @@ def sdc_np_array_overload(A):

if isinstance(A, Int64IndexType):
return lambda A: A._data


def sdc_indexes_take(self, target):
pass


@sdc_overload(sdc_indexes_take)
def pd_fix_indexes_take_overload(self, indexes):
""" Simply workaround for not having take method as unique indexes due to
the fact that StringArrayType is one of the index types """

check = isinstance(self, sdc_pandas_index_types)
if not isinstance(self, sdc_pandas_index_types):
return None

index_api_supported = not isinstance(self, sdc_old_index_types)

def pd_fix_indexes_take_impl(self, indexes):

if index_api_supported == True: # noqa
res = self.take(indexes)
else:
res = numpy_like.take(self, indexes)

return res

return pd_fix_indexes_take_impl


def sdc_indexes_rename(index, name):
pass


@sdc_overload(sdc_indexes_rename)
def sdc_index_rename_ovld(index, name):

if not isinstance(index, sdc_pandas_index_types):
return None

if isinstance(index, sdc_old_index_types):
def sdc_indexes_rename_stub(index, name):
# cannot rename string or float indexes, TO-DO: StringIndexType
return index
return sdc_indexes_rename_stub

if isinstance(index, PositionalIndexType):
from sdc.extensions.indexes.positional_index_ext import init_positional_index

def sdc_indexes_rename_impl(index, name):
return init_positional_index(len(index), name)
return sdc_indexes_rename_impl

elif isinstance(index, RangeIndexType):
def sdc_indexes_rename_impl(index, name):
return pd.RangeIndex(index.start, index.stop, index.step, name=name)
return sdc_indexes_rename_impl

elif isinstance(index, Int64IndexType):
def sdc_indexes_rename_impl(index, name):
return pd.Int64Index(index, name=name)
return sdc_indexes_rename_impl


def sdc_indexes_get_name(index):
pass


@sdc_overload(sdc_indexes_get_name)
def sdc_indexes_get_name_ovld(index):

if (isinstance(index, sdc_pandas_index_types)
and not isinstance(index, sdc_old_index_types)):
def sdc_indexes_get_name_impl(index):
return index.name
return sdc_indexes_get_name_impl

def sdc_indexes_get_name_stub(index):
# cannot rename string or float indexes, TO-DO: StringIndexType
return None
return sdc_indexes_get_name_stub


def sdc_indexes_build_map_positions(self):
pass


@sdc_overload(sdc_indexes_build_map_positions)
def sdc_indexes_build_map_positions_ovld(self):

indexer_dtype = self.dtype
indexer_value_type = types.ListType(types.int64)

def sdc_indexes_build_map_positions_impl(self):
indexer_map = Dict.empty(indexer_dtype, indexer_value_type)
for i in range(len(self)):
val = self[i]
index_list = indexer_map.get(val, None)
if index_list is None:
indexer_map[val] = List.empty_list(types.int64)
indexer_map[val].append(i)
else:
index_list.append(i)

return indexer_map

return sdc_indexes_build_map_positions_impl
Loading