Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ Building on Linux with setuptools

export PYVER=<3.6 or 3.7>
export NUMPYVER=<1.16 or 1.17>
conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.52 pandas=1.2.0 pyarrow=2.0.0 gcc_linux-64 gxx_linux-64
conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.53.1 pandas=1.2.0 pyarrow=4.0.1 gcc_linux-64 gxx_linux-64
source activate sdc-env
git clone https://github.com/IntelPython/sdc.git
cd sdc
Expand Down Expand Up @@ -123,7 +123,7 @@ Building on Windows with setuptools

set PYVER=<3.6 or 3.7>
set NUMPYVER=<1.16 or 1.17>
conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.52 pandas=1.2.0 pyarrow=2.0.0
conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.53.1 pandas=1.2.0 pyarrow=4.0.1
conda activate sdc-env
set INCLUDE=%INCLUDE%;%CONDA_PREFIX%\Library\include
set LIB=%LIB%;%CONDA_PREFIX%\Library\lib
Expand Down
2 changes: 1 addition & 1 deletion conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{% set NUMBA_VERSION = "==0.53.1" %}
{% set PANDAS_VERSION = "==1.2.0" %}
{% set PYARROW_VERSION = "==2.0.0" %}
{% set PYARROW_VERSION = "==4.0.1" %}

package:
name: sdc
Expand Down
4 changes: 2 additions & 2 deletions docs/source/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ Distribution includes Intel SDC for Python 3.6 and 3.7 for Windows and Linux pla
Intel SDC conda package can be installed using the steps below:
::

> conda create -n sdc_env python=<3.7 or 3.6> pyarrow=2.0.0 pandas=1.2.0 -c anaconda -c conda-forge
> conda create -n sdc_env python=<3.7 or 3.6> pyarrow=4.0.1 pandas=1.2.0 -c anaconda -c conda-forge
> conda activate sdc_env
> conda install sdc -c intel/label/beta -c intel -c defaults -c conda-forge --override-channels

Intel SDC wheel package can be installed using the steps below:
::

> conda create -n sdc_env python=<3.7 or 3.6> pip pyarrow=2.0.0 pandas=1.2.0 -c anaconda -c conda-forge
> conda create -n sdc_env python=<3.7 or 3.6> pip pyarrow=4.0.1 pandas=1.2.0 -c anaconda -c conda-forge
> conda activate sdc_env
> pip install --index-url https://pypi.anaconda.org/intel/label/beta/simple --extra-index-url https://pypi.anaconda.org/intel/simple --extra-index-url https://pypi.org/simple sdc

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
numpy>=1.16
pandas==1.2.0
pyarrow==2.0.0
pyarrow==4.0.1
numba==0.53.1
tbb
tbb-devel
1 change: 1 addition & 0 deletions sdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

import sdc.extensions.indexes.range_index_ext
import sdc.extensions.indexes.int64_index_ext
import sdc.extensions.indexes.multi_index_ext

import sdc.extensions.sdc_hashmap_ext

Expand Down
4 changes: 2 additions & 2 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4574,7 +4574,7 @@ def _series_operator_add_str_impl(self, other):
else:
indexes_join_res = sdc_indexes_join_outer(left_index, right_index)

# FIXME_Numba#XXXX: remove sdc_fix_indexes_join call at all when issue is fixed
# FIXME_Numba#6686: remove sdc_fix_indexes_join call at all when issue is fixed
joined_index, left_indexer, right_indexer = sdc_fix_indexes_join(*indexes_join_res)
result_size = len(joined_index)
result_nan_mask = numpy.zeros(result_size, dtype=numpy.bool_)
Expand Down Expand Up @@ -4692,7 +4692,7 @@ def _series_operator_mul_common_impl(self, other):
else:
indexes_join_res = sdc_indexes_join_outer(left_index, right_index)

# FIXME_Numba#XXXX: remove sdc_fix_indexes_join call at all when issue is fixed
# FIXME_Numba#6686: remove sdc_fix_indexes_join call at all when issue is fixed
joined_index, left_indexer, right_indexer = sdc_fix_indexes_join(*indexes_join_res)
str_series_operand = self if self_is_string_series == True else other # noqa
str_series_indexer = left_indexer if self_is_string_series == True else right_indexer # noqa
Expand Down
1 change: 1 addition & 0 deletions sdc/datatypes/indexes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@
from .positional_index_type import PositionalIndexType
from .empty_index_type import EmptyIndexType
from .int64_index_type import Int64IndexType
from .multi_index_type import MultiIndexType
111 changes: 111 additions & 0 deletions sdc/datatypes/indexes/multi_index_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# -*- coding: utf-8 -*-
# *****************************************************************************
# Copyright (c) 2021, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

from numba import types
from numba.extending import (
models,
register_model,
make_attribute_wrapper,
)


class MultiIndexIteratorType(types.SimpleIteratorType):
def __init__(self, iterable):
self.parent = iterable
yield_type = iterable.dtype
name = "iter[{}->{}],{}".format(
iterable, yield_type, iterable.name
)
super(MultiIndexIteratorType, self).__init__(name, yield_type)


@register_model(MultiIndexIteratorType)
class MultiIndexIterModel(models.StructModel):
def __init__(self, dmm, fe_type):
members = [
('parent', fe_type.parent), # reference to the index object
('state', types.CPointer(types.int64)), # iterator state (i.e. counter)
]
super(MultiIndexIterModel, self).__init__(dmm, fe_type, members)


class MultiIndexType(types.IterableType):

def __init__(self, levels, codes, is_named=False):
self.levels = levels
self.codes = codes
self.is_named = is_named
super(MultiIndexType, self).__init__(
name='MultiIndexType({}, {}, {})'.format(levels, codes, is_named))

@property
def iterator_type(self):
return MultiIndexIteratorType(self).iterator_type

@property
def dtype(self):
nlevels = len(self.levels)
levels_types = [self.levels.dtype] * nlevels if isinstance(self.levels, types.UniTuple) else self.levels
return types.Tuple.from_types([level.dtype for level in levels_types])

@property
def nlevels(self):
return len(self.levels)

@property
def levels_types(self):
if isinstance(self.levels, types.UniTuple):
return [self.levels.dtype] * self.levels.count

return self.levels

@property
def codes_types(self):
if isinstance(self.codes, types.UniTuple):
return [self.codes.dtype] * self.codes.count

return self.codes


@register_model(MultiIndexType)
class MultiIndexModel(models.StructModel):
def __init__(self, dmm, fe_type):

levels_type = fe_type.levels
codes_type = fe_type.codes
name_type = types.unicode_type if fe_type.is_named else types.none # TO-DO: change to types.Optional
members = [
('levels', levels_type),
('codes', codes_type),
('name', name_type),
]
models.StructModel.__init__(self, dmm, fe_type, members)


make_attribute_wrapper(MultiIndexType, 'levels', '_levels')
make_attribute_wrapper(MultiIndexType, 'codes', '_codes')
make_attribute_wrapper(MultiIndexType, 'name', '_name')
63 changes: 63 additions & 0 deletions sdc/datatypes/sdc_typeref.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# *****************************************************************************
# Copyright (c) 2021, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

import pandas as pd

from numba.core import types
from numba.extending import (models, register_model, )
from numba.core.typing.templates import infer_global

from sdc.extensions.sdc_hashmap_type import ConcurrentDict, ConcurrentDictType
from sdc.datatypes.indexes import MultiIndexType


# FIXME_Numba#6781: due to overlapping of overload_methods for Numba TypeRef
# we have to use our new SdcTypeRef to type objects created from types.Type
# (i.e. ConcurrentDict meta-type). This should be removed once it's fixed.
def sdc_make_new_typeref_class():
class SdcTypeRef(types.Dummy):
"""Reference to a type.

Used when a type is passed as a value.
"""
def __init__(self, instance_type):
self.instance_type = instance_type
super(SdcTypeRef, self).__init__('sdc_typeref[{}]'.format(self.instance_type))

@register_model(SdcTypeRef)
class SdcTypeRefModel(models.OpaqueModel):
def __init__(self, dmm, fe_type):

models.OpaqueModel.__init__(self, dmm, fe_type)

return SdcTypeRef


ConcurrentDictTypeRef = sdc_make_new_typeref_class()
MultiIndexTypeRef = sdc_make_new_typeref_class()

infer_global(ConcurrentDict, ConcurrentDictTypeRef(ConcurrentDictType))
infer_global(pd.MultiIndex, MultiIndexTypeRef(MultiIndexType))
Loading