Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
2 changes: 2 additions & 0 deletions sdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import sdc.datatypes.hpat_pandas_series_rolling_functions
import sdc.datatypes.hpat_pandas_stringmethods_functions
import sdc.datatypes.hpat_pandas_groupby_functions
import sdc.datatypes.categorical.init
import sdc.datatypes.series.init

import sdc.extensions.indexes.range_index_ext

Expand Down
25 changes: 25 additions & 0 deletions sdc/datatypes/categorical/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# *****************************************************************************
# Copyright (c) 2020, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************
90 changes: 90 additions & 0 deletions sdc/datatypes/categorical/boxing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# *****************************************************************************
# Copyright (c) 2020, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

from numba.extending import box, unbox, NativeValue
from numba.core import boxing
from numba.core.imputils import lower_constant
from numba.np import arrayobj
from numba import types

from . import pandas_support
from .types import (
CategoricalDtypeType,
Categorical,
)


@box(CategoricalDtypeType)
def box_CategoricalDtype(typ, val, c):
pd_dtype = pandas_support.as_dtype(typ)
return c.pyapi.unserialize(c.pyapi.serialize_object(pd_dtype))


@unbox(CategoricalDtypeType)
def unbox_CategoricalDtype(typ, val, c):
return NativeValue(c.context.get_dummy_value())


@box(Categorical)
def box_Categorical(typ, val, c):
pandas_module_name = c.context.insert_const_string(c.builder.module, "pandas")
pandas_module = c.pyapi.import_module_noblock(pandas_module_name)

constructor = c.pyapi.object_getattr_string(pandas_module, "Categorical")

empty_list = c.pyapi.list_new(c.context.get_constant(types.intp, 0))
args = c.pyapi.tuple_pack([empty_list])
categorical = c.pyapi.call(constructor, args)

dtype = box_CategoricalDtype(typ.pd_dtype, val, c)
c.pyapi.object_setattr_string(categorical, "_dtype", dtype)

codes = boxing.box_array(typ.codes, val, c)
c.pyapi.object_setattr_string(categorical, "_codes", codes)

c.pyapi.decref(codes)
c.pyapi.decref(dtype)
c.pyapi.decref(args)
c.pyapi.decref(empty_list)
c.pyapi.decref(constructor)
c.pyapi.decref(pandas_module)
return categorical


@unbox(Categorical)
def unbox_Categorical(typ, val, c):
codes = c.pyapi.object_getattr_string(val, "codes")
native_value = boxing.unbox_array(typ.codes, codes, c)
c.pyapi.decref(codes)
return native_value


@lower_constant(Categorical)
def constant_Categorical(context, builder, ty, pyval):
"""
Create a constant Categorical.
"""
return arrayobj.constant_array(context, builder, ty.codes, pyval.codes)
38 changes: 38 additions & 0 deletions sdc/datatypes/categorical/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# *****************************************************************************
# Copyright (c) 2020, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

from sdc.utilities.utils import sdc_overload_attribute

from .types import CategoricalDtypeType


@sdc_overload_attribute(CategoricalDtypeType, 'ordered')
def pd_CategoricalDtype_categories_overload(self):
ordered = self.ordered

def impl(self):
return ordered
return impl
44 changes: 44 additions & 0 deletions sdc/datatypes/categorical/init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# *****************************************************************************
# Copyright (c) 2020, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

"""
Init Numba extension for Pandas Categorical.
"""

from . import types
from . import typeof
from . import models
from . import boxing
from . import pdimpl
from . import rewrites
from . import functions

import numba


# register new types in numba.types for using in objmode
setattr(numba.types, "CategoricalDtype", types.CategoricalDtypeType)
setattr(numba.types, "Categorical", types.Categorical)
37 changes: 37 additions & 0 deletions sdc/datatypes/categorical/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# *****************************************************************************
# Copyright (c) 2020, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

from numba.extending import models
from numba.extending import register_model

from .types import (
CategoricalDtypeType,
Categorical,
)


register_model(CategoricalDtypeType)(models.OpaqueModel)
register_model(Categorical)(models.ArrayModel)
63 changes: 63 additions & 0 deletions sdc/datatypes/categorical/pandas_support.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# *****************************************************************************
# Copyright (c) 2020, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

import pandas as pd

from numba import types

from .types import CategoricalDtypeType


def from_dtype(pdtype):
"""
Return a Numba Type instance corresponding to the given Pandas *dtype*.
NotImplementedError is raised if unsupported Pandas dtypes.
"""
# TODO: use issubclass
if isinstance(pdtype, pd.CategoricalDtype):
if pdtype.categories is None:
categories = None
else:
categories = list(pdtype.categories)
return CategoricalDtypeType(categories=categories,
ordered=pdtype.ordered)

raise NotImplementedError("%r cannot be represented as a Numba type"
% (pdtype,))


def as_dtype(nbtype):
"""
Return a Pandas *dtype* instance corresponding to the given Numba type.
NotImplementedError is raised if no correspondence is known.
"""
nbtype = types.unliteral(nbtype)
if isinstance(nbtype, CategoricalDtypeType):
return pd.CategoricalDtype(categories=nbtype.categories,
ordered=nbtype.ordered)

raise NotImplementedError("%r cannot be represented as a Pandas dtype"
% (nbtype,))
Loading