Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions sdc/datatypes/common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,63 @@
import numpy

from numba import types
from numba.errors import TypingError
from numba.extending import overload
from numba import numpy_support

import sdc
from sdc.str_arr_ext import (string_array_type, num_total_chars, append_string_array_to)


class TypeChecker:
"""
Validate object type and raise TypingError if the type is invalid, e.g.:
Method nsmallest(). The object n
given: bool
expected: int
"""
msg_template = '{} The object {}\n given: {}\n expected: {}'

def __init__(self, func_name):
"""
Parameters
----------
func_name: :obj:`str`
name of the function where types checking
"""
self.func_name = func_name

def raise_exc(self, data, expected_types, name=''):
"""
Raise exception with unified message
Parameters
----------
data: :obj:`any`
real type of the data
expected_types: :obj:`str`
expected types inserting directly to the exception
name: :obj:`str`
name of the parameter
"""
msg = self.msg_template.format(self.func_name, name, data, expected_types)
raise TypingError(msg)

def check(self, data, accepted_type, name=''):
"""
Check data type belongs to specified type
Parameters
----------
data: :obj:`any`
real type of the data
accepted_type: :obj:`type`
accepted type
name: :obj:`str`
name of the parameter
"""
if not isinstance(data, accepted_type):
self.raise_exc(data, accepted_type.__name__, name=name)


def has_literal_value(var, value):
'''Used during typing to check that variable var is a Numba literal value equal to value'''

Expand Down
49 changes: 1 addition & 48 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,59 +40,12 @@

import sdc
import sdc.datatypes.common_functions as common_functions
from sdc.datatypes.common_functions import TypeChecker
from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
from sdc.hiframes.pd_series_ext import SeriesType
from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars)
from sdc.utils import to_array

class TypeChecker:
"""
Validate object type and raise TypingError if the type is invalid, e.g.:
Method nsmallest(). The object n
given: bool
expected: int
"""
msg_template = '{} The object {}\n given: {}\n expected: {}'

def __init__(self, func_name):
"""
Parameters
----------
func_name: :obj:`str`
name of the function where types checking
"""
self.func_name = func_name

def raise_exc(self, data, expected_types, name=''):
"""
Raise exception with unified message
Parameters
----------
data: :obj:`any`
real type of the data
expected_types: :obj:`str`
expected types inserting directly to the exception
name: :obj:`str`
name of the parameter
"""
msg = self.msg_template.format(self.func_name, name, data, expected_types)
raise TypingError(msg)

def check(self, data, accepted_type, name=''):
"""
Check data type belongs to specified type
Parameters
----------
data: :obj:`any`
real type of the data
accepted_type: :obj:`type`
accepted type
name: :obj:`str`
name of the parameter
"""
if not isinstance(data, accepted_type):
self.raise_exc(data, accepted_type.__name__, name=name)


@overload(operator.getitem)
def hpat_pandas_series_getitem(self, idx):
Expand Down
58 changes: 47 additions & 11 deletions sdc/datatypes/hpat_pandas_stringmethods_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,9 @@

@overload_method(StringMethodsType, 'upper')
def hpat_pandas_stringmethods_upper(self):
_func_name = 'Method stringmethods.upper().'

if not isinstance(self, StringMethodsType):
raise TypingError('{} The object must be a pandas.core.strings. Given: {}'.format(_func_name, self))
ty_checker = TypeChecker('Method stringmethods.upper().')
ty_checker.check(self, StringMethodsType)

def hpat_pandas_stringmethods_upper_parallel_impl(self):
from numba.parfor import (init_prange, min_checker, internal_prange)
Expand Down Expand Up @@ -83,16 +82,17 @@ def hpat_pandas_stringmethods_upper_impl(self):

import numba
from numba.extending import overload_method
from numba.errors import TypingError

from sdc.datatypes.common_functions import TypeChecker
from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType


_hpat_pandas_stringmethods_autogen_global_dict = {
'pandas': pandas,
'numpy': numpy,
'numba': numba,
'StringMethodsType': StringMethodsType
'StringMethodsType': StringMethodsType,
'TypeChecker': TypeChecker
}

_hpat_pandas_stringmethods_functions_params = {
Expand Down Expand Up @@ -166,8 +166,8 @@ def hpat_pandas_stringmethods_{methodname}(self{methodparams}):
returns :obj:`pandas.Series` object
\"\"\"

if not isinstance(self, StringMethodsType):
raise TypingError('Method {methodname}(). The object must be a pandas.core.strings. Given: ' % self)
ty_checker = TypeChecker('Method {methodname}().')
ty_checker.check(self, StringMethodsType)

def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}):
item_count = len(self._data)
Expand All @@ -181,12 +181,48 @@ def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}):
else:
result[it] = item

return pandas.Series(result, name=self._data._name)
return pandas.Series(result, self._data._index, name=self._data._name)

return hpat_pandas_stringmethods_{methodname}_impl
"""


@overload_method(StringMethodsType, 'isupper')
def hpat_pandas_stringmethods_isupper(self):
"""
Pandas Series method :meth:`pandas.core.strings.StringMethods.isupper()` implementation.

Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

.. only:: developer

Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str2str

Parameters
----------
self: :class:`pandas.core.strings.StringMethods`
input arg

Returns
-------
:obj:`pandas.Series`
returns :obj:`pandas.Series` object
"""

ty_checker = TypeChecker('Method isupper().')
ty_checker.check(self, StringMethodsType)

def hpat_pandas_stringmethods_isupper_impl(self):
item_count = len(self._data)
result = numpy.empty(item_count, numba.types.boolean)
for idx, item in enumerate(self._data._data):
result[idx] = item.isupper()

return pandas.Series(result, self._data._index, name=self._data._name)

return hpat_pandas_stringmethods_isupper_impl


@overload_method(StringMethodsType, 'len')
def hpat_pandas_stringmethods_len(self):
"""
Expand All @@ -209,16 +245,16 @@ def hpat_pandas_stringmethods_len(self):
returns :obj:`pandas.Series` object
"""

if not isinstance(self, StringMethodsType):
raise TypingError('Method len(). The object must be a pandas.core.strings. Given: {}'.format(self))
ty_checker = TypeChecker('Method len().')
ty_checker.check(self, StringMethodsType)

def hpat_pandas_stringmethods_len_impl(self):
item_count = len(self._data)
result = numpy.empty(item_count, numba.types.int64)
for idx, item in enumerate(self._data._data):
result[idx] = len(item)

return pandas.Series(result, name=self._data._name)
return pandas.Series(result, self._data._index, name=self._data._name)

return hpat_pandas_stringmethods_len_impl

Expand Down
3 changes: 2 additions & 1 deletion sdc/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,8 @@ def resolve_head(self, ary, args, kws):
Functions which are still overloaded by HPAT compiler pipeline
"""

str2str_methods_excluded = ['upper', 'len', 'lower', 'lstrip', 'rstrip', 'strip']
str2str_methods_excluded = ['upper', 'isupper', 'len', 'lower',
'lstrip', 'rstrip', 'strip']
"""
Functions which are used from Numba directly by calling from StringMethodsType

Expand Down
2 changes: 1 addition & 1 deletion sdc/hiframes/split_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def hpat_pandas_spliview_stringmethods_len_impl(self):
for i in range(len(local_data)):
result[i] = len(local_data[i])

return pandas.Series(result, name=self._data._name)
return pandas.Series(result, self._data._index, name=self._data._name)

return hpat_pandas_spliview_stringmethods_len_impl

Expand Down
33 changes: 21 additions & 12 deletions sdc/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import numpy as np
import pyarrow.parquet as pq
import sdc
from itertools import islice, permutations
from itertools import islice, permutations, product
from sdc.tests.test_base import TestCase
from sdc.tests.test_utils import (
count_array_REPs, count_parfor_REPs, count_array_OneDs, get_start_end,
Expand Down Expand Up @@ -2424,23 +2424,31 @@ def test_impl(S1, S2):
hpat_func(S1, S2), test_impl(S1, S2),
err_msg='S1={}\nS2={}'.format(S1, S2))

@skip_numba_jit
def test_series_str_len1(self):
def test_impl(S):
return S.str.len()
hpat_func = self.jit(test_impl)

# TODO: fix issue occurred if name is not assigned
S = pd.Series(['aa', 'abc', 'c', 'cccd'], name='A')
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))
data = ['aa', 'abc', 'c', 'cccd']
indices = [None, [1, 3, 2, 0], data]
names = [None, 'A']
for index, name in product(indices, names):
S = pd.Series(data, index, name=name)
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

@skip_numba_jit
def test_series_str2str(self):
common_methods = ['lower', 'upper', 'lstrip', 'rstrip', 'strip']
sdc_methods = ['capitalize', 'swapcase', 'title']
common_methods = ['lower', 'upper', 'isupper']
sdc_methods = ['capitalize', 'swapcase', 'title',
'lstrip', 'rstrip', 'strip']
str2str_methods = common_methods[:]

data = [' \tbbCD\t ', 'ABC', ' mCDm\t', 'abc']
indices = [None]
names = [None, 'A']
if sdc.config.config_pipeline_hpat_default:
str2str_methods += sdc_methods
else:
indices += [[1, 3, 2, 0], data]

for method in str2str_methods:
func_lines = ['def test_impl(S):',
Expand All @@ -2449,10 +2457,11 @@ def test_series_str2str(self):
test_impl = _make_func_from_text(func_text)
hpat_func = self.jit(test_impl)

# TODO: fix issue occurred if name is not assigned
S = pd.Series([' \tbbCD\t ', 'ABC', ' mCDm\t', 'abc'], name='A')
pd.testing.assert_series_equal(hpat_func(S), test_impl(S),
check_names=method in common_methods)
check_names = method in common_methods
for index, name in product(indices, names):
S = pd.Series(data, index, name=name)
pd.testing.assert_series_equal(hpat_func(S), test_impl(S),
check_names=check_names)

@skip_sdc_jit('Series.str.<method>() unsupported')
def test_series_str2str_unsupported(self):
Expand Down