diff --git a/sdc/datatypes/common_functions.py b/sdc/datatypes/common_functions.py index b5745e8d1..d359d2c97 100644 --- a/sdc/datatypes/common_functions.py +++ b/sdc/datatypes/common_functions.py @@ -33,6 +33,7 @@ import numpy from numba import types +from numba.errors import TypingError from numba.extending import overload from numba import numpy_support @@ -40,6 +41,55 @@ from sdc.str_arr_ext import (string_array_type, num_total_chars, append_string_array_to) +class TypeChecker: + """ + Validate object type and raise TypingError if the type is invalid, e.g.: + Method nsmallest(). The object n + given: bool + expected: int + """ + msg_template = '{} The object {}\n given: {}\n expected: {}' + + def __init__(self, func_name): + """ + Parameters + ---------- + func_name: :obj:`str` + name of the function where types checking + """ + self.func_name = func_name + + def raise_exc(self, data, expected_types, name=''): + """ + Raise exception with unified message + Parameters + ---------- + data: :obj:`any` + real type of the data + expected_types: :obj:`str` + expected types inserting directly to the exception + name: :obj:`str` + name of the parameter + """ + msg = self.msg_template.format(self.func_name, name, data, expected_types) + raise TypingError(msg) + + def check(self, data, accepted_type, name=''): + """ + Check data type belongs to specified type + Parameters + ---------- + data: :obj:`any` + real type of the data + accepted_type: :obj:`type` + accepted type + name: :obj:`str` + name of the parameter + """ + if not isinstance(data, accepted_type): + self.raise_exc(data, accepted_type.__name__, name=name) + + def has_literal_value(var, value): '''Used during typing to check that variable var is a Numba literal value equal to value''' diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index de5a48d1c..43fc93d9a 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -40,59 +40,12 @@ import sdc import sdc.datatypes.common_functions as common_functions +from sdc.datatypes.common_functions import TypeChecker from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType from sdc.hiframes.pd_series_ext import SeriesType from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars) from sdc.utils import to_array -class TypeChecker: - """ - Validate object type and raise TypingError if the type is invalid, e.g.: - Method nsmallest(). The object n - given: bool - expected: int - """ - msg_template = '{} The object {}\n given: {}\n expected: {}' - - def __init__(self, func_name): - """ - Parameters - ---------- - func_name: :obj:`str` - name of the function where types checking - """ - self.func_name = func_name - - def raise_exc(self, data, expected_types, name=''): - """ - Raise exception with unified message - Parameters - ---------- - data: :obj:`any` - real type of the data - expected_types: :obj:`str` - expected types inserting directly to the exception - name: :obj:`str` - name of the parameter - """ - msg = self.msg_template.format(self.func_name, name, data, expected_types) - raise TypingError(msg) - - def check(self, data, accepted_type, name=''): - """ - Check data type belongs to specified type - Parameters - ---------- - data: :obj:`any` - real type of the data - accepted_type: :obj:`type` - accepted type - name: :obj:`str` - name of the parameter - """ - if not isinstance(data, accepted_type): - self.raise_exc(data, accepted_type.__name__, name=name) - @overload(operator.getitem) def hpat_pandas_series_getitem(self, idx): diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index f9cb4d3ef..216c5d751 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -40,10 +40,9 @@ @overload_method(StringMethodsType, 'upper') def hpat_pandas_stringmethods_upper(self): - _func_name = 'Method stringmethods.upper().' - if not isinstance(self, StringMethodsType): - raise TypingError('{} The object must be a pandas.core.strings. Given: {}'.format(_func_name, self)) + ty_checker = TypeChecker('Method stringmethods.upper().') + ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_upper_parallel_impl(self): from numba.parfor import (init_prange, min_checker, internal_prange) @@ -83,8 +82,8 @@ def hpat_pandas_stringmethods_upper_impl(self): import numba from numba.extending import overload_method -from numba.errors import TypingError +from sdc.datatypes.common_functions import TypeChecker from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType @@ -92,7 +91,8 @@ def hpat_pandas_stringmethods_upper_impl(self): 'pandas': pandas, 'numpy': numpy, 'numba': numba, - 'StringMethodsType': StringMethodsType + 'StringMethodsType': StringMethodsType, + 'TypeChecker': TypeChecker } _hpat_pandas_stringmethods_functions_params = { @@ -166,8 +166,8 @@ def hpat_pandas_stringmethods_{methodname}(self{methodparams}): returns :obj:`pandas.Series` object \"\"\" - if not isinstance(self, StringMethodsType): - raise TypingError('Method {methodname}(). The object must be a pandas.core.strings. Given: ' % self) + ty_checker = TypeChecker('Method {methodname}().') + ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}): item_count = len(self._data) @@ -181,12 +181,48 @@ def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}): else: result[it] = item - return pandas.Series(result, name=self._data._name) + return pandas.Series(result, self._data._index, name=self._data._name) return hpat_pandas_stringmethods_{methodname}_impl """ +@overload_method(StringMethodsType, 'isupper') +def hpat_pandas_stringmethods_isupper(self): + """ + Pandas Series method :meth:`pandas.core.strings.StringMethods.isupper()` implementation. + + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + + .. only:: developer + + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str2str + + Parameters + ---------- + self: :class:`pandas.core.strings.StringMethods` + input arg + + Returns + ------- + :obj:`pandas.Series` + returns :obj:`pandas.Series` object + """ + + ty_checker = TypeChecker('Method isupper().') + ty_checker.check(self, StringMethodsType) + + def hpat_pandas_stringmethods_isupper_impl(self): + item_count = len(self._data) + result = numpy.empty(item_count, numba.types.boolean) + for idx, item in enumerate(self._data._data): + result[idx] = item.isupper() + + return pandas.Series(result, self._data._index, name=self._data._name) + + return hpat_pandas_stringmethods_isupper_impl + + @overload_method(StringMethodsType, 'len') def hpat_pandas_stringmethods_len(self): """ @@ -209,8 +245,8 @@ def hpat_pandas_stringmethods_len(self): returns :obj:`pandas.Series` object """ - if not isinstance(self, StringMethodsType): - raise TypingError('Method len(). The object must be a pandas.core.strings. Given: {}'.format(self)) + ty_checker = TypeChecker('Method len().') + ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_len_impl(self): item_count = len(self._data) @@ -218,7 +254,7 @@ def hpat_pandas_stringmethods_len_impl(self): for idx, item in enumerate(self._data._data): result[idx] = len(item) - return pandas.Series(result, name=self._data._name) + return pandas.Series(result, self._data._index, name=self._data._name) return hpat_pandas_stringmethods_len_impl diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py index 4b510bb92..5ec50ff62 100644 --- a/sdc/hiframes/pd_series_ext.py +++ b/sdc/hiframes/pd_series_ext.py @@ -758,7 +758,8 @@ def resolve_head(self, ary, args, kws): Functions which are still overloaded by HPAT compiler pipeline """ -str2str_methods_excluded = ['upper', 'len', 'lower', 'lstrip', 'rstrip', 'strip'] +str2str_methods_excluded = ['upper', 'isupper', 'len', 'lower', + 'lstrip', 'rstrip', 'strip'] """ Functions which are used from Numba directly by calling from StringMethodsType diff --git a/sdc/hiframes/split_impl.py b/sdc/hiframes/split_impl.py index fd75a2223..53ac5ef27 100644 --- a/sdc/hiframes/split_impl.py +++ b/sdc/hiframes/split_impl.py @@ -478,7 +478,7 @@ def hpat_pandas_spliview_stringmethods_len_impl(self): for i in range(len(local_data)): result[i] = len(local_data[i]) - return pandas.Series(result, name=self._data._name) + return pandas.Series(result, self._data._index, name=self._data._name) return hpat_pandas_spliview_stringmethods_len_impl diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 05d23fe1c..569ac6f77 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -33,7 +33,7 @@ import numpy as np import pyarrow.parquet as pq import sdc -from itertools import islice, permutations +from itertools import islice, permutations, product from sdc.tests.test_base import TestCase from sdc.tests.test_utils import ( count_array_REPs, count_parfor_REPs, count_array_OneDs, get_start_end, @@ -2424,23 +2424,31 @@ def test_impl(S1, S2): hpat_func(S1, S2), test_impl(S1, S2), err_msg='S1={}\nS2={}'.format(S1, S2)) - @skip_numba_jit def test_series_str_len1(self): def test_impl(S): return S.str.len() hpat_func = self.jit(test_impl) - # TODO: fix issue occurred if name is not assigned - S = pd.Series(['aa', 'abc', 'c', 'cccd'], name='A') - pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) + data = ['aa', 'abc', 'c', 'cccd'] + indices = [None, [1, 3, 2, 0], data] + names = [None, 'A'] + for index, name in product(indices, names): + S = pd.Series(data, index, name=name) + pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @skip_numba_jit def test_series_str2str(self): - common_methods = ['lower', 'upper', 'lstrip', 'rstrip', 'strip'] - sdc_methods = ['capitalize', 'swapcase', 'title'] + common_methods = ['lower', 'upper', 'isupper'] + sdc_methods = ['capitalize', 'swapcase', 'title', + 'lstrip', 'rstrip', 'strip'] str2str_methods = common_methods[:] + + data = [' \tbbCD\t ', 'ABC', ' mCDm\t', 'abc'] + indices = [None] + names = [None, 'A'] if sdc.config.config_pipeline_hpat_default: str2str_methods += sdc_methods + else: + indices += [[1, 3, 2, 0], data] for method in str2str_methods: func_lines = ['def test_impl(S):', @@ -2449,10 +2457,11 @@ def test_series_str2str(self): test_impl = _make_func_from_text(func_text) hpat_func = self.jit(test_impl) - # TODO: fix issue occurred if name is not assigned - S = pd.Series([' \tbbCD\t ', 'ABC', ' mCDm\t', 'abc'], name='A') - pd.testing.assert_series_equal(hpat_func(S), test_impl(S), - check_names=method in common_methods) + check_names = method in common_methods + for index, name in product(indices, names): + S = pd.Series(data, index, name=name) + pd.testing.assert_series_equal(hpat_func(S), test_impl(S), + check_names=check_names) @skip_sdc_jit('Series.str.() unsupported') def test_series_str2str_unsupported(self):