From 5dba8733584380943a5a856569d0dcbeaa81d80d Mon Sep 17 00:00:00 2001 From: Denis Date: Sun, 8 Dec 2019 12:08:53 +0300 Subject: [PATCH 1/2] Implement Series.str.find() --- .../hpat_pandas_stringmethods_functions.py | 61 +++++++++++++++++++ sdc/hiframes/pd_series_ext.py | 2 +- sdc/tests/test_series.py | 53 ++++++++++++++++ 3 files changed, 115 insertions(+), 1 deletion(-) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 216c5d751..88554f107 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -82,6 +82,8 @@ def hpat_pandas_stringmethods_upper_impl(self): import numba from numba.extending import overload_method +from numba.types import (Integer, IntegerLiteral, NoneType, + Omitted, StringLiteral, UnicodeType) from sdc.datatypes.common_functions import TypeChecker from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType @@ -187,6 +189,65 @@ def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}): """ +@overload_method(StringMethodsType, 'find') +def hpat_pandas_stringmethods_find(self, sub, start=0, end=None): + """ + Pandas Series method :meth:`pandas.core.strings.StringMethods.find()` implementation. + + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + + .. only:: developer + + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_find + + Parameters + ---------- + self: :class:`pandas.core.strings.StringMethods` + input arg + sub: :obj:`str` + Substring being searched + start: :obj:`int` + Left edge index + *unsupported* + end: :obj:`int` + Right edge index + *unsupported* + + Returns + ------- + :obj:`pandas.Series` + returns :obj:`pandas.Series` object + """ + + ty_checker = TypeChecker('Method find().') + ty_checker.check(self, StringMethodsType) + + if not isinstance(sub, (StringLiteral, UnicodeType)): + ty_checker.raise_exc(sub, 'str', 'sub') + + accepted_types = (Integer, IntegerLiteral, NoneType, Omitted) + if not isinstance(start, accepted_types) and start != 0: + ty_checker.raise_exc(start, 'None, int', 'start') + + if not isinstance(end, accepted_types) and end is not None: + ty_checker.raise_exc(end, 'None, int', 'end') + + def hpat_pandas_stringmethods_find_impl(self, sub, start=0, end=None): + if start != 0: + raise ValueError('Method find(). The object start\n expected: 0') + if end is not None: + raise ValueError('Method find(). The object end\n expected: None') + + item_count = len(self._data) + result = numpy.empty(item_count, numba.types.int64) + for idx, item in enumerate(self._data._data): + result[idx] = item.find(sub) + + return pandas.Series(result, self._data._index, name=self._data._name) + + return hpat_pandas_stringmethods_find_impl + + @overload_method(StringMethodsType, 'isupper') def hpat_pandas_stringmethods_isupper(self): """ diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py index 5ec50ff62..66bf7b6a0 100644 --- a/sdc/hiframes/pd_series_ext.py +++ b/sdc/hiframes/pd_series_ext.py @@ -758,7 +758,7 @@ def resolve_head(self, ary, args, kws): Functions which are still overloaded by HPAT compiler pipeline """ -str2str_methods_excluded = ['upper', 'isupper', 'len', 'lower', +str2str_methods_excluded = ['upper', 'find', 'isupper', 'len', 'lower', 'lstrip', 'rstrip', 'strip'] """ Functions which are used from Numba directly by calling from StringMethodsType diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 569ac6f77..89fd0c427 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -2424,6 +2424,59 @@ def test_impl(S1, S2): hpat_func(S1, S2), test_impl(S1, S2), err_msg='S1={}\nS2={}'.format(S1, S2)) + def test_series_str_find(self): + def test_impl(series, sub): + return series.str.find(sub) + hpat_func = self.jit(test_impl) + + data = test_global_input_data_unicode_kind4 + subs = [''] + [s[:min(len(s) for s in data)] for s in data] + data + indices = [None, list(range(len(data)))[::-1], data[::-1]] + names = [None, 'A'] + for index, name in product(indices, names): + series = pd.Series(data, index, name=name) + for sub in subs: + pd.testing.assert_series_equal(hpat_func(series, sub), + test_impl(series, sub)) + + def test_series_str_find_exception_unsupported_start(self): + def test_impl(series, sub, start): + return series.str.find(sub, start) + hpat_func = self.jit(test_impl) + + series = pd.Series(test_global_input_data_unicode_kind4) + msg_tmpl = 'Method {}(). The object {}\n {}' + + with self.assertRaises(TypingError) as raises: + hpat_func(series, '', '0') + msg = msg_tmpl.format('find', 'start', 'given: unicode_type\n ' + 'expected: None, int') + self.assertIn(msg, str(raises.exception)) + + with self.assertRaises(ValueError) as raises: + hpat_func(series, '', 1) + msg = msg_tmpl.format('find', 'start', 'expected: 0') + self.assertIn(msg, str(raises.exception)) + + def test_series_str_find_exception_unsupported_end(self): + def test_impl(series, sub, start, end): + return series.str.find(sub, start, end) + hpat_func = self.jit(test_impl) + + series = pd.Series(test_global_input_data_unicode_kind4) + msg_tmpl = 'Method {}(). The object {}\n {}' + + with self.assertRaises(TypingError) as raises: + hpat_func(series, '', 0, 'None') + msg = msg_tmpl.format('find', 'end', 'given: unicode_type\n ' + 'expected: None, int') + self.assertIn(msg, str(raises.exception)) + + with self.assertRaises(ValueError) as raises: + hpat_func(series, '', 0, 0) + msg = msg_tmpl.format('find', 'end', 'expected: None') + self.assertIn(msg, str(raises.exception)) + def test_series_str_len1(self): def test_impl(S): return S.str.len() From 99f8f8465a7394121e535bc29709d85aae1bcdc4 Mon Sep 17 00:00:00 2001 From: Denis Date: Sun, 8 Dec 2019 13:09:48 +0300 Subject: [PATCH 2/2] Remove IntegerLiteral from check of start/end IntegerLiteral is inherited of Integer --- sdc/datatypes/hpat_pandas_stringmethods_functions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 88554f107..b0a6c3bed 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -82,8 +82,7 @@ def hpat_pandas_stringmethods_upper_impl(self): import numba from numba.extending import overload_method -from numba.types import (Integer, IntegerLiteral, NoneType, - Omitted, StringLiteral, UnicodeType) +from numba.types import (Integer, NoneType, Omitted, StringLiteral, UnicodeType) from sdc.datatypes.common_functions import TypeChecker from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType @@ -225,7 +224,7 @@ def hpat_pandas_stringmethods_find(self, sub, start=0, end=None): if not isinstance(sub, (StringLiteral, UnicodeType)): ty_checker.raise_exc(sub, 'str', 'sub') - accepted_types = (Integer, IntegerLiteral, NoneType, Omitted) + accepted_types = (Integer, NoneType, Omitted) if not isinstance(start, accepted_types) and start != 0: ty_checker.raise_exc(start, 'None, int', 'start')