From 44d40ae7cb65010bc99a992d8a879e1051a7876a Mon Sep 17 00:00:00 2001 From: Denis Date: Sun, 8 Dec 2019 14:14:52 +0300 Subject: [PATCH 1/2] Implement Series.str.startswith() --- .../hpat_pandas_stringmethods_functions.py | 52 +++++++++++++++++++ sdc/hiframes/pd_series_ext.py | 2 +- sdc/tests/test_series.py | 35 +++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 216c5d751..a8ede9ec1 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -82,6 +82,7 @@ def hpat_pandas_stringmethods_upper_impl(self): import numba from numba.extending import overload_method +from numba.types import (Boolean, NoneType, Omitted, StringLiteral, UnicodeType) from sdc.datatypes.common_functions import TypeChecker from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType @@ -259,6 +260,57 @@ def hpat_pandas_stringmethods_len_impl(self): return hpat_pandas_stringmethods_len_impl +@overload_method(StringMethodsType, 'startswith') +def hpat_pandas_stringmethods_startswith(self, pat, na=None): + """ + Pandas Series method :meth:`pandas.core.strings.StringMethods.startswith()` implementation. + + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + + .. only:: developer + + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_startswith + + Parameters + ---------- + self: :class:`pandas.core.strings.StringMethods` + input arg + pat: :obj:`str` + Character sequence + na: :obj:`bool` + Object shown if element tested is not a string + *unsupported* + + Returns + ------- + :obj:`pandas.Series` + returns :obj:`pandas.Series` object + """ + + ty_checker = TypeChecker('Method startswith().') + ty_checker.check(self, StringMethodsType) + + if not isinstance(pat, (StringLiteral, UnicodeType)): + ty_checker.raise_exc(pat, 'str', 'pat') + + if not isinstance(na, (Boolean, NoneType, Omitted)) and na is not None: + ty_checker.raise_exc(na, 'bool', 'na') + + def hpat_pandas_stringmethods_startswith_impl(self, pat, na=None): + if na is not None: + msg = 'Method startswith(). The object na\n expected: None' + raise ValueError(msg) + + item_startswith = len(self._data) + result = numpy.empty(item_startswith, numba.types.boolean) + for idx, item in enumerate(self._data._data): + result[idx] = item.startswith(pat) + + return pandas.Series(result, self._data._index, name=self._data._name) + + return hpat_pandas_stringmethods_startswith_impl + + def _hpat_pandas_stringmethods_autogen(method_name): """" The function generates a function for 'method_name' from source text that is created on the fly. diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py index 5ec50ff62..cff3712c8 100644 --- a/sdc/hiframes/pd_series_ext.py +++ b/sdc/hiframes/pd_series_ext.py @@ -759,7 +759,7 @@ def resolve_head(self, ary, args, kws): """ str2str_methods_excluded = ['upper', 'isupper', 'len', 'lower', - 'lstrip', 'rstrip', 'strip'] + 'lstrip', 'rstrip', 'startswith', 'strip'] """ Functions which are used from Numba directly by calling from StringMethodsType diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 569ac6f77..c2542cc28 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -2436,6 +2436,41 @@ def test_impl(S): S = pd.Series(data, index, name=name) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) + def test_series_str_startswith(self): + def test_impl(series, pat): + return series.str.startswith(pat) + + hpat_func = self.jit(test_impl) + + data = test_global_input_data_unicode_kind4 + pats = [''] + [s[:min(len(s) for s in data)] for s in data] + data + indices = [None, list(range(len(data)))[::-1], data[::-1]] + names = [None, 'A'] + for index, name in product(indices, names): + series = pd.Series(data, index, name=name) + for pat in pats: + pd.testing.assert_series_equal(hpat_func(series, pat), + test_impl(series, pat)) + + def test_series_str_startswith_exception_unsupported_na(self): + def test_impl(series, pat, na): + return series.str.startswith(pat, na) + + hpat_func = self.jit(test_impl) + + series = pd.Series(test_global_input_data_unicode_kind4) + msg_tmpl = 'Method startswith(). The object na\n {}' + + with self.assertRaises(TypingError) as raises: + hpat_func(series, '', 'None') + msg = msg_tmpl.format('given: unicode_type\n expected: bool') + self.assertIn(msg, str(raises.exception)) + + with self.assertRaises(ValueError) as raises: + hpat_func(series, '', False) + msg = msg_tmpl.format('expected: None') + self.assertIn(msg, str(raises.exception)) + def test_series_str2str(self): common_methods = ['lower', 'upper', 'isupper'] sdc_methods = ['capitalize', 'swapcase', 'title', From ea7464166ac2ed1b0cd87e55c842b07615899505 Mon Sep 17 00:00:00 2001 From: Denis Date: Sun, 8 Dec 2019 14:19:22 +0300 Subject: [PATCH 2/2] Minor fixes in tests for Series.str.find() --- sdc/tests/test_series.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 23c11517a..edb3cf5e6 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -2445,17 +2445,16 @@ def test_impl(series, sub, start): hpat_func = self.jit(test_impl) series = pd.Series(test_global_input_data_unicode_kind4) - msg_tmpl = 'Method {}(). The object {}\n {}' + msg_tmpl = 'Method find(). The object start\n {}' with self.assertRaises(TypingError) as raises: hpat_func(series, '', '0') - msg = msg_tmpl.format('find', 'start', 'given: unicode_type\n ' - 'expected: None, int') + msg = msg_tmpl.format('given: unicode_type\n expected: None, int') self.assertIn(msg, str(raises.exception)) with self.assertRaises(ValueError) as raises: hpat_func(series, '', 1) - msg = msg_tmpl.format('find', 'start', 'expected: 0') + msg = msg_tmpl.format('expected: 0') self.assertIn(msg, str(raises.exception)) def test_series_str_find_exception_unsupported_end(self): @@ -2464,17 +2463,16 @@ def test_impl(series, sub, start, end): hpat_func = self.jit(test_impl) series = pd.Series(test_global_input_data_unicode_kind4) - msg_tmpl = 'Method {}(). The object {}\n {}' + msg_tmpl = 'Method find(). The object end\n {}' with self.assertRaises(TypingError) as raises: hpat_func(series, '', 0, 'None') - msg = msg_tmpl.format('find', 'end', 'given: unicode_type\n ' - 'expected: None, int') + msg = msg_tmpl.format('given: unicode_type\n expected: None, int') self.assertIn(msg, str(raises.exception)) with self.assertRaises(ValueError) as raises: hpat_func(series, '', 0, 0) - msg = msg_tmpl.format('find', 'end', 'expected: None') + msg = msg_tmpl.format('expected: None') self.assertIn(msg, str(raises.exception)) def test_series_str_len1(self):