Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions sdc/datatypes/hpat_pandas_stringmethods_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,57 @@ def hpat_pandas_stringmethods_len_impl(self):
return hpat_pandas_stringmethods_len_impl


@overload_method(StringMethodsType, 'startswith')
def hpat_pandas_stringmethods_startswith(self, pat, na=None):
"""
Pandas Series method :meth:`pandas.core.strings.StringMethods.startswith()` implementation.

Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

.. only:: developer

Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_startswith

Parameters
----------
self: :class:`pandas.core.strings.StringMethods`
input arg
pat: :obj:`str`
Character sequence
na: :obj:`bool`
Object shown if element tested is not a string
*unsupported*

Returns
-------
:obj:`pandas.Series`
returns :obj:`pandas.Series` object
"""

ty_checker = TypeChecker('Method startswith().')
ty_checker.check(self, StringMethodsType)

if not isinstance(pat, (StringLiteral, UnicodeType)):
ty_checker.raise_exc(pat, 'str', 'pat')

if not isinstance(na, (Boolean, NoneType, Omitted)) and na is not None:
ty_checker.raise_exc(na, 'bool', 'na')

def hpat_pandas_stringmethods_startswith_impl(self, pat, na=None):
if na is not None:
msg = 'Method startswith(). The object na\n expected: None'
raise ValueError(msg)

item_startswith = len(self._data)
result = numpy.empty(item_startswith, numba.types.boolean)
for idx, item in enumerate(self._data._data):
result[idx] = item.startswith(pat)

return pandas.Series(result, self._data._index, name=self._data._name)

return hpat_pandas_stringmethods_startswith_impl


def _hpat_pandas_stringmethods_autogen(method_name):
""""
The function generates a function for 'method_name' from source text that is created on the fly.
Expand Down
2 changes: 1 addition & 1 deletion sdc/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ def resolve_head(self, ary, args, kws):
"""

str2str_methods_excluded = ['upper', 'endswith', 'find', 'isupper', 'len',
'lower', 'lstrip', 'rstrip', 'strip']
'lower', 'lstrip', 'rstrip', 'startswith', 'strip']
"""
Functions which are used from Numba directly by calling from StringMethodsType

Expand Down
49 changes: 41 additions & 8 deletions sdc/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2480,17 +2480,16 @@ def test_impl(series, sub, start):
hpat_func = self.jit(test_impl)

series = pd.Series(test_global_input_data_unicode_kind4)
msg_tmpl = 'Method {}(). The object {}\n {}'
msg_tmpl = 'Method find(). The object start\n {}'

with self.assertRaises(TypingError) as raises:
hpat_func(series, '', '0')
msg = msg_tmpl.format('find', 'start', 'given: unicode_type\n '
'expected: None, int')
msg = msg_tmpl.format('given: unicode_type\n expected: None, int')
self.assertIn(msg, str(raises.exception))

with self.assertRaises(ValueError) as raises:
hpat_func(series, '', 1)
msg = msg_tmpl.format('find', 'start', 'expected: 0')
msg = msg_tmpl.format('expected: 0')
self.assertIn(msg, str(raises.exception))

def test_series_str_find_exception_unsupported_end(self):
Expand All @@ -2499,17 +2498,16 @@ def test_impl(series, sub, start, end):
hpat_func = self.jit(test_impl)

series = pd.Series(test_global_input_data_unicode_kind4)
msg_tmpl = 'Method {}(). The object {}\n {}'
msg_tmpl = 'Method find(). The object end\n {}'

with self.assertRaises(TypingError) as raises:
hpat_func(series, '', 0, 'None')
msg = msg_tmpl.format('find', 'end', 'given: unicode_type\n '
'expected: None, int')
msg = msg_tmpl.format('given: unicode_type\n expected: None, int')
self.assertIn(msg, str(raises.exception))

with self.assertRaises(ValueError) as raises:
hpat_func(series, '', 0, 0)
msg = msg_tmpl.format('find', 'end', 'expected: None')
msg = msg_tmpl.format('expected: None')
self.assertIn(msg, str(raises.exception))

def test_series_str_len1(self):
Expand All @@ -2524,6 +2522,41 @@ def test_impl(S):
S = pd.Series(data, index, name=name)
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

def test_series_str_startswith(self):
def test_impl(series, pat):
return series.str.startswith(pat)

hpat_func = self.jit(test_impl)

data = test_global_input_data_unicode_kind4
pats = [''] + [s[:min(len(s) for s in data)] for s in data] + data
indices = [None, list(range(len(data)))[::-1], data[::-1]]
names = [None, 'A']
for index, name in product(indices, names):
series = pd.Series(data, index, name=name)
for pat in pats:
pd.testing.assert_series_equal(hpat_func(series, pat),
test_impl(series, pat))

def test_series_str_startswith_exception_unsupported_na(self):
def test_impl(series, pat, na):
return series.str.startswith(pat, na)

hpat_func = self.jit(test_impl)

series = pd.Series(test_global_input_data_unicode_kind4)
msg_tmpl = 'Method startswith(). The object na\n {}'

with self.assertRaises(TypingError) as raises:
hpat_func(series, '', 'None')
msg = msg_tmpl.format('given: unicode_type\n expected: bool')
self.assertIn(msg, str(raises.exception))

with self.assertRaises(ValueError) as raises:
hpat_func(series, '', False)
msg = msg_tmpl.format('expected: None')
self.assertIn(msg, str(raises.exception))

def test_series_str2str(self):
common_methods = ['lower', 'upper', 'isupper']
sdc_methods = ['capitalize', 'swapcase', 'title',
Expand Down