Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions examples/series_str_ljust.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# *****************************************************************************
# Copyright (c) 2019, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

import pandas as pd
from numba import njit


@njit
def series_str_ljust():
series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar'
out_series = series.str.ljust(5, '*')

return out_series # Expect series of 'dog**', 'foo**', 'bar**'


print(series_str_ljust())
39 changes: 39 additions & 0 deletions examples/series_str_rjust.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# *****************************************************************************
# Copyright (c) 2019, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

import pandas as pd
from numba import njit


@njit
def series_str_rjust():
series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar'
out_series = series.str.rjust(5, '*')

return out_series # Expect series of '**dog', '**foo', '**bar'


print(series_str_rjust())
144 changes: 144 additions & 0 deletions sdc/datatypes/hpat_pandas_stringmethods_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,150 @@ def hpat_pandas_stringmethods_len_impl(self):
return hpat_pandas_stringmethods_len_impl


@overload_method(StringMethodsType, 'ljust')
def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '):
"""
Intel Scalable Dataframe Compiler User Guide
********************************************
Pandas API: pandas.Series.str.ljust

Examples
--------
.. literalinclude:: ../../../examples/series_str_ljust.py
:language: python
:lines: 27-
:caption: Filling right side of strings in the Series with an additional character
:name: ex_series_str_ljust

.. code-block:: console

> python ./series_str_ljust.py
0 dog**
1 foo**
2 bar**
dtype: object

.. todo:: Add support of 32-bit Unicode for `str.ljust()`

Intel Scalable Dataframe Compiler Developer Guide
*************************************************

Pandas Series method :meth:`pandas.core.strings.StringMethods.ljust()` implementation.

Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

.. only:: developer

Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_ljust

Parameters
----------
self: :class:`pandas.core.strings.StringMethods`
input arg
width: :obj:`int`
Minimum width of resulting string
fillchar: :obj:`str`
Additional character for filling, default is whitespace

Returns
-------
:obj:`pandas.Series`
returns :obj:`pandas.Series` object
"""

ty_checker = TypeChecker('Method ljust().')
ty_checker.check(self, StringMethodsType)

if not isinstance(width, Integer):
ty_checker.raise_exc(width, 'int', 'width')

accepted_types = (Omitted, StringLiteral, UnicodeType)
if not isinstance(fillchar, accepted_types) and fillchar != ' ':
ty_checker.raise_exc(fillchar, 'str', 'fillchar')

def hpat_pandas_stringmethods_ljust_impl(self, width, fillchar=' '):
item_count = len(self._data)
result = [''] * item_count
for idx, item in enumerate(self._data._data):
result[idx] = item.ljust(width, fillchar)

return pandas.Series(result, self._data._index, name=self._data._name)

return hpat_pandas_stringmethods_ljust_impl


@overload_method(StringMethodsType, 'rjust')
def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '):
"""
Intel Scalable Dataframe Compiler User Guide
********************************************
Pandas API: pandas.Series.str.rjust

Examples
--------
.. literalinclude:: ../../../examples/series_str_rjust.py
:language: python
:lines: 27-
:caption: Filling left side of strings in the Series with an additional character
:name: ex_series_str_rjust

.. code-block:: console

> python ./series_str_rjust.py
0 **dog
1 **foo
2 **bar
dtype: object

.. todo:: Add support of 32-bit Unicode for `str.rjust()`

Intel Scalable Dataframe Compiler Developer Guide
*************************************************

Pandas Series method :meth:`pandas.core.strings.StringMethods.rjust()` implementation.

Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

.. only:: developer

Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rjust

Parameters
----------
self: :class:`pandas.core.strings.StringMethods`
input arg
width: :obj:`int`
Minimum width of resulting string
fillchar: :obj:`str`
Additional character for filling, default is whitespace

Returns
-------
:obj:`pandas.Series`
returns :obj:`pandas.Series` object
"""

ty_checker = TypeChecker('Method rjust().')
ty_checker.check(self, StringMethodsType)

if not isinstance(width, Integer):
ty_checker.raise_exc(width, 'int', 'width')

accepted_types = (Omitted, StringLiteral, UnicodeType)
if not isinstance(fillchar, accepted_types) and fillchar != ' ':
ty_checker.raise_exc(fillchar, 'str', 'fillchar')

def hpat_pandas_stringmethods_rjust_impl(self, width, fillchar=' '):
item_count = len(self._data)
result = [''] * item_count
for idx, item in enumerate(self._data._data):
result[idx] = item.rjust(width, fillchar)

return pandas.Series(result, self._data._index, name=self._data._name)

return hpat_pandas_stringmethods_rjust_impl


@overload_method(StringMethodsType, 'startswith')
def hpat_pandas_stringmethods_startswith(self, pat, na=None):
"""
Expand Down
4 changes: 2 additions & 2 deletions sdc/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,8 +759,8 @@ def resolve_head(self, ary, args, kws):
"""

str2str_methods_excluded = [
'upper', 'center', 'endswith', 'find', 'isupper', 'len',
'lower', 'lstrip', 'rstrip', 'startswith', 'strip'
'upper', 'center', 'endswith', 'find', 'isupper', 'len', 'ljust',
'lower', 'lstrip', 'rjust', 'rstrip', 'startswith', 'strip'
]
"""
Functions which are used from Numba directly by calling from StringMethodsType
Expand Down
68 changes: 68 additions & 0 deletions sdc/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,22 @@ def _make_func_use_method_arg1(method):
return _make_func_from_text(func_text)


def ljust_usecase(series, width):
return series.str.ljust(width)


def ljust_with_fillchar_usecase(series, width, fillchar):
return series.str.ljust(width, fillchar)


def rjust_usecase(series, width):
return series.str.rjust(width)


def rjust_with_fillchar_usecase(series, width, fillchar):
return series.str.rjust(width, fillchar)


GLOBAL_VAL = 2


Expand Down Expand Up @@ -2589,6 +2605,58 @@ def test_impl(S):
S = pd.Series(data, index, name=name)
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

def test_series_str_just_default_fillchar(self):
data = test_global_input_data_unicode_kind1
series = pd.Series(data)
width = max(len(s) for s in data) + 5

pyfuncs = [ljust_usecase, rjust_usecase]
for pyfunc in pyfuncs:
cfunc = self.jit(pyfunc)
pd.testing.assert_series_equal(cfunc(series, width),
pyfunc(series, width))

def test_series_str_just(self):
data = test_global_input_data_unicode_kind1
data_lengths = [len(s) for s in data]
widths = [max(data_lengths) + 5, min(data_lengths)]

pyfuncs = [ljust_with_fillchar_usecase, rjust_with_fillchar_usecase]
for index in [None, list(range(len(data)))[::-1], data[::-1]]:
series = pd.Series(data, index, name='A')
for width, fillchar in product(widths, ['\t']):
for pyfunc in pyfuncs:
cfunc = self.jit(pyfunc)
jit_result = cfunc(series, width, fillchar)
ref_result = pyfunc(series, width, fillchar)
pd.testing.assert_series_equal(jit_result, ref_result)

def test_series_str_just_exception_unsupported_fillchar(self):
data = test_global_input_data_unicode_kind1
series = pd.Series(data)
width = max(len(s) for s in data) + 5
msg_tmpl = 'Method {}(). The object fillchar\n given: int64\n expected: str'

pyfuncs = [('ljust', ljust_with_fillchar_usecase),
('rjust', rjust_with_fillchar_usecase)]
for name, pyfunc in pyfuncs:
cfunc = self.jit(pyfunc)
with self.assertRaises(TypingError) as raises:
cfunc(series, width, 5)
self.assertIn(msg_tmpl.format(name), str(raises.exception))

def test_series_str_just_exception_unsupported_kind4(self):
data = test_global_input_data_unicode_kind4
series = pd.Series(data)
width = max(len(s) for s in data) + 5
msg = 'NULL object passed to Py_BuildValue'

for pyfunc in [ljust_usecase, rjust_usecase]:
cfunc = self.jit(pyfunc)
with self.assertRaises(SystemError) as raises:
cfunc(series, width)
self.assertIn(msg, str(raises.exception))

def test_series_str_startswith(self):
def test_impl(series, pat):
return series.str.startswith(pat)
Expand Down