From d3554cd2220a4b759a4d1b07be7be812c220b095 Mon Sep 17 00:00:00 2001 From: Denis Date: Mon, 9 Dec 2019 10:03:13 +0300 Subject: [PATCH 1/2] Implement Series.str.ljust()/rjust() --- .../hpat_pandas_stringmethods_functions.py | 94 +++++++++++++++++++ sdc/hiframes/pd_series_ext.py | 4 +- sdc/tests/test_series.py | 68 ++++++++++++++ 3 files changed, 164 insertions(+), 2 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 2765c0c18..342454f35 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -418,6 +418,100 @@ def hpat_pandas_stringmethods_len_impl(self): return hpat_pandas_stringmethods_len_impl +@overload_method(StringMethodsType, 'ljust') +def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '): + """ + Pandas Series method :meth:`pandas.core.strings.StringMethods.ljust()` implementation. + + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + + .. only:: developer + + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_ljust + + Parameters + ---------- + self: :class:`pandas.core.strings.StringMethods` + input arg + width: :obj:`int` + Minimum width of resulting string + fillchar: :obj:`str` + Additional character for filling, default is whitespace + + Returns + ------- + :obj:`pandas.Series` + returns :obj:`pandas.Series` object + """ + + ty_checker = TypeChecker('Method ljust().') + ty_checker.check(self, StringMethodsType) + + if not isinstance(width, Integer): + ty_checker.raise_exc(width, 'int', 'width') + + accepted_types = (Omitted, StringLiteral, UnicodeType) + if not isinstance(fillchar, accepted_types) and fillchar != ' ': + ty_checker.raise_exc(fillchar, 'str', 'fillchar') + + def hpat_pandas_stringmethods_ljust_impl(self, width, fillchar=' '): + item_count = len(self._data) + result = [''] * item_count + for idx, item in enumerate(self._data._data): + result[idx] = item.ljust(width, fillchar) + + return pandas.Series(result, self._data._index, name=self._data._name) + + return hpat_pandas_stringmethods_ljust_impl + + +@overload_method(StringMethodsType, 'rjust') +def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '): + """ + Pandas Series method :meth:`pandas.core.strings.StringMethods.rjust()` implementation. + + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + + .. only:: developer + + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rjust + + Parameters + ---------- + self: :class:`pandas.core.strings.StringMethods` + input arg + width: :obj:`int` + Minimum width of resulting string + fillchar: :obj:`str` + Additional character for filling, default is whitespace + + Returns + ------- + :obj:`pandas.Series` + returns :obj:`pandas.Series` object + """ + + ty_checker = TypeChecker('Method rjust().') + ty_checker.check(self, StringMethodsType) + + if not isinstance(width, Integer): + ty_checker.raise_exc(width, 'int', 'width') + + accepted_types = (Omitted, StringLiteral, UnicodeType) + if not isinstance(fillchar, accepted_types) and fillchar != ' ': + ty_checker.raise_exc(fillchar, 'str', 'fillchar') + + def hpat_pandas_stringmethods_rjust_impl(self, width, fillchar=' '): + item_count = len(self._data) + result = [''] * item_count + for idx, item in enumerate(self._data._data): + result[idx] = item.rjust(width, fillchar) + + return pandas.Series(result, self._data._index, name=self._data._name) + + return hpat_pandas_stringmethods_rjust_impl + + @overload_method(StringMethodsType, 'startswith') def hpat_pandas_stringmethods_startswith(self, pat, na=None): """ diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py index f023aea48..84ea1c01d 100644 --- a/sdc/hiframes/pd_series_ext.py +++ b/sdc/hiframes/pd_series_ext.py @@ -759,8 +759,8 @@ def resolve_head(self, ary, args, kws): """ str2str_methods_excluded = [ - 'upper', 'center', 'endswith', 'find', 'isupper', 'len', - 'lower', 'lstrip', 'rstrip', 'startswith', 'strip' + 'upper', 'center', 'endswith', 'find', 'isupper', 'len', 'ljust', + 'lower', 'lstrip', 'rjust', 'rstrip', 'startswith', 'strip' ] """ Functions which are used from Numba directly by calling from StringMethodsType diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 0d47f70dd..35b8e6860 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -208,6 +208,22 @@ def _make_func_use_method_arg1(method): return _make_func_from_text(func_text) +def ljust_usecase(series, width): + return series.str.ljust(width) + + +def ljust_with_fillchar_usecase(series, width, fillchar): + return series.str.ljust(width, fillchar) + + +def rjust_usecase(series, width): + return series.str.rjust(width) + + +def rjust_with_fillchar_usecase(series, width, fillchar): + return series.str.rjust(width, fillchar) + + GLOBAL_VAL = 2 @@ -2583,6 +2599,58 @@ def test_impl(S): S = pd.Series(data, index, name=name) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) + def test_series_str_just_default_fillchar(self): + data = test_global_input_data_unicode_kind1 + series = pd.Series(data) + width = max(len(s) for s in data) + 5 + + pyfuncs = [ljust_usecase, rjust_usecase] + for pyfunc in pyfuncs: + cfunc = self.jit(pyfunc) + pd.testing.assert_series_equal(cfunc(series, width), + pyfunc(series, width)) + + def test_series_str_just(self): + data = test_global_input_data_unicode_kind1 + data_lengths = [len(s) for s in data] + widths = [max(data_lengths) + 5, min(data_lengths)] + + pyfuncs = [ljust_with_fillchar_usecase, rjust_with_fillchar_usecase] + for index in [None, list(range(len(data)))[::-1], data[::-1]]: + series = pd.Series(data, index, name='A') + for width, fillchar in product(widths, ['\t']): + for pyfunc in pyfuncs: + cfunc = self.jit(pyfunc) + jit_result = cfunc(series, width, fillchar) + ref_result = pyfunc(series, width, fillchar) + pd.testing.assert_series_equal(jit_result, ref_result) + + def test_series_str_just_exception_unsupported_fillchar(self): + data = test_global_input_data_unicode_kind1 + series = pd.Series(data) + width = max(len(s) for s in data) + 5 + msg_tmpl = 'Method {}(). The object fillchar\n given: int64\n expected: str' + + pyfuncs = [('ljust', ljust_with_fillchar_usecase), + ('rjust', rjust_with_fillchar_usecase)] + for name, pyfunc in pyfuncs: + cfunc = self.jit(pyfunc) + with self.assertRaises(TypingError) as raises: + cfunc(series, width, 5) + self.assertIn(msg_tmpl.format(name), str(raises.exception)) + + def test_series_str_just_exception_unsupported_kind4(self): + data = test_global_input_data_unicode_kind4 + series = pd.Series(data) + width = max(len(s) for s in data) + 5 + msg = 'NULL object passed to Py_BuildValue' + + for pyfunc in [ljust_usecase, rjust_usecase]: + cfunc = self.jit(pyfunc) + with self.assertRaises(SystemError) as raises: + cfunc(series, width) + self.assertIn(msg, str(raises.exception)) + def test_series_str_startswith(self): def test_impl(series, pat): return series.str.startswith(pat) From e302ffc222403cc6cdef5ec0a74e51cc2106a823 Mon Sep 17 00:00:00 2001 From: Denis Date: Mon, 9 Dec 2019 10:53:22 +0300 Subject: [PATCH 2/2] Add examples and documentation for str.ljust/rjust --- examples/series_str_ljust.py | 39 +++++++++++++++ examples/series_str_rjust.py | 39 +++++++++++++++ .../hpat_pandas_stringmethods_functions.py | 50 +++++++++++++++++++ 3 files changed, 128 insertions(+) create mode 100644 examples/series_str_ljust.py create mode 100644 examples/series_str_rjust.py diff --git a/examples/series_str_ljust.py b/examples/series_str_ljust.py new file mode 100644 index 000000000..c79b1b11e --- /dev/null +++ b/examples/series_str_ljust.py @@ -0,0 +1,39 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import pandas as pd +from numba import njit + + +@njit +def series_str_ljust(): + series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar' + out_series = series.str.ljust(5, '*') + + return out_series # Expect series of 'dog**', 'foo**', 'bar**' + + +print(series_str_ljust()) diff --git a/examples/series_str_rjust.py b/examples/series_str_rjust.py new file mode 100644 index 000000000..c62e36052 --- /dev/null +++ b/examples/series_str_rjust.py @@ -0,0 +1,39 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import pandas as pd +from numba import njit + + +@njit +def series_str_rjust(): + series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar' + out_series = series.str.rjust(5, '*') + + return out_series # Expect series of '**dog', '**foo', '**bar' + + +print(series_str_rjust()) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 342454f35..ebd77373e 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -421,6 +421,31 @@ def hpat_pandas_stringmethods_len_impl(self): @overload_method(StringMethodsType, 'ljust') def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '): """ + Intel Scalable Dataframe Compiler User Guide + ******************************************** + Pandas API: pandas.Series.str.ljust + + Examples + -------- + .. literalinclude:: ../../../examples/series_str_ljust.py + :language: python + :lines: 27- + :caption: Filling right side of strings in the Series with an additional character + :name: ex_series_str_ljust + + .. code-block:: console + + > python ./series_str_ljust.py + 0 dog** + 1 foo** + 2 bar** + dtype: object + + .. todo:: Add support of 32-bit Unicode for `str.ljust()` + + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.ljust()` implementation. Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. @@ -468,6 +493,31 @@ def hpat_pandas_stringmethods_ljust_impl(self, width, fillchar=' '): @overload_method(StringMethodsType, 'rjust') def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '): """ + Intel Scalable Dataframe Compiler User Guide + ******************************************** + Pandas API: pandas.Series.str.rjust + + Examples + -------- + .. literalinclude:: ../../../examples/series_str_rjust.py + :language: python + :lines: 27- + :caption: Filling left side of strings in the Series with an additional character + :name: ex_series_str_rjust + + .. code-block:: console + + > python ./series_str_rjust.py + 0 **dog + 1 **foo + 2 **bar + dtype: object + + .. todo:: Add support of 32-bit Unicode for `str.rjust()` + + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.rjust()` implementation. Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.