diff --git a/examples/series_str_zfill.py b/examples/series_str_zfill.py new file mode 100644 index 000000000..4475a272e --- /dev/null +++ b/examples/series_str_zfill.py @@ -0,0 +1,39 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import pandas as pd +from numba import njit + + +@njit +def series_str_zfill(): + series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar' + out_series = series.str.zfill(5) + + return out_series # Expect series of '00dog', '00foo', '00bar' + + +print(series_str_zfill()) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index ebd77373e..9e01a610d 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -613,6 +613,72 @@ def hpat_pandas_stringmethods_startswith_impl(self, pat, na=None): return hpat_pandas_stringmethods_startswith_impl +@overload_method(StringMethodsType, 'zfill') +def hpat_pandas_stringmethods_zfill(self, width): + """ + Intel Scalable Dataframe Compiler User Guide + ******************************************** + Pandas API: pandas.Series.str.zfill + + Examples + -------- + .. literalinclude:: ../../../examples/series_str_zfill.py + :language: python + :lines: 27- + :caption: Pad strings in the Series by prepending '0' characters + :name: ex_series_str_zfill + + .. code-block:: console + + > python ./series_str_zfill.py + 0 00dog + 1 00foo + 2 00bar + dtype: object + + .. todo:: Add support of 32-bit Unicode for `str.zfill()` + + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + + Pandas Series method :meth:`pandas.core.strings.StringMethods.zfill()` implementation. + + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + + .. only:: developer + + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_zfill + + Parameters + ---------- + self: :class:`pandas.core.strings.StringMethods` + input arg + width: :obj:`int` + Minimum width of resulting string + + Returns + ------- + :obj:`pandas.Series` + returns :obj:`pandas.Series` object + """ + + ty_checker = TypeChecker('Method zfill().') + ty_checker.check(self, StringMethodsType) + + if not isinstance(width, Integer): + ty_checker.raise_exc(width, 'int', 'width') + + def hpat_pandas_stringmethods_zfill_impl(self, width): + item_count = len(self._data) + result = [''] * item_count + for idx, item in enumerate(self._data._data): + result[idx] = item.zfill(width) + + return pandas.Series(result, self._data._index, name=self._data._name) + + return hpat_pandas_stringmethods_zfill_impl + + def _hpat_pandas_stringmethods_autogen(method_name): """" The function generates a function for 'method_name' from source text that is created on the fly. diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py index 673015139..e0a173724 100644 --- a/sdc/hiframes/pd_series_ext.py +++ b/sdc/hiframes/pd_series_ext.py @@ -760,7 +760,7 @@ def resolve_head(self, ary, args, kws): str2str_methods_excluded = [ 'upper', 'center', 'endswith', 'find', 'isupper', 'len', 'ljust', - 'lower', 'lstrip', 'rjust', 'rstrip', 'startswith', 'strip' + 'lower', 'lstrip', 'rjust', 'rstrip', 'startswith', 'strip', 'zfill' ] """ Functions which are used from Numba directly by calling from StringMethodsType diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 7d96eac75..5f4a0611c 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -2692,6 +2692,37 @@ def test_impl(series, pat, na): msg = msg_tmpl.format('expected: None') self.assertIn(msg, str(raises.exception)) + def test_series_str_zfill(self): + def test_impl(series, width): + return series.str.zfill(width) + + hpat_func = self.jit(test_impl) + + data = test_global_input_data_unicode_kind1 + data_lengths = [len(s) for s in data] + + for index in [None, list(range(len(data)))[::-1], data[::-1]]: + series = pd.Series(data, index, name='A') + for width in [max(data_lengths) + 5, min(data_lengths)]: + jit_result = hpat_func(series, width) + ref_result = test_impl(series, width) + pd.testing.assert_series_equal(jit_result, ref_result) + + def test_series_str_zfill_exception_unsupported_kind4(self): + def test_impl(series, width): + return series.str.zfill(width) + + hpat_func = self.jit(test_impl) + + data = test_global_input_data_unicode_kind4 + series = pd.Series(data) + width = max(len(s) for s in data) + 5 + + with self.assertRaises(SystemError) as raises: + hpat_func(series, width) + msg = 'NULL object passed to Py_BuildValue' + self.assertIn(msg, str(raises.exception)) + def test_series_str2str(self): common_methods = ['lower', 'upper', 'isupper'] sdc_methods = ['capitalize', 'swapcase', 'title',