From d37fc1c416db460ac552812632a982eff3cafc0a Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 25 Dec 2019 09:33:49 +0300 Subject: [PATCH 1/5] start impl isspace --- .../hpat_pandas_stringmethods_functions.py | 16 ++++++++++++++++ sdc/tests/test_series.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 0b8cb39d1..3697f6fa9 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -717,6 +717,22 @@ def _hpat_pandas_stringmethods_autogen(method_name): global_dict_name = 'hpat_pandas_stringmethods_{methodname}'.format(methodname=method_name) return _hpat_pandas_stringmethods_autogen_global_dict[global_dict_name] +@overload_method(StringMethodsType, 'isspace') +def hpat_pandas_stringmethods_isspace(self): + + ty_checker = TypeChecker('Method isspace().') + ty_checker.check(self, StringMethodsType) + + def hpat_pandas_stringmethods_isspace_impl(self): + item_count = len(self._data) + result = numpy.empty(item_count, numba.types.boolean) + for idx, item in enumerate(self._data._data): + result[idx] = item.isspace() + + return pandas.Series(result, self._data._index, name=self._data._name) + + return hpat_pandas_stringmethods_isspace_impl + # _hpat_pandas_stringmethods_autogen_methods = sorted(dir(numba.types.misc.UnicodeType.__getattribute__.__qualname__)) _hpat_pandas_stringmethods_autogen_methods = ['upper', 'lower', 'lstrip', 'rstrip', 'strip'] diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 5f4a0611c..6f9fa1514 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -224,6 +224,10 @@ def rjust_with_fillchar_usecase(series, width, fillchar): return series.str.rjust(width, fillchar) +def isspace_usecase(series): + return series.str.isspace() + + GLOBAL_VAL = 2 @@ -4905,6 +4909,18 @@ def test_impl(A, B): B = pd.Series(['b', 'aa', '', 'b', 'o', None, 'oo']) pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False) + def test_series_isspace_str(self): + series = [['', ' ', ' ', ' '], + ['', ' c ', ' b ', ' a '], + ['aaaaaa', 'bb', 'c', ' d'] + ] + + cfunc = self.jit(isspace_usecase) + for ser in series: + S = pd.Series(ser) + pd.testing.assert_series_equal(cfunc(S), isspace_usecase(S)) + + if __name__ == "__main__": unittest.main() From 0a3dd92958295bb3fe8fd92b986218b73ff23292 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 25 Dec 2019 10:57:27 +0300 Subject: [PATCH 2/5] add example --- examples/series/str/series_str_isspace.py | 39 +++++++++++++++ .../hpat_pandas_stringmethods_functions.py | 47 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 examples/series/str/series_str_isspace.py diff --git a/examples/series/str/series_str_isspace.py b/examples/series/str/series_str_isspace.py new file mode 100644 index 000000000..1228de9f3 --- /dev/null +++ b/examples/series/str/series_str_isspace.py @@ -0,0 +1,39 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import pandas as pd +from numba import njit + + +@njit +def series_str_isspace(): + series = pd.Series([' ', ' c ', ' b ', ' a ']) # Series of ' ', ' c ', ' b ', ' a ' + out_series = series.str.isspace() + + return out_series # Expect series of True, False, False, False + + +print(series_str_isspace()) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 86be31d26..7f3cdd28a 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -959,6 +959,53 @@ def _hpat_pandas_stringmethods_autogen(method_name): @overload_method(StringMethodsType, 'isspace') def hpat_pandas_stringmethods_isspace(self): + """ + Intel Scalable Dataframe Compiler User Guide + ******************************************** + Pandas API: pandas.Series.str.isspace + + Limitations + ----------- + Series elements are expected to be Unicode strings. Elements cannot be NaN. + + Examples + -------- + .. literalinclude:: ../../../examples/series/str/series_str_isspace.py + :language: python + :lines: 27- + :caption: Check if all the characters in the text are whitespaces + :name: ex_series_str_isspace + + .. code-block:: console + + > python ./series/str/series_str_isspace.py + 0 True + 1 False + 2 False + 3 False + dtype: bool + + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + + Pandas Series method :meth:`pandas.core.strings.StringMethods.isspace()` implementation. + + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + + .. only:: developer + + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_isspace_str + + Parameters + ---------- + self: :class:`pandas.core.strings.StringMethods` + input arg + + Returns + ------- + :obj:`pandas.Series` + returns :obj:`pandas.Series` object + """ ty_checker = TypeChecker('Method isspace().') ty_checker.check(self, StringMethodsType) From a0f35b510eb334cfa709db9594e3674da1ddd7e8 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 25 Dec 2019 18:04:28 +0300 Subject: [PATCH 3/5] correction doc --- examples/series/str/series_str_isspace.py | 2 +- .../hpat_pandas_stringmethods_functions.py | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/examples/series/str/series_str_isspace.py b/examples/series/str/series_str_isspace.py index 1228de9f3..b4d8d16bd 100644 --- a/examples/series/str/series_str_isspace.py +++ b/examples/series/str/series_str_isspace.py @@ -30,7 +30,7 @@ @njit def series_str_isspace(): - series = pd.Series([' ', ' c ', ' b ', ' a ']) # Series of ' ', ' c ', ' b ', ' a ' + series = pd.Series([' ', ' c ', ' b ', ' a ']) out_series = series.str.isspace() return out_series # Expect series of True, False, False, False diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 7f3cdd28a..afc7b8a2c 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -957,6 +957,7 @@ def _hpat_pandas_stringmethods_autogen(method_name): global_dict_name = 'hpat_pandas_stringmethods_{methodname}'.format(methodname=method_name) return _hpat_pandas_stringmethods_autogen_global_dict[global_dict_name] + @overload_method(StringMethodsType, 'isspace') def hpat_pandas_stringmethods_isspace(self): """ @@ -985,6 +986,26 @@ def hpat_pandas_stringmethods_isspace(self): 3 False dtype: bool + .. seealso:: + :ref:`Series.str.isalpha ` + Check whether all characters are alphabetic. + :ref:`Series.str.isnumeric ` + Check whether all characters are numeric. + :ref:`Series.str.isalnum ` + Check whether all characters are alphanumeric. + :ref:`Series.str.isdigit ` + Check whether all characters are digits. + :ref:`Series.str.isdecimal ` + Check whether all characters are decimal. + :ref:`Series.str.isspace ` + Check whether all characters are whitespace. + :ref:`Series.str.islower ` + Check whether all characters are lowercase. + :ref:`Series.str.isupper ` + Check whether all characters are uppercase. + :ref:`Series.str.istitle ` + Check whether all characters are titlecase. + Intel Scalable Dataframe Compiler Developer Guide ************************************************* From f38387c52119bd9c7790bdec27a6231110a4e00d Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 25 Dec 2019 18:27:36 +0300 Subject: [PATCH 4/5] skip in SDC_CONFIG_PIPELINE_SDC=1, not supported --- sdc/tests/test_series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 465cdf6a6..8e3e23595 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -5007,6 +5007,7 @@ def test_impl(A, B): B = pd.Series(['b', 'aa', '', 'b', 'o', None, 'oo']) pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False) + @skip_sdc_jit("Series.str.isspace is not supported yet") def test_series_isspace_str(self): series = [['', ' ', ' ', ' '], ['', ' c ', ' b ', ' a '], From a002429778a1a3072affb22a39bbce3c3af8a106 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 26 Dec 2019 16:20:10 +0300 Subject: [PATCH 5/5] correction doc --- sdc/datatypes/hpat_pandas_stringmethods_functions.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index afc7b8a2c..e2af01c09 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -977,14 +977,8 @@ def hpat_pandas_stringmethods_isspace(self): :caption: Check if all the characters in the text are whitespaces :name: ex_series_str_isspace - .. code-block:: console - - > python ./series/str/series_str_isspace.py - 0 True - 1 False - 2 False - 3 False - dtype: bool + .. command-output:: python ./series/str/series_str_isspace.py + :cwd: ../../../examples .. seealso:: :ref:`Series.str.isalpha `