From 6cffc372aaad460a0663f72f0a15112fb0abdfc8 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 25 Dec 2019 16:49:51 +0300 Subject: [PATCH 1/7] start Impl --- examples/series/str/series_str_isalnum.py | 39 ++++++++++++ .../hpat_pandas_stringmethods_functions.py | 63 +++++++++++++++++++ sdc/tests/test_series.py | 16 +++++ 3 files changed, 118 insertions(+) create mode 100644 examples/series/str/series_str_isalnum.py diff --git a/examples/series/str/series_str_isalnum.py b/examples/series/str/series_str_isalnum.py new file mode 100644 index 000000000..a89c62bdf --- /dev/null +++ b/examples/series/str/series_str_isalnum.py @@ -0,0 +1,39 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import pandas as pd +from numba import njit + + +@njit +def series_str_isalnum(): + series = pd.Series(['A B', '1.5', '3,000']) # Series of 'A B', '1.5', '3,000' + out_series = series.str.isalnum() + + return out_series # Expect series of True, False, False, False + + +print(series_str_isalnum()) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 27a65a140..8a9a3178c 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -958,6 +958,69 @@ def _hpat_pandas_stringmethods_autogen(method_name): return _hpat_pandas_stringmethods_autogen_global_dict[global_dict_name] +@overload_method(StringMethodsType, 'isalnum') +def hpat_pandas_stringmethods_isalnum(self): + """ + Intel Scalable Dataframe Compiler User Guide + ******************************************** + Pandas API: pandas.Series.str.isalnum + + Limitations + ----------- + Series elements are expected to be Unicode strings. Elements cannot be NaN. + + Examples + -------- + .. literalinclude:: ../../../examples/series/str/series_str_isalnum.py + :language: python + :lines: 27- + :caption: Check if each word start with an upper case letter + :name: ex_series_str_isalnum + + .. code-block:: console + + > python ./series/str/series_str_isalnum.py + 0 True + 1 False + 2 True + dtype: bool + + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + + Pandas Series method :meth:`pandas.core.strings.StringMethods.isalnum()` implementation. + + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + + .. only:: developer + + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_isalnum_str + + Parameters + ---------- + self: :class:`pandas.core.strings.StringMethods` + input arg + + Returns + ------- + :obj:`pandas.Series` + returns :obj:`pandas.Series` object + """ + + ty_checker = TypeChecker('Method isalnum().') + ty_checker.check(self, StringMethodsType) + + def hpat_pandas_stringmethods_isalnum_impl(self): + item_count = len(self._data) + result = numpy.empty(item_count, numba.types.boolean) + for idx, item in enumerate(self._data._data): + result[idx] = item.isalnum() + + return pandas.Series(result, self._data._index, name=self._data._name) + + return hpat_pandas_stringmethods_isalnum_impl + + # _hpat_pandas_stringmethods_autogen_methods = sorted(dir(numba.types.misc.UnicodeType.__getattribute__.__qualname__)) _hpat_pandas_stringmethods_autogen_methods = ['upper', 'lower', 'lstrip', 'rstrip', 'strip'] """ diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index ca916880b..529eaf461 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -226,6 +226,10 @@ def rjust_with_fillchar_usecase(series, width, fillchar): return series.str.rjust(width, fillchar) +def isalnum_usecase(series): + return series.str.isalnum() + + GLOBAL_VAL = 2 @@ -5003,6 +5007,18 @@ def test_impl(A, B): B = pd.Series(['b', 'aa', '', 'b', 'o', None, 'oo']) pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False) + def test_series_isalnum_str(self): + series = [['one', 'one1', '1', ''], + ['A B', '1.5', '3,000'], + ['23', '⅕', ''], + ['leopard', 'Golden Eagle', 'SNAKE', ''] + ] + + cfunc = self.jit(isalnum_usecase) + for ser in series: + S = pd.Series(ser) + pd.testing.assert_series_equal(cfunc(S), isalnum_usecase(S)) + if __name__ == "__main__": unittest.main() From afe0970ad3a5e0d0f4f1b58afb25db2f51ce7ac4 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 25 Dec 2019 17:25:46 +0300 Subject: [PATCH 2/7] add example for isalnum --- examples/series/str/series_str_isalnum.py | 4 ++-- sdc/datatypes/hpat_pandas_stringmethods_functions.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/series/str/series_str_isalnum.py b/examples/series/str/series_str_isalnum.py index a89c62bdf..cca12d39b 100644 --- a/examples/series/str/series_str_isalnum.py +++ b/examples/series/str/series_str_isalnum.py @@ -30,10 +30,10 @@ @njit def series_str_isalnum(): - series = pd.Series(['A B', '1.5', '3,000']) # Series of 'A B', '1.5', '3,000' + series = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) # Series of 'leopard', 'Golden Eagle', 'SNAKE', '' out_series = series.str.isalnum() - return out_series # Expect series of True, False, False, False + return out_series # Expect series of True, False, True, False print(series_str_isalnum()) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 8a9a3178c..400ceb891 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -974,7 +974,7 @@ def hpat_pandas_stringmethods_isalnum(self): .. literalinclude:: ../../../examples/series/str/series_str_isalnum.py :language: python :lines: 27- - :caption: Check if each word start with an upper case letter + :caption: Check if all the characters in the text are alphanumeric :name: ex_series_str_isalnum .. code-block:: console @@ -983,6 +983,7 @@ def hpat_pandas_stringmethods_isalnum(self): 0 True 1 False 2 True + 3 False dtype: bool Intel Scalable Dataframe Compiler Developer Guide From 898f4e2090b0d63106dc2e836b5f694a04a928af Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 25 Dec 2019 18:32:15 +0300 Subject: [PATCH 3/7] skip in SDC_CONFIG_PIPELINE_SDC=1, not supported --- sdc/tests/test_series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 529eaf461..39fc27cc7 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -5007,6 +5007,7 @@ def test_impl(A, B): B = pd.Series(['b', 'aa', '', 'b', 'o', None, 'oo']) pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False) + @skip_sdc_jit("Series.str.isalnum is not supported yet") def test_series_isalnum_str(self): series = [['one', 'one1', '1', ''], ['A B', '1.5', '3,000'], From ddbfc1334c7cac7b14283895eec553777f640939 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 26 Dec 2019 11:17:06 +0300 Subject: [PATCH 4/7] delete comment --- examples/series/str/series_str_isalnum.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/series/str/series_str_isalnum.py b/examples/series/str/series_str_isalnum.py index cca12d39b..f60ac6698 100644 --- a/examples/series/str/series_str_isalnum.py +++ b/examples/series/str/series_str_isalnum.py @@ -30,7 +30,7 @@ @njit def series_str_isalnum(): - series = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) # Series of 'leopard', 'Golden Eagle', 'SNAKE', '' + series = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) out_series = series.str.isalnum() return out_series # Expect series of True, False, True, False From e3613aa53e54c9c8d92297864804257b4f69d220 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 26 Dec 2019 11:52:42 +0300 Subject: [PATCH 5/7] add see also --- .../hpat_pandas_stringmethods_functions.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 400ceb891..cde7e73c7 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -986,6 +986,26 @@ def hpat_pandas_stringmethods_isalnum(self): 3 False dtype: bool + .. seealso:: + :ref:`Series.str.isalpha ` + Check whether all characters are alphabetic. + :ref:`Series.str.isnumeric ` + Check whether all characters are numeric. + :ref:`Series.str.isalnum ` + Check whether all characters are alphanumeric. + :ref:`Series.str.isdigit ` + Check whether all characters are digits. + :ref:`Series.str.isdecimal ` + Check whether all characters are decimal. + :ref:`Series.str.isspace ` + Check whether all characters are whitespace. + :ref:`Series.str.islower ` + Check whether all characters are lowercase. + :ref:`Series.str.isupper ` + Check whether all characters are uppercase. + :ref:`Series.str.istitle ` + Check whether all characters are titlecase. + Intel Scalable Dataframe Compiler Developer Guide ************************************************* From 073c826f6161d4b40f8ad582f11c16476e40899d Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 26 Dec 2019 16:28:19 +0300 Subject: [PATCH 6/7] correction doc --- sdc/datatypes/hpat_pandas_stringmethods_functions.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index cde7e73c7..bb6b07c58 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -977,14 +977,8 @@ def hpat_pandas_stringmethods_isalnum(self): :caption: Check if all the characters in the text are alphanumeric :name: ex_series_str_isalnum - .. code-block:: console - - > python ./series/str/series_str_isalnum.py - 0 True - 1 False - 2 True - 3 False - dtype: bool + .. command-output:: python ./series/str/series_str_isalnum.py + :cwd: ../../../examples .. seealso:: :ref:`Series.str.isalpha ` From bd80e7ff82fb3ed2bdb50171cd33591400150c10 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Fri, 27 Dec 2019 09:53:36 +0300 Subject: [PATCH 7/7] correction problem code style --- sdc/tests/test_series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index b2de27b95..df65025f4 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -229,6 +229,7 @@ def rjust_with_fillchar_usecase(series, width, fillchar): def istitle_usecase(series): return series.str.istitle() + def isalnum_usecase(series): return series.str.isalnum()