From e3ea2a6d784e0351929e3909697122529eff07ea Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 6 Feb 2020 17:00:13 +0300 Subject: [PATCH 1/5] parallel and add mask for methods: capitalize, casefold, swapcase, title --- .../hpat_pandas_stringmethods_functions.py | 32 +++++++++++++------ sdc/hiframes/api.py | 5 ++- sdc/tests/test_series.py | 32 +++++++++++++++++++ 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 020f9def3..d05f49b38 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -87,6 +87,8 @@ def hpat_pandas_stringmethods_upper_impl(self): from sdc.utilities.sdc_typing_utils import TypeChecker from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType from sdc.utilities.utils import sdc_overload_method +from sdc.hiframes.api import get_nan_mask +from sdc.str_arr_ext import create_str_arr_from_list, str_arr_set_na_by_mask _hpat_pandas_stringmethods_autogen_global_dict = { 'pandas': pandas, @@ -884,7 +886,7 @@ def _hpat_pandas_stringmethods_autogen(method_name): .. only:: developer - Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_{method_name}_str + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_{method_name}_str Parameters ---------- @@ -1037,10 +1039,13 @@ def hpat_pandas_stringmethods_capitalize(self): ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_capitalize_impl(self): + mask = get_nan_mask(self._data._data) item_count = len(self._data) - result = [''] * item_count + res_list = [''] * item_count for idx in numba.prange(item_count): - result[idx] = self._data._data[idx].capitalize() + res_list[idx] = self._data._data[idx].capitalize() + str_arr = create_str_arr_from_list(res_list) + result = str_arr_set_na_by_mask(str_arr, mask) return pandas.Series(result, self._data._index, name=self._data._name) @@ -1053,10 +1058,13 @@ def hpat_pandas_stringmethods_title(self): ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_title_impl(self): + mask = get_nan_mask(self._data._data) item_count = len(self._data) - result = [''] * item_count + res_list = [''] * item_count for idx in numba.prange(item_count): - result[idx] = self._data._data[idx].title() + res_list[idx] = self._data._data[idx].title() + str_arr = create_str_arr_from_list(res_list) + result = str_arr_set_na_by_mask(str_arr, mask) return pandas.Series(result, self._data._index, name=self._data._name) @@ -1069,10 +1077,13 @@ def hpat_pandas_stringmethods_swapcase(self): ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_swapcase_impl(self): + mask = get_nan_mask(self._data._data) item_count = len(self._data) - result = [''] * item_count + res_list = [''] * item_count for idx in numba.prange(item_count): - result[idx] = self._data._data[idx].swapcase() + res_list[idx] = self._data._data[idx].swapcase() + str_arr = create_str_arr_from_list(res_list) + result = str_arr_set_na_by_mask(str_arr, mask) return pandas.Series(result, self._data._index, name=self._data._name) @@ -1085,10 +1096,13 @@ def hpat_pandas_stringmethods_casefold(self): ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_casefold_impl(self): + mask = get_nan_mask(self._data._data) item_count = len(self._data) - result = [''] * item_count + res_list = [''] * item_count for idx in numba.prange(item_count): - result[idx] = self._data._data[idx].casefold() + res_list[idx] = self._data._data[idx].casefold() + str_arr = create_str_arr_from_list(res_list) + result = str_arr_set_na_by_mask(str_arr, mask) return pandas.Series(result, self._data._index, name=self._data._name) diff --git a/sdc/hiframes/api.py b/sdc/hiframes/api.py index 0df83d576..9dacb9c8f 100644 --- a/sdc/hiframes/api.py +++ b/sdc/hiframes/api.py @@ -570,7 +570,10 @@ def get_nan_mask(arr): def get_nan_mask_overload(arr): def get_nan_mask_via_isna_impl(arr): - return np.array([isna(arr, i) for i in np.arange(len(arr))]) + res = np.empty(len(arr), dtype=np.bool_) + for i in numba.prange(len(arr)): + res[i] = isna(arr, i) + return res if isinstance(arr, types.Array): dtype = arr.dtype diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 5de53714e..9e0288224 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -3176,6 +3176,14 @@ def test_impl(S): s = pd.Series(test_global_input_data_unicode_kind4) pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) + def test_series_capitalize_str_with_none(self): + def test_impl(S): + return S.str.capitalize() + + sdc_func = self.jit(test_impl) + s = pd.Series(['lower', None, 'CAPITALS', 'this is a sentence', None, 'SwApCaSe', None]) + pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) + def test_series_title_str(self): def test_impl(S): return S.str.title() @@ -3184,6 +3192,14 @@ def test_impl(S): s = pd.Series(test_global_input_data_unicode_kind4) pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) + def test_series_title_str_with_none(self): + def test_impl(S): + return S.str.title() + + sdc_func = self.jit(test_impl) + s = pd.Series(['lower', None, 'CAPITALS', 'this is a sentence', None, 'SwApCaSe', None]) + pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) + def test_series_swapcase_str(self): def test_impl(S): return S.str.swapcase() @@ -3192,6 +3208,14 @@ def test_impl(S): s = pd.Series(test_global_input_data_unicode_kind4) pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) + def test_series_swapcase_str_with_none(self): + def test_impl(S): + return S.str.swapcase() + + sdc_func = self.jit(test_impl) + s = pd.Series(['lower', None, 'CAPITALS', 'this is a sentence', None, 'SwApCaSe', None]) + pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) + def test_series_casefold_str(self): def test_impl(S): return S.str.casefold() @@ -3200,6 +3224,14 @@ def test_impl(S): s = pd.Series(test_global_input_data_unicode_kind4) pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) + def test_series_casefold_str_with_none(self): + def test_impl(S): + return S.str.casefold() + + sdc_func = self.jit(test_impl) + s = pd.Series(['lower', None, 'CAPITALS', 'this is a sentence', None, 'SwApCaSe', None]) + pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) + @sdc_limitation def test_series_append_same_names(self): """SDC discards name""" From a30f1c4269206dfbb9c04cd115c95fda9d67cbb4 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 6 Feb 2020 18:19:26 +0300 Subject: [PATCH 2/5] correct master --- .../hpat_pandas_stringmethods_functions.py | 163 ++---------------- sdc/hiframes/api.py | 5 +- sdc/str_arr_ext.py | 1 - sdc/tests/test_series.py | 33 ---- 4 files changed, 11 insertions(+), 191 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index d05f49b38..6c268ca8f 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -25,28 +25,20 @@ # ***************************************************************************** """ - | :class:`pandas.core.strings.StringMethods` functions and operators implementations in HPAT - .. only:: developer - This is autogenerated sources for all Unicode string functions supported by Numba. Currently tested 45 functions only. List of functions obtained automatically from `numba.types.misc.UnicodeType` class - Example of the generated method (for method upper()): `hpat_pandas_stringmethods_upper_parallel_impl` is paralell version (required additional import mentioned in the body) - @sdc_overload_method(StringMethodsType, 'upper') def hpat_pandas_stringmethods_upper(self): - ty_checker = TypeChecker('Method stringmethods.upper().') ty_checker.check(self, StringMethodsType) - def hpat_pandas_stringmethods_upper_parallel_impl(self): from numba.parfor import (init_prange, min_checker, internal_prange) - init_prange() result = [] item_count = len(self._data) @@ -55,11 +47,8 @@ def hpat_pandas_stringmethods_upper_parallel_impl(self): item = self._data[i] item_method = item.upper() result.append(item_method) - return pandas.Series(result) - return hpat_pandas_stringmethods_upper_parallel_impl - def hpat_pandas_stringmethods_upper_impl(self): result = [] item_count = len(self._data) @@ -67,13 +56,9 @@ def hpat_pandas_stringmethods_upper_impl(self): item = self._data[i] item_method = item.upper() result.append(item_method) - return pandas.Series(result) - return hpat_pandas_stringmethods_upper_impl - Test: python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_split_filter - """ @@ -87,8 +72,6 @@ def hpat_pandas_stringmethods_upper_impl(self): from sdc.utilities.sdc_typing_utils import TypeChecker from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType from sdc.utilities.utils import sdc_overload_method -from sdc.hiframes.api import get_nan_mask -from sdc.str_arr_ext import create_str_arr_from_list, str_arr_set_na_by_mask _hpat_pandas_stringmethods_autogen_global_dict = { 'pandas': pandas, @@ -143,18 +126,14 @@ def hpat_pandas_stringmethods_upper_impl(self): def hpat_pandas_stringmethods_{methodname}(self{methodparams}): \"\"\" Pandas Series method :meth:`pandas.core.strings.StringMethods.{methodname}()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - Test: python -m sdc.runtests sdc.tests.test_strings.TestStrings.test_str2str python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str2str python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_get python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_replace_noregex python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_split python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_contains_regex - Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -162,30 +141,24 @@ def hpat_pandas_stringmethods_{methodname}(self{methodparams}): other: {methodparams} input arguments decription in https://pandas.pydata.org/pandas-docs/version/0.25/reference/series.html#string-handling - Returns ------- :obj:`pandas.Series` returns :obj:`pandas.Series` object \"\"\" - ty_checker = TypeChecker('Method {methodname}().') ty_checker.check(self, StringMethodsType) - def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}): item_count = len(self._data) result = [''] * item_count # result = numba.typed.List.empty_list(numba.types.unicode_type) - for it in range(item_count): item = self._data._data[it] if len(item) > 0: result[it] = item.{methodname}({methodparams_call}) else: result[it] = item - return pandas.Series(result, self._data._index, name=self._data._name) - return hpat_pandas_stringmethods_{methodname}_impl """ @@ -196,11 +169,9 @@ def hpat_pandas_stringmethods_center(self, width, fillchar=' '): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.center - Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. - Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_center.py @@ -208,23 +179,15 @@ def hpat_pandas_stringmethods_center(self, width, fillchar=' '): :lines: 27- :caption: Filling left and right side of strings in the Series with an additional character :name: ex_series_str_center - .. command-output:: python ./series/str/series_str_center.py :cwd: ../../../examples - .. todo:: Add support of 32-bit Unicode for `str.center()` - Intel Scalable Dataframe Compiler Developer Guide ************************************************* - Pandas Series method :meth:`pandas.core.strings.StringMethods.center()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_center - Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -233,7 +196,6 @@ def hpat_pandas_stringmethods_center(self, width, fillchar=' '): Minimum width of resulting string fillchar: :obj:`str` Additional character for filling, default is whitespace - Returns ------- :obj:`pandas.Series` @@ -267,11 +229,9 @@ def hpat_pandas_stringmethods_endswith(self, pat, na=None): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.endswith - Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. - Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_endswith.py @@ -279,14 +239,11 @@ def hpat_pandas_stringmethods_endswith(self, pat, na=None): :lines: 27- :caption: Test if the end of each string element matches a string :name: ex_series_str_endswith - .. command-output:: python ./series/str/series_str_endswith.py :cwd: ../../../examples - .. todo:: - Add support of matching the end of each string by a pattern - Add support of parameter ``na`` - .. seealso:: `str.endswith `_ Python standard library string method. @@ -294,18 +251,12 @@ def hpat_pandas_stringmethods_endswith(self, pat, na=None): Same as endswith, but tests the start of string. :ref:`Series.str.contains ` Tests if string element contains a pattern. - Intel Scalable Dataframe Compiler Developer Guide ************************************************* - Pandas Series method :meth:`pandas.core.strings.StringMethods.endswith()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_endswith - Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -315,7 +266,6 @@ def hpat_pandas_stringmethods_endswith(self, pat, na=None): na: :obj:`bool` Object shown if element tested is not a string *unsupported* - Returns ------- :obj:`pandas.Series` @@ -352,11 +302,9 @@ def hpat_pandas_stringmethods_find(self, sub, start=0, end=None): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.find - Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. - Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_find.py @@ -364,27 +312,18 @@ def hpat_pandas_stringmethods_find(self, sub, start=0, end=None): :lines: 27- :caption: Return lowest indexes in each strings in the Series :name: ex_series_str_find - .. command-output:: python ./series/str/series_str_find.py :cwd: ../../../examples - .. todo:: Add support of parameters ``start`` and ``end`` - .. seealso:: :ref:`Series.str.rfind ` Return highest indexes in each strings. - Intel Scalable Dataframe Compiler Developer Guide ************************************************* - Pandas Series method :meth:`pandas.core.strings.StringMethods.find()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_find - Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -397,7 +336,6 @@ def hpat_pandas_stringmethods_find(self, sub, start=0, end=None): end: :obj:`int` Right edge index *unsupported* - Returns ------- :obj:`pandas.Series` @@ -455,11 +393,9 @@ def hpat_pandas_stringmethods_len(self): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.len - Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. - Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_len.py @@ -467,32 +403,23 @@ def hpat_pandas_stringmethods_len(self): :lines: 27- :caption: Compute the length of each element in the Series :name: ex_series_str_len - .. command-output:: python ./series/str/series_str_len.py :cwd: ../../../examples - .. seealso:: `str.len` Python built-in function returning the length of an object. :ref:`Series.size ` Returns the length of the Series. - Intel Scalable Dataframe Compiler Developer Guide ************************************************* - Pandas Series method :meth:`pandas.core.strings.StringMethods.len()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str_len1 - Parameters ---------- self: :class:`pandas.core.strings.StringMethods` input arg - Returns ------- :obj:`pandas.Series` @@ -519,11 +446,9 @@ def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.ljust - Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. - Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_ljust.py @@ -531,23 +456,15 @@ def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '): :lines: 27- :caption: Filling right side of strings in the Series with an additional character :name: ex_series_str_ljust - .. command-output:: python ./series/str/series_str_ljust.py :cwd: ../../../examples - .. todo:: Add support of 32-bit Unicode for `str.ljust()` - Intel Scalable Dataframe Compiler Developer Guide ************************************************* - Pandas Series method :meth:`pandas.core.strings.StringMethods.ljust()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_ljust - Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -556,7 +473,6 @@ def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '): Minimum width of resulting string fillchar: :obj:`str` Additional character for filling, default is whitespace - Returns ------- :obj:`pandas.Series` @@ -590,11 +506,9 @@ def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.rjust - Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. - Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_rjust.py @@ -602,23 +516,15 @@ def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '): :lines: 27- :caption: Filling left side of strings in the Series with an additional character :name: ex_series_str_rjust - .. command-output:: python ./series/str/series_str_rjust.py :cwd: ../../../examples - .. todo:: Add support of 32-bit Unicode for `str.rjust()` - Intel Scalable Dataframe Compiler Developer Guide ************************************************* - Pandas Series method :meth:`pandas.core.strings.StringMethods.rjust()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rjust - Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -627,7 +533,6 @@ def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '): Minimum width of resulting string fillchar: :obj:`str` Additional character for filling, default is whitespace - Returns ------- :obj:`pandas.Series` @@ -661,11 +566,9 @@ def hpat_pandas_stringmethods_startswith(self, pat, na=None): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.startswith - Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. - Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_startswith.py @@ -673,14 +576,11 @@ def hpat_pandas_stringmethods_startswith(self, pat, na=None): :lines: 27- :caption: Test if the start of each string element matches a string :name: ex_series_str_startswith - .. command-output:: python ./series/str/series_str_startswith.py :cwd: ../../../examples - .. todo:: - Add support of matching the start of each string by a pattern - Add support of parameter ``na`` - .. seealso:: `str.startswith `_ Python standard library string method. @@ -688,18 +588,12 @@ def hpat_pandas_stringmethods_startswith(self, pat, na=None): Same as startswith, but tests the end of string. :ref:`Series.str.contains ` Tests if string element contains a pattern. - Intel Scalable Dataframe Compiler Developer Guide ************************************************* - Pandas Series method :meth:`pandas.core.strings.StringMethods.startswith()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_startswith - Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -709,7 +603,6 @@ def hpat_pandas_stringmethods_startswith(self, pat, na=None): na: :obj:`bool` Object shown if element tested is not a string *unsupported* - Returns ------- :obj:`pandas.Series` @@ -746,11 +639,9 @@ def hpat_pandas_stringmethods_zfill(self, width): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.zfill - Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. - Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_zfill.py @@ -758,12 +649,9 @@ def hpat_pandas_stringmethods_zfill(self, width): :lines: 27- :caption: Pad strings in the Series by prepending '0' characters :name: ex_series_str_zfill - .. command-output:: python ./series/str/series_str_zfill.py :cwd: ../../../examples - .. todo:: Add support of 32-bit Unicode for `str.zfill()` - .. seealso:: :ref:`Series.str.rjust ` Fills the left side of strings with an arbitrary character. @@ -773,25 +661,18 @@ def hpat_pandas_stringmethods_zfill(self, width): Fills the specified sides of strings with an arbitrary character. :ref:`Series.str.center ` Fills boths sides of strings with an arbitrary character. - Intel Scalable Dataframe Compiler Developer Guide ************************************************* - Pandas Series method :meth:`pandas.core.strings.StringMethods.zfill()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_zfill - Parameters ---------- self: :class:`pandas.core.strings.StringMethods` input arg width: :obj:`int` Minimum width of resulting string - Returns ------- :obj:`pandas.Series` @@ -831,7 +712,6 @@ def _hpat_pandas_stringmethods_autogen(method_name): if len(params) > 0: """ Translate parameters string for method - For example: parameters for split(): ', pat=None, n=-1, expand=False' translate into: 'pat, n, expand' @@ -858,11 +738,9 @@ def _hpat_pandas_stringmethods_autogen(method_name): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.{method_name} - Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. - Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_{method_name}.py @@ -870,29 +748,20 @@ def _hpat_pandas_stringmethods_autogen(method_name): :lines: 27- :caption: {caption} :name: ex_series_str_{method_name} - .. command-output:: python ./series/str/series_str_{method_name}.py :cwd: ../../../examples - .. seealso:: {seealso} - Intel Scalable Dataframe Compiler Developer Guide ************************************************* - Pandas Series method :meth:`pandas.core.strings.StringMethods.{method_name}()` implementation. - Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. - .. only:: developer - - Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_{method_name}_str - + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_{method_name}_str Parameters ---------- self: :class:`pandas.core.strings.StringMethods` input arg - Returns ------- :obj:`pandas.Series` @@ -1039,13 +908,10 @@ def hpat_pandas_stringmethods_capitalize(self): ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_capitalize_impl(self): - mask = get_nan_mask(self._data._data) item_count = len(self._data) - res_list = [''] * item_count + result = [''] * item_count for idx in numba.prange(item_count): - res_list[idx] = self._data._data[idx].capitalize() - str_arr = create_str_arr_from_list(res_list) - result = str_arr_set_na_by_mask(str_arr, mask) + result[idx] = self._data._data[idx].capitalize() return pandas.Series(result, self._data._index, name=self._data._name) @@ -1058,13 +924,10 @@ def hpat_pandas_stringmethods_title(self): ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_title_impl(self): - mask = get_nan_mask(self._data._data) item_count = len(self._data) - res_list = [''] * item_count + result = [''] * item_count for idx in numba.prange(item_count): - res_list[idx] = self._data._data[idx].title() - str_arr = create_str_arr_from_list(res_list) - result = str_arr_set_na_by_mask(str_arr, mask) + result[idx] = self._data._data[idx].title() return pandas.Series(result, self._data._index, name=self._data._name) @@ -1077,13 +940,10 @@ def hpat_pandas_stringmethods_swapcase(self): ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_swapcase_impl(self): - mask = get_nan_mask(self._data._data) item_count = len(self._data) - res_list = [''] * item_count + result = [''] * item_count for idx in numba.prange(item_count): - res_list[idx] = self._data._data[idx].swapcase() - str_arr = create_str_arr_from_list(res_list) - result = str_arr_set_na_by_mask(str_arr, mask) + result[idx] = self._data._data[idx].swapcase() return pandas.Series(result, self._data._index, name=self._data._name) @@ -1096,13 +956,10 @@ def hpat_pandas_stringmethods_casefold(self): ty_checker.check(self, StringMethodsType) def hpat_pandas_stringmethods_casefold_impl(self): - mask = get_nan_mask(self._data._data) item_count = len(self._data) - res_list = [''] * item_count + result = [''] * item_count for idx in numba.prange(item_count): - res_list[idx] = self._data._data[idx].casefold() - str_arr = create_str_arr_from_list(res_list) - result = str_arr_set_na_by_mask(str_arr, mask) + result[idx] = self._data._data[idx].casefold() return pandas.Series(result, self._data._index, name=self._data._name) @@ -1206,4 +1063,4 @@ def hpat_pandas_stringmethods_casefold_impl(self): for method_name in _hpat_pandas_stringmethods_autogen_methods: if not (method_name.startswith('__') or method_name in _hpat_pandas_stringmethods_autogen_exceptions): - sdc_overload_method(StringMethodsType, method_name)(_hpat_pandas_stringmethods_autogen(method_name)) + sdc_overload_method(StringMethodsType, method_name)(_hpat_pandas_stringmethods_autogen(method_name)) \ No newline at end of file diff --git a/sdc/hiframes/api.py b/sdc/hiframes/api.py index 9dacb9c8f..0df83d576 100644 --- a/sdc/hiframes/api.py +++ b/sdc/hiframes/api.py @@ -570,10 +570,7 @@ def get_nan_mask(arr): def get_nan_mask_overload(arr): def get_nan_mask_via_isna_impl(arr): - res = np.empty(len(arr), dtype=np.bool_) - for i in numba.prange(len(arr)): - res[i] = isna(arr, i) - return res + return np.array([isna(arr, i) for i in np.arange(len(arr))]) if isinstance(arr, types.Array): dtype = arr.dtype diff --git a/sdc/str_arr_ext.py b/sdc/str_arr_ext.py index 77eb88cc7..2300109a3 100644 --- a/sdc/str_arr_ext.py +++ b/sdc/str_arr_ext.py @@ -74,7 +74,6 @@ def typer(string_list=None): def iternext_str_array(context, builder, sig, args, result): """ Implementation of iternext() for the StringArrayIterator type - :param context: context descriptor :param builder: llvmlite IR Builder :param sig: iterator signature diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 9e0288224..9fa18ce6c 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -1325,7 +1325,6 @@ def test_impl(A, n): -3 6 -3 3 dtype: int64 - >>>S.loc[0:-3] 0 6 -3 6 @@ -3176,14 +3175,6 @@ def test_impl(S): s = pd.Series(test_global_input_data_unicode_kind4) pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) - def test_series_capitalize_str_with_none(self): - def test_impl(S): - return S.str.capitalize() - - sdc_func = self.jit(test_impl) - s = pd.Series(['lower', None, 'CAPITALS', 'this is a sentence', None, 'SwApCaSe', None]) - pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) - def test_series_title_str(self): def test_impl(S): return S.str.title() @@ -3192,14 +3183,6 @@ def test_impl(S): s = pd.Series(test_global_input_data_unicode_kind4) pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) - def test_series_title_str_with_none(self): - def test_impl(S): - return S.str.title() - - sdc_func = self.jit(test_impl) - s = pd.Series(['lower', None, 'CAPITALS', 'this is a sentence', None, 'SwApCaSe', None]) - pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) - def test_series_swapcase_str(self): def test_impl(S): return S.str.swapcase() @@ -3208,14 +3191,6 @@ def test_impl(S): s = pd.Series(test_global_input_data_unicode_kind4) pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) - def test_series_swapcase_str_with_none(self): - def test_impl(S): - return S.str.swapcase() - - sdc_func = self.jit(test_impl) - s = pd.Series(['lower', None, 'CAPITALS', 'this is a sentence', None, 'SwApCaSe', None]) - pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) - def test_series_casefold_str(self): def test_impl(S): return S.str.casefold() @@ -3224,14 +3199,6 @@ def test_impl(S): s = pd.Series(test_global_input_data_unicode_kind4) pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) - def test_series_casefold_str_with_none(self): - def test_impl(S): - return S.str.casefold() - - sdc_func = self.jit(test_impl) - s = pd.Series(['lower', None, 'CAPITALS', 'this is a sentence', None, 'SwApCaSe', None]) - pd.testing.assert_series_equal(sdc_func(s), test_impl(s)) - @sdc_limitation def test_series_append_same_names(self): """SDC discards name""" From e8deb2f92bc2fd60a84d005ffe74d4fc17655aec Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 13 Feb 2020 08:49:45 +0300 Subject: [PATCH 3/5] add empty line --- .../hpat_pandas_stringmethods_functions.py | 131 +++++++++++++++++- 1 file changed, 130 insertions(+), 1 deletion(-) diff --git a/sdc/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py index 8ca9717b6..6490f3979 100644 --- a/sdc/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -25,20 +25,28 @@ # ***************************************************************************** """ + | :class:`pandas.core.strings.StringMethods` functions and operators implementations in HPAT + .. only:: developer + This is autogenerated sources for all Unicode string functions supported by Numba. Currently tested 45 functions only. List of functions obtained automatically from `numba.types.misc.UnicodeType` class + Example of the generated method (for method upper()): `hpat_pandas_stringmethods_upper_parallel_impl` is paralell version (required additional import mentioned in the body) + @sdc_overload_method(StringMethodsType, 'upper') def hpat_pandas_stringmethods_upper(self): + ty_checker = TypeChecker('Method stringmethods.upper().') ty_checker.check(self, StringMethodsType) + def hpat_pandas_stringmethods_upper_parallel_impl(self): from numba.parfor import (init_prange, min_checker, internal_prange) + init_prange() result = [] item_count = len(self._data) @@ -47,8 +55,11 @@ def hpat_pandas_stringmethods_upper_parallel_impl(self): item = self._data[i] item_method = item.upper() result.append(item_method) + return pandas.Series(result) + return hpat_pandas_stringmethods_upper_parallel_impl + def hpat_pandas_stringmethods_upper_impl(self): result = [] item_count = len(self._data) @@ -56,9 +67,13 @@ def hpat_pandas_stringmethods_upper_impl(self): item = self._data[i] item_method = item.upper() result.append(item_method) + return pandas.Series(result) + return hpat_pandas_stringmethods_upper_impl + Test: python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_split_filter + """ @@ -128,14 +143,18 @@ def hpat_pandas_stringmethods_upper_impl(self): def hpat_pandas_stringmethods_{methodname}(self{methodparams}): \"\"\" Pandas Series method :meth:`pandas.core.strings.StringMethods.{methodname}()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests sdc.tests.test_strings.TestStrings.test_str2str python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str2str python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_get python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_replace_noregex python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_split python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_contains_regex + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -143,24 +162,30 @@ def hpat_pandas_stringmethods_{methodname}(self{methodparams}): other: {methodparams} input arguments decription in https://pandas.pydata.org/pandas-docs/version/0.25/reference/series.html#string-handling + Returns ------- :obj:`pandas.Series` returns :obj:`pandas.Series` object \"\"\" + ty_checker = TypeChecker('Method {methodname}().') ty_checker.check(self, StringMethodsType) + def hpat_pandas_stringmethods_{methodname}_impl(self{methodparams}): item_count = len(self._data) result = [''] * item_count # result = numba.typed.List.empty_list(numba.types.unicode_type) + for it in range(item_count): item = self._data._data[it] if len(item) > 0: result[it] = item.{methodname}({methodparams_call}) else: result[it] = item + return pandas.Series(result, self._data._index, name=self._data._name) + return hpat_pandas_stringmethods_{methodname}_impl """ @@ -171,9 +196,11 @@ def hpat_pandas_stringmethods_center(self, width, fillchar=' '): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.center + Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. + Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_center.py @@ -181,15 +208,23 @@ def hpat_pandas_stringmethods_center(self, width, fillchar=' '): :lines: 27- :caption: Filling left and right side of strings in the Series with an additional character :name: ex_series_str_center + .. command-output:: python ./series/str/series_str_center.py :cwd: ../../../examples + .. todo:: Add support of 32-bit Unicode for `str.center()` + Intel Scalable Dataframe Compiler Developer Guide ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.center()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_center + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -198,6 +233,7 @@ def hpat_pandas_stringmethods_center(self, width, fillchar=' '): Minimum width of resulting string fillchar: :obj:`str` Additional character for filling, default is whitespace + Returns ------- :obj:`pandas.Series` @@ -231,9 +267,11 @@ def hpat_pandas_stringmethods_endswith(self, pat, na=None): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.endswith + Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. + Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_endswith.py @@ -241,11 +279,14 @@ def hpat_pandas_stringmethods_endswith(self, pat, na=None): :lines: 27- :caption: Test if the end of each string element matches a string :name: ex_series_str_endswith + .. command-output:: python ./series/str/series_str_endswith.py :cwd: ../../../examples + .. todo:: - Add support of matching the end of each string by a pattern - Add support of parameter ``na`` + .. seealso:: `str.endswith `_ Python standard library string method. @@ -253,12 +294,18 @@ def hpat_pandas_stringmethods_endswith(self, pat, na=None): Same as endswith, but tests the start of string. :ref:`Series.str.contains ` Tests if string element contains a pattern. + Intel Scalable Dataframe Compiler Developer Guide ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.endswith()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_endswith + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -268,6 +315,7 @@ def hpat_pandas_stringmethods_endswith(self, pat, na=None): na: :obj:`bool` Object shown if element tested is not a string *unsupported* + Returns ------- :obj:`pandas.Series` @@ -304,9 +352,11 @@ def hpat_pandas_stringmethods_find(self, sub, start=0, end=None): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.find + Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. + Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_find.py @@ -314,18 +364,27 @@ def hpat_pandas_stringmethods_find(self, sub, start=0, end=None): :lines: 27- :caption: Return lowest indexes in each strings in the Series :name: ex_series_str_find + .. command-output:: python ./series/str/series_str_find.py :cwd: ../../../examples + .. todo:: Add support of parameters ``start`` and ``end`` + .. seealso:: :ref:`Series.str.rfind ` Return highest indexes in each strings. + Intel Scalable Dataframe Compiler Developer Guide ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.find()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_find + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -338,6 +397,7 @@ def hpat_pandas_stringmethods_find(self, sub, start=0, end=None): end: :obj:`int` Right edge index *unsupported* + Returns ------- :obj:`pandas.Series` @@ -395,9 +455,11 @@ def hpat_pandas_stringmethods_len(self): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.len + Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. + Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_len.py @@ -405,23 +467,32 @@ def hpat_pandas_stringmethods_len(self): :lines: 27- :caption: Compute the length of each element in the Series :name: ex_series_str_len + .. command-output:: python ./series/str/series_str_len.py :cwd: ../../../examples + .. seealso:: `str.len` Python built-in function returning the length of an object. :ref:`Series.size ` Returns the length of the Series. + Intel Scalable Dataframe Compiler Developer Guide ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.len()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str_len1 + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` input arg + Returns ------- :obj:`pandas.Series` @@ -448,9 +519,11 @@ def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.ljust + Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. + Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_ljust.py @@ -458,15 +531,23 @@ def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '): :lines: 27- :caption: Filling right side of strings in the Series with an additional character :name: ex_series_str_ljust + .. command-output:: python ./series/str/series_str_ljust.py :cwd: ../../../examples + .. todo:: Add support of 32-bit Unicode for `str.ljust()` + Intel Scalable Dataframe Compiler Developer Guide ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.ljust()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_ljust + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -475,6 +556,7 @@ def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '): Minimum width of resulting string fillchar: :obj:`str` Additional character for filling, default is whitespace + Returns ------- :obj:`pandas.Series` @@ -508,9 +590,11 @@ def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.rjust + Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. + Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_rjust.py @@ -518,15 +602,23 @@ def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '): :lines: 27- :caption: Filling left side of strings in the Series with an additional character :name: ex_series_str_rjust + .. command-output:: python ./series/str/series_str_rjust.py :cwd: ../../../examples + .. todo:: Add support of 32-bit Unicode for `str.rjust()` + Intel Scalable Dataframe Compiler Developer Guide ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.rjust()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rjust + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -535,6 +627,7 @@ def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '): Minimum width of resulting string fillchar: :obj:`str` Additional character for filling, default is whitespace + Returns ------- :obj:`pandas.Series` @@ -568,9 +661,11 @@ def hpat_pandas_stringmethods_startswith(self, pat, na=None): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.startswith + Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. + Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_startswith.py @@ -578,11 +673,14 @@ def hpat_pandas_stringmethods_startswith(self, pat, na=None): :lines: 27- :caption: Test if the start of each string element matches a string :name: ex_series_str_startswith + .. command-output:: python ./series/str/series_str_startswith.py :cwd: ../../../examples + .. todo:: - Add support of matching the start of each string by a pattern - Add support of parameter ``na`` + .. seealso:: `str.startswith `_ Python standard library string method. @@ -590,12 +688,18 @@ def hpat_pandas_stringmethods_startswith(self, pat, na=None): Same as startswith, but tests the end of string. :ref:`Series.str.contains ` Tests if string element contains a pattern. + Intel Scalable Dataframe Compiler Developer Guide ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.startswith()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_startswith + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` @@ -605,6 +709,7 @@ def hpat_pandas_stringmethods_startswith(self, pat, na=None): na: :obj:`bool` Object shown if element tested is not a string *unsupported* + Returns ------- :obj:`pandas.Series` @@ -641,9 +746,11 @@ def hpat_pandas_stringmethods_zfill(self, width): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.zfill + Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. + Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_zfill.py @@ -651,9 +758,12 @@ def hpat_pandas_stringmethods_zfill(self, width): :lines: 27- :caption: Pad strings in the Series by prepending '0' characters :name: ex_series_str_zfill + .. command-output:: python ./series/str/series_str_zfill.py :cwd: ../../../examples + .. todo:: Add support of 32-bit Unicode for `str.zfill()` + .. seealso:: :ref:`Series.str.rjust ` Fills the left side of strings with an arbitrary character. @@ -663,18 +773,25 @@ def hpat_pandas_stringmethods_zfill(self, width): Fills the specified sides of strings with an arbitrary character. :ref:`Series.str.center ` Fills boths sides of strings with an arbitrary character. + Intel Scalable Dataframe Compiler Developer Guide ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.zfill()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_zfill + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` input arg width: :obj:`int` Minimum width of resulting string + Returns ------- :obj:`pandas.Series` @@ -714,6 +831,7 @@ def _hpat_pandas_stringmethods_autogen(method_name): if len(params) > 0: """ Translate parameters string for method + For example: parameters for split(): ', pat=None, n=-1, expand=False' translate into: 'pat, n, expand' @@ -740,9 +858,11 @@ def _hpat_pandas_stringmethods_autogen(method_name): Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.{method_name} + Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. + Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_{method_name}.py @@ -750,20 +870,29 @@ def _hpat_pandas_stringmethods_autogen(method_name): :lines: 27- :caption: {caption} :name: ex_series_str_{method_name} + .. command-output:: python ./series/str/series_str_{method_name}.py :cwd: ../../../examples + .. seealso:: {seealso} + Intel Scalable Dataframe Compiler Developer Guide ************************************************* + Pandas Series method :meth:`pandas.core.strings.StringMethods.{method_name}()` implementation. + Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. + .. only:: developer + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_{method_name}_str + Parameters ---------- self: :class:`pandas.core.strings.StringMethods` input arg + Returns ------- :obj:`pandas.Series` @@ -1077,4 +1206,4 @@ def hpat_pandas_stringmethods_casefold_impl(self): for method_name in _hpat_pandas_stringmethods_autogen_methods: if not (method_name.startswith('__') or method_name in _hpat_pandas_stringmethods_autogen_exceptions): - sdc_overload_method(StringMethodsType, method_name)(_hpat_pandas_stringmethods_autogen(method_name)) \ No newline at end of file + sdc_overload_method(StringMethodsType, method_name)(_hpat_pandas_stringmethods_autogen(method_name)) From e89499e377a040a48f4eba8cd17be201c250b9f9 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 19 Feb 2020 07:58:36 +0300 Subject: [PATCH 4/5] add perf tests for Series.str --- sdc/tests/tests_perf/test_perf_series_str.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sdc/tests/tests_perf/test_perf_series_str.py b/sdc/tests/tests_perf/test_perf_series_str.py index 449b9f250..44b70029d 100644 --- a/sdc/tests/tests_perf/test_perf_series_str.py +++ b/sdc/tests/tests_perf/test_perf_series_str.py @@ -80,6 +80,16 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes TC(name='center', params='1', size=[10 ** 4, 10 ** 5], input_data=test_global_input_data_unicode_kind1), TC(name='endswith', params='"e"', size=[10 ** 4, 10 ** 5]), TC(name='find', params='"e"', size=[10 ** 4, 10 ** 5]), + TC(name='isalnum', size=[10 ** 4, 10 ** 5]), + TC(name='isalpha', size=[10 ** 4, 10 ** 5]), + TC(name='isalnum', size=[10 ** 4, 10 ** 5]), + TC(name='isdecimal', size=[10 ** 4, 10 ** 5]), + TC(name='isdigit', size=[10 ** 4, 10 ** 5]), + TC(name='islower', size=[10 ** 4, 10 ** 5]), + TC(name='isnumeric', size=[10 ** 4, 10 ** 5]), + TC(name='isspace', size=[10 ** 4, 10 ** 5]), + TC(name='istitle', size=[10 ** 4, 10 ** 5]), + TC(name='isupper', size=[10 ** 4, 10 ** 5]), TC(name='len', size=[10 ** 4, 10 ** 5]), TC(name='ljust', params='1', size=[10 ** 4, 10 ** 5], input_data=test_global_input_data_unicode_kind1), TC(name='lower', size=[10 ** 4, 10 ** 5]), From faef00991ca15fbbb1dabad126780580d60ab0c4 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 19 Feb 2020 11:57:58 +0300 Subject: [PATCH 5/5] delete extension case --- sdc/tests/tests_perf/test_perf_series_str.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdc/tests/tests_perf/test_perf_series_str.py b/sdc/tests/tests_perf/test_perf_series_str.py index 44b70029d..2a25ce8e2 100644 --- a/sdc/tests/tests_perf/test_perf_series_str.py +++ b/sdc/tests/tests_perf/test_perf_series_str.py @@ -82,7 +82,6 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes TC(name='find', params='"e"', size=[10 ** 4, 10 ** 5]), TC(name='isalnum', size=[10 ** 4, 10 ** 5]), TC(name='isalpha', size=[10 ** 4, 10 ** 5]), - TC(name='isalnum', size=[10 ** 4, 10 ** 5]), TC(name='isdecimal', size=[10 ** 4, 10 ** 5]), TC(name='isdigit', size=[10 ** 4, 10 ** 5]), TC(name='islower', size=[10 ** 4, 10 ** 5]),