From ca0ad33765220084ea32772c1b892e0f5e508be6 Mon Sep 17 00:00:00 2001 From: "elena.totmenina" Date: Thu, 20 Feb 2020 16:07:06 +0300 Subject: [PATCH 1/4] wip --- sdc/datatypes/hpat_pandas_series_functions.py | 37 +++++++++++-------- sdc/functions/numpy_like.py | 6 ++- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index b96464173..c7c548fbf 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -1795,21 +1795,22 @@ def hpat_pandas_series_astype_to_str_impl(self, dtype, copy=True, errors='raise' return pandas.Series(data=data, index=self._index, name=self._name) # Return npytypes.Array from npytypes.Array for astype(types.functions.NumberClass), example - astype(np.int64) + # Return npytypes.Array from npytypes.Array for astype(types.StringLiteral), example - astype('int64') def hpat_pandas_series_astype_numba_impl(self, dtype, copy=True, errors='raise'): - return pandas.Series(data=self._data.astype(dtype), index=self._index, name=self._name) + return pandas.Series(data=numpy_like.astype(self._data, dtype), index=self._index, name=self._name) - # Return npytypes.Array from npytypes.Array for astype(types.StringLiteral), example - astype('int64') - def hpat_pandas_series_astype_literal_type_numba_impl(self, dtype, copy=True, errors='raise'): - return pandas.Series(data=self._data.astype(numpy.dtype(dtype)), index=self._index, name=self._name) + # # Return npytypes.Array from npytypes.Array for astype(types.StringLiteral), example - astype('int64') + # def hpat_pandas_series_astype_literal_type_numba_impl(self, dtype, copy=True, errors='raise'): + # return pandas.Series(data=numpy_like.astype(self._data, dtype), index=self._index, name=self._name) # Return self def hpat_pandas_series_astype_no_modify_impl(self, dtype, copy=True, errors='raise'): return pandas.Series(data=self._data, index=self._index, name=self._name) - if ((isinstance(dtype, types.Function) and dtype.typing_key == str) - or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')): - return hpat_pandas_series_astype_to_str_impl + # if ((isinstance(dtype, types.Function) and dtype.typing_key == str) + # or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')): + # return hpat_pandas_series_astype_to_str_impl # Needs Numba astype impl support converting unicode_type to NumberClass and other types if isinstance(self.data, StringArrayType): @@ -1823,16 +1824,22 @@ def hpat_pandas_series_astype_no_modify_impl(self, dtype, copy=True, errors='rai else: raise TypingError(f'Needs Numba astype impl support converting unicode_type to {dtype.literal_value}') - if isinstance(self.data, types.npytypes.Array) and isinstance(dtype, types.functions.NumberClass): + # if ((isinstance(self.data, types.npytypes.Array) and isinstance(dtype, (types.functions.NumberClass, types.StringLiteral))) or + # ((isinstance(dtype, types.Function) and dtype.typing_key == str) or + # (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')) + # ): + if ((isinstance(dtype, types.Function) and dtype.typing_key == str) + or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')): + print('eeee') return hpat_pandas_series_astype_numba_impl - if isinstance(self.data, types.npytypes.Array) and isinstance(dtype, types.StringLiteral): - try: - literal_value = numpy.dtype(dtype.literal_value) - except: - pass # Will raise the exception later - else: - return hpat_pandas_series_astype_literal_type_numba_impl + # if isinstance(self.data, types.npytypes.Array) and isinstance(dtype, types.StringLiteral): + # try: + # literal_value = numpy.dtype(dtype.literal_value) + # except: + # pass # Will raise the exception later + # else: + # return hpat_pandas_series_astype_literal_type_numba_impl # Raise error if dtype is not supported if errors == 'raise': diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index 3ac537d4e..b24c67291 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -106,9 +106,11 @@ def sdc_astype_overload(self, dtype): Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k astype """ - + print('qqqqq') + print(dtype) + print(type(dtype)) ty_checker = TypeChecker("numpy-like 'astype'") - if not isinstance(self, types.Array): + if not isinstance(self, (types.Array, StringArrayType)): return None if not isinstance(dtype, (types.functions.NumberClass, types.Function, types.Literal)): From b3e4f9647829f21adec996030b38e48079fa9bb2 Mon Sep 17 00:00:00 2001 From: "elena.totmenina" Date: Thu, 20 Feb 2020 17:34:46 +0300 Subject: [PATCH 2/4] change call in series --- sdc/datatypes/hpat_pandas_series_functions.py | 48 +++---------------- sdc/functions/numpy_like.py | 4 +- sdc/tests/tests_perf/test_perf_series.py | 2 +- 3 files changed, 8 insertions(+), 46 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index c7c548fbf..91faaaad5 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -1777,43 +1777,20 @@ def hpat_pandas_series_astype(self, dtype, copy=True, errors='raise'): errors in ('raise', 'ignore')): ty_checker.raise_exc(errors, 'str', 'errors') - # Return StringArray for astype(str) or astype('str') - def hpat_pandas_series_astype_to_str_impl(self, dtype, copy=True, errors='raise'): - num_chars = 0 - arr_len = len(self._data) - - # Get total chars for new array - for i in prange(arr_len): - item = self._data[i] - num_chars += len(str(item)) # TODO: check NA - - data = pre_alloc_string_array(arr_len, num_chars) - for i in prange(arr_len): - item = self._data[i] - data[i] = str(item) # TODO: check NA - - return pandas.Series(data=data, index=self._index, name=self._name) - # Return npytypes.Array from npytypes.Array for astype(types.functions.NumberClass), example - astype(np.int64) # Return npytypes.Array from npytypes.Array for astype(types.StringLiteral), example - astype('int64') def hpat_pandas_series_astype_numba_impl(self, dtype, copy=True, errors='raise'): return pandas.Series(data=numpy_like.astype(self._data, dtype), index=self._index, name=self._name) - # # Return npytypes.Array from npytypes.Array for astype(types.StringLiteral), example - astype('int64') - # def hpat_pandas_series_astype_literal_type_numba_impl(self, dtype, copy=True, errors='raise'): - # return pandas.Series(data=numpy_like.astype(self._data, dtype), index=self._index, name=self._name) - # Return self def hpat_pandas_series_astype_no_modify_impl(self, dtype, copy=True, errors='raise'): return pandas.Series(data=self._data, index=self._index, name=self._name) - - # if ((isinstance(dtype, types.Function) and dtype.typing_key == str) - # or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')): - # return hpat_pandas_series_astype_to_str_impl + str_check = ((isinstance(dtype, types.Function) and dtype.typing_key == str) or + (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')) # Needs Numba astype impl support converting unicode_type to NumberClass and other types - if isinstance(self.data, StringArrayType): + if (isinstance(self.data, StringArrayType) and not str_check): if isinstance(dtype, types.functions.NumberClass) and errors == 'raise': raise TypingError(f'Needs Numba astype impl support converting unicode_type to {dtype}') if isinstance(dtype, types.StringLiteral) and errors == 'raise': @@ -1824,24 +1801,11 @@ def hpat_pandas_series_astype_no_modify_impl(self, dtype, copy=True, errors='rai else: raise TypingError(f'Needs Numba astype impl support converting unicode_type to {dtype.literal_value}') - # if ((isinstance(self.data, types.npytypes.Array) and isinstance(dtype, (types.functions.NumberClass, types.StringLiteral))) or - # ((isinstance(dtype, types.Function) and dtype.typing_key == str) or - # (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')) - # ): - if ((isinstance(dtype, types.Function) and dtype.typing_key == str) - or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')): - print('eeee') + if ((isinstance(self.data, types.npytypes.Array) and + isinstance(dtype, (types.functions.NumberClass, types.StringLiteral))) or str_check + ): return hpat_pandas_series_astype_numba_impl - # if isinstance(self.data, types.npytypes.Array) and isinstance(dtype, types.StringLiteral): - # try: - # literal_value = numpy.dtype(dtype.literal_value) - # except: - # pass # Will raise the exception later - # else: - # return hpat_pandas_series_astype_literal_type_numba_impl - - # Raise error if dtype is not supported if errors == 'raise': raise TypingError(f'{_func_name} The object must be a supported type. Given dtype: {dtype}') else: diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index b24c67291..82b2a3d60 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -106,9 +106,7 @@ def sdc_astype_overload(self, dtype): Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k astype """ - print('qqqqq') - print(dtype) - print(type(dtype)) + ty_checker = TypeChecker("numpy-like 'astype'") if not isinstance(self, (types.Array, StringArrayType)): return None diff --git a/sdc/tests/tests_perf/test_perf_series.py b/sdc/tests/tests_perf/test_perf_series.py index bbc2c90ee..6e98fc32b 100644 --- a/sdc/tests/tests_perf/test_perf_series.py +++ b/sdc/tests/tests_perf/test_perf_series.py @@ -68,7 +68,7 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes TC(name='append', size=[10 ** 7], params='other', data_num=2), TC(name='apply', size=[10 ** 7], params='lambda x: x'), TC(name='argsort', size=[10 ** 4]), - TC(name='astype', size=[10 ** 5], call_expr='data.astype(np.int8)', usecase_params='data', + TC(name='astype', size=[10 ** 8], call_expr='data.astype(np.int8)', usecase_params='data', input_data=[test_global_input_data_float64[0]]), TC(name='at', size=[10 ** 7], call_expr='data.at[3]', usecase_params='data'), TC(name='chain_add_and_sum', size=[20 * 10 ** 6, 25 * 10 ** 6, 30 * 10 ** 6], call_expr='(A + B).sum()', From e3e790e05ba6dc58948ea7a68873a588664da87f Mon Sep 17 00:00:00 2001 From: "elena.totmenina" Date: Thu, 20 Feb 2020 17:43:38 +0300 Subject: [PATCH 3/4] pep --- sdc/datatypes/hpat_pandas_series_functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index 91faaaad5..0c19c23bb 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -1786,7 +1786,7 @@ def hpat_pandas_series_astype_numba_impl(self, dtype, copy=True, errors='raise') def hpat_pandas_series_astype_no_modify_impl(self, dtype, copy=True, errors='raise'): return pandas.Series(data=self._data, index=self._index, name=self._name) - str_check = ((isinstance(dtype, types.Function) and dtype.typing_key == str) or + str_check = ((isinstance(dtype, types.Function) and dtype.typing_key == str) or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')) # Needs Numba astype impl support converting unicode_type to NumberClass and other types @@ -1801,9 +1801,9 @@ def hpat_pandas_series_astype_no_modify_impl(self, dtype, copy=True, errors='rai else: raise TypingError(f'Needs Numba astype impl support converting unicode_type to {dtype.literal_value}') - if ((isinstance(self.data, types.npytypes.Array) and + if ((isinstance(self.data, types.npytypes.Array) and isinstance(dtype, (types.functions.NumberClass, types.StringLiteral))) or str_check - ): + ): return hpat_pandas_series_astype_numba_impl if errors == 'raise': From 6484f69367b2d9ee3b6067b016b9162a46c5cb8e Mon Sep 17 00:00:00 2001 From: "elena.totmenina" Date: Thu, 20 Feb 2020 17:58:50 +0300 Subject: [PATCH 4/4] pep+ --- sdc/datatypes/hpat_pandas_series_functions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index 0c19c23bb..795647084 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -1801,9 +1801,10 @@ def hpat_pandas_series_astype_no_modify_impl(self, dtype, copy=True, errors='rai else: raise TypingError(f'Needs Numba astype impl support converting unicode_type to {dtype.literal_value}') - if ((isinstance(self.data, types.npytypes.Array) and - isinstance(dtype, (types.functions.NumberClass, types.StringLiteral))) or str_check - ): + data_narr = isinstance(self.data, types.npytypes.Array) + dtype_num_liter = isinstance(dtype, (types.functions.NumberClass, types.StringLiteral)) + + if data_narr and dtype_num_liter or str_check: return hpat_pandas_series_astype_numba_impl if errors == 'raise':