From 7579497fc6f6eb2248ddf9529dfe9419d9e2e3ae Mon Sep 17 00:00:00 2001 From: "Kozlov, Alexey" Date: Wed, 30 Oct 2019 20:22:00 +0300 Subject: [PATCH] Fix for 'apending StringArrays drops NaNs problem' and minor changes --- hpat/datatypes/common_functions.py | 133 ++++++++++++++++++ .../datatypes/hpat_pandas_series_functions.py | 85 +++++------ hpat/hiframes/api.py | 72 +--------- hpat/hiframes/pd_series_ext.py | 2 +- hpat/str_arr_ext.py | 14 ++ hpat/tests/test_series.py | 75 +++++----- 6 files changed, 226 insertions(+), 155 deletions(-) create mode 100644 hpat/datatypes/common_functions.py diff --git a/hpat/datatypes/common_functions.py b/hpat/datatypes/common_functions.py new file mode 100644 index 000000000..3b0fb5fdc --- /dev/null +++ b/hpat/datatypes/common_functions.py @@ -0,0 +1,133 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +""" + +| This file contains internal common functions used in SDC implementation across different files + +""" + +import numpy + +from numba import types +from numba.extending import overload +from numba import numpy_support + +import hpat +from hpat.str_arr_ext import (string_array_type, num_total_chars, append_string_array_to) + + +def has_literal_value(var, value): + '''Used during typing to check that variable var is a Numba literal value equal to value''' + + if not isinstance(var, types.Literal): + return False + + if value is None or isinstance(value, type(bool)): + return var.literal_value is value + else: + return var.literal_value == value + + +def has_python_value(var, value): + '''Used during typing to check that variable var was resolved as Python type and has specific value''' + + if not isinstance(var, type(value)): + return False + + if value is None or isinstance(value, type(bool)): + return var is value + else: + return var == value + + +def hpat_arrays_append(A, B): + pass + + +@overload(hpat_arrays_append) +def hpat_arrays_append_overload(A, B): + '''Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A''' + + if isinstance(A, types.Array): + if isinstance(B, types.Array): + def _append_single_numeric_impl(A, B): + return numpy.concatenate((A, B,)) + + return _append_single_numeric_impl + elif isinstance(B, (types.UniTuple, types.List)): + # TODO: this heavily relies on B being a homogeneous tuple/list - find a better way + # to resolve common dtype of heterogeneous sequence of arrays + np_dtypes = [numpy_support.as_dtype(A.dtype), numpy_support.as_dtype(B.dtype.dtype)] + np_common_dtype = numpy.find_common_type([], np_dtypes) + numba_common_dtype = numpy_support.from_dtype(np_common_dtype) + + # TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime + def _append_list_numeric_impl(A, B): + + total_length = len(A) + numpy.array([len(arr) for arr in B]).sum() + new_data = numpy.empty(total_length, numba_common_dtype) + + stop = len(A) + new_data[:stop] = A + for arr in B: + start = stop + stop = start + len(arr) + new_data[start:stop] = arr + return new_data + + return _append_list_numeric_impl + + elif A == string_array_type: + if B == string_array_type: + def _append_single_string_array_impl(A, B): + total_size = len(A) + len(B) + total_chars = num_total_chars(A) + num_total_chars(B) + new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) + + pos = 0 + pos += append_string_array_to(new_data, pos, A) + pos += append_string_array_to(new_data, pos, B) + + return new_data + + return _append_single_string_array_impl + elif (isinstance(B, (types.UniTuple, types.List)) and B.dtype == string_array_type): + def _append_list_string_array_impl(A, B): + array_list = [A] + list(B) + total_size = numpy.array([len(arr) for arr in array_list]).sum() + total_chars = numpy.array([num_total_chars(arr) for arr in array_list]).sum() + + new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) + + pos = 0 + pos += append_string_array_to(new_data, pos, A) + for arr in B: + pos += append_string_array_to(new_data, pos, arr) + + return new_data + + return _append_list_string_array_impl diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/hpat/datatypes/hpat_pandas_series_functions.py index 3d6691645..f77131698 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/hpat/datatypes/hpat_pandas_series_functions.py @@ -39,11 +39,11 @@ from numba import types import hpat +import hpat.datatypes.common_functions as common_functions from hpat.hiframes.pd_series_ext import SeriesType from hpat.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars) from hpat.utils import to_array - class TypeChecker: """ Validate object type and raise TypingError if the type is invalid, e.g.: @@ -759,26 +759,26 @@ def hpat_pandas_series_append(self, to_append, ignore_index=False, verify_integr .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_append* + Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_append* Parameters ----------- self: :obj:`pandas.Series` - input series + input series to_append : :obj:`pandas.Series` object or :obj:`list` or :obj:`set` - Series (or list or tuple of Series) to append with self + Series (or list or tuple of Series) to append with self ignore_index: :obj:`bool`, default False - If True, do not use the index labels. - Supported as literal value only + If True, do not use the index labels. + Supported as literal value only verify_integrity: :obj:`bool`, default False - If True, raise Exception on creating index with duplicates. - *unsupported* + If True, raise Exception on creating index with duplicates. + *unsupported* Returns ------- :obj:`pandas.Series` - returns :obj:`pandas.Series` object - Concatenated Series + returns :obj:`pandas.Series` object + Concatenated Series """ @@ -807,53 +807,38 @@ def hpat_pandas_series_append(self, to_append, ignore_index=False, verify_integr '{} Unsupported parameters. Given verify_integrity: {}'.format(_func_name, verify_integrity)) # ignore_index value has to be known at compile time to select between implementations with different signatures - if ((isinstance(ignore_index, types.Literal) and ignore_index.literal_value is True) - or (isinstance(ignore_index, bool) and ignore_index is True)): - # implementations that ignore series index - if isinstance(to_append, SeriesType): - def hpat_pandas_series_append_single_impl(self, to_append, ignore_index=False, verify_integrity=False): + ignore_index_is_false = (common_functions.has_literal_value(ignore_index, False) + or common_functions.has_python_value(ignore_index, False) + or isinstance(ignore_index, types.Omitted)) + to_append_is_series = isinstance(to_append, SeriesType) + + if ignore_index_is_false: + def hpat_pandas_series_append_impl(self, to_append, ignore_index=False, verify_integrity=False): + if to_append_is_series == True: # noqa + new_data = common_functions.hpat_arrays_append(self._data, to_append._data) + new_index = common_functions.hpat_arrays_append(self.index, to_append.index) + else: + data_arrays_to_append = [series._data for series in to_append] + index_arrays_to_append = [series.index for series in to_append] + new_data = common_functions.hpat_arrays_append(self._data, data_arrays_to_append) + new_index = common_functions.hpat_arrays_append(self.index, index_arrays_to_append) - new_data = hpat.hiframes.api._append(self._data, to_append._data) - new_index = numpy.arange(len(self._data) + len(to_append._data)) - return pandas.Series(new_data, new_index) + return pandas.Series(new_data, new_index) - return hpat_pandas_series_append_single_impl + return hpat_pandas_series_append_impl - elif isinstance(to_append, (types.UniTuple, types.List)): - def hpat_pandas_series_append_list_impl(self, to_append, ignore_index=False, verify_integrity=False): + else: + def hpat_pandas_series_append_ignore_index_impl(self, to_append, ignore_index=False, verify_integrity=False): + if to_append_is_series == True: # noqa + new_data = common_functions.hpat_arrays_append(self._data, to_append._data) + else: arrays_to_append = [series._data for series in to_append] - sum_of_sizes = numpy.array([len(arr) for arr in arrays_to_append]).sum() - new_data = hpat.hiframes.api._append(self._data, arrays_to_append) - new_index = numpy.arange(len(self._data) + sum_of_sizes) - return pandas.Series(new_data, new_index) - - return hpat_pandas_series_append_list_impl - - elif ((isinstance(ignore_index, types.Literal) and ignore_index.literal_value is False) - or (isinstance(ignore_index, bool) and ignore_index is False) - or isinstance(ignore_index, types.Omitted)): - # implementations that handle series index (ignore_index is False) - if isinstance(to_append, SeriesType): - def hpat_pandas_series_append_single_impl(self, to_append, ignore_index=False, verify_integrity=False): - - new_data = hpat.hiframes.api._append(self._data, to_append._data) - new_index = hpat.hiframes.api._append(self.index, to_append.index) - return pandas.Series(new_data, new_index) - - return hpat_pandas_series_append_single_impl - - elif isinstance(to_append, (types.UniTuple, types.List)): - def hpat_pandas_series_append_list_impl(self, to_append, ignore_index=False, verify_integrity=False): - - data_arrays_to_append = [series._data for series in to_append] - index_arrays_to_append = [series.index for series in to_append] + new_data = common_functions.hpat_arrays_append(self._data, arrays_to_append) - new_data = hpat.hiframes.api._append(self._data, data_arrays_to_append) - new_index = hpat.hiframes.api._append(self.index, index_arrays_to_append) - return pandas.Series(new_data, new_index) + return pandas.Series(new_data, None) - return hpat_pandas_series_append_list_impl + return hpat_pandas_series_append_ignore_index_impl @overload_method(SeriesType, 'copy') diff --git a/hpat/hiframes/api.py b/hpat/hiframes/api.py index ce9b9db72..8c4495d61 100644 --- a/hpat/hiframes/api.py +++ b/hpat/hiframes/api.py @@ -56,7 +56,12 @@ from hpat.utils import _numba_to_c_type_map, unliteral_all from hpat.str_ext import string_type, list_string_array_type from hpat.set_ext import build_set -from hpat.str_arr_ext import (StringArrayType, string_array_type, is_str_arr_typ, cp_str_list_to_array, num_total_chars) +from hpat.str_arr_ext import ( + StringArrayType, + string_array_type, + is_str_arr_typ, + num_total_chars, + append_string_array_to) from hpat.hiframes.pd_timestamp_ext import (pandas_timestamp_type, datetime_date_type, set_df_datetime_date_lower) from hpat.hiframes.pd_series_ext import ( SeriesType, @@ -1790,68 +1795,3 @@ def _analyze_op_pair_first(self, scope, equiv_set, expr): numba.array_analysis.ArrayAnalysis._analyze_op_pair_first = _analyze_op_pair_first - - -def _append(A, B): - return None - - -@overload(_append) -def _append_overload(A, B): - '''Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A''' - - if isinstance(A, types.Array): - if isinstance(B, types.Array): - def _append_single_numeric_impl(A, B): - return np.concatenate((A, B,)) - - return _append_single_numeric_impl - elif isinstance(B, (types.UniTuple, types.List)): - # TODO: this heavily relies on B being a homogeneous tuple/list - find a better way - # to resolve common dtype of heterogeneous sequence of arrays - np_dtypes = [numpy_support.as_dtype(A.dtype), numpy_support.as_dtype(B.dtype.dtype)] - np_common_dtype = np.find_common_type([], np_dtypes) - numba_common_dtype = numpy_support.from_dtype(np_common_dtype) - - # TODO: refactor to use np.concatenate when Numba supports building a tuple at runtime - def _append_list_numeric_impl(A, B): - - total_length = len(A) + np.array([len(arr) for arr in B]).sum() - new_data = np.empty(total_length, numba_common_dtype) - - stop = len(A) - new_data[:stop] = A - for arr in B: - start = stop - stop = start + len(arr) - new_data[start:stop] = arr - return new_data - - return _append_list_numeric_impl - - elif A == string_array_type: - if B == string_array_type: - def _append_single_string_array_impl(A, B): - total_size = len(A) + len(B) - total_chars = num_total_chars(A) + num_total_chars(B) - new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) - - list_of_strings = list(A) + list(B) - hpat.str_arr_ext.cp_str_list_to_array(new_data, list_of_strings) - return new_data - - return _append_single_string_array_impl - elif (isinstance(B, (types.UniTuple, types.List)) and B.dtype == string_array_type): - def _append_list_string_array_impl(A, B): - array_list = [A] + list(B) - total_size = np.array([len(arr) for arr in array_list]).sum() - total_chars = np.array([num_total_chars(arr) for arr in array_list]).sum() - - new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) - list_of_strings = list(A) - for arr in B: - list_of_strings.extend(list(arr)) - hpat.str_arr_ext.cp_str_list_to_array(new_data, list_of_strings) - return new_data - - return _append_list_string_array_impl diff --git a/hpat/hiframes/pd_series_ext.py b/hpat/hiframes/pd_series_ext.py index 7daa610e2..3074bbe74 100644 --- a/hpat/hiframes/pd_series_ext.py +++ b/hpat/hiframes/pd_series_ext.py @@ -1288,7 +1288,7 @@ def hpat_pandas_series_ctor_impl(data=None, index=None, dtype=None, name=None, c '''' use binop here as otherwise Numba's dead branch pruning doesn't work TODO: replace with 'if not is_index_none' when resolved ''' - if is_index_none == False: + if is_index_none == False: # noqa fix_index = hpat.hiframes.api.fix_df_array(index) else: fix_index = index diff --git a/hpat/str_arr_ext.py b/hpat/str_arr_ext.py index 2c5ab4714..b4b33eb70 100644 --- a/hpat/str_arr_ext.py +++ b/hpat/str_arr_ext.py @@ -1419,3 +1419,17 @@ def lower_glob(context, builder, sig, args): # context.nrt.decref(builder, ty, ret) return impl_ret_new_ref(context, builder, typ, ret) + + +@numba.njit(no_cpython_wrapper=True) +def append_string_array_to(result, pos, A): + # precondition: result is allocated with the size enough to contain A + i, j = 0, pos + for str in A: + result[j] = str + if str_arr_is_na(A, i): + hpat.str_arr_ext.str_arr_set_na(result, j) + i += 1 + j += 1 + + return i diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index eea18cdf1..3f0d90cb9 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -1405,7 +1405,6 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, 'No support of axis argument in old-style Series.dropna() impl') def test_series_dropna_axis1(self): @@ -2114,13 +2113,13 @@ def test_series_str2str(self): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Old-style append implementation doesn't handle ignore_index argument") def test_series_append_single_ignore_index(self): - '''Verify Series.append() concatenates Series with other single Series ignoring index''' + '''Verify Series.append() concatenates Series with other single Series ignoring indexes''' def test_impl(S, other): return S.append(other, ignore_index=True) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0]], - 'string': [['a', 'b', 'q'], ['d', 'e']]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0, -1]], + 'string': [['a', None, 'bbbb', ''], ['dd', None, '', 'e', 'ttt']]} for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2130,14 +2129,14 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Old-style append implementation doesn't handle ignore_index argument") def test_series_append_list_ignore_index(self): - '''Verify Series.append() concatenates Series with list of other Series ignoring index''' + '''Verify Series.append() concatenates Series with list of other Series ignoring indexes''' def test_impl(S1, S2, S3): return S1.append([S2, S3], ignore_index=True) hpat_func = hpat.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] + dtype_to_data['string'] = [['a', None, ''], ['d', None], ['']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2146,14 +2145,14 @@ def test_impl(S1, S2, S3): @unittest.skip('BUG: Pandas 0.25.1 Series.append() doesn\'t support tuple as appending values') def test_series_append_tuple_ignore_index(self): - '''Verify Series.append() concatenates Series with tuple of other Series ignoring index''' + '''Verify Series.append() concatenates Series with tuple of other Series ignoring indexes''' def test_impl(S1, S2, S3): return S1.append((S2, S3, ), ignore_index=True) hpat_func = hpat.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] + dtype_to_data['string'] = [['a', None, ''], ['d', None], ['']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2163,14 +2162,14 @@ def test_impl(S1, S2, S3): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_index_default(self): - '''Verify Series.append() concatenates Series with other single Series supporting default index''' + '''Verify Series.append() concatenates Series with other single Series respecting default indexes''' def test_impl(S, other): return S.append(other) hpat_func = hpat.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e']] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2180,7 +2179,7 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_index_default(self): - '''Verify Series.append() concatenates Series with list of other Series ignoring index''' + '''Verify Series.append() concatenates Series with list of other Series respecting default indexes''' def test_impl(S1, S2, S3): return S1.append([S2, S3]) hpat_func = hpat.jit(test_impl) @@ -2196,7 +2195,7 @@ def test_impl(S1, S2, S3): @unittest.skip('BUG: Pandas 0.25.1 Series.append() doesn\'t support tuple as appending values') def test_series_append_tuple_index_default(self): - '''Verify Series.append() concatenates Series with tuple of other Series ignoring index''' + '''Verify Series.append() concatenates Series with tuple of other Series respecting default indexes''' def test_impl(S1, S2, S3): return S1.append((S2, S3, )) hpat_func = hpat.jit(test_impl) @@ -2213,15 +2212,15 @@ def test_impl(S1, S2, S3): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_index_int(self): - '''Verify Series.append() concatenates Series with other single Series supporting default index''' + '''Verify Series.append() concatenates Series with other single Series respecting integer indexes''' def test_impl(S, other): return S.append(other) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0, -1]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e']] - indexes = [[1, 2, 3], [7, 8]] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e', 'ttt']] + indexes = [[1, 2, 3, 4], [7, 8, 11, 3, 4]] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2231,15 +2230,15 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_index_int(self): - '''Verify Series.append() concatenates Series with list of other Series ignoring index''' + '''Verify Series.append() concatenates Series with list of other Series respecting integer indexes''' def test_impl(S1, S2, S3): return S1.append([S2, S3]) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] - indexes = [[1, 2, 3], [7, 8], [11]] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] + indexes = [[1, 2, 3, 4], [7, 8, 11, 3], [4]] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2248,15 +2247,15 @@ def test_impl(S1, S2, S3): @unittest.skip('BUG: Pandas 0.25.1 Series.append() doesn\'t support tuple as appending values') def test_series_append_tuple_index_int(self): - '''Verify Series.append() concatenates Series with tuple of other Series ignoring index''' + '''Verify Series.append() concatenates Series with tuple of other Series respecting integer indexes''' def test_impl(S1, S2, S3): return S1.append((S2, S3, )) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] - indexes = [[1, 2, 3], [7, 8], [11]] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] + indexes = [[1, 2, 3, 4], [7, 8, 11, 3], [4]] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2266,15 +2265,15 @@ def test_impl(S1, S2, S3): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_index_str(self): - '''Verify Series.append() concatenates Series with other single Series supporting default index''' + '''Verify Series.append() concatenates Series with other single Series respecting string indexes''' def test_impl(S, other): return S.append(other) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0, -1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e']] - indexes = [['a', 'bb', 'ccc'], ['q', 't']] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e', 'ttt']] + indexes = [['a', 'bb', 'ccc', 'dddd'], ['a1', 'a2', 'a3', 'a4', 'a5']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2284,15 +2283,15 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_index_str(self): - '''Verify Series.append() concatenates Series with list of other Series ignoring index''' + '''Verify Series.append() concatenates Series with list of other Series respecting string indexes''' def test_impl(S1, S2, S3): return S1.append([S2, S3]) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] - indexes = [['a', 'bb', 'ccc'], ['q', 't'], ['dd']] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] + indexes = [['a', 'bb', 'ccc', 'dddd'], ['q', 't', 'a', 'x'], ['dd']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2301,15 +2300,15 @@ def test_impl(S1, S2, S3): @unittest.skip('BUG: Pandas 0.25.1 Series.append() doesn\'t support tuple as appending values') def test_series_append_tuple_index_str(self): - '''Verify Series.append() concatenates Series with tuple of other Series ignoring index''' + '''Verify Series.append() concatenates Series with tuple of other Series respecting string indexes''' def test_impl(S1, S2, S3): return S1.append((S2, S3, )) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] - indexes = [['a', 'bb', 'ccc'], ['q', 't'], ['dd']] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] + indexes = [['a', 'bb', 'ccc', 'dddd'], ['q', 't', 'a', 'x'], ['dd']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2346,7 +2345,7 @@ def test_impl(S, other, param): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") - def test_series_append_single_dtype_mismatch(self): + def test_series_append_single_dtype_promotion(self): '''Verify Series.append() implementation handles appending single Series with different dtypes''' def test_impl(S, other): return S.append(other) @@ -2358,7 +2357,7 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") - def test_series_append_list_dtype_mismatch(self): + def test_series_append_list_dtype_promotion(self): '''Verify Series.append() implementation handles appending list of Series with different dtypes''' def test_impl(S1, S2, S3): return S1.append([S2, S3])