diff --git a/hpat/datatypes/common_functions.py b/hpat/datatypes/common_functions.py new file mode 100644 index 000000000..28ea7678a --- /dev/null +++ b/hpat/datatypes/common_functions.py @@ -0,0 +1,133 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +""" + +| This file contains internal common functions used in SDC implementation across different files + +""" + +import numpy + +from numba import types +from numba.extending import overload +from numba import numpy_support + +import hpat +from hpat.str_arr_ext import (string_array_type, num_total_chars, append_string_array_to) + + +def has_literal_value(var, value): + '''Used during typing to check that variable var is a Numba literal value equal to value''' + + if not isinstance(var, types.Literal): + return False + + if value is None or isinstance(value, type(bool)): + return var.literal_value is value + else: + return var.literal_value == value + + +def has_python_value(var, value): + '''Used during typing to check that variable var was resolved as Python type and has specific value''' + + if not isinstance(var, type(value)): + return False + + if value is None or isinstance(value, type(bool)): + return var is value + else: + return var == value + + +def hpat_arrays_append(A, B): + return None + + +@overload(hpat_arrays_append) +def hpat_arrays_append_overload(A, B): + '''Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A''' + + if isinstance(A, types.Array): + if isinstance(B, types.Array): + def _append_single_numeric_impl(A, B): + return numpy.concatenate((A, B,)) + + return _append_single_numeric_impl + elif isinstance(B, (types.UniTuple, types.List)): + # TODO: this heavily relies on B being a homogeneous tuple/list - find a better way + # to resolve common dtype of heterogeneous sequence of arrays + np_dtypes = [numpy_support.as_dtype(A.dtype), numpy_support.as_dtype(B.dtype.dtype)] + np_common_dtype = numpy.find_common_type([], np_dtypes) + numba_common_dtype = numpy_support.from_dtype(np_common_dtype) + + # TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime + def _append_list_numeric_impl(A, B): + + total_length = len(A) + numpy.array([len(arr) for arr in B]).sum() + new_data = numpy.empty(total_length, numba_common_dtype) + + stop = len(A) + new_data[:stop] = A + for arr in B: + start = stop + stop = start + len(arr) + new_data[start:stop] = arr + return new_data + + return _append_list_numeric_impl + + elif A == string_array_type: + if B == string_array_type: + def _append_single_string_array_impl(A, B): + total_size = len(A) + len(B) + total_chars = num_total_chars(A) + num_total_chars(B) + new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) + + pos = 0 + pos += append_string_array_to(new_data, pos, A) + pos += append_string_array_to(new_data, pos, B) + + return new_data + + return _append_single_string_array_impl + elif (isinstance(B, (types.UniTuple, types.List)) and B.dtype == string_array_type): + def _append_list_string_array_impl(A, B): + array_list = [A] + list(B) + total_size = numpy.array([len(arr) for arr in array_list]).sum() + total_chars = numpy.array([num_total_chars(arr) for arr in array_list]).sum() + + new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) + + pos = 0 + pos += append_string_array_to(new_data, pos, A) + for arr in B: + pos += append_string_array_to(new_data, pos, arr) + + return new_data + + return _append_list_string_array_impl diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/hpat/datatypes/hpat_pandas_series_functions.py index e374656b6..1dff26b99 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/hpat/datatypes/hpat_pandas_series_functions.py @@ -23,6 +23,7 @@ # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +from hpat.datatypes import common_functions """ | :class:`pandas.Series` functions and operators implementations in HPAT @@ -39,6 +40,7 @@ from numba import types import hpat +import hpat.datatypes.common_functions as common_functions from hpat.hiframes.pd_series_ext import SeriesType from hpat.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars) from hpat.utils import to_array @@ -759,26 +761,26 @@ def hpat_pandas_series_append(self, to_append, ignore_index=False, verify_integr .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_append* + Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_append* Parameters ----------- self: :obj:`pandas.Series` - input series + input series to_append : :obj:`pandas.Series` object or :obj:`list` or :obj:`set` - Series (or list or tuple of Series) to append with self + Series (or list or tuple of Series) to append with self ignore_index: :obj:`bool`, default False - If True, do not use the index labels. - Supported as literal value only + If True, do not use the index labels. + Supported as literal value only verify_integrity: :obj:`bool`, default False - If True, raise Exception on creating index with duplicates. - *unsupported* + If True, raise Exception on creating index with duplicates. + *unsupported* Returns ------- :obj:`pandas.Series` - returns :obj:`pandas.Series` object - Concatenated Series + returns :obj:`pandas.Series` object + Concatenated Series """ @@ -807,53 +809,38 @@ def hpat_pandas_series_append(self, to_append, ignore_index=False, verify_integr '{} Unsupported parameters. Given verify_integrity: {}'.format(_func_name, verify_integrity)) # ignore_index value has to be known at compile time to select between implementations with different signatures - if ((isinstance(ignore_index, types.Literal) and ignore_index.literal_value is True) - or (isinstance(ignore_index, bool) and ignore_index is True)): - # implementations that ignore series index - if isinstance(to_append, SeriesType): - def hpat_pandas_series_append_single_impl(self, to_append, ignore_index=False, verify_integrity=False): + ignore_index_is_false = (common_functions.has_literal_value(ignore_index, False) + or common_functions.has_python_value(ignore_index, False) + or isinstance(ignore_index, types.Omitted)) + to_append_is_series = isinstance(to_append, SeriesType) + + if ignore_index_is_false: + def hpat_pandas_series_append_impl(self, to_append, ignore_index=False, verify_integrity=False): + if to_append_is_series == True: # noqa + new_data = common_functions.hpat_arrays_append(self._data, to_append._data) + new_index = common_functions.hpat_arrays_append(self.index, to_append.index) + else: + data_arrays_to_append = [series._data for series in to_append] + index_arrays_to_append = [series.index for series in to_append] + new_data = common_functions.hpat_arrays_append(self._data, data_arrays_to_append) + new_index = common_functions.hpat_arrays_append(self.index, index_arrays_to_append) - new_data = hpat.hiframes.api._append(self._data, to_append._data) - new_index = numpy.arange(len(self._data) + len(to_append._data)) - return pandas.Series(new_data, new_index) + return pandas.Series(new_data, new_index) - return hpat_pandas_series_append_single_impl + return hpat_pandas_series_append_impl - elif isinstance(to_append, (types.UniTuple, types.List)): - def hpat_pandas_series_append_list_impl(self, to_append, ignore_index=False, verify_integrity=False): + else: + def hpat_pandas_series_append_ignore_index_impl(self, to_append, ignore_index=False, verify_integrity=False): + if to_append_is_series == True: # noqa + new_data = common_functions.hpat_arrays_append(self._data, to_append._data) + else: arrays_to_append = [series._data for series in to_append] - sum_of_sizes = numpy.array([len(arr) for arr in arrays_to_append]).sum() - new_data = hpat.hiframes.api._append(self._data, arrays_to_append) - new_index = numpy.arange(len(self._data) + sum_of_sizes) - return pandas.Series(new_data, new_index) - - return hpat_pandas_series_append_list_impl - - elif ((isinstance(ignore_index, types.Literal) and ignore_index.literal_value is False) - or (isinstance(ignore_index, bool) and ignore_index is False) - or isinstance(ignore_index, types.Omitted)): - # implementations that handle series index (ignore_index is False) - if isinstance(to_append, SeriesType): - def hpat_pandas_series_append_single_impl(self, to_append, ignore_index=False, verify_integrity=False): - - new_data = hpat.hiframes.api._append(self._data, to_append._data) - new_index = hpat.hiframes.api._append(self.index, to_append.index) - return pandas.Series(new_data, new_index) - - return hpat_pandas_series_append_single_impl - - elif isinstance(to_append, (types.UniTuple, types.List)): - def hpat_pandas_series_append_list_impl(self, to_append, ignore_index=False, verify_integrity=False): - - data_arrays_to_append = [series._data for series in to_append] - index_arrays_to_append = [series.index for series in to_append] + new_data = common_functions.hpat_arrays_append(self._data, arrays_to_append) - new_data = hpat.hiframes.api._append(self._data, data_arrays_to_append) - new_index = hpat.hiframes.api._append(self.index, index_arrays_to_append) - return pandas.Series(new_data, new_index) + return pandas.Series(new_data, None) - return hpat_pandas_series_append_list_impl + return hpat_pandas_series_append_ignore_index_impl @overload_method(SeriesType, 'copy') diff --git a/hpat/hiframes/api.py b/hpat/hiframes/api.py index be763f705..f14e0dc76 100644 --- a/hpat/hiframes/api.py +++ b/hpat/hiframes/api.py @@ -29,7 +29,12 @@ from hpat.utils import _numba_to_c_type_map, unliteral_all from hpat.str_ext import string_type, list_string_array_type from hpat.set_ext import build_set -from hpat.str_arr_ext import (StringArrayType, string_array_type, is_str_arr_typ, cp_str_list_to_array, num_total_chars) +from hpat.str_arr_ext import ( + StringArrayType, + string_array_type, + is_str_arr_typ, + num_total_chars, + append_string_array_to) from hpat.hiframes.pd_timestamp_ext import (pandas_timestamp_type, datetime_date_type, set_df_datetime_date_lower) from hpat.hiframes.pd_series_ext import ( SeriesType, @@ -1763,68 +1768,3 @@ def _analyze_op_pair_first(self, scope, equiv_set, expr): numba.array_analysis.ArrayAnalysis._analyze_op_pair_first = _analyze_op_pair_first - - -def _append(A, B): - return None - - -@overload(_append) -def _append_overload(A, B): - '''Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A''' - - if isinstance(A, types.Array): - if isinstance(B, types.Array): - def _append_single_numeric_impl(A, B): - return np.concatenate((A, B,)) - - return _append_single_numeric_impl - elif isinstance(B, (types.UniTuple, types.List)): - # TODO: this heavily relies on B being a homogeneous tuple/list - find a better way - # to resolve common dtype of heterogeneous sequence of arrays - np_dtypes = [numpy_support.as_dtype(A.dtype), numpy_support.as_dtype(B.dtype.dtype)] - np_common_dtype = np.find_common_type([], np_dtypes) - numba_common_dtype = numpy_support.from_dtype(np_common_dtype) - - # TODO: refactor to use np.concatenate when Numba supports building a tuple at runtime - def _append_list_numeric_impl(A, B): - - total_length = len(A) + np.array([len(arr) for arr in B]).sum() - new_data = np.empty(total_length, numba_common_dtype) - - stop = len(A) - new_data[:stop] = A - for arr in B: - start = stop - stop = start + len(arr) - new_data[start:stop] = arr - return new_data - - return _append_list_numeric_impl - - elif A == string_array_type: - if B == string_array_type: - def _append_single_string_array_impl(A, B): - total_size = len(A) + len(B) - total_chars = num_total_chars(A) + num_total_chars(B) - new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) - - list_of_strings = list(A) + list(B) - hpat.str_arr_ext.cp_str_list_to_array(new_data, list_of_strings) - return new_data - - return _append_single_string_array_impl - elif (isinstance(B, (types.UniTuple, types.List)) and B.dtype == string_array_type): - def _append_list_string_array_impl(A, B): - array_list = [A] + list(B) - total_size = np.array([len(arr) for arr in array_list]).sum() - total_chars = np.array([num_total_chars(arr) for arr in array_list]).sum() - - new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) - list_of_strings = list(A) - for arr in B: - list_of_strings.extend(list(arr)) - hpat.str_arr_ext.cp_str_list_to_array(new_data, list_of_strings) - return new_data - - return _append_list_string_array_impl diff --git a/hpat/str_arr_ext.py b/hpat/str_arr_ext.py index 9e729892a..6085304af 100644 --- a/hpat/str_arr_ext.py +++ b/hpat/str_arr_ext.py @@ -1392,3 +1392,17 @@ def lower_glob(context, builder, sig, args): # context.nrt.decref(builder, ty, ret) return impl_ret_new_ref(context, builder, typ, ret) + + +@numba.njit(no_cpython_wrapper=True) +def append_string_array_to(result, pos, A): + # precondition: result is allocated with the size enough to contain A + i, j = 0, pos + for str in A: + result[j] = str + if str_arr_is_na(A, i): + hpat.str_arr_ext.str_arr_set_na(result, j) + i += 1 + j += 1 + + return i diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 67c665fec..660424d19 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -1378,7 +1378,6 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, 'No support of axis argument in old-style Series.dropna() impl') def test_series_dropna_axis1(self): @@ -2087,13 +2086,13 @@ def test_series_str2str(self): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Old-style append implementation doesn't handle ignore_index argument") def test_series_append_single_ignore_index(self): - '''Verify Series.append() concatenates Series with other single Series ignoring index''' + '''Verify Series.append() concatenates Series with other single Series ignoring indexes''' def test_impl(S, other): return S.append(other, ignore_index=True) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0]], - 'string': [['a', 'b', 'q'], ['d', 'e']]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0, -1]], + 'string': [['a', None, 'bbbb', ''], ['dd', None, '', 'e', 'ttt']]} for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2103,14 +2102,14 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Old-style append implementation doesn't handle ignore_index argument") def test_series_append_list_ignore_index(self): - '''Verify Series.append() concatenates Series with list of other Series ignoring index''' + '''Verify Series.append() concatenates Series with list of other Series ignoring indexes''' def test_impl(S1, S2, S3): return S1.append([S2, S3], ignore_index=True) hpat_func = hpat.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] + dtype_to_data['string'] = [['a', None, ''], ['d', None], ['']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2119,14 +2118,14 @@ def test_impl(S1, S2, S3): @unittest.skip('BUG: Pandas 0.25.1 Series.append() doesn\'t support tuple as appending values') def test_series_append_tuple_ignore_index(self): - '''Verify Series.append() concatenates Series with tuple of other Series ignoring index''' + '''Verify Series.append() concatenates Series with tuple of other Series ignoring indexes''' def test_impl(S1, S2, S3): return S1.append((S2, S3, ), ignore_index=True) hpat_func = hpat.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] + dtype_to_data['string'] = [['a', None, ''], ['d', None], ['']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2136,14 +2135,14 @@ def test_impl(S1, S2, S3): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_index_default(self): - '''Verify Series.append() concatenates Series with other single Series supporting default index''' + '''Verify Series.append() concatenates Series with other single Series respecting default indexes''' def test_impl(S, other): return S.append(other) hpat_func = hpat.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e']] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2153,7 +2152,7 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_index_default(self): - '''Verify Series.append() concatenates Series with list of other Series ignoring index''' + '''Verify Series.append() concatenates Series with list of other Series respecting default indexes''' def test_impl(S1, S2, S3): return S1.append([S2, S3]) hpat_func = hpat.jit(test_impl) @@ -2169,7 +2168,7 @@ def test_impl(S1, S2, S3): @unittest.skip('BUG: Pandas 0.25.1 Series.append() doesn\'t support tuple as appending values') def test_series_append_tuple_index_default(self): - '''Verify Series.append() concatenates Series with tuple of other Series ignoring index''' + '''Verify Series.append() concatenates Series with tuple of other Series respecting default indexes''' def test_impl(S1, S2, S3): return S1.append((S2, S3, )) hpat_func = hpat.jit(test_impl) @@ -2186,15 +2185,15 @@ def test_impl(S1, S2, S3): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_index_int(self): - '''Verify Series.append() concatenates Series with other single Series supporting default index''' + '''Verify Series.append() concatenates Series with other single Series respecting integer indexes''' def test_impl(S, other): return S.append(other) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0, -1]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e']] - indexes = [[1, 2, 3], [7, 8]] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e', 'ttt']] + indexes = [[1, 2, 3, 4], [7, 8, 11, 3, 4]] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2204,15 +2203,15 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_index_int(self): - '''Verify Series.append() concatenates Series with list of other Series ignoring index''' + '''Verify Series.append() concatenates Series with list of other Series respecting integer indexes''' def test_impl(S1, S2, S3): return S1.append([S2, S3]) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] - indexes = [[1, 2, 3], [7, 8], [11]] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] + indexes = [[1, 2, 3, 4], [7, 8, 11, 3], [4]] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2221,15 +2220,15 @@ def test_impl(S1, S2, S3): @unittest.skip('BUG: Pandas 0.25.1 Series.append() doesn\'t support tuple as appending values') def test_series_append_tuple_index_int(self): - '''Verify Series.append() concatenates Series with tuple of other Series ignoring index''' + '''Verify Series.append() concatenates Series with tuple of other Series respecting integer indexes''' def test_impl(S1, S2, S3): return S1.append((S2, S3, )) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] - indexes = [[1, 2, 3], [7, 8], [11]] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] + indexes = [[1, 2, 3, 4], [7, 8, 11, 3], [4]] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2239,15 +2238,15 @@ def test_impl(S1, S2, S3): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_index_str(self): - '''Verify Series.append() concatenates Series with other single Series supporting default index''' + '''Verify Series.append() concatenates Series with other single Series respecting string indexes''' def test_impl(S, other): return S.append(other) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0, -1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e']] - indexes = [['a', 'bb', 'ccc'], ['q', 't']] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e', 'ttt']] + indexes = [['a', 'bb', 'ccc', 'dddd'], ['a1', 'a2', 'a3', 'a4', 'a5']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2257,15 +2256,15 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_index_str(self): - '''Verify Series.append() concatenates Series with list of other Series ignoring index''' + '''Verify Series.append() concatenates Series with list of other Series respecting string indexes''' def test_impl(S1, S2, S3): return S1.append([S2, S3]) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] - indexes = [['a', 'bb', 'ccc'], ['q', 't'], ['dd']] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] + indexes = [['a', 'bb', 'ccc', 'dddd'], ['q', 't', 'a', 'x'], ['dd']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2274,15 +2273,15 @@ def test_impl(S1, S2, S3): @unittest.skip('BUG: Pandas 0.25.1 Series.append() doesn\'t support tuple as appending values') def test_series_append_tuple_index_str(self): - '''Verify Series.append() concatenates Series with tuple of other Series ignoring index''' + '''Verify Series.append() concatenates Series with tuple of other Series respecting string indexes''' def test_impl(S1, S2, S3): return S1.append((S2, S3, )) hpat_func = hpat.jit(test_impl) - dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} + dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} if not hpat.config.config_pipeline_hpat_default: - dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] - indexes = [['a', 'bb', 'ccc'], ['q', 't'], ['dd']] + dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] + indexes = [['a', 'bb', 'ccc', 'dddd'], ['q', 't', 'a', 'x'], ['dd']] for dtype, data_list in dtype_to_data.items(): with self.subTest(series_dtype=dtype, concatenated_data=data_list): @@ -2319,7 +2318,7 @@ def test_impl(S, other, param): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") - def test_series_append_single_dtype_mismatch(self): + def test_series_append_single_dtype_promotion(self): '''Verify Series.append() implementation handles appending single Series with different dtypes''' def test_impl(S, other): return S.append(other) @@ -2331,7 +2330,7 @@ def test_impl(S, other): @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") - def test_series_append_list_dtype_mismatch(self): + def test_series_append_list_dtype_promotion(self): '''Verify Series.append() implementation handles appending list of Series with different dtypes''' def test_impl(S1, S2, S3): return S1.append([S2, S3])