-
Notifications
You must be signed in to change notification settings - Fork 61
Fix new-style implementation of Series.append() method #262
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# ***************************************************************************** | ||
# Copyright (c) 2019, Intel Corporation All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions are met: | ||
# | ||
# Redistributions of source code must retain the above copyright notice, | ||
# this list of conditions and the following disclaimer. | ||
# | ||
# Redistributions in binary form must reproduce the above copyright notice, | ||
# this list of conditions and the following disclaimer in the documentation | ||
# and/or other materials provided with the distribution. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | ||
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR | ||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | ||
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | ||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
# ***************************************************************************** | ||
|
||
""" | ||
|
||
| This file contains internal common functions used in SDC implementation across different files | ||
|
||
""" | ||
|
||
import numpy | ||
|
||
from numba import types | ||
from numba.extending import overload | ||
from numba import numpy_support | ||
|
||
import hpat | ||
from hpat.str_arr_ext import (string_array_type, num_total_chars, append_string_array_to) | ||
|
||
|
||
def has_literal_value(var, value): | ||
'''Used during typing to check that variable var is a Numba literal value equal to value''' | ||
|
||
if not isinstance(var, types.Literal): | ||
return False | ||
|
||
if value is None or isinstance(value, type(bool)): | ||
return var.literal_value is value | ||
else: | ||
return var.literal_value == value | ||
|
||
|
||
def has_python_value(var, value): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not easy to understand why with function by the name. Please add docstring. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added. |
||
'''Used during typing to check that variable var was resolved as Python type and has specific value''' | ||
|
||
if not isinstance(var, type(value)): | ||
return False | ||
|
||
if value is None or isinstance(value, type(bool)): | ||
return var is value | ||
else: | ||
return var == value | ||
|
||
|
||
def hpat_arrays_append(A, B): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this function is needed? As far as I understand you could use other decorator to register function to be used in JIT. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shssf This should provide different overloads for the same append operation for various underlying data types (np.array, StringArray). What are you suggesting to use instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shssf please specify the exact way to implement it, or let's close this discussion |
||
pass | ||
|
||
|
||
@overload(hpat_arrays_append) | ||
def hpat_arrays_append_overload(A, B): | ||
'''Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A''' | ||
|
||
if isinstance(A, types.Array): | ||
if isinstance(B, types.Array): | ||
def _append_single_numeric_impl(A, B): | ||
return numpy.concatenate((A, B,)) | ||
|
||
return _append_single_numeric_impl | ||
elif isinstance(B, (types.UniTuple, types.List)): | ||
# TODO: this heavily relies on B being a homogeneous tuple/list - find a better way | ||
# to resolve common dtype of heterogeneous sequence of arrays | ||
np_dtypes = [numpy_support.as_dtype(A.dtype), numpy_support.as_dtype(B.dtype.dtype)] | ||
np_common_dtype = numpy.find_common_type([], np_dtypes) | ||
numba_common_dtype = numpy_support.from_dtype(np_common_dtype) | ||
|
||
# TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime | ||
def _append_list_numeric_impl(A, B): | ||
|
||
total_length = len(A) + numpy.array([len(arr) for arr in B]).sum() | ||
new_data = numpy.empty(total_length, numba_common_dtype) | ||
|
||
stop = len(A) | ||
new_data[:stop] = A | ||
for arr in B: | ||
start = stop | ||
stop = start + len(arr) | ||
new_data[start:stop] = arr | ||
return new_data | ||
|
||
return _append_list_numeric_impl | ||
|
||
elif A == string_array_type: | ||
if B == string_array_type: | ||
def _append_single_string_array_impl(A, B): | ||
total_size = len(A) + len(B) | ||
total_chars = num_total_chars(A) + num_total_chars(B) | ||
new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) | ||
|
||
pos = 0 | ||
pos += append_string_array_to(new_data, pos, A) | ||
pos += append_string_array_to(new_data, pos, B) | ||
|
||
return new_data | ||
|
||
return _append_single_string_array_impl | ||
elif (isinstance(B, (types.UniTuple, types.List)) and B.dtype == string_array_type): | ||
def _append_list_string_array_impl(A, B): | ||
array_list = [A] + list(B) | ||
total_size = numpy.array([len(arr) for arr in array_list]).sum() | ||
total_chars = numpy.array([num_total_chars(arr) for arr in array_list]).sum() | ||
|
||
new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) | ||
|
||
pos = 0 | ||
pos += append_string_array_to(new_data, pos, A) | ||
for arr in B: | ||
pos += append_string_array_to(new_data, pos, arr) | ||
|
||
return new_data | ||
|
||
return _append_list_string_array_impl |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,11 +39,11 @@ | |
from numba import types | ||
|
||
import hpat | ||
import hpat.datatypes.common_functions as common_functions | ||
from hpat.hiframes.pd_series_ext import SeriesType | ||
from hpat.str_arr_ext import StringArrayType | ||
from hpat.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars) | ||
from hpat.utils import to_array | ||
|
||
|
||
class TypeChecker: | ||
""" | ||
Validate object type and raise TypingError if the type is invalid, e.g.: | ||
|
@@ -753,37 +753,92 @@ def hpat_pandas_series_isin_impl(self, values): | |
|
||
|
||
@overload_method(SeriesType, 'append') | ||
def hpat_pandas_series_append(self, to_append): | ||
def hpat_pandas_series_append(self, to_append, ignore_index=False, verify_integrity=False): | ||
""" | ||
Pandas Series method :meth:`pandas.Series.append` implementation. | ||
|
||
.. only:: developer | ||
|
||
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_append1 | ||
Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_append* | ||
|
||
Parameters | ||
----------- | ||
to_append : :obj:`pandas.Series` object | ||
input argument | ||
ignore_index: | ||
*unsupported* | ||
verify_integrity: | ||
*unsupported* | ||
self: :obj:`pandas.Series` | ||
input series | ||
to_append : :obj:`pandas.Series` object or :obj:`list` or :obj:`set` | ||
Series (or list or tuple of Series) to append with self | ||
ignore_index: :obj:`bool`, default False | ||
If True, do not use the index labels. | ||
Supported as literal value only | ||
verify_integrity: :obj:`bool`, default False | ||
If True, raise Exception on creating index with duplicates. | ||
*unsupported* | ||
|
||
Returns | ||
------- | ||
:obj:`pandas.Series` | ||
returns :obj:`pandas.Series` object | ||
returns :obj:`pandas.Series` object | ||
Concatenated Series | ||
|
||
""" | ||
|
||
_func_name = 'Method append().' | ||
|
||
if not isinstance(self, SeriesType) or not isinstance(to_append, SeriesType): | ||
if not isinstance(self, SeriesType): | ||
raise TypingError( | ||
'{} The object must be a pandas.series. Given self: {}'.format(_func_name, self)) | ||
|
||
if not (isinstance(to_append, SeriesType) | ||
or (isinstance(to_append, (types.UniTuple, types.List)) and isinstance(to_append.dtype, SeriesType))): | ||
raise TypingError( | ||
'{} The argument must be a pandas.series or list/tuple of pandas.series. \ | ||
Given to_append: {}'.format(_func_name, to_append)) | ||
|
||
# currently we will always raise this in the end, i.e. if no impl was found | ||
# TODO: find a way to stop compilation early and not proceed with unliteral step | ||
if not (isinstance(ignore_index, types.Literal) and isinstance(ignore_index, types.Boolean) | ||
or isinstance(ignore_index, types.Omitted) | ||
or ignore_index is False): | ||
raise TypingError( | ||
'{} The ignore_index must be a literal Boolean constant. Given: {}'.format(_func_name, ignore_index)) | ||
|
||
if not (verify_integrity is False or isinstance(verify_integrity, types.Omitted)): | ||
raise TypingError( | ||
'{} The object must be a pandas.series. Given self: {}, to_append: {}'.format(_func_name, self, to_append)) | ||
'{} Unsupported parameters. Given verify_integrity: {}'.format(_func_name, verify_integrity)) | ||
|
||
# ignore_index value has to be known at compile time to select between implementations with different signatures | ||
ignore_index_is_false = (common_functions.has_literal_value(ignore_index, False) | ||
or common_functions.has_python_value(ignore_index, False) | ||
or isinstance(ignore_index, types.Omitted)) | ||
to_append_is_series = isinstance(to_append, SeriesType) | ||
|
||
if ignore_index_is_false: | ||
def hpat_pandas_series_append_impl(self, to_append, ignore_index=False, verify_integrity=False): | ||
if to_append_is_series == True: # noqa | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fix style according PEP |
||
new_data = common_functions.hpat_arrays_append(self._data, to_append._data) | ||
new_index = common_functions.hpat_arrays_append(self.index, to_append.index) | ||
else: | ||
data_arrays_to_append = [series._data for series in to_append] | ||
index_arrays_to_append = [series.index for series in to_append] | ||
new_data = common_functions.hpat_arrays_append(self._data, data_arrays_to_append) | ||
new_index = common_functions.hpat_arrays_append(self.index, index_arrays_to_append) | ||
|
||
return pandas.Series(new_data, new_index) | ||
|
||
return hpat_pandas_series_append_impl | ||
|
||
else: | ||
def hpat_pandas_series_append_ignore_index_impl(self, to_append, ignore_index=False, verify_integrity=False): | ||
|
||
if to_append_is_series == True: # noqa | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fix style according PEP There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shssf It's not possible - with "if to_append_is_series" Numba will fail to compile, as dead branch pruning won't eliminate the dead branch (it's similar as in |
||
new_data = common_functions.hpat_arrays_append(self._data, to_append._data) | ||
else: | ||
arrays_to_append = [series._data for series in to_append] | ||
new_data = common_functions.hpat_arrays_append(self._data, arrays_to_append) | ||
|
||
def hpat_pandas_series_append_impl(self, to_append): | ||
return pandas.Series(self._data + to_append._data) | ||
return pandas.Series(new_data, None) | ||
|
||
return hpat_pandas_series_append_impl | ||
return hpat_pandas_series_append_ignore_index_impl | ||
|
||
|
||
@overload_method(SeriesType, 'copy') | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1288,7 +1288,7 @@ def hpat_pandas_series_ctor_impl(data=None, index=None, dtype=None, name=None, c | |
|
||
'''' use binop here as otherwise Numba's dead branch pruning doesn't work | ||
TODO: replace with 'if not is_index_none' when resolved ''' | ||
if is_index_none == False: | ||
if is_index_none == False: # noqa | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix it accordion PEP |
||
fix_index = hpat.hiframes.api.fix_df_array(index) | ||
else: | ||
fix_index = index | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1419,3 +1419,17 @@ def lower_glob(context, builder, sig, args): | |
# context.nrt.decref(builder, ty, ret) | ||
|
||
return impl_ret_new_ref(context, builder, typ, ret) | ||
|
||
|
||
@numba.njit(no_cpython_wrapper=True) | ||
def append_string_array_to(result, pos, A): | ||
# precondition: result is allocated with the size enough to contain A | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add docstring |
||
i, j = 0, pos | ||
for str in A: | ||
result[j] = str | ||
if str_arr_is_na(A, i): | ||
hpat.str_arr_ext.str_arr_set_na(result, j) | ||
i += 1 | ||
j += 1 | ||
|
||
return i |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not easy to understand why with function by the name. Please add docstring.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added.