Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Fix new-style implementation of Series.append() method
Browse files Browse the repository at this point in the history
  • Loading branch information
kozlov-alexey committed Nov 5, 2019
1 parent 6f36f5b commit 6b14375
Show file tree
Hide file tree
Showing 3 changed files with 400 additions and 28 deletions.
98 changes: 84 additions & 14 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

import hpat
from hpat.hiframes.pd_series_ext import SeriesType
from hpat.str_arr_ext import StringArrayType
from hpat.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars)
from hpat.utils import to_array


Expand Down Expand Up @@ -700,37 +700,107 @@ def hpat_pandas_series_isin_impl(self, values):


@overload_method(SeriesType, 'append')
def hpat_pandas_series_append(self, to_append):
def hpat_pandas_series_append(self, to_append, ignore_index=False, verify_integrity=False):
"""
Pandas Series method :meth:`pandas.Series.append` implementation.
.. only:: developer
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_append1
Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_append*
Parameters
-----------
to_append : :obj:`pandas.Series` object
input argument
ignore_index:
*unsupported*
verify_integrity:
*unsupported*
self: :obj:`pandas.Series`
input series
to_append : :obj:`pandas.Series` object or :obj:`list` or :obj:`set`
Series (or list or tuple of Series) to append with self
ignore_index: :obj:`bool`, default False
If True, do not use the index labels.
Supported as literal value only
verify_integrity: :obj:`bool`, default False
If True, raise Exception on creating index with duplicates.
*unsupported*
Returns
-------
:obj:`pandas.Series`
returns :obj:`pandas.Series` object
Concatenated Series
"""

_func_name = 'Method append().'

if not isinstance(self, SeriesType) or not isinstance(to_append, SeriesType):
if not isinstance(self, SeriesType):
raise TypingError(
'{} The object must be a pandas.series. Given self: {}'.format(_func_name, self))

if not (isinstance(to_append, SeriesType)
or (isinstance(to_append, (types.UniTuple, types.List)) and isinstance(to_append.dtype, SeriesType))):
raise TypingError(
'{} The argument must be a pandas.series or list/tuple of pandas.series. \
Given to_append: {}'.format(_func_name, to_append))

# currently we will always raise this in the end, i.e. if no impl was found
# TODO: find a way to stop compilation early and not proceed with unliteral step
if not (isinstance(ignore_index, types.Literal) and isinstance(ignore_index, types.Boolean)
or isinstance(ignore_index, types.Omitted)
or ignore_index is False):
raise TypingError(
'{} The ignore_index must be a literal Boolean constant. Given: {}'.format(_func_name, ignore_index))

if not (verify_integrity is False or isinstance(verify_integrity, types.Omitted)):
raise TypingError(
'{} The object must be a pandas.series. Given self: {}, to_append: {}'.format(_func_name, self, to_append))
'{} Unsupported parameters. Given verify_integrity: {}'.format(_func_name, verify_integrity))

# ignore_index value has to be known at compile time to select between implementations with different signatures
if ((isinstance(ignore_index, types.Literal) and ignore_index.literal_value is True)
or (isinstance(ignore_index, bool) and ignore_index is True)):
# implementations that ignore series index
if isinstance(to_append, SeriesType):
def hpat_pandas_series_append_single_impl(self, to_append, ignore_index=False, verify_integrity=False):

new_data = hpat.hiframes.api._append(self._data, to_append._data)
new_index = numpy.arange(len(self._data) + len(to_append._data))
return pandas.Series(new_data, new_index)

return hpat_pandas_series_append_single_impl

elif isinstance(to_append, (types.UniTuple, types.List)):
def hpat_pandas_series_append_list_impl(self, to_append, ignore_index=False, verify_integrity=False):

arrays_to_append = [series._data for series in to_append]
sum_of_sizes = numpy.array([len(arr) for arr in arrays_to_append]).sum()
new_data = hpat.hiframes.api._append(self._data, arrays_to_append)
new_index = numpy.arange(len(self._data) + sum_of_sizes)
return pandas.Series(new_data, new_index)

return hpat_pandas_series_append_list_impl

elif ((isinstance(ignore_index, types.Literal) and ignore_index.literal_value is False)
or (isinstance(ignore_index, bool) and ignore_index is False)
or isinstance(ignore_index, types.Omitted)):
# implementations that handle series index (ignore_index is False)
if isinstance(to_append, SeriesType):
def hpat_pandas_series_append_single_impl(self, to_append, ignore_index=False, verify_integrity=False):

new_data = hpat.hiframes.api._append(self._data, to_append._data)
new_index = hpat.hiframes.api._append(self.index, to_append.index)
return pandas.Series(new_data, new_index)

return hpat_pandas_series_append_single_impl

elif isinstance(to_append, (types.UniTuple, types.List)):
def hpat_pandas_series_append_list_impl(self, to_append, ignore_index=False, verify_integrity=False):

data_arrays_to_append = [series._data for series in to_append]
index_arrays_to_append = [series.index for series in to_append]

def hpat_pandas_series_append_impl(self, to_append):
return pandas.Series(self._data + to_append._data)
new_data = hpat.hiframes.api._append(self._data, data_arrays_to_append)
new_index = hpat.hiframes.api._append(self.index, index_arrays_to_append)
return pandas.Series(new_data, new_index)

return hpat_pandas_series_append_impl
return hpat_pandas_series_append_list_impl


@overload_method(SeriesType, 'copy')
Expand Down
69 changes: 67 additions & 2 deletions hpat/hiframes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import numba
from numba import ir, ir_utils
from numba import types, cgutils
from numba import numpy_support, types, cgutils
from numba.ir_utils import require, mk_unique_var
import numba.array_analysis
from numba.typing import signature
Expand All @@ -29,7 +29,7 @@
from hpat.utils import _numba_to_c_type_map, unliteral_all
from hpat.str_ext import string_type, list_string_array_type
from hpat.set_ext import build_set
from hpat.str_arr_ext import (StringArrayType, string_array_type, is_str_arr_typ)
from hpat.str_arr_ext import (StringArrayType, string_array_type, is_str_arr_typ, cp_str_list_to_array, num_total_chars)
from hpat.hiframes.pd_timestamp_ext import (pandas_timestamp_type, datetime_date_type, set_df_datetime_date_lower)
from hpat.hiframes.pd_series_ext import (
SeriesType,
Expand Down Expand Up @@ -1763,3 +1763,68 @@ def _analyze_op_pair_first(self, scope, equiv_set, expr):


numba.array_analysis.ArrayAnalysis._analyze_op_pair_first = _analyze_op_pair_first


def _append(A, B):
return None


@overload(_append)
def _append_overload(A, B):
'''Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A'''

if isinstance(A, types.Array):
if isinstance(B, types.Array):
def _append_single_numeric_impl(A, B):
return np.concatenate((A, B,))

return _append_single_numeric_impl
elif isinstance(B, (types.UniTuple, types.List)):
# TODO: this heavily relies on B being a homogeneous tuple/list - find a better way
# to resolve common dtype of heterogeneous sequence of arrays
np_dtypes = [numpy_support.as_dtype(A.dtype), numpy_support.as_dtype(B.dtype.dtype)]
np_common_dtype = np.find_common_type([], np_dtypes)
numba_common_dtype = numpy_support.from_dtype(np_common_dtype)

# TODO: refactor to use np.concatenate when Numba supports building a tuple at runtime
def _append_list_numeric_impl(A, B):

total_length = len(A) + np.array([len(arr) for arr in B]).sum()
new_data = np.empty(total_length, numba_common_dtype)

stop = len(A)
new_data[:stop] = A
for arr in B:
start = stop
stop = start + len(arr)
new_data[start:stop] = arr
return new_data

return _append_list_numeric_impl

elif A == string_array_type:
if B == string_array_type:
def _append_single_string_array_impl(A, B):
total_size = len(A) + len(B)
total_chars = num_total_chars(A) + num_total_chars(B)
new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars)

list_of_strings = list(A) + list(B)
hpat.str_arr_ext.cp_str_list_to_array(new_data, list_of_strings)
return new_data

return _append_single_string_array_impl
elif (isinstance(B, (types.UniTuple, types.List)) and B.dtype == string_array_type):
def _append_list_string_array_impl(A, B):
array_list = [A] + list(B)
total_size = np.array([len(arr) for arr in array_list]).sum()
total_chars = np.array([num_total_chars(arr) for arr in array_list]).sum()

new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars)
list_of_strings = list(A)
for arr in B:
list_of_strings.extend(list(arr))
hpat.str_arr_ext.cp_str_list_to_array(new_data, list_of_strings)
return new_data

return _append_list_string_array_impl
Loading

0 comments on commit 6b14375

Please sign in to comment.