diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index 36f629a20..836b73844 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -42,6 +42,7 @@ from numba import (types, numpy_support, cgutils) from numba.typed import Dict from numba import prange +from numba.targets.arraymath import get_isnan import sdc import sdc.datatypes.common_functions as common_functions @@ -5019,21 +5020,13 @@ def hpat_pandas_series_fillna(self, value=None, method=None, axis=None, inplace= raise TypingError('{} Not implemented when Series dtype is {} and\ inplace={}'.format(_func_name, self.dtype, inplace)) - elif isinstance(self.dtype, (types.Integer, types.Boolean)): - def hpat_pandas_series_no_nan_fillna_impl(self, value=None, method=None, axis=None, inplace=False, - limit=None, downcast=None): - # no NaNs in series of Integers or Booleans - return None - - return hpat_pandas_series_no_nan_fillna_impl else: def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): - na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) - self._data[na_data_arr] = value - return None + return numpy_like.fillna(self._data, inplace=inplace, value=value) return hpat_pandas_series_fillna_impl + else: # non inplace implementations, copy array, fill the NA/NaN and return a new Series if isinstance(self.dtype, types.UnicodeType): @@ -5041,39 +5034,26 @@ def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inp # (can be called directly when it's index handling is fixed) def hpat_pandas_series_str_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): - - n = len(self._data) - num_chars = 0 - # get total chars in new array - for i in prange(n): - s = self._data[i] - if sdc.hiframes.api.isna(self._data, i): - num_chars += len(value) - else: - num_chars += len(s) - - filled_data = pre_alloc_string_array(n, num_chars) - for i in prange(n): - if sdc.hiframes.api.isna(self._data, i): - filled_data[i] = value - else: - filled_data[i] = self._data[i] - return pandas.Series(data=filled_data, index=self._index, name=self._name) + return pandas.Series(data=numpy_like.fillna(self._data, inplace=inplace, value=value), + index=self._index, + name=self._name) return hpat_pandas_series_str_fillna_impl elif isinstance(self.dtype, (types.Integer, types.Boolean)): def hpat_pandas_series_no_nan_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): - return pandas.Series(data=numpy.copy(self._data), index=self._index, name=self._name) + return pandas.Series(data=numpy_like.fillna(self._data, inplace=inplace, value=value), + index=self._index, + name=self._name) return hpat_pandas_series_no_nan_fillna_impl else: def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): - na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) - filled_data = numpy.copy(self._data) - filled_data[na_data_arr] = value - return pandas.Series(data=filled_data, index=self._index, name=self._name) + filled_data = numpy_like.fillna(self._data, inplace=inplace, value=value) + return pandas.Series(data=filled_data, + index=self._index, + name=self._name) return hpat_pandas_series_fillna_impl diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index 9e74a76bd..4078cf555 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -48,6 +48,10 @@ def astype(self, dtype): pass +def fillna(self, inplace=False, value=None): + pass + + def copy(self): pass @@ -315,6 +319,81 @@ def sdc_nansum_number_impl(self): return gen_sum_bool_impl() +@sdc_overload(fillna) +def sdc_fillna_overload(self, inplace=False, value=None): + """ + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + Parallel replacement of fillna. + .. only:: developer + Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k fillna + """ + if not isinstance(self, (types.Array, StringArrayType)): + return None + + dtype = self.dtype + isnan = get_isnan(dtype) + if ( + (isinstance(inplace, types.Literal) and inplace.literal_value == True) or # noqa + (isinstance(inplace, bool) and inplace == True) # noqa + ): + if isinstance(dtype, (types.Integer, types.Boolean)): + def sdc_fillna_inplace_int_impl(self, inplace=False, value=None): + return None + + return sdc_fillna_inplace_int_impl + + def sdc_fillna_inplace_float_impl(self, inplace=False, value=None): + length = len(self) + for i in prange(length): + if isnan(self[i]): + self[i] = value + return None + + return sdc_fillna_inplace_float_impl + + else: + if isinstance(self.dtype, types.UnicodeType): + def sdc_fillna_str_impl(self, inplace=False, value=None): + n = len(self) + num_chars = 0 + # get total chars in new array + for i in prange(n): + s = self[i] + if sdc.hiframes.api.isna(self, i): + num_chars += len(value) + else: + num_chars += len(s) + + filled_data = pre_alloc_string_array(n, num_chars) + for i in prange(n): + if sdc.hiframes.api.isna(self, i): + filled_data[i] = value + else: + filled_data[i] = self[i] + return filled_data + + return sdc_fillna_str_impl + + if isinstance(dtype, (types.Integer, types.Boolean)): + def sdc_fillna_int_impl(self, inplace=False, value=None): + return copy(self) + + return sdc_fillna_int_impl + + def sdc_fillna_impl(self, inplace=False, value=None): + length = len(self) + filled_data = numpy.empty(length, dtype=dtype) + for i in prange(length): + if isnan(self[i]): + filled_data[i] = value + else: + filled_data[i] = self[i] + return filled_data + + return sdc_fillna_impl + + def nanmin(a): pass