Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 13 additions & 33 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from numba import (types, numpy_support, cgutils)
from numba.typed import Dict
from numba import prange
from numba.targets.arraymath import get_isnan

import sdc
import sdc.datatypes.common_functions as common_functions
Expand Down Expand Up @@ -5019,61 +5020,40 @@ def hpat_pandas_series_fillna(self, value=None, method=None, axis=None, inplace=
raise TypingError('{} Not implemented when Series dtype is {} and\
inplace={}'.format(_func_name, self.dtype, inplace))

elif isinstance(self.dtype, (types.Integer, types.Boolean)):
def hpat_pandas_series_no_nan_fillna_impl(self, value=None, method=None, axis=None, inplace=False,
limit=None, downcast=None):
# no NaNs in series of Integers or Booleans
return None

return hpat_pandas_series_no_nan_fillna_impl
else:
def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False,
limit=None, downcast=None):
na_data_arr = sdc.hiframes.api.get_nan_mask(self._data)
self._data[na_data_arr] = value
return None
return numpy_like.fillna(self._data, inplace=inplace, value=value)

return hpat_pandas_series_fillna_impl

else:
# non inplace implementations, copy array, fill the NA/NaN and return a new Series
if isinstance(self.dtype, types.UnicodeType):
# For StringArrayType implementation is taken from _series_fillna_str_alloc_impl
# (can be called directly when it's index handling is fixed)
def hpat_pandas_series_str_fillna_impl(self, value=None, method=None, axis=None,
inplace=False, limit=None, downcast=None):

n = len(self._data)
num_chars = 0
# get total chars in new array
for i in prange(n):
s = self._data[i]
if sdc.hiframes.api.isna(self._data, i):
num_chars += len(value)
else:
num_chars += len(s)

filled_data = pre_alloc_string_array(n, num_chars)
for i in prange(n):
if sdc.hiframes.api.isna(self._data, i):
filled_data[i] = value
else:
filled_data[i] = self._data[i]
return pandas.Series(data=filled_data, index=self._index, name=self._name)
return pandas.Series(data=numpy_like.fillna(self._data, inplace=inplace, value=value),
index=self._index,
name=self._name)

return hpat_pandas_series_str_fillna_impl

elif isinstance(self.dtype, (types.Integer, types.Boolean)):
def hpat_pandas_series_no_nan_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
return pandas.Series(data=numpy.copy(self._data), index=self._index, name=self._name)
return pandas.Series(data=numpy_like.fillna(self._data, inplace=inplace, value=value),
index=self._index,
name=self._name)

return hpat_pandas_series_no_nan_fillna_impl

else:
def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
na_data_arr = sdc.hiframes.api.get_nan_mask(self._data)
filled_data = numpy.copy(self._data)
filled_data[na_data_arr] = value
return pandas.Series(data=filled_data, index=self._index, name=self._name)
filled_data = numpy_like.fillna(self._data, inplace=inplace, value=value)
return pandas.Series(data=filled_data,
index=self._index,
name=self._name)

return hpat_pandas_series_fillna_impl

Expand Down
79 changes: 79 additions & 0 deletions sdc/functions/numpy_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ def astype(self, dtype):
pass


def fillna(self, inplace=False, value=None):
pass


def copy(self):
pass

Expand Down Expand Up @@ -315,6 +319,81 @@ def sdc_nansum_number_impl(self):
return gen_sum_bool_impl()


@sdc_overload(fillna)
def sdc_fillna_overload(self, inplace=False, value=None):
"""
Intel Scalable Dataframe Compiler Developer Guide
*************************************************
Parallel replacement of fillna.
.. only:: developer
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k fillna
"""
if not isinstance(self, (types.Array, StringArrayType)):
return None

dtype = self.dtype
isnan = get_isnan(dtype)
if (
(isinstance(inplace, types.Literal) and inplace.literal_value == True) or # noqa
(isinstance(inplace, bool) and inplace == True) # noqa
):
if isinstance(dtype, (types.Integer, types.Boolean)):
def sdc_fillna_inplace_int_impl(self, inplace=False, value=None):
return None

return sdc_fillna_inplace_int_impl

def sdc_fillna_inplace_float_impl(self, inplace=False, value=None):
length = len(self)
for i in prange(length):
if isnan(self[i]):
self[i] = value
return None

return sdc_fillna_inplace_float_impl

else:
if isinstance(self.dtype, types.UnicodeType):
def sdc_fillna_str_impl(self, inplace=False, value=None):
n = len(self)
num_chars = 0
# get total chars in new array
for i in prange(n):
s = self[i]
if sdc.hiframes.api.isna(self, i):
num_chars += len(value)
else:
num_chars += len(s)

filled_data = pre_alloc_string_array(n, num_chars)
for i in prange(n):
if sdc.hiframes.api.isna(self, i):
filled_data[i] = value
else:
filled_data[i] = self[i]
return filled_data

return sdc_fillna_str_impl

if isinstance(dtype, (types.Integer, types.Boolean)):
def sdc_fillna_int_impl(self, inplace=False, value=None):
return copy(self)

return sdc_fillna_int_impl

def sdc_fillna_impl(self, inplace=False, value=None):
length = len(self)
filled_data = numpy.empty(length, dtype=dtype)
for i in prange(length):
if isnan(self[i]):
filled_data[i] = value
else:
filled_data[i] = self[i]
return filled_data

return sdc_fillna_impl


def nanmin(a):
pass

Expand Down