diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index 658d6460c..146572e24 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -4933,14 +4933,22 @@ def hpat_pandas_series_dropna(self, axis=0, inplace=False): if not (inplace is False or isinstance(inplace, types.Omitted)): ty_checker.raise_exc(inplace, 'bool', 'inplace') - def hpat_pandas_series_dropna_impl(self, axis=0, inplace=False): - # generate Series index if needed by using SeriesType.index (i.e. not self._index) - na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) - data = self._data[~na_data_arr] - index = self.index[~na_data_arr] - return pandas.Series(data, index, self._name) + if isinstance(self.data.dtype, types.Number) and isinstance(self.index, (types.Number, types.NoneType)): + def hpat_pandas_series_dropna_impl(self, axis=0, inplace=False): + index = self.index + return numpy_like.dropna(self._data, index, self._name) + + return hpat_pandas_series_dropna_impl + + else: + def hpat_pandas_series_dropna_str_impl(self, axis=0, inplace=False): + # generate Series index if needed by using SeriesType.index (i.e. not self._index) + na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) + data = self._data[~na_data_arr] + index = self.index[~na_data_arr] + return pandas.Series(data, index, self._name) - return hpat_pandas_series_dropna_impl + return hpat_pandas_series_dropna_str_impl @sdc_overload_method(SeriesType, 'fillna') diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index cbdb43904..266520c13 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -33,6 +33,7 @@ import numba import numpy +import pandas import numpy as np from numba import types, jit, prange, numpy_support, literally @@ -43,6 +44,7 @@ from sdc.utilities.sdc_typing_utils import TypeChecker from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size, str_arr_is_na) from sdc.utilities.utils import sdc_overload, sdc_register_jitable +from sdc.utilities.prange_utils import parallel_chunks def astype(self, dtype): @@ -475,6 +477,49 @@ def nanprod_impl(a): return nanprod_impl +def dropna(arr, idx, name): + pass + + +@sdc_overload(dropna) +def dropna_overload(arr, idx, name): + dtype = arr.dtype + dtype_idx = idx.dtype + isnan = get_isnan(dtype) + + def dropna_impl(arr, idx, name): + chunks = parallel_chunks(len(arr)) + arr_len = numpy.empty(len(chunks), dtype=numpy.int64) + length = 0 + + for i in prange(len(chunks)): + chunk = chunks[i] + res = 0 + for j in range(chunk.start, chunk.stop): + if not isnan(arr[j]): + res += 1 + length += res + arr_len[i] = res + + result_data = numpy.empty(shape=length, dtype=dtype) + result_index = numpy.empty(shape=length, dtype=dtype_idx) + for i in prange(len(chunks)): + chunk = chunks[i] + new_start = int(sum(arr_len[0:i])) + new_stop = new_start + arr_len[i] + current_pos = new_start + + for j in range(chunk.start, chunk.stop): + if not isnan(arr[j]): + result_data[current_pos] = arr[j] + result_index[current_pos] = idx[j] + current_pos += 1 + + return pandas.Series(result_data, result_index, name) + + return dropna_impl + + def nanmean(a): pass