Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4933,14 +4933,22 @@ def hpat_pandas_series_dropna(self, axis=0, inplace=False):
if not (inplace is False or isinstance(inplace, types.Omitted)):
ty_checker.raise_exc(inplace, 'bool', 'inplace')

def hpat_pandas_series_dropna_impl(self, axis=0, inplace=False):
# generate Series index if needed by using SeriesType.index (i.e. not self._index)
na_data_arr = sdc.hiframes.api.get_nan_mask(self._data)
data = self._data[~na_data_arr]
index = self.index[~na_data_arr]
return pandas.Series(data, index, self._name)
if isinstance(self.data.dtype, types.Number) and isinstance(self.index, (types.Number, types.NoneType)):
def hpat_pandas_series_dropna_impl(self, axis=0, inplace=False):
index = self.index
return numpy_like.dropna(self._data, index, self._name)

return hpat_pandas_series_dropna_impl

else:
def hpat_pandas_series_dropna_str_impl(self, axis=0, inplace=False):
# generate Series index if needed by using SeriesType.index (i.e. not self._index)
na_data_arr = sdc.hiframes.api.get_nan_mask(self._data)
data = self._data[~na_data_arr]
index = self.index[~na_data_arr]
return pandas.Series(data, index, self._name)

return hpat_pandas_series_dropna_impl
return hpat_pandas_series_dropna_str_impl


@sdc_overload_method(SeriesType, 'fillna')
Expand Down
45 changes: 45 additions & 0 deletions sdc/functions/numpy_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import numba
import numpy
import pandas
import numpy as np

from numba import types, jit, prange, numpy_support, literally
Expand All @@ -43,6 +44,7 @@
from sdc.utilities.sdc_typing_utils import TypeChecker
from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size, str_arr_is_na)
from sdc.utilities.utils import sdc_overload, sdc_register_jitable
from sdc.utilities.prange_utils import parallel_chunks


def astype(self, dtype):
Expand Down Expand Up @@ -475,6 +477,49 @@ def nanprod_impl(a):
return nanprod_impl


def dropna(arr, idx, name):
pass


@sdc_overload(dropna)
def dropna_overload(arr, idx, name):
dtype = arr.dtype
dtype_idx = idx.dtype
isnan = get_isnan(dtype)

def dropna_impl(arr, idx, name):
chunks = parallel_chunks(len(arr))
arr_len = numpy.empty(len(chunks), dtype=numpy.int64)
length = 0

for i in prange(len(chunks)):
chunk = chunks[i]
res = 0
for j in range(chunk.start, chunk.stop):
if not isnan(arr[j]):
res += 1
length += res
arr_len[i] = res

result_data = numpy.empty(shape=length, dtype=dtype)
result_index = numpy.empty(shape=length, dtype=dtype_idx)
for i in prange(len(chunks)):
chunk = chunks[i]
new_start = int(sum(arr_len[0:i]))
new_stop = new_start + arr_len[i]
current_pos = new_start

for j in range(chunk.start, chunk.stop):
if not isnan(arr[j]):
result_data[current_pos] = arr[j]
result_index[current_pos] = idx[j]
current_pos += 1

return pandas.Series(result_data, result_index, name)

return dropna_impl


def nanmean(a):
pass

Expand Down