Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Merge 4cc8172 into db43e32
Browse files Browse the repository at this point in the history
  • Loading branch information
kozlov-alexey committed Oct 25, 2019
2 parents db43e32 + 4cc8172 commit ba16f30
Show file tree
Hide file tree
Showing 4 changed files with 240 additions and 35 deletions.
46 changes: 46 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2274,3 +2274,49 @@ def hpat_pandas_series_median_impl(self, axis=None, skipna=True, level=None, num
return numpy.median(self._data)

return hpat_pandas_series_median_impl


@overload_method(SeriesType, 'dropna')
def hpat_pandas_series_dropna(self, axis=0, inplace=False):
"""
Pandas Series method :meth:`pandas.Series.dropna` implementation.
.. only:: developer
Tests: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_dropna*
Parameters
----------
self: :obj:`pandas.Series`
input series
axis: :obj:`int` or :obj:`string` {0 or `index`}, default 0
There is only one axis to drop values from.
inplace: :obj:`bool`, default False
If True, do operation inplace and return None.
*unsupported*
Returns
-------
:obj:`pandas.Series`
returns :obj:`pandas.Series` object with NA entries dropped from it.
"""

_func_name = 'Method dropna().'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not (isinstance(axis, (types.Integer, types.StringLiteral, types.UnicodeType, types.Omitted)) or axis == 0):
raise TypingError('{} The axis must be an Integer or String. Given: {}'.format(_func_name, axis))

if not (inplace is False or isinstance(inplace, types.Omitted)):
raise TypingError('{} Unsupported parameters. Given inplace: {}'.format(_func_name, inplace))

def hpat_pandas_series_dropna_impl(self, axis=0, inplace=False):
# generate Series index if needed by using SeriesType.index (i.e. not self._index)
na_data_arr = hpat.hiframes.api.get_nan_mask(self._data)
data = self._data[~na_data_arr]
index = self.index[~na_data_arr]
return pandas.Series(data, index, self._name)

return hpat_pandas_series_dropna_impl
26 changes: 26 additions & 0 deletions hpat/hiframes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
alloc_pre_shuffle_metadata)
from hpat.hiframes.join import write_send_buff
from hpat.hiframes.split_impl import string_array_split_view_type
from numba.errors import TypingError

# XXX: used in agg func output to avoid mutating filter, agg, join, etc.
# TODO: fix type inferrer and remove this
Expand Down Expand Up @@ -533,6 +534,31 @@ def isna_overload(arr, i):
return lambda arr, i: False


def get_nan_mask(arr):
return np.zeros(len(arr), np.bool_)


@overload(get_nan_mask)
def get_nan_mask_overload(arr):

def get_nan_mask_via_isna_impl(arr):
return np.array([isna(arr, i) for i in np.arange(len(arr))])

if isinstance(arr, types.Array):
dtype = arr.dtype
if isinstance(dtype, types.Float):
return lambda arr: np.isnan(arr)
elif isinstance(dtype, (types.Boolean, types.Integer)):
return lambda arr: np.zeros(len(arr), np.bool_)
elif isinstance(dtype, (types.NPDatetime, types.NPTimedelta)):
return get_nan_mask_via_isna_impl
else:
raise TypingError('{} Not implemented for arrays with dtype: {}'.format(_func_name, dtype))
else:
# for StringArrayType and other cases rely on isna implementation
return get_nan_mask_via_isna_impl


@numba.njit
def min_heapify(arr, n, start, cmp_f):
min_ind = start
Expand Down
3 changes: 2 additions & 1 deletion hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ def resolve_fillna(self, ary, args, kws):
out = types.none
return signature(out, *args)

# PR135. This needs to be commented out (for new-style impl to be called)
@bound_function("series.dropna")
def resolve_dropna(self, ary, args, kws):
out = ary
Expand Down Expand Up @@ -994,7 +995,7 @@ def generic_expand_cumulative_series(self, args, kws):
'resolve_cumsum',
'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean',
'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique',
'resolve_prod', 'resolve_count']
'resolve_prod', 'resolve_count', 'resolve_dropna']

# use ArrayAttribute for attributes not defined in SeriesAttribute
for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items():
Expand Down
200 changes: 166 additions & 34 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,25 +1078,159 @@ def test_impl(S):
pd.testing.assert_series_equal(hpat_func(S),
test_impl(S), check_names=False)

def test_series_dropna_float1(self):
def test_impl(A):
return A.dropna().values
@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'No support of axis argument in old-style Series.dropna() impl')
def test_series_dropna_axis1(self):
'''Verifies Series.dropna() implementation handles 'index' as axis argument'''
def test_impl(S):
return S.dropna(axis='index')
hpat_func = hpat.jit(test_impl)

S1 = pd.Series([1.0, 2.0, np.nan, 1.0])
S1 = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf])
S2 = S1.copy()
np.testing.assert_array_equal(hpat_func(S1), test_impl(S2))
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

def test_series_dropna_str1(self):
def test_impl(A):
return A.dropna().values
@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'No support of axis argument in old-style Series.dropna() impl')
def test_series_dropna_axis2(self):
'''Verifies Series.dropna() implementation handles 0 as axis argument'''
def test_impl(S):
return S.dropna(axis=0)
hpat_func = hpat.jit(test_impl)

S1 = pd.Series(['aa', 'b', None, 'ccc'])
S1 = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf])
S2 = S1.copy()
np.testing.assert_array_equal(hpat_func(S1), test_impl(S2))
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'No support of axis argument in old-style Series.dropna() impl')
def test_series_dropna_axis3(self):
'''Verifies Series.dropna() implementation handles correct non-literal axis argument'''
def test_impl(S, axis):
return S.dropna(axis=axis)
hpat_func = hpat.jit(test_impl)

S1 = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf])
S2 = S1.copy()
axis_values = [0, 'index']
for value in axis_values:
pd.testing.assert_series_equal(hpat_func(S1, value), test_impl(S2, value))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'BUG: old-style dropna impl returns series without index')
def test_series_dropna_float_index1(self):
'''Verifies Series.dropna() implementation for float series with default index'''
def test_impl(S):
return S.dropna()
hpat_func = hpat.jit(test_impl)

for data in test_global_input_data_float64:
S1 = pd.Series(data)
S2 = S1.copy()
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'BUG: old-style dropna impl returns series without index')
def test_series_dropna_float_index2(self):
'''Verifies Series.dropna() implementation for float series with string index'''
def test_impl(S):
return S.dropna()
hpat_func = hpat.jit(test_impl)

S1 = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf], ['a', 'b', 'c', 'd', 'e'])
S2 = S1.copy()
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'BUG: old-style dropna impl returns series without index')
def test_series_dropna_str_index1(self):
'''Verifies Series.dropna() implementation for series of strings with default index'''
def test_impl(S):
return S.dropna()
hpat_func = hpat.jit(test_impl)

S1 = pd.Series(['aa', 'b', None, 'cccd', ''])
S2 = S1.copy()
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'BUG: old-style dropna impl returns series without index')
def test_series_dropna_str_index2(self):
'''Verifies Series.dropna() implementation for series of strings with string index'''
def test_impl(S):
return S.dropna()
hpat_func = hpat.jit(test_impl)

S1 = pd.Series(['aa', 'b', None, 'cccd', ''], ['a', 'b', 'c', 'd', 'e'])
S2 = S1.copy()
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'BUG: old-style dropna impl returns series without index')
def test_series_dropna_str_index3(self):
def test_impl(S):
return S.dropna()

hpat_func = hpat.jit(test_impl)

S1 = pd.Series(['aa', 'b', None, 'cccd', ''], index=[1, 2, 5, 7, 10])
S2 = S1.copy()
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skip('BUG: old-style dropna impl returns series without index, in new-style inplace is unsupported')
def test_series_dropna_float_inplace_no_index1(self):
'''Verifies Series.dropna() implementation for float series with default index and inplace argument True'''
def test_impl(S):
S.dropna(inplace=True)
return S
hpat_func = hpat.jit(test_impl)

S1 = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf])
S2 = S1.copy()
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skip('TODO: add reflection support and check method return value')
def test_series_dropna_float_inplace_no_index2(self):
'''Verifies Series.dropna(inplace=True) results are reflected back in the original float series'''
def test_impl(S):
return S.dropna(inplace=True)
hpat_func = hpat.jit(test_impl)

S1 = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf])
S2 = S1.copy()
self.assertIsNone(hpat_func(S1))
self.assertIsNone(test_impl(S2))
pd.testing.assert_series_equal(S1, S2)

@unittest.skip('BUG: old-style dropna impl returns series without index, in new-style inplace is unsupported')
def test_series_dropna_str_inplace_no_index1(self):
'''Verifies Series.dropna() implementation for series of strings
with default index and inplace argument True
'''
def test_impl(S):
S.dropna(inplace=True)
return S
hpat_func = hpat.jit(test_impl)

S1 = pd.Series(['aa', 'b', None, 'cccd', ''])
S2 = S1.copy()
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skip('TODO: add reflection support and check method return value')
def test_series_dropna_str_inplace_no_index2(self):
'''Verifies Series.dropna(inplace=True) results are reflected back in the original string series'''
def test_impl(S):
return S.dropna(inplace=True)
hpat_func = hpat.jit(test_impl)

S1 = pd.Series(['aa', 'b', None, 'cccd', ''])
S2 = S1.copy()
self.assertIsNone(hpat_func(S1))
self.assertIsNone(test_impl(S2))
pd.testing.assert_series_equal(S1, S2)

def test_series_dropna_str_parallel1(self):
'''Verifies Series.dropna() distributed work for series of strings with default index'''
def test_impl(A):
B = A.dropna()
return (B == 'gg').sum()
Expand All @@ -1106,46 +1240,44 @@ def test_impl(A):
start, end = get_start_end(len(S1))
# TODO: gatherv
self.assertEqual(hpat_func(S1[start:end]), test_impl(S1))
self.assertEqual(count_array_REPs(), 0)
self.assertEqual(count_parfor_REPs(), 0)
self.assertTrue(count_array_OneDs() > 0)

def test_series_dropna_float_inplace1(self):
def test_impl(A):
A.dropna(inplace=True)
return A.values
hpat_func = hpat.jit(test_impl)

S1 = pd.Series([1.0, 2.0, np.nan, 1.0])
S2 = S1.copy()
np.testing.assert_array_equal(hpat_func(S1), test_impl(S2))

def test_series_dropna_str_inplace1(self):
def test_impl(A):
A.dropna(inplace=True)
return A.values
@unittest.skip('AssertionError: Series are different\n'
'Series length are different\n'
'[left]: 3, Int64Index([0, 1, 2], dtype=\'int64\')\n'
'[right]: 2, Int64Index([1, 2], dtype=\'int64\')')
def test_series_dropna_dt_no_index1(self):
'''Verifies Series.dropna() implementation for datetime series with default index'''
def test_impl(S):
return S.dropna()
hpat_func = hpat.jit(test_impl)

S1 = pd.Series(['aa', 'b', None, 'ccc'])
S1 = pd.Series([pd.NaT, pd.Timestamp('1970-12-01'), pd.Timestamp('2012-07-25')])
S2 = S1.copy()
np.testing.assert_array_equal(hpat_func(S1), test_impl(S2))
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skip('Unsupported functionality: failed to handle index')
def test_series_dropna_index_str(self):
def test_series_dropna_bool_no_index1(self):
'''Verifies Series.dropna() implementation for bool series with default index'''
def test_impl(S):
return S.dropna()

hpat_func = hpat.jit(test_impl)

S1 = pd.Series(['aa', 'b', None, 'ccc'], index=['a', 'b', 'c', 'd'])
S1 = pd.Series([True, False, False, True])
S2 = S1.copy()
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

@unittest.skip('Unsupported functionality: failed to handle index')
def test_series_dropna_index_int(self):
@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'BUG: old-style dropna impl returns series without index')
def test_series_dropna_int_no_index1(self):
'''Verifies Series.dropna() implementation for integer series with default index'''
def test_impl(S):
return S.dropna()

hpat_func = hpat.jit(test_impl)

S1 = pd.Series(['aa', 'b', None, 'ccc'], index=[1, 2, 5, 7])
n = 11
S1 = pd.Series(np.arange(n, dtype=np.int64))
S2 = S1.copy()
pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2))

Expand Down

0 comments on commit ba16f30

Please sign in to comment.