Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Implement series.var() in new style #220

Merged
merged 5 commits into from
Oct 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,86 @@ def hpat_pandas_series_values_impl(self):
return hpat_pandas_series_values_impl


@overload_method(SeriesType, 'var')
def hpat_pandas_series_var(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

skipna=True?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By default skipna=None in Pandas interface.

"""
Pandas Series method :meth:`pandas.Series.var` implementation.

.. only:: developer
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var_unboxing
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var_str
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var_unsupported_params

Parameters
----------
self: :obj:`pandas.Series`
input series
axis: :obj:`int`, :obj:`str`
Axis along which the operation acts
0/None/'index' - row-wise operation
1/'columns' - column-wise operation
*unsupported*
skipna: :obj:`bool`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect skipna=True

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By default skipna=None in Pandas interface. So we did the same.

exclude NA/null values
level: :obj:`int`, :obj:`str`
If the axis is a MultiIndex (hierarchical),
count along a particular level, collapsing into a scalar
*unsupported*
ddof: :obj:`int`
Delta Degrees of Freedom.
The divisor used in calculations is N - ddof,
where N represents the number of elements.
numeric_only: :obj:`bool`
Include only float, int, boolean columns.
If None, will attempt to use everything, then use only numeric data.
Not implemented for Series.
*unsupported*

Returns
-------
:obj:`scalar`
returns :obj:`scalar`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can return scalar or Series

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we have to add series here, because series is returned in case of level specified, but currently we don't support parameter level. So scalar is always returned in our case. Do you still think I need to add series here?

"""

_func_name = 'Method var().'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not isinstance(self.data.dtype, types.Number):
msg = '{} The object must be a number. Given self.data.dtype: {}'
raise TypingError(msg.format(_func_name, self.data.dtype))

if not isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) and skipna is not None:
raise TypingError('{} The object must be a boolean. Given skipna: {}'.format(_func_name, skipna))

if not isinstance(ddof, (types.Omitted, int, types.Integer)):
raise TypingError('{} The object must be an integer. Given ddof: {}'.format(_func_name, ddof))

for name, arg in [('axis', axis), ('level', level), ('numeric_only', numeric_only)]:
if not isinstance(arg, (types.Omitted, types.NoneType)) and arg is not None:
raise TypingError('{} Unsupported parameters. Given {}: {}'.format(_func_name, name, arg))

def hpat_pandas_series_var_impl(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
if skipna is None:
skipna = True

if skipna:
valuable_length = len(self._data) - numpy.sum(numpy.isnan(self._data))
if valuable_length <= ddof:
return numpy.nan

return numpy.nanvar(self._data) * valuable_length / (valuable_length - ddof)

if len(self._data) <= ddof:
return numpy.nan

return self._data.var() * len(self._data) / (len(self._data) - ddof)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe numpy has no such functionality

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Numpy implementation has ddof=0 by default, but Pandas has ddof=1 by default. So I has to make this extra change to align the parameter between these implementations.


return hpat_pandas_series_var_impl


@overload_attribute(SeriesType, 'index')
def hpat_pandas_series_index(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,7 @@ def generic_expand_cumulative_series(self, args, kws):

# TODO: add itemsize, strides, etc. when removed from Pandas
_not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten',
'resolve_cumsum',
'resolve_cumsum', 'resolve_var',
'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean',
'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique',
'resolve_prod', 'resolve_count', 'resolve_dropna']
Expand Down
62 changes: 62 additions & 0 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2911,6 +2911,68 @@ def test_series_nunique_param1_impl(S, dropna):
result_param1 = hpat_func_param1(S, param1)
self.assertEqual(result_param1, result_param1_ref)

def test_series_var(self):
def pyfunc():
series = pd.Series([1.0, np.nan, -1.0, 0.0, 5e-324])
return series.var()

cfunc = hpat.jit(pyfunc)
np.testing.assert_equal(pyfunc(), cfunc())

def test_series_var_unboxing(self):
def pyfunc(series):
return series.var()

cfunc = hpat.jit(pyfunc)
for data in test_global_input_data_numeric + [[]]:
series = pd.Series(data)
np.testing.assert_equal(pyfunc(series), cfunc(series))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'Series.var() parameters "ddof" and "skipna" unsupported')
def test_series_var_full(self):
def pyfunc(series, skipna, ddof):
return series.var(skipna=skipna, ddof=ddof)

cfunc = hpat.jit(pyfunc)
for data in test_global_input_data_numeric + [[]]:
series = pd.Series(data)
for ddof in [0, 1]:
for skipna in [True, False]:
ref_result = pyfunc(series, skipna=skipna, ddof=ddof)
result = cfunc(series, skipna=skipna, ddof=ddof)
np.testing.assert_equal(ref_result, result)

def test_series_var_str(self):
def pyfunc(series):
return series.var()

cfunc = hpat.jit(pyfunc)
series = pd.Series(test_global_input_data_unicode_kind4)
with self.assertRaises(TypingError) as raises:
cfunc(series)
msg = 'Method var(). The object must be a number. Given self.data.dtype: {}'
self.assertIn(msg.format(types.unicode_type), str(raises.exception))

def test_series_var_unsupported_params(self):
def pyfunc(series, axis, level, numeric_only):
return series.var(axis=axis, level=level, numeric_only=numeric_only)

cfunc = hpat.jit(pyfunc)
series = pd.Series(test_global_input_data_float64[0])
msg = 'Method var(). Unsupported parameters. Given {}: {}'
with self.assertRaises(TypingError) as raises:
cfunc(series, axis=1, level=None, numeric_only=None)
self.assertIn(msg.format('axis', 'int'), str(raises.exception))

with self.assertRaises(TypingError) as raises:
cfunc(series, axis=None, level=1, numeric_only=None)
self.assertIn(msg.format('level', 'int'), str(raises.exception))

with self.assertRaises(TypingError) as raises:
cfunc(series, axis=None, level=None, numeric_only=True)
self.assertIn(msg.format('numeric_only', 'bool'), str(raises.exception))

def test_series_count(self):
def test_series_count_impl(S):
return S.count()
Expand Down