Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Implement series.var() in new style
Browse files Browse the repository at this point in the history
  • Loading branch information
densmirn committed Oct 15, 2019
1 parent e45cc92 commit f323e5b
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 2 deletions.
73 changes: 73 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,79 @@ def hpat_pandas_series_values_impl(self):
return hpat_pandas_series_values_impl


@overload_method(SeriesType, 'var')
def hapt_pandas_series_var(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
"""
Pandas Series method :meth:`pandas.Series.var` implementation.
.. only:: developer
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var_unboxing
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var_str
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var_unsupported_params
Parameters
----------
self: :obj:`pandas.Series`
input series
axis: :obj:`int`, :obj:`str`
Axis along which the operation acts
0/None - row-wise operation
1 - column-wise operation
*unsupported*
skipna: :obj:`bool`
exclude NA/null values
level: :obj:`int`, :obj:`str`
If the axis is a MultiIndex (hierarchical),
count along a particular level, collapsing into a scalar
*unsupported*
ddof: :obj:`int`
Delta Degrees of Freedom.
The divisor used in calculations is N - ddof,
where N represents the number of elements.
numeric_only: :obj:`bool`
Include only float, int, boolean columns.
If None, will attempt to use everything, then use only numeric data.
Not implemented for Series.
*unsupported*
Returns
-------
:obj:`scalar`
returns :obj:`scalar`
"""

_func_name = 'Method var().'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not isinstance(self.dtype, types.Number):
raise TypingError('{} The object must be a number. Given self.dtype: {}'.format(_func_name, self.dtype))

if not isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) and skipna is not None:
raise TypingError('{} The object must be a boolean. Given skipna: {}'.format(_func_name, skipna))

if not isinstance(ddof, (types.Omitted, int, types.Integer)):
raise TypingError('{} The object must be an integer. Given ddof: {}'.format(_func_name, ddof))

for name, arg in [('axis', axis), ('level', level), ('numeric_only', numeric_only)]:
if not isinstance(arg, (types.Omitted, types.NoneType)) and arg is not None:
raise TypingError('{} Unsupported parameters. Given {}: {}'.format(_func_name, name, arg))

def hpat_pandas_series_var_impl(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
if skipna is None:
skipna = True

if skipna:
valuable_length = len(self._data) - numpy.sum(numpy.isnan(self._data))
return numpy.nanvar(self._data) * valuable_length / (valuable_length - ddof)

return self._data.var() * len(self._data) / (len(self._data) - ddof)

return hpat_pandas_series_var_impl


@overload_attribute(SeriesType, 'index')
def hpat_pandas_series_index(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion hpat/hiframes/hiframes_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ def parse_impl(data):

def _run_call_series(self, assign, lhs, rhs, series_var, func_name):
# single arg functions
if func_name in ('sum', 'count', 'mean', 'var', 'min', 'max', 'prod'):
if func_name in ('sum', 'count', 'mean', 'min', 'max', 'prod'):
if rhs.args or rhs.kws:
raise ValueError("HPAT pipeline does not support arguments for Series.{}()".format(func_name))

Expand Down
2 changes: 1 addition & 1 deletion hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,7 @@ def generic_expand_cumulative_series(self, args, kws):

# TODO: add itemsize, strides, etc. when removed from Pandas
_not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten',
'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique']
'resolve_take', 'resolve_var', 'resolve_max', 'resolve_min', 'resolve_nunique']

# use ArrayAttribute for attributes not defined in SeriesAttribute
for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items():
Expand Down
54 changes: 54 additions & 0 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
from hpat.tests.test_utils import (
count_array_REPs, count_parfor_REPs, count_array_OneDs, get_start_end)
from hpat.tests.gen_test_data import ParquetGenerator
from numba import types
from numba.config import IS_32BITS
from numba.errors import TypingError


_cov_corr_series = [(pd.Series(x), pd.Series(y)) for x, y in [
Expand Down Expand Up @@ -2099,6 +2101,58 @@ def test_series_nunique_param1_impl(S, dropna):
result_param1 = hpat_func_param1(S, param1)
self.assertEqual(result_param1, result_param1_ref)

def test_series_var(self):
def pyfunc():
series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9])
return series.var()

cfunc = hpat.jit(pyfunc)
ref_result = pyfunc()
result = cfunc()
np.testing.assert_equal(ref_result, result)

def test_series_var_unboxing(self):
def pyfunc(series, skipna, ddof):
return series.var(skipna=skipna, ddof=ddof)

cfunc = hpat.jit(pyfunc)
series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9])
for ddof in [0, 1]:
for skipna in [True, False]:
ref_result = pyfunc(series, skipna=skipna, ddof=ddof)
result = cfunc(series, skipna=skipna, ddof=ddof)
np.testing.assert_equal(ref_result, result)

def test_series_var_str(self):
def pyfunc(series):
return series.var()

cfunc = hpat.jit(pyfunc)
series = pd.Series(['test', 'series', 'var', 'str'])
with self.assertRaises(TypingError) as raises:
cfunc(series)
msg = 'Method var(). The object must be a number. Given self.dtype: {}'
self.assertIn(msg.format(types.unicode_type), str(raises.exception))

def test_series_var_unsupported_params(self):
def pyfunc(series, axis, level, numeric_only):
return series.var(axis=axis, level=level, numeric_only=numeric_only)

cfunc = hpat.jit(pyfunc)
series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9])
msg = 'Method var(). Unsupported parameters. Given {}: {}'
with self.assertRaises(TypingError) as raises:
cfunc(series, axis=1, level=None, numeric_only=None)
self.assertIn(msg.format('axis', 'int'), str(raises.exception))

with self.assertRaises(TypingError) as raises:
cfunc(series, axis=None, level=1, numeric_only=None)
self.assertIn(msg.format('level', 'int'), str(raises.exception))

with self.assertRaises(TypingError) as raises:
cfunc(series, axis=None, level=None, numeric_only=True)
self.assertIn(msg.format('numeric_only', 'bool'), str(raises.exception))


if __name__ == "__main__":
unittest.main()

0 comments on commit f323e5b

Please sign in to comment.