Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Add tests for Series arithmetic and comparison methods and fix Series.div #114

Merged
merged 11 commits into from
Aug 13, 2019
4 changes: 2 additions & 2 deletions hpat/hiframes/hiframes_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -1018,8 +1018,8 @@ def func(A, B):
data = self._get_series_data(series_var, nodes)
return self._replace_func(func, [data], pre_nodes=nodes)

if func_name in explicit_binop_funcs.values():
binop_map = {v: _binop_to_str[k] for k, v in explicit_binop_funcs.items()}
if func_name in explicit_binop_funcs.keys():
binop_map = {k: _binop_to_str[v] for k, v in explicit_binop_funcs.items()}
func_text = "def _binop_impl(A, B):\n"
func_text += " return A {} B\n".format(binop_map[func_name])

Expand Down
30 changes: 15 additions & 15 deletions hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -977,28 +977,28 @@ def array_attribute_attachment(self, ary):


explicit_binop_funcs = {
operator.add: 'add',
operator.sub: 'sub',
operator.mul: 'mul',
operator.truediv: 'div',
operator.truediv: 'truediv',
operator.floordiv: 'floordiv',
operator.mod: 'mod',
operator.pow: 'pow',
operator.lt: 'lt',
operator.gt: 'gt',
operator.le: 'le',
operator.ge: 'ge',
operator.ne: 'ne',
operator.eq: 'eq',
'add': operator.add,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it will faster? Or why you changed the order?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the order was changes because there were two values with the same key in original explicit_binop_funcs:
operator.truediv: 'div',
operator.truediv: 'truediv',

Hence iterating over (key, value) pairs didn't work and only one (truediv) Series method was generated, so Series.div didn't work at all - it failed with:
numba.errors.TypingError: Failed in hpat mode pipeline (step: nopython frontend)
Unknown attribute 'div' of type series(int64, array(int64, 1d, C), none, False)

'sub': operator.sub,
'mul': operator.mul,
'div': operator.truediv,
'truediv': operator.truediv,
'floordiv': operator.floordiv,
'mod': operator.mod,
'pow': operator.pow,
'lt': operator.lt,
'gt': operator.gt,
'le': operator.le,
'ge': operator.ge,
'ne': operator.ne,
'eq': operator.eq,
}


def ex_binop_generic(self, args, kws):
return SeriesOpUfuncs.generic(self, (self.this,) + args, kws)


for op, fname in explicit_binop_funcs.items():
for fname, op in explicit_binop_funcs.items():
install_series_method(op, fname, ex_binop_generic)


Expand Down
156 changes: 91 additions & 65 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,32 @@
]]


def _make_func_from_text(func_text, func_name='test_impl'):
loc_vars = {}
exec(func_text, {}, loc_vars)
test_impl = loc_vars[func_name]
return test_impl


def _make_func_use_binop1(operator):
func_text = "def test_impl(A, B):\n"
func_text += " return A {} B\n".format(operator)
return _make_func_from_text(func_text)


def _make_func_use_binop2(operator):
func_text = "def test_impl(A, B):\n"
func_text += " A {} B\n".format(operator)
func_text += " return A\n"
return _make_func_from_text(func_text)


def _make_func_use_method_arg1(method):
func_text = "def test_impl(A, B):\n"
func_text += " return A.{}(B)\n".format(method)
return _make_func_from_text(func_text)


GLOBAL_VAL = 2


Expand Down Expand Up @@ -417,69 +443,60 @@ def test_impl(A):
hpat_func(S), test_impl(S).reset_index(drop=True))

def test_series_op1(self):
def test_impl(A, i):
return A + A
hpat_func = hpat.jit(test_impl)
arithmetic_binops = ('+', '-', '*', '/', '//', '%', '**')
for operator in arithmetic_binops:
test_impl = _make_func_use_binop1(operator)
hpat_func = hpat.jit(test_impl)

n = 11
df = pd.DataFrame({'A': np.arange(n)})
pd.testing.assert_series_equal(hpat_func(df.A, 0),
test_impl(df.A, 0), check_names=False)
n = 11
df = pd.DataFrame({'A': np.arange(1, n), 'B': np.ones(n - 1)})
pd.testing.assert_series_equal(hpat_func(df.A, df.B), test_impl(df.A, df.B), check_names=False)

@unittest.skip('AssertionError - fix needed\n'
'Attribute "dtype" are different\n'
'[left]: int64\n'
'[right]: int32\n')
def test_series_op2(self):
def test_impl(A, i):
return A+i
hpat_func = hpat.jit(test_impl)
arithmetic_binops = ('+', '-', '*', '/', '//', '%', '**')

n = 11
df = pd.DataFrame({'A': np.arange(n)})
pd.testing.assert_series_equal(hpat_func(df.A, 1),
test_impl(df.A, 1), check_names=False)
for operator in arithmetic_binops:
test_impl = _make_func_use_binop1(operator)
hpat_func = hpat.jit(test_impl)

n = 11
df = pd.DataFrame({'A': np.arange(1, n)})
pd.testing.assert_series_equal(hpat_func(df.A, 1), test_impl(df.A, 1), check_names=False)

def test_series_op3(self):
def test_impl(A, i):
A += i
return A
hpat_func = hpat.jit(test_impl)
arithmetic_binops = ('+', '-', '*', '/', '//', '%', '**')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please extract duplicated code into separate function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shssf Sorry, I don't understand what duplication has to be removed, do you mean combine all tests for arithmetic operations (i.e. op1 - op3) into one single tests?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in one test but these tests have many similar code. The idea is to make this code as separate function which could be called from these tests.


n = 11
df = pd.DataFrame({'A': np.arange(n)})
pd.testing.assert_series_equal(hpat_func(df.A.copy(), 1),
test_impl(df.A, 1), check_names=False)
for operator in arithmetic_binops:
test_impl = _make_func_use_binop2(operator)
hpat_func = hpat.jit(test_impl)

n = 11
df = pd.DataFrame({'A': np.arange(1, n), 'B': np.ones(n - 1)})
pd.testing.assert_series_equal(hpat_func(df.A, df.B), test_impl(df.A, df.B), check_names=False)

def test_series_op4(self):
def test_impl(A):
return A.add(A)
hpat_func = hpat.jit(test_impl)
arithmetic_binops = ('+', '-', '*', '/', '//', '%', '**')

n = 11
A = pd.Series(np.arange(n))
pd.testing.assert_series_equal(hpat_func(A), test_impl(A))
for operator in arithmetic_binops:
test_impl = _make_func_use_binop2(operator)
hpat_func = hpat.jit(test_impl)

n = 11
df = pd.DataFrame({'A': np.arange(1, n)})
pd.testing.assert_series_equal(hpat_func(df.A, 1), test_impl(df.A, 1), check_names=False)

def test_series_op5(self):
def test_impl(A):
return A.pow(A)
hpat_func = hpat.jit(test_impl)
arithmetic_methods = ('add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow')

n = 11
A = pd.Series(np.arange(n))
pd.testing.assert_series_equal(hpat_func(A), test_impl(A))
for method in arithmetic_methods:
test_impl = _make_func_use_method_arg1(method)
hpat_func = hpat.jit(test_impl)

def test_series_op6(self):
def test_impl(A, B):
return A.eq(B)
hpat_func = hpat.jit(test_impl)
n = 11
df = pd.DataFrame({'A': np.arange(1, n), 'B': np.ones(n - 1)})
pd.testing.assert_series_equal(hpat_func(df.A, df.B), test_impl(df.A, df.B), check_names=False)

n = 11
A = pd.Series(np.arange(n))
B = pd.Series(np.arange(n)**2)
pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B))

def test_series_op7(self):
def test_series_op6(self):
def test_impl(A):
return -A
hpat_func = hpat.jit(test_impl)
Expand All @@ -488,6 +505,30 @@ def test_impl(A):
A = pd.Series(np.arange(n))
pd.testing.assert_series_equal(hpat_func(A), test_impl(A))

def test_series_op7(self):
comparison_binops = ('<', '>', '<=', '>=', '!=', '==')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to check the case if a=b and evaluate properly throw exception?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I didn't think of it. But it as far as I understand it won't test HPAT anyway, it will test Python, because SyntaxError will be raised before we attempt to jit-compile anything. Am I wrong?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know. I just propose the idea to implement additional checks. It is ok if you think it is not applicable here.


for operator in comparison_binops:
test_impl = _make_func_use_binop1(operator)
hpat_func = hpat.jit(test_impl)

n = 11
A = pd.Series(np.arange(n))
B = pd.Series(np.arange(n)**2)
pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_names=False)

def test_series_op8(self):
comparison_methods = ('lt', 'gt', 'le', 'ge', 'ne', 'eq')

for method in comparison_methods:
test_impl = _make_func_use_method_arg1(method)
hpat_func = hpat.jit(test_impl)

n = 11
A = pd.Series(np.arange(n))
B = pd.Series(np.arange(n)**2)
pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_names=False)

def test_series_inplace_binop_array(self):
def test_impl(A, B):
A += B
Expand All @@ -499,10 +540,6 @@ def test_impl(A, B):
B = pd.Series(np.ones(n))
np.testing.assert_array_equal(hpat_func(A.copy(), B), test_impl(A, B))

@unittest.skip('AssertionError - fix needed\n'
'Attribute "dtype" are different\n'
'[left]: int64\n'
'[right]: int32\n')
def test_series_fusion1(self):
def test_impl(A, B):
return A + B + 1
Expand All @@ -514,10 +551,6 @@ def test_impl(A, B):
pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B))
self.assertEqual(count_parfor_REPs(), 1)

@unittest.skip('AssertionError - fix needed\n'
'Attribute "dtype" are different\n'
'[left]: int64\n'
'[right]: int32\n')
def test_series_fusion2(self):
# make sure getting data var avoids incorrect single def assumption
def test_impl(A, B):
Expand Down Expand Up @@ -1016,7 +1049,7 @@ def test_impl(S):
return S.abs()
hpat_func = hpat.jit(test_impl)

S = pd.Series([np.nan, -2., 3.])
S = pd.Series([np.nan, -2., 3., 0.5E-01, 0xFF, 0o7, 0b101])
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

@unittest.skip('AssertionError - fix needed\n'
Expand Down Expand Up @@ -1059,20 +1092,13 @@ def test_impl(S):
S = pd.Series(['aa', 'abc', 'c', 'cccd'])
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

@unittest.skip('numba.errors.LoweringError - fix needed\n'
'Failed in hpat mode pipeline'
'(step: nopython mode backend)\n'
'str_overload() takes 1 positional argument '
'but 2 were given\n')
def test_series_str2str(self):
str2str_methods = ('capitalize', 'lower', 'lstrip', 'rstrip',
'strip', 'swapcase', 'title', 'upper')
for method in str2str_methods:
func_text = "def test_impl(S):\n"
func_text += " return S.str.{}()\n".format(method)
loc_vars = {}
exec(func_text, {}, loc_vars)
test_impl = loc_vars['test_impl']
test_impl = _make_func_from_text(func_text)
hpat_func = hpat.jit(test_impl)

S = pd.Series([' \tbbCD\t ', 'ABC', ' mCDm\t', 'abc'])
Expand Down