From d9f587ef47f6899d189f068b63f74a8583bebdc4 Mon Sep 17 00:00:00 2001 From: Denis Date: Wed, 16 Oct 2019 09:57:54 +0300 Subject: [PATCH 1/3] Implement Series.nsmallest() in new style --- .../datatypes/hpat_pandas_series_functions.py | 103 ++++++- hpat/hiframes/api.py | 4 +- hpat/hiframes/pd_series_ext.py | 7 + hpat/tests/test_series.py | 271 +++++++++++++----- 4 files changed, 310 insertions(+), 75 deletions(-) diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/hpat/datatypes/hpat_pandas_series_functions.py index 3929b4117..1df479416 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/hpat/datatypes/hpat_pandas_series_functions.py @@ -35,7 +35,7 @@ import pandas from numba.errors import TypingError -from numba.extending import (types, overload, overload_method, overload_attribute) +from numba.extending import overload, overload_method, overload_attribute from numba import types import hpat @@ -139,6 +139,103 @@ def hpat_pandas_series_iloc_impl(self): return hpat_pandas_series_iloc_impl +@overload_method(SeriesType, 'nsmallest') +def hpat_pandas_series_nsmallest(self, n=5, keep='first'): + """ + Pandas Series method :meth:`pandas.Series.nsmallest` implementation. + + .. only:: developer + Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_nsmallest* + + n: :obj:`int`, default 5 + Return this many ascending sorted values. + keep: :obj:`str`, default 'first' + When there are duplicate values that cannot all fit in a Series of n elements: + first : return the first n occurrences in order of appearance. + last : return the last n occurrences in reverse order of appearance. + all : keep all occurrences. This can result in a Series of size larger than n. + *unsupported* + + Returns + ------- + :obj:`series` + returns :obj:`series` + """ + + _func_name = 'Method nsmallest().' + + if not isinstance(self, SeriesType): + raise TypingError('{} The object\n given: {}\n expected: {}'.format(_func_name, self, 'series')) + + if not isinstance(n, (types.Omitted, int, types.Integer)): + raise TypingError('{} The object n\n given: {}\n expected: {}'.format(_func_name, n, 'int')) + + if not isinstance(keep, (types.Omitted, str, types.UnicodeType, types.StringLiteral)): + raise TypingError('{} The object keep\n given: {}\n expected: {}'.format(_func_name, keep, 'str')) + + def hpat_pandas_series_nsmallest_impl(self, n=5, keep='first'): + if keep != 'first': + raise ValueError("Method nsmallest(). Unsupported parameter. Given 'keep' != 'first'") + + # mergesort is used for stable sorting of repeated values + indices = self._data.argsort(kind='mergesort')[:max(n, 0)] + + return self.take(indices) + + return hpat_pandas_series_nsmallest_impl + + +@overload_method(SeriesType, 'nlargest') +def hpat_pandas_series_nlargest(self, n=5, keep='first'): + """ + Pandas Series method :meth:`pandas.Series.nlargest` implementation. + + .. only:: developer + Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_nlargest* + + Parameters + ---------- + self: :obj:`pandas.Series` + input series + n: :obj:`int`, default 5 + Return this many ascending sorted values. + keep: :obj:`str`, default 'first' + When there are duplicate values that cannot all fit in a Series of n elements: + first : return the first n occurrences in order of appearance. + last : return the last n occurrences in reverse order of appearance. + all : keep all occurrences. This can result in a Series of size larger than n. + *unsupported* + + Returns + ------- + :obj:`series` + returns :obj:`series` + """ + + _func_name = 'Method nlargest().' + + if not isinstance(self, SeriesType): + raise TypingError('{} The object\n given: {}\n expected: {}'.format(_func_name, self, 'series')) + + if not isinstance(n, (types.Omitted, int, types.Integer)): + raise TypingError('{} The object n\n given: {}\n expected: {}'.format(_func_name, n, 'int')) + + if not isinstance(keep, (types.Omitted, str, types.UnicodeType, types.StringLiteral)): + raise TypingError('{} The object keep\n given: {}\n expected: {}'.format(_func_name, keep, 'str')) + + def hpat_pandas_series_nlargest_impl(self, n=5, keep='first'): + if keep != 'first': + raise ValueError("Method nlargest(). Unsupported parameter. Given 'keep' != 'first'") + + # data: [0, 1, -1, 1, 0] -> [1, 1, 0, 0, -1] + # index: [0, 1, 2, 3, 4] -> [1, 3, 0, 4, 2] (not [3, 1, 4, 0, 2]) + indices = (-self._data - 1).argsort(kind='mergesort')[:max(n, 0)] + + return self.take(indices) + + return hpat_pandas_series_nlargest_impl + + @overload_attribute(SeriesType, 'shape') def hpat_pandas_series_shape(self): """ @@ -1185,8 +1282,8 @@ def hpat_pandas_series_take(self, indices, axis=0, is_copy=False): if not isinstance(self, SeriesType): raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) - if not isinstance(indices, types.List): - raise TypingError('{} The indices must be a List. Given: {}'.format(_func_name, indices)) + if not isinstance(indices, (types.List, types.Array)): + raise TypingError('{} The indices must be an array-like. Given: {}'.format(_func_name, indices)) if not (isinstance(axis, (types.Integer, types.Omitted)) or axis == 0): raise TypingError('{} The axis must be an Integer. Currently unsupported. Given: {}'.format(_func_name, axis)) diff --git a/hpat/hiframes/api.py b/hpat/hiframes/api.py index 84de317d1..32cc7f6f6 100644 --- a/hpat/hiframes/api.py +++ b/hpat/hiframes/api.py @@ -585,13 +585,13 @@ def select_k_nonan_overload(A, m, k): dtype = A.dtype if isinstance(dtype, types.Integer): # ints don't have nans - return lambda A, m, k: (A[:k].copy(), k) + return lambda A, m, k: (A[:max(k, 0)].copy(), k) assert isinstance(dtype, types.Float) def select_k_nonan_float(A, m, k): # select the first k elements but ignore NANs - min_heap_vals = np.empty(k, A.dtype) + min_heap_vals = np.empty(max(k, 0), A.dtype) i = 0 ind = 0 while i < m and ind < k: diff --git a/hpat/hiframes/pd_series_ext.py b/hpat/hiframes/pd_series_ext.py index 8bd3f6a9b..c312eee77 100644 --- a/hpat/hiframes/pd_series_ext.py +++ b/hpat/hiframes/pd_series_ext.py @@ -1000,6 +1000,8 @@ def generic_expand_cumulative_series(self, args, kws): if not hpat.config.config_pipeline_hpat_default: _not_series_array_attrs.append('resolve_std') +_non_hpat_pipeline_attrs = ['resolve_nsmallest', 'resolve_nlargest'] + # use ArrayAttribute for attributes not defined in SeriesAttribute for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items(): if (attr.startswith('resolve_') @@ -1007,6 +1009,11 @@ def generic_expand_cumulative_series(self, args, kws): and attr not in _not_series_array_attrs): setattr(SeriesAttribute, attr, func) +# remove some attributes from SeriesAttribute for non-hpat pipeline +if not hpat.config.config_pipeline_hpat_default: + for attr in _non_hpat_pipeline_attrs: + if attr in SeriesAttribute.__dict__: + delattr(SeriesAttribute, attr) # PR135. This needs to be commented out @infer_global(operator.getitem) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index e576e4d10..8bb71383f 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +import string import unittest import platform import pandas as pd @@ -73,6 +73,15 @@ ] +def gen_srand_array(size, nchars=8): + """Generate array of strings of specified size based on [a-zA-Z] + [0-9]""" + accepted_chars = list(string.ascii_letters + string.digits) + rands_chars = np.array(accepted_chars, dtype=(np.str_, 1)) + + np.random.seed(100) + return np.random.choice(rands_chars, size=nchars * size).view((np.str_, nchars)) + + def _make_func_from_text(func_text, func_name='test_impl'): loc_vars = {} exec(func_text, {}, loc_vars) @@ -2213,35 +2222,35 @@ def test_impl(S): S = pd.Series([pd.NaT, pd.Timestamp('1970-12-01'), pd.Timestamp('2012-07-25')]) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - def test_series_nlargest1(self): - def test_impl(S): - return S.nlargest(4) - hpat_func = hpat.jit(test_impl) - - m = 100 - np.random.seed(0) - S = pd.Series(np.random.randint(-30, 30, m)) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) - - def test_series_nlargest_default1(self): - def test_impl(S): - return S.nlargest() + def test_series_nlargest(self): + def test_impl(): + series = pd.Series([1., np.nan, -1., 0., min_float64, max_float64]) + return series.nlargest(4) hpat_func = hpat.jit(test_impl) - m = 100 - np.random.seed(0) - S = pd.Series(np.random.randint(-30, 30, m)) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) + if hpat.config.config_pipeline_hpat_default: + np.testing.assert_array_equal(test_impl(), hpat_func()) + else: + pd.testing.assert_series_equal(test_impl(), hpat_func()) - def test_series_nlargest_nan1(self): - def test_impl(S): - return S.nlargest(4) + def test_series_nlargest_unboxing(self): + def test_impl(series, n): + return series.nlargest(n) hpat_func = hpat.jit(test_impl) - S = pd.Series([1.0, np.nan, 3.0, 2.0, np.nan, 4.0]) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) + for data in test_global_input_data_numeric + [[]]: + series = pd.Series(data * 3) + for n in range(-1, 10): + ref_result = test_impl(series, n) + jit_result = hpat_func(series, n) + if hpat.config.config_pipeline_hpat_default: + np.testing.assert_array_equal(ref_result, jit_result) + else: + pd.testing.assert_series_equal(ref_result, jit_result) - def test_series_nlargest_parallel1(self): + @unittest.skipIf(not hpat.config.config_pipeline_hpat_default, + 'Series.nlargest() parallelism unsupported') + def test_series_nlargest_parallel(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() @@ -2251,56 +2260,117 @@ def test_impl(): return S.nlargest(4) hpat_func = hpat.jit(test_impl) - np.testing.assert_array_equal(hpat_func().values, test_impl().values) + if hpat.config.config_pipeline_hpat_default: + np.testing.assert_array_equal(test_impl(), hpat_func()) + else: + pd.testing.assert_series_equal(test_impl(), hpat_func()) + self.assertEqual(count_parfor_REPs(), 0) + self.assertTrue(count_array_OneDs() > 0) - @unittest.skip('Unsupported functionality: failed to handle index') - def test_series_nlargest_index_str(self): - def test_impl(S): - return S.nlargest(4) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.nlargest() parameter keep unsupported') + def test_series_nlargest_full(self): + def test_impl(series, n, keep): + return series.nlargest(n, keep) hpat_func = hpat.jit(test_impl) - S = pd.Series([73, 21, 10005, 5, 1], index=['a', 'b', 'c', 'd', 'e']) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) + keep = 'first' + for data in test_global_input_data_numeric + [[]]: + series = pd.Series(data * 3) + for n in range(-1, 10): + ref_result = test_impl(series, n, keep) + jit_result = hpat_func(series, n, keep) + pd.testing.assert_series_equal(ref_result, jit_result) - @unittest.skip('Unsupported functionality: failed to handle index') - def test_series_nlargest_index_int(self): - def test_impl(S): - return S.nlargest(4) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.nlargest() index unsupported') + def test_series_nlargest_index(self): + def test_impl(series, n): + return series.nlargest(n) + hpat_func = hpat.jit(test_impl) + + # TODO: check data == [] when index fixed + for data in test_global_input_data_numeric: + data *= 3 + for index in [gen_srand_array(len(data)), range(len(data))]: + series = pd.Series(data, index) + for n in range(-1, 10): + ref_result = test_impl(series, n) + jit_result = hpat_func(series, n) + pd.testing.assert_series_equal(ref_result, jit_result) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.nlargest() types validation unsupported') + def test_series_nlargest_typing(self): + _func_name = 'Method nlargest().' + + def test_impl(series, n, keep): + return series.nlargest(n, keep) hpat_func = hpat.jit(test_impl) - S = pd.Series([73, 21, 10005, 5, 1], index=[2, 3, 4, 5, 6]) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) + series = pd.Series(test_global_input_data_float64[0]) + for n, ntype in [(True, types.boolean), (None, types.none), + (0.1, 'float64'), ('n', types.unicode_type)]: + with self.assertRaises(TypingError) as raises: + hpat_func(series, n=n, keep='first') + msg = '{} The object n\n given: {}\n expected: int' + self.assertIn(msg.format(_func_name, ntype), str(raises.exception)) + + for keep, dtype in [(True, types.boolean), (None, types.none), + (0.1, 'float64'), (1, 'int64')]: + with self.assertRaises(TypingError) as raises: + hpat_func(series, n=5, keep=keep) + msg = '{} The object keep\n given: {}\n expected: str' + self.assertIn(msg.format(_func_name, dtype), str(raises.exception)) - def test_series_nsmallest1(self): - def test_impl(S): - return S.nsmallest(4) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.nlargest() values validation unsupported') + def test_series_nlargest_unsupported(self): + msg = "Method nlargest(). Unsupported parameter. Given 'keep' != 'first'" + + def test_impl(series, n, keep): + return series.nlargest(n, keep) hpat_func = hpat.jit(test_impl) - m = 100 - np.random.seed(0) - S = pd.Series(np.random.randint(-30, 30, m)) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) + series = pd.Series(test_global_input_data_float64[0]) + for keep in ['last', 'all', '']: + with self.assertRaises(ValueError) as raises: + hpat_func(series, n=5, keep=keep) + self.assertIn(msg, str(raises.exception)) - def test_series_nsmallest_default1(self): - def test_impl(S): - return S.nsmallest() + with self.assertRaises(ValueError) as raises: + hpat_func(series, n=5, keep='last') + self.assertIn(msg, str(raises.exception)) + + def test_series_nsmallest(self): + def test_impl(): + series = pd.Series([1., np.nan, -1., 0., min_float64, max_float64]) + return series.nsmallest(4) hpat_func = hpat.jit(test_impl) - m = 100 - np.random.seed(0) - S = pd.Series(np.random.randint(-30, 30, m)) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) + if hpat.config.config_pipeline_hpat_default: + np.testing.assert_array_equal(test_impl(), hpat_func()) + else: + pd.testing.assert_series_equal(test_impl(), hpat_func()) - def test_series_nsmallest_nan1(self): - def test_impl(S): - return S.nsmallest(4) + def test_series_nsmallest_unboxing(self): + def test_impl(series, n): + return series.nsmallest(n) hpat_func = hpat.jit(test_impl) - S = pd.Series([1.0, np.nan, 3.0, 2.0, np.nan, 4.0]) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) + for data in test_global_input_data_numeric + [[]]: + series = pd.Series(data * 3) + for n in range(-1, 10): + ref_result = test_impl(series, n) + jit_result = hpat_func(series, n) + if hpat.config.config_pipeline_hpat_default: + np.testing.assert_array_equal(ref_result, jit_result) + else: + pd.testing.assert_series_equal(ref_result, jit_result) - def test_series_nsmallest_parallel1(self): + @unittest.skipIf(not hpat.config.config_pipeline_hpat_default, + 'Series.nsmallest() parallelism unsupported') + def test_series_nsmallest_parallel(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() @@ -2310,26 +2380,87 @@ def test_impl(): return S.nsmallest(4) hpat_func = hpat.jit(test_impl) - np.testing.assert_array_equal(hpat_func().values, test_impl().values) + if hpat.config.config_pipeline_hpat_default: + np.testing.assert_array_equal(test_impl(), hpat_func()) + else: + pd.testing.assert_series_equal(test_impl(), hpat_func()) + self.assertEqual(count_parfor_REPs(), 0) + self.assertTrue(count_array_OneDs() > 0) - @unittest.skip('Unsupported functionality: failed to handle index') - def test_series_nsmallest_index_str(self): - def test_impl(S): - return S.nsmallest(3) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.nsmallest() parameter keep unsupported') + def test_series_nsmallest_full(self): + def test_impl(series, n, keep): + return series.nsmallest(n, keep) hpat_func = hpat.jit(test_impl) - S = pd.Series([41, 32, 33, 4, 5], index=['a', 'b', 'c', 'd', 'e']) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) + keep = 'first' + for data in test_global_input_data_numeric + [[]]: + series = pd.Series(data * 3) + for n in range(-1, 10): + ref_result = test_impl(series, n, keep) + jit_result = hpat_func(series, n, keep) + pd.testing.assert_series_equal(ref_result, jit_result) + + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.nsmallest() index unsupported') + def test_series_nsmallest_index(self): + def test_impl(series, n): + return series.nsmallest(n) + hpat_func = hpat.jit(test_impl) + + # TODO: check data == [] when index fixed + for data in test_global_input_data_numeric: + data *= 3 + for index in [gen_srand_array(len(data)), range(len(data))]: + series = pd.Series(data, index) + for n in range(-1, 10): + ref_result = test_impl(series, n) + jit_result = hpat_func(series, n) + pd.testing.assert_series_equal(ref_result, jit_result) - @unittest.skip('Unsupported functionality: failed to handle index') - def test_series_nsmallest_index_int(self): - def test_impl(S): - return S.nsmallest(3) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.nsmallest() types validation unsupported') + def test_series_nsmallest_typing(self): + _func_name = 'Method nsmallest().' + + def test_impl(series, n, keep): + return series.nsmallest(n, keep) + hpat_func = hpat.jit(test_impl) + + series = pd.Series(test_global_input_data_float64[0]) + for n, ntype in [(True, types.boolean), (None, types.none), + (0.1, 'float64'), ('n', types.unicode_type)]: + with self.assertRaises(TypingError) as raises: + hpat_func(series, n=n, keep='first') + msg = '{} The object n\n given: {}\n expected: int' + self.assertIn(msg.format(_func_name, ntype), str(raises.exception)) + + for keep, dtype in [(True, types.boolean), (None, types.none), + (0.1, 'float64'), (1, 'int64')]: + with self.assertRaises(TypingError) as raises: + hpat_func(series, n=5, keep=keep) + msg = '{} The object keep\n given: {}\n expected: str' + self.assertIn(msg.format(_func_name, dtype), str(raises.exception)) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.nsmallest() values validation unsupported') + def test_series_nsmallest_unsupported(self): + msg = "Method nsmallest(). Unsupported parameter. Given 'keep' != 'first'" + + def test_impl(series, n, keep): + return series.nsmallest(n, keep) hpat_func = hpat.jit(test_impl) - S = pd.Series([41, 32, 33, 4, 5], index=[1, 2, 3, 4, 5]) - np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) + series = pd.Series(test_global_input_data_float64[0]) + for keep in ['last', 'all', '']: + with self.assertRaises(ValueError) as raises: + hpat_func(series, n=5, keep=keep) + self.assertIn(msg, str(raises.exception)) + + with self.assertRaises(ValueError) as raises: + hpat_func(series, n=5, keep='last') + self.assertIn(msg, str(raises.exception)) def test_series_head1(self): def test_impl(S): From 84355effd44bb742c9e2d052dbef05a7cf31c77a Mon Sep 17 00:00:00 2001 From: Denis Date: Fri, 1 Nov 2019 10:34:22 +0300 Subject: [PATCH 2/3] Replace rand chararray generator with strlist one --- .../datatypes/hpat_pandas_series_functions.py | 1 + hpat/tests/test_series.py | 52 ++++++++++++------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/hpat/datatypes/hpat_pandas_series_functions.py index 1df479416..8189c491e 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/hpat/datatypes/hpat_pandas_series_functions.py @@ -229,6 +229,7 @@ def hpat_pandas_series_nlargest_impl(self, n=5, keep='first'): # data: [0, 1, -1, 1, 0] -> [1, 1, 0, 0, -1] # index: [0, 1, 2, 3, 4] -> [1, 3, 0, 4, 2] (not [3, 1, 4, 0, 2]) + # subtract 1 to ensure reverse ordering at boundaries indices = (-self._data - 1).argsort(kind='mergesort')[:max(n, 0)] return self.take(indices) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 8bb71383f..05e2ea4e7 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -6,6 +6,7 @@ import numpy as np import pyarrow.parquet as pq import hpat +from itertools import islice, permutations from hpat.tests.test_utils import ( count_array_REPs, count_parfor_REPs, count_array_OneDs, get_start_end) from hpat.tests.gen_test_data import ParquetGenerator @@ -73,13 +74,12 @@ ] -def gen_srand_array(size, nchars=8): - """Generate array of strings of specified size based on [a-zA-Z] + [0-9]""" - accepted_chars = list(string.ascii_letters + string.digits) - rands_chars = np.array(accepted_chars, dtype=(np.str_, 1)) +def gen_strlist(size, nchars=8): + """Generate list of strings of specified size based on [a-zA-Z] + [0-9]""" + accepted_chars = string.ascii_letters + string.digits + generated_chars = islice(permutations(accepted_chars, nchars), size) - np.random.seed(100) - return np.random.choice(rands_chars, size=nchars * size).view((np.str_, nchars)) + return [''.join(chars) for chars in generated_chars] def _make_func_from_text(func_text, func_name='test_impl'): @@ -2282,25 +2282,31 @@ def test_impl(series, n, keep): jit_result = hpat_func(series, n, keep) pd.testing.assert_series_equal(ref_result, jit_result) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, - 'Series.nlargest() index unsupported') def test_series_nlargest_index(self): def test_impl(series, n): return series.nlargest(n) hpat_func = hpat.jit(test_impl) - # TODO: check data == [] when index fixed + # TODO: check data == [] after index is fixed for data in test_global_input_data_numeric: data *= 3 - for index in [gen_srand_array(len(data)), range(len(data))]: + # TODO: add integer index not equal to range after index is fixed + indexes = [range(len(data))] + if not hpat.config.config_pipeline_hpat_default: + indexes.append(gen_strlist(len(data))) + + for index in indexes: series = pd.Series(data, index) for n in range(-1, 10): ref_result = test_impl(series, n) jit_result = hpat_func(series, n) - pd.testing.assert_series_equal(ref_result, jit_result) + if hpat.config.config_pipeline_hpat_default: + np.testing.assert_array_equal(ref_result, jit_result) + else: + pd.testing.assert_series_equal(ref_result, jit_result) @unittest.skipIf(hpat.config.config_pipeline_hpat_default, - 'Series.nlargest() types validation unsupported') + 'Series.nlargest() does not raise an exception') def test_series_nlargest_typing(self): _func_name = 'Method nlargest().' @@ -2324,7 +2330,7 @@ def test_impl(series, n, keep): self.assertIn(msg.format(_func_name, dtype), str(raises.exception)) @unittest.skipIf(hpat.config.config_pipeline_hpat_default, - 'Series.nlargest() values validation unsupported') + 'Series.nlargest() does not raise an exception') def test_series_nlargest_unsupported(self): msg = "Method nlargest(). Unsupported parameter. Given 'keep' != 'first'" @@ -2402,25 +2408,31 @@ def test_impl(series, n, keep): jit_result = hpat_func(series, n, keep) pd.testing.assert_series_equal(ref_result, jit_result) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, - 'Series.nsmallest() index unsupported') def test_series_nsmallest_index(self): def test_impl(series, n): return series.nsmallest(n) hpat_func = hpat.jit(test_impl) - # TODO: check data == [] when index fixed + # TODO: check data == [] after index is fixed for data in test_global_input_data_numeric: data *= 3 - for index in [gen_srand_array(len(data)), range(len(data))]: + # TODO: add integer index not equal to range after index is fixed + indexes = [range(len(data))] + if not hpat.config.config_pipeline_hpat_default: + indexes.append(gen_strlist(len(data))) + + for index in indexes: series = pd.Series(data, index) for n in range(-1, 10): ref_result = test_impl(series, n) jit_result = hpat_func(series, n) - pd.testing.assert_series_equal(ref_result, jit_result) + if hpat.config.config_pipeline_hpat_default: + np.testing.assert_array_equal(ref_result, jit_result) + else: + pd.testing.assert_series_equal(ref_result, jit_result) @unittest.skipIf(hpat.config.config_pipeline_hpat_default, - 'Series.nsmallest() types validation unsupported') + 'Series.nsmallest() does not raise an exception') def test_series_nsmallest_typing(self): _func_name = 'Method nsmallest().' @@ -2444,7 +2456,7 @@ def test_impl(series, n, keep): self.assertIn(msg.format(_func_name, dtype), str(raises.exception)) @unittest.skipIf(hpat.config.config_pipeline_hpat_default, - 'Series.nsmallest() values validation unsupported') + 'Series.nsmallest() does not raise an exception') def test_series_nsmallest_unsupported(self): msg = "Method nsmallest(). Unsupported parameter. Given 'keep' != 'first'" From 0e0cf7ef51dfe30e7464c3bda0f996a28fea2361 Mon Sep 17 00:00:00 2001 From: Denis Date: Mon, 4 Nov 2019 14:41:33 +0300 Subject: [PATCH 3/3] Minor changes in tests for nsmallest/nlargest --- hpat/tests/test_series.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 05e2ea4e7..746e5ee11 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -2289,14 +2289,14 @@ def test_impl(series, n): # TODO: check data == [] after index is fixed for data in test_global_input_data_numeric: - data *= 3 + data_duplicated = data * 3 # TODO: add integer index not equal to range after index is fixed - indexes = [range(len(data))] + indexes = [range(len(data_duplicated))] if not hpat.config.config_pipeline_hpat_default: - indexes.append(gen_strlist(len(data))) + indexes.append(gen_strlist(len(data_duplicated))) for index in indexes: - series = pd.Series(data, index) + series = pd.Series(data_duplicated, index) for n in range(-1, 10): ref_result = test_impl(series, n) jit_result = hpat_func(series, n) @@ -2415,14 +2415,14 @@ def test_impl(series, n): # TODO: check data == [] after index is fixed for data in test_global_input_data_numeric: - data *= 3 + data_duplicated = data * 3 # TODO: add integer index not equal to range after index is fixed - indexes = [range(len(data))] + indexes = [range(len(data_duplicated))] if not hpat.config.config_pipeline_hpat_default: - indexes.append(gen_strlist(len(data))) + indexes.append(gen_strlist(len(data_duplicated))) for index in indexes: - series = pd.Series(data, index) + series = pd.Series(data_duplicated, index) for n in range(-1, 10): ref_result = test_impl(series, n) jit_result = hpat_func(series, n)