From 71aaafb8d18969d1171ec9a84df796aab6072699 Mon Sep 17 00:00:00 2001 From: akharche Date: Thu, 13 Feb 2020 10:53:17 +0300 Subject: [PATCH 1/3] Add Boolean handling to Series.value_counts --- sdc/datatypes/hpat_pandas_series_functions.py | 2 +- sdc/tests/test_series.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index 36f629a20..12b8b6763 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -1369,7 +1369,7 @@ def hpat_pandas_series_value_counts_str_impl( return hpat_pandas_series_value_counts_str_impl - elif isinstance(self.dtype, types.Number): + elif isinstance(self.dtype, (types.Number, types.Boolean)): series_dtype = self.dtype def hpat_pandas_series_value_counts_number_impl( diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index b562be57d..6f49f6527 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -2590,6 +2590,23 @@ def test_impl(S): result = hpat_func(S).sort_index() pd.testing.assert_series_equal(result, result_ref) + @skip_sdc_jit + def test_series_value_counts_boolean(self): + def test_impl(S): + return S.value_counts(sort=True) + + input_data = [[True, False, True, True, False]] + + hpat_func = self.jit(test_impl) + + for data_to_test in input_data: + with self.subTest(series_data=data_to_test): + S = pd.Series(data_to_test) + # use sort_index() due to possible different order of values with the same counts in results + result_ref = test_impl(S).sort_index() + result = hpat_func(S).sort_index() + pd.testing.assert_series_equal(result, result_ref) + @skip_sdc_jit('Bug in old-style value_counts implementation for ascending param support') def test_series_value_counts_sort(self): def test_impl(S, value): From c3e1cf514f7bb9dde6551322d2ed6027b187327f Mon Sep 17 00:00:00 2001 From: akharche Date: Thu, 13 Feb 2020 13:45:07 +0300 Subject: [PATCH 2/3] Refactor test --- sdc/tests/test_series.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 6f49f6527..0f5c1aaed 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -2590,22 +2590,20 @@ def test_impl(S): result = hpat_func(S).sort_index() pd.testing.assert_series_equal(result, result_ref) - @skip_sdc_jit + @skip_sdc_jit('Fails to compile with latest Numba') def test_series_value_counts_boolean(self): def test_impl(S): - return S.value_counts(sort=True) + return S.value_counts() - input_data = [[True, False, True, True, False]] + input_data = [True, False, True, True, False] - hpat_func = self.jit(test_impl) + sdc_func = self.jit(test_impl) - for data_to_test in input_data: - with self.subTest(series_data=data_to_test): - S = pd.Series(data_to_test) - # use sort_index() due to possible different order of values with the same counts in results - result_ref = test_impl(S).sort_index() - result = hpat_func(S).sort_index() - pd.testing.assert_series_equal(result, result_ref) + S = pd.Series(input_data) + # use sort_index() due to possible different order of values with the same counts in results + result_ref = test_impl(S).sort_index() + result = sdc_func(S).sort_index() + pd.testing.assert_series_equal(result, result_ref) @skip_sdc_jit('Bug in old-style value_counts implementation for ascending param support') def test_series_value_counts_sort(self): From b0e32edab42f5a6400d1c088744077da50330b8b Mon Sep 17 00:00:00 2001 From: akharche Date: Thu, 13 Feb 2020 16:19:04 +0300 Subject: [PATCH 3/3] Remove index sorting --- sdc/tests/test_series.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py index 0f5c1aaed..5e08b9c7b 100644 --- a/sdc/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -2600,9 +2600,8 @@ def test_impl(S): sdc_func = self.jit(test_impl) S = pd.Series(input_data) - # use sort_index() due to possible different order of values with the same counts in results - result_ref = test_impl(S).sort_index() - result = sdc_func(S).sort_index() + result_ref = test_impl(S) + result = sdc_func(S) pd.testing.assert_series_equal(result, result_ref) @skip_sdc_jit('Bug in old-style value_counts implementation for ascending param support')