From 9ca079742600fced0fe5239a4cf1a53851fdacdc Mon Sep 17 00:00:00 2001 From: "elena.totmenina" Date: Mon, 10 Feb 2020 18:54:12 +0300 Subject: [PATCH 1/3] Impl isnan/notnan numpy-like --- sdc/functions/numpy_like.py | 89 ++++++++++++++++++++++++- sdc/tests/test_sdc_numpy.py | 46 +++++++++++++ sdc/tests/tests_perf/test_perf_numpy.py | 5 ++ 3 files changed, 139 insertions(+), 1 deletion(-) diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index 2ac971abb..8c63b331a 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -36,10 +36,11 @@ from numba import types, jit, prange, numpy_support, literally from numba.errors import TypingError +from numba.targets.arraymath import get_isnan import sdc from sdc.utilities.sdc_typing_utils import TypeChecker -from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size) +from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size, str_arr_is_na) from sdc.utilities.utils import sdc_overload, sdc_register_jitable @@ -47,6 +48,14 @@ def astype(self, dtype): pass +def isnan(self): + pass + + +def notnan(self): + pass + + @sdc_overload(astype) def sdc_astype_overload(self, dtype): """ @@ -106,3 +115,81 @@ def sdc_astype_number_impl(self, dtype): return sdc_astype_number_impl ty_checker.raise_exc(self.dtype, 'str or type', 'self.dtype') + + +@sdc_overload(notnan) +def sdc_isnan_overload(self): + """ + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + Parallel replacement of numpy.notnan. + .. only:: developer + Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k notnan + """ + + if not isinstance(self, types.Array): + return None + + dtype = self.dtype + isnan = get_isnan(dtype) + if isinstance(dtype, types.Integer): + def sdc_notnan_int_impl(self): + length = len(self) + res = numpy.ones(shape=length, dtype=numpy.bool_) + + return res + + return sdc_notnan_int_impl + + if isinstance(dtype, types.Float): + def sdc_notnan_float_impl(self): + length = len(self) + res = numpy.ones(shape=length, dtype=numpy.bool_) + for i in prange(length): + if isnan(self[i]): + res[i] = False + + return res + + return sdc_notnan_float_impl + + ty_checker.raise_exc(dtype, 'int or float', 'self.dtype') + + +@sdc_overload(isnan) +def sdc_isnan_overload(self): + """ + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + Parallel replacement of numpy.isnan. + .. only:: developer + Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k isnan + """ + + if not isinstance(self, types.Array): + return None + + dtype = self.dtype + isnan = get_isnan(dtype) + if isinstance(dtype, types.Integer): + def sdc_isnan_int_impl(self): + length = len(self) + res = numpy.zeros(shape=length, dtype=numpy.bool_) + + return res + + return sdc_isnan_int_impl + + if isinstance(dtype, types.Float): + def sdc_isnan_float_impl(self): + length = len(self) + res = numpy.zeros(shape=length, dtype=numpy.bool_) + for i in prange(length): + if isnan(self[i]): + res[i] = True + + return res + + return sdc_isnan_float_impl + + ty_checker.raise_exc(dtype, 'int or float', 'self.dtype') diff --git a/sdc/tests/test_sdc_numpy.py b/sdc/tests/test_sdc_numpy.py index 348f23b04..bbf1fb19b 100644 --- a/sdc/tests/test_sdc_numpy.py +++ b/sdc/tests/test_sdc_numpy.py @@ -142,5 +142,51 @@ def sdc_impl(a, t): with self.subTest(data=case, type=type_): np.testing.assert_array_equal(sdc_func(a, type_), ref_impl(a, type_)) + def test_isnan(self): + def ref_impl(a): + return np.isnan(a) + + def sdc_impl(a): + return numpy_like.isnan(a) + + sdc_func = self.jit(sdc_impl) + + cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]] + for case in cases: + a = np.array(case) + with self.subTest(data=case): + np.testing.assert_array_equal(sdc_func(a), ref_impl(a)) + + @unittest.skip('Needs provide String Array boxing') + def test_isnan_str(self): + def ref_impl(a): + return np.isnan(a) + + def sdc_impl(a): + return numpy_like.isnan(a) + + sdc_func = self.jit(sdc_impl) + + cases = [['a', 'cc', np.nan], ['se', None, 'vvv']] + for case in cases: + a = np.array(case) + with self.subTest(data=case): + np.testing.assert_array_equal(sdc_func(a), ref_impl(a)) + + def test_notnan(self): + def ref_impl(a): + return np.invert(np.isnan(a)) + + def sdc_impl(a): + return numpy_like.notnan(a) + + sdc_func = self.jit(sdc_impl) + + cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]] + for case in cases: + a = np.array(case) + with self.subTest(data=case): + np.testing.assert_array_equal(sdc_func(a), ref_impl(a)) + if __name__ == "__main__": unittest.main() diff --git a/sdc/tests/tests_perf/test_perf_numpy.py b/sdc/tests/tests_perf/test_perf_numpy.py index 8df13f315..11a1ef27f 100644 --- a/sdc/tests/tests_perf/test_perf_numpy.py +++ b/sdc/tests/tests_perf/test_perf_numpy.py @@ -99,6 +99,11 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test CE(type_='Numba', code='data.astype(np.int64)', jitted=True), CE(type_='SDC', code='sdc.functions.numpy_like.astype(data, np.int64)', jitted=True), ], usecase_params='data'), + TC(name='isnan', size=[10 ** 7], call_expr=[ + CE(type_='Python', code='np.isnan(data)', jitted=False), + CE(type_='Numba', code='np.isnan(data)', jitted=True), + CE(type_='SDC', code='sdc.functions.numpy_like.isnan(data)', jitted=True), + ], usecase_params='data'), ] generate_test_cases(cases, TestFunctions, 'function') From f55d858d273d9e555dce90e0e279c403ebb40b5f Mon Sep 17 00:00:00 2001 From: "elena.totmenina" Date: Mon, 10 Feb 2020 19:53:46 +0300 Subject: [PATCH 2/3] fix optimization --- sdc/functions/numpy_like.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index 8c63b331a..7115103e5 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -144,10 +144,9 @@ def sdc_notnan_int_impl(self): if isinstance(dtype, types.Float): def sdc_notnan_float_impl(self): length = len(self) - res = numpy.ones(shape=length, dtype=numpy.bool_) + res = numpy.empty(shape=length, dtype=numpy.bool_) for i in prange(length): - if isnan(self[i]): - res[i] = False + res[i] = isnan(self[i]) return res @@ -183,10 +182,9 @@ def sdc_isnan_int_impl(self): if isinstance(dtype, types.Float): def sdc_isnan_float_impl(self): length = len(self) - res = numpy.zeros(shape=length, dtype=numpy.bool_) + res = numpy.empty(shape=length, dtype=numpy.bool_) for i in prange(length): - if isnan(self[i]): - res[i] = True + res[i] = isnan(self[i]) return res From 19e4d839ceb3a13c7c1b11f9757b5bd4ca8705c9 Mon Sep 17 00:00:00 2001 From: "elena.totmenina" Date: Mon, 10 Feb 2020 20:05:57 +0300 Subject: [PATCH 3/3] fix notnan --- sdc/functions/numpy_like.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index dea6c30b4..6660c139b 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -154,7 +154,7 @@ def sdc_notnan_float_impl(self): length = len(self) res = numpy.empty(shape=length, dtype=numpy.bool_) for i in prange(length): - res[i] = isnan(self[i]) + res[i] = not isnan(self[i]) return res