diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index f589084b8..6660c139b 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -40,7 +40,7 @@ import sdc from sdc.utilities.sdc_typing_utils import TypeChecker -from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size) +from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size, str_arr_is_na) from sdc.utilities.utils import sdc_overload, sdc_register_jitable @@ -48,6 +48,14 @@ def astype(self, dtype): pass +def isnan(self): + pass + + +def notnan(self): + pass + + def sum(self): pass @@ -117,6 +125,82 @@ def sdc_astype_number_impl(self, dtype): ty_checker.raise_exc(self.dtype, 'str or type', 'self.dtype') +@sdc_overload(notnan) +def sdc_isnan_overload(self): + """ + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + Parallel replacement of numpy.notnan. + .. only:: developer + Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k notnan + """ + + if not isinstance(self, types.Array): + return None + + dtype = self.dtype + isnan = get_isnan(dtype) + if isinstance(dtype, types.Integer): + def sdc_notnan_int_impl(self): + length = len(self) + res = numpy.ones(shape=length, dtype=numpy.bool_) + + return res + + return sdc_notnan_int_impl + + if isinstance(dtype, types.Float): + def sdc_notnan_float_impl(self): + length = len(self) + res = numpy.empty(shape=length, dtype=numpy.bool_) + for i in prange(length): + res[i] = not isnan(self[i]) + + return res + + return sdc_notnan_float_impl + + ty_checker.raise_exc(dtype, 'int or float', 'self.dtype') + + +@sdc_overload(isnan) +def sdc_isnan_overload(self): + """ + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + Parallel replacement of numpy.isnan. + .. only:: developer + Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k isnan + """ + + if not isinstance(self, types.Array): + return None + + dtype = self.dtype + isnan = get_isnan(dtype) + if isinstance(dtype, types.Integer): + def sdc_isnan_int_impl(self): + length = len(self) + res = numpy.zeros(shape=length, dtype=numpy.bool_) + + return res + + return sdc_isnan_int_impl + + if isinstance(dtype, types.Float): + def sdc_isnan_float_impl(self): + length = len(self) + res = numpy.empty(shape=length, dtype=numpy.bool_) + for i in prange(length): + res[i] = isnan(self[i]) + + return res + + return sdc_isnan_float_impl + + ty_checker.raise_exc(dtype, 'int or float', 'self.dtype') + + @sdc_overload(sum) def sdc_sum_overload(self): """ diff --git a/sdc/tests/test_sdc_numpy.py b/sdc/tests/test_sdc_numpy.py index 5113e8975..2ad3e4938 100644 --- a/sdc/tests/test_sdc_numpy.py +++ b/sdc/tests/test_sdc_numpy.py @@ -142,12 +142,43 @@ def sdc_impl(a, t): with self.subTest(data=case, type=type_): np.testing.assert_array_equal(sdc_func(a, type_), ref_impl(a, type_)) - def test_nansum(self): + def test_isnan(self): def ref_impl(a): - return np.nansum(a) + return np.isnan(a) def sdc_impl(a): - return numpy_like.nansum(a) + return numpy_like.isnan(a) + + sdc_func = self.jit(sdc_impl) + + cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]] + for case in cases: + a = np.array(case) + with self.subTest(data=case): + np.testing.assert_array_equal(sdc_func(a), ref_impl(a)) + + @unittest.skip('Needs provide String Array boxing') + def test_isnan_str(self): + def ref_impl(a): + return np.isnan(a) + + def sdc_impl(a): + return numpy_like.isnan(a) + + sdc_func = self.jit(sdc_impl) + + cases = [['a', 'cc', np.nan], ['se', None, 'vvv']] + for case in cases: + a = np.array(case) + with self.subTest(data=case): + np.testing.assert_array_equal(sdc_func(a), ref_impl(a)) + + def test_notnan(self): + def ref_impl(a): + return np.invert(np.isnan(a)) + + def sdc_impl(a): + return numpy_like.notnan(a) sdc_func = self.jit(sdc_impl) @@ -172,5 +203,20 @@ def sdc_impl(a): with self.subTest(data=case): np.testing.assert_array_equal(sdc_func(a), ref_impl(a)) + def test_nansum(self): + def ref_impl(a): + return np.nansum(a) + + def sdc_impl(a): + return numpy_like.nansum(a) + + sdc_func = self.jit(sdc_impl) + + cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]] + for case in cases: + a = np.array(case) + with self.subTest(data=case): + np.testing.assert_array_equal(sdc_func(a), ref_impl(a)) + if __name__ == "__main__": unittest.main() diff --git a/sdc/tests/tests_perf/test_perf_numpy.py b/sdc/tests/tests_perf/test_perf_numpy.py index df0d5042c..0b718d15d 100644 --- a/sdc/tests/tests_perf/test_perf_numpy.py +++ b/sdc/tests/tests_perf/test_perf_numpy.py @@ -99,6 +99,11 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test CE(type_='Numba', code='data.astype(np.int64)', jitted=True), CE(type_='SDC', code='sdc.functions.numpy_like.astype(data, np.int64)', jitted=True), ], usecase_params='data'), + TC(name='isnan', size=[10 ** 7], call_expr=[ + CE(type_='Python', code='np.isnan(data)', jitted=False), + CE(type_='Numba', code='np.isnan(data)', jitted=True), + CE(type_='SDC', code='sdc.functions.numpy_like.isnan(data)', jitted=True), + ], usecase_params='data'), TC(name='nansum', size=[10 ** 7], call_expr=[ CE(type_='Python', code='np.nansum(data)', jitted=False), CE(type_='SDC', code='sdc.functions.numpy_like.nansum(data)', jitted=True),