Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 85 additions & 1 deletion sdc/functions/numpy_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,22 @@

import sdc
from sdc.utilities.sdc_typing_utils import TypeChecker
from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size)
from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size, str_arr_is_na)
from sdc.utilities.utils import sdc_overload, sdc_register_jitable


def astype(self, dtype):
pass


def isnan(self):
pass


def notnan(self):
pass


def sum(self):
pass

Expand Down Expand Up @@ -117,6 +125,82 @@ def sdc_astype_number_impl(self, dtype):
ty_checker.raise_exc(self.dtype, 'str or type', 'self.dtype')


@sdc_overload(notnan)
def sdc_isnan_overload(self):
"""
Intel Scalable Dataframe Compiler Developer Guide
*************************************************
Parallel replacement of numpy.notnan.
.. only:: developer
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k notnan
"""

if not isinstance(self, types.Array):
return None

dtype = self.dtype
isnan = get_isnan(dtype)
if isinstance(dtype, types.Integer):
def sdc_notnan_int_impl(self):
length = len(self)
res = numpy.ones(shape=length, dtype=numpy.bool_)

return res

return sdc_notnan_int_impl

if isinstance(dtype, types.Float):
def sdc_notnan_float_impl(self):
length = len(self)
res = numpy.empty(shape=length, dtype=numpy.bool_)
for i in prange(length):
res[i] = not isnan(self[i])

return res

return sdc_notnan_float_impl

ty_checker.raise_exc(dtype, 'int or float', 'self.dtype')


@sdc_overload(isnan)
def sdc_isnan_overload(self):
"""
Intel Scalable Dataframe Compiler Developer Guide
*************************************************
Parallel replacement of numpy.isnan.
.. only:: developer
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k isnan
"""

if not isinstance(self, types.Array):
return None

dtype = self.dtype
isnan = get_isnan(dtype)
if isinstance(dtype, types.Integer):
def sdc_isnan_int_impl(self):
length = len(self)
res = numpy.zeros(shape=length, dtype=numpy.bool_)

return res

return sdc_isnan_int_impl

if isinstance(dtype, types.Float):
def sdc_isnan_float_impl(self):
length = len(self)
res = numpy.empty(shape=length, dtype=numpy.bool_)
for i in prange(length):
res[i] = isnan(self[i])

return res

return sdc_isnan_float_impl

ty_checker.raise_exc(dtype, 'int or float', 'self.dtype')


@sdc_overload(sum)
def sdc_sum_overload(self):
"""
Expand Down
52 changes: 49 additions & 3 deletions sdc/tests/test_sdc_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,43 @@ def sdc_impl(a, t):
with self.subTest(data=case, type=type_):
np.testing.assert_array_equal(sdc_func(a, type_), ref_impl(a, type_))

def test_nansum(self):
def test_isnan(self):
def ref_impl(a):
return np.nansum(a)
return np.isnan(a)

def sdc_impl(a):
return numpy_like.nansum(a)
return numpy_like.isnan(a)

sdc_func = self.jit(sdc_impl)

cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]]
for case in cases:
a = np.array(case)
with self.subTest(data=case):
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))

@unittest.skip('Needs provide String Array boxing')
def test_isnan_str(self):
def ref_impl(a):
return np.isnan(a)

def sdc_impl(a):
return numpy_like.isnan(a)

sdc_func = self.jit(sdc_impl)

cases = [['a', 'cc', np.nan], ['se', None, 'vvv']]
for case in cases:
a = np.array(case)
with self.subTest(data=case):
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))

def test_notnan(self):
def ref_impl(a):
return np.invert(np.isnan(a))

def sdc_impl(a):
return numpy_like.notnan(a)

sdc_func = self.jit(sdc_impl)

Expand All @@ -172,5 +203,20 @@ def sdc_impl(a):
with self.subTest(data=case):
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))

def test_nansum(self):
def ref_impl(a):
return np.nansum(a)

def sdc_impl(a):
return numpy_like.nansum(a)

sdc_func = self.jit(sdc_impl)

cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]]
for case in cases:
a = np.array(case)
with self.subTest(data=case):
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))

if __name__ == "__main__":
unittest.main()
5 changes: 5 additions & 0 deletions sdc/tests/tests_perf/test_perf_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test
CE(type_='Numba', code='data.astype(np.int64)', jitted=True),
CE(type_='SDC', code='sdc.functions.numpy_like.astype(data, np.int64)', jitted=True),
], usecase_params='data'),
TC(name='isnan', size=[10 ** 7], call_expr=[
CE(type_='Python', code='np.isnan(data)', jitted=False),
CE(type_='Numba', code='np.isnan(data)', jitted=True),
CE(type_='SDC', code='sdc.functions.numpy_like.isnan(data)', jitted=True),
], usecase_params='data'),
TC(name='nansum', size=[10 ** 7], call_expr=[
CE(type_='Python', code='np.nansum(data)', jitted=False),
CE(type_='SDC', code='sdc.functions.numpy_like.nansum(data)', jitted=True),
Expand Down