diff --git a/hpat/distributed_api.py b/hpat/distributed_api.py index 87b8776e0..f34963dbb 100644 --- a/hpat/distributed_api.py +++ b/hpat/distributed_api.py @@ -103,12 +103,14 @@ def gather_scalar(data): # pragma: no cover c_gather_scalar = types.ExternalFunction("c_gather_scalar", types.void(types.voidptr, types.voidptr, types.int32)) + # TODO: test @overload(gather_scalar) def gather_scalar_overload(val): assert isinstance(val, (types.Integer, types.Float)) # TODO: other types like boolean typ_val = _numba_to_c_type_map[val] + func_text = ( "def gather_scalar_impl(val):\n" " n_pes = hpat.distributed_api.get_size()\n" @@ -159,7 +161,6 @@ def gatherv_impl(data): displs = np.empty(1, np.int32) if rank == MPI_ROOT: displs = hpat.hiframes.join.calc_disp(recv_counts) - # print(rank, n_loc, n_total, recv_counts, displs) c_gatherv( data.ctypes, np.int32(n_loc), @@ -195,15 +196,14 @@ def gatherv_str_arr_impl(data): # displacements all_data = StringArray(['']) # dummy arrays on non-root PEs - displs = np.empty(1, np.int32) - displs_char = np.empty(1, np.int32) + displs = np.empty(n_loc, np.int32) + displs_char = np.empty(n_loc, np.int32) if rank == MPI_ROOT: all_data = pre_alloc_string_array(n_total, n_total_char) displs = hpat.hiframes.join.calc_disp(recv_counts) displs_char = hpat.hiframes.join.calc_disp(recv_counts_char) - # print(rank, n_loc, n_total, recv_counts, displs) offset_ptr = get_offset_ptr(all_data) data_ptr = get_data_ptr(all_data) c_gatherv( @@ -330,30 +330,36 @@ def const_slice_getitem(arr, slice_index, start, count): @overload(const_slice_getitem) def const_slice_getitem_overload(arr, slice_index, start, count): + # slice.index start/stop contain absolute indexes of the slice in input array + # start/count define part of the array processed by this processor + + # TODO: should this also handle slices not staring from zero? if arr == string_array_type: reduce_op = Reduce_Type.Sum.value def getitem_str_impl(arr, slice_index, start, count): rank = hpat.distributed_api.get_rank() k = slice_index.stop + # get total characters for allocation n_chars = np.uint64(0) - if k > count: + if k > start: + # if slice end is beyond the start of this subset we have to send our elements my_end = min(count, max(k - start, 0)) my_arr = arr[:my_end] - my_arr = hpat.distributed_api.gatherv(my_arr) - n_chars = hpat.distributed_api.dist_reduce( - num_total_chars(my_arr), np.int32(reduce_op)) - if rank == 0: - out_arr = my_arr else: - if rank == 0: - my_arr = arr[:k] - n_chars = num_total_chars(my_arr) - out_arr = my_arr - n_chars = bcast_scalar(n_chars) + my_arr = arr[:0] + + # get the total number of chars in our array, then gather all arrays into one + # and compute total number of chars in all arrays + n_chars = num_total_chars(my_arr) + my_arr = hpat.distributed_api.gatherv(my_arr) + n_chars = hpat.distributed_api.dist_reduce(n_chars, np.int32(reduce_op)) + if rank != 0: out_arr = pre_alloc_string_array(k, n_chars) + else: + out_arr = my_arr # actual communication hpat.distributed_api.bcast(out_arr) @@ -364,17 +370,23 @@ def getitem_str_impl(arr, slice_index, start, count): def getitem_impl(arr, slice_index, start, count): rank = hpat.distributed_api.get_rank() k = slice_index.stop + out_arr = np.empty(k, arr.dtype) - if k > count: + my_arr = arr[:0] + if k > start: + # if slice end is beyond the start of this subset we have to send our elements my_end = min(count, max(k - start, 0)) my_arr = arr[:my_end] - my_arr = hpat.distributed_api.gatherv(my_arr) - if rank == 0: - print(my_arr) - out_arr = my_arr else: - if rank == 0: - out_arr = arr[:k] + my_arr = arr[:0] + + # gather all subsets from all processors + my_arr = hpat.distributed_api.gatherv(my_arr) + + if rank == 0: + out_arr = my_arr + + # actual communication hpat.distributed_api.bcast(out_arr) return out_arr diff --git a/hpat/tests/test_basic.py b/hpat/tests/test_basic.py index ff486ca30..a0e8cd342 100644 --- a/hpat/tests/test_basic.py +++ b/hpat/tests/test_basic.py @@ -6,12 +6,14 @@ import hpat import random from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, - count_parfor_OneDs, count_array_OneDs, count_array_OneD_Vars, - dist_IR_contains, get_rank, get_start_end) + count_parfor_OneDs, count_array_OneDs, count_array_OneD_Vars, + dist_IR_contains, get_rank, get_start_end) + def get_np_state_ptr(): return numba._helperlib.rnd_get_np_state_ptr() + def _copy_py_state(r, ptr): """ Copy state of Python random *r* to Numba state *ptr*. @@ -67,7 +69,7 @@ def test_impl(N): def test_setitem1(self): def test_impl(N): - A = np.arange(10)+1.0 + A = np.arange(10) + 1.0 A[0] = 30 return A.sum() @@ -79,7 +81,7 @@ def test_impl(N): def test_setitem2(self): def test_impl(N): - A = np.arange(10)+1.0 + A = np.arange(10) + 1.0 A[0:4] = 30 return A.sum() @@ -147,7 +149,7 @@ def test_impl(N): def test_whole_slice(self): def test_impl(N): X = np.ones((N, 4)) - X[:,3] = (X[:,3]) / (np.max(X[:,3]) - np.min(X[:,3])) + X[:, 3] = (X[:, 3]) / (np.max(X[:, 3]) - np.min(X[:, 3])) return X.sum() hpat_func = hpat.jit(test_impl) @@ -170,10 +172,12 @@ def test_impl(N): def test_assert(self): # make sure assert in an inlined function works + def g(a): - assert a==0 + assert a == 0 hpat_g = hpat.jit(g) + def f(): hpat_g(0) @@ -199,8 +203,9 @@ def test_reduce(self): funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax'] for (dtype, func) in itertools.product(dtypes, funcs): # loc allreduce doesn't support int64 on windows - if (sys.platform.startswith('win') and dtype=='int64' - and func in ['argmin', 'argmax']): + if (sys.platform.startswith('win') + and dtype == 'int64' + and func in ['argmin', 'argmax']): continue func_text = """def f(n): A = np.arange(0, n, 1, np.{}) @@ -222,8 +227,9 @@ def test_reduce2(self): funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax'] for (dtype, func) in itertools.product(dtypes, funcs): # loc allreduce doesn't support int64 on windows - if (sys.platform.startswith('win') and dtype=='int64' - and func in ['argmin', 'argmax']): + if (sys.platform.startswith('win') + and dtype == 'int64' + and func in ['argmin', 'argmax']): continue func_text = """def f(A): return A.{}() @@ -232,7 +238,7 @@ def test_reduce2(self): exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] - hpat_func = hpat.jit(locals={'A:input':'distributed'})(test_impl) + hpat_func = hpat.jit(locals={'A:input': 'distributed'})(test_impl) n = 21 start, end = get_start_end(n) np.random.seed(0) @@ -248,8 +254,9 @@ def test_reduce_filter1(self): funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax'] for (dtype, func) in itertools.product(dtypes, funcs): # loc allreduce doesn't support int64 on windows - if (sys.platform.startswith('win') and dtype=='int64' - and func in ['argmin', 'argmax']): + if (sys.platform.startswith('win') + and dtype == 'int64' + and func in ['argmin', 'argmax']): continue func_text = """def f(A): A = A[A>5] @@ -259,7 +266,7 @@ def test_reduce_filter1(self): exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] - hpat_func = hpat.jit(locals={'A:input':'distributed'})(test_impl) + hpat_func = hpat.jit(locals={'A:input': 'distributed'})(test_impl) n = 21 start, end = get_start_end(n) np.random.seed(0) @@ -273,7 +280,7 @@ def test_reduce_filter1(self): def test_array_reduce(self): binops = ['+=', '*=', '+=', '*=', '|=', '|='] dtypes = ['np.float32', 'np.float32', 'np.float64', 'np.float64', 'np.int32', 'np.int64'] - for (op,typ) in zip(binops,dtypes): + for (op, typ) in zip(binops, dtypes): func_text = """def f(n): A = np.arange(0, 10, 1, {}) B = np.arange(0 + 3, 10 + 3, 1, {}) @@ -310,7 +317,7 @@ def test_impl(N): def test_dist_return_tuple(self): def test_impl(N): A = np.arange(N) - B = np.arange(N)+1.5 + B = np.arange(N) + 1.5 return A, B hpat_func = hpat.jit(locals={'A:return': 'distributed', @@ -339,7 +346,7 @@ def test_impl(A): def test_rebalance(self): def test_impl(N): A = np.arange(n) - B = A[A>10] + B = A[A > 10] C = hpat.distributed_api.rebalance_array(B) return C.sum() @@ -356,7 +363,7 @@ def test_impl(N): def test_rebalance_loop(self): def test_impl(N): A = np.arange(n) - B = A[A>10] + B = A[A > 10] s = 0 for i in range(3): s += B.sum() @@ -479,5 +486,6 @@ def test_rhs(arr_len): A, B, _ = hpat_func3(arr_len) np.testing.assert_allclose(A, B) + if __name__ == "__main__": unittest.main() diff --git a/hpat/tests/test_d4p.py b/hpat/tests/test_d4p.py index 5d786f2f8..c84a1acb6 100644 --- a/hpat/tests/test_d4p.py +++ b/hpat/tests/test_d4p.py @@ -33,7 +33,9 @@ def train_impl(n, d): def prdct_impl(n, d, model): w = np.ones((n, d), dtype=np.double) - 22.5 algo = d4p.logistic_regression_prediction( - 2, resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities") + 2, + resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities" + ) return algo.compute(w, model) train_hpat = hpat.jit(train_impl) diff --git a/hpat/tests/test_dataframe.py b/hpat/tests/test_dataframe.py index 816373c58..a388dad7d 100644 --- a/hpat/tests/test_dataframe.py +++ b/hpat/tests/test_dataframe.py @@ -6,7 +6,8 @@ import numba import hpat -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, dist_IR_contains, get_start_end) +from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, + count_array_OneDs, dist_IR_contains, get_start_end) from hpat.tests.gen_test_data import ParquetGenerator @@ -49,8 +50,6 @@ def test_impl(A, B, c): c = 2 pd.testing.assert_series_equal(hpat_func(A, B, c), test_impl(A, B, c)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_unbox1(self): def test_impl(df): return df.A @@ -105,14 +104,6 @@ def test_impl(df): df = pd.DataFrame({'A': ['aa', 'bb', 'cc']}) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) - @unittest.skip('Assertion Error - fix needed\n' - 'Not equal to tolerance rtol=1e-07, atol=0\n' - 'Mismatch: 100%\n' - 'Max absolute difference: 3.\n' - 'Max relative difference: 0.27272727\n' - 'x: array(8.)\n' - 'y: array(11.)\n' - 'NUMA_PES=3 build') def test_box_dist_return(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) @@ -165,8 +156,6 @@ def test_impl(n): self.assertEqual(count_parfor_REPs(), 0) self.assertEqual(count_parfor_OneDs(), 1) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_column_list_getitem1(self): def test_impl(df): return df[['A', 'C']] @@ -177,8 +166,6 @@ def test_impl(df): {'A': np.arange(n), 'B': np.ones(n), 'C': np.random.ranf(n)}) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_filter1(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2}) @@ -191,8 +178,6 @@ def test_impl(n): self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_filter2(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2}) @@ -205,8 +190,6 @@ def test_impl(n): self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_filter3(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2}) @@ -275,8 +258,6 @@ def test_impl(df): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_iat1(self): def test_impl(n): df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) @@ -400,8 +381,6 @@ def test_impl(df, n): test_impl(df2, n) pd.testing.assert_frame_equal(df1, df2) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_set_column_bool1(self): def test_impl(df): df['C'] = df['A'][df['B']] @@ -456,8 +435,6 @@ def test_impl(df): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_df_values_parallel1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) @@ -690,8 +667,6 @@ def test_impl(n): n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_pct_change1(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) @@ -701,8 +676,6 @@ def test_impl(n): n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_mean1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): @@ -713,8 +686,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_std1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): @@ -725,8 +696,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_var1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): @@ -737,8 +706,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_max1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): @@ -749,8 +716,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_min1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): @@ -761,8 +726,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_sum1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): @@ -773,8 +736,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_prod1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): @@ -967,8 +928,6 @@ def test_impl(df, df2, df3): pd.testing.assert_frame_equal( hpat_func(df, df2, df3), test_impl(df, df2, df3)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_concat_columns1(self): def test_impl(S1, S2): return pd.concat([S1, S2], axis=1) @@ -981,8 +940,6 @@ def test_impl(S1, S2): hpat_func(S1, S2), test_impl(S1, S2).rename(columns={0: '0', 1: '1'})) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_var_rename(self): # tests df variable replacement in hiframes_untyped where inlining # can cause extra assignments and definition handling errors diff --git a/hpat/tests/test_groupby.py b/hpat/tests/test_groupby.py index 658d18e5d..42ed98056 100644 --- a/hpat/tests/test_groupby.py +++ b/hpat/tests/test_groupby.py @@ -305,8 +305,6 @@ def test_impl(n): self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_muti_hiframes_node_filter_agg(self): def test_impl(df, cond): df2 = df[cond] @@ -343,8 +341,6 @@ def test_impl(df): # np.testing.assert_array_equal(hpat_func(df), test_impl(df)) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_pivot(self): def test_impl(df): pt = df.pivot_table(index='A', columns='C', values='D', aggfunc='sum') @@ -356,8 +352,6 @@ def test_impl(df): self.assertEqual( set(hpat_func(_pivot_df1)[1]), set(test_impl(_pivot_df1)[1])) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_pivot_parallel(self): def test_impl(): df = pd.read_parquet("pivot2.pq") @@ -380,8 +374,6 @@ def test_impl(df): self.assertEqual( set(hpat_func(_pivot_df1)[1]), set(test_impl(_pivot_df1)[1])) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_crosstab_parallel1(self): def test_impl(): df = pd.read_parquet("pivot2.pq") diff --git a/hpat/tests/test_join.py b/hpat/tests/test_join.py index 3905b6c42..8744a02d6 100644 --- a/hpat/tests/test_join.py +++ b/hpat/tests/test_join.py @@ -16,8 +16,6 @@ class TestJoin(unittest.TestCase): - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_join1(self): def test_impl(n): df1 = pd.DataFrame({'key1': np.arange(n) + 3, 'A': np.arange(n) + 1.0}) @@ -33,8 +31,6 @@ def test_impl(n): n = 11111 self.assertEqual(hpat_func(n), test_impl(n)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_join1_seq(self): def test_impl(df1, df2): df3 = df1.merge(df2, left_on='key1', right_on='key2') @@ -71,8 +67,6 @@ def test_impl(): hpat_func = hpat.jit(test_impl) self.assertEqual(set(hpat_func()), set(test_impl())) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_join_mutil_seq1(self): def test_impl(df1, df2): return df1.merge(df2, on=['A', 'B']) @@ -88,8 +82,6 @@ def test_impl(df1, df2): pd.testing.assert_frame_equal(hpat_func(df1, df2), test_impl(df1, df2)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_join_mutil_parallel1(self): def test_impl(A1, B1, C1, A2, B2, D2): df1 = pd.DataFrame({'A': A1, 'B': B1, 'C': C1}) @@ -129,8 +121,6 @@ def test_impl(A1, B1, C1, A2, B2, D2): p_res = test_impl(p_A1, p_B1, p_C1, p_A2, p_B2, p_D2) self.assertEqual(h_res, p_res) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_join_left_parallel1(self): """ """ diff --git a/hpat/tests/test_ml.py b/hpat/tests/test_ml.py index d6f513cda..526d5c9e8 100644 --- a/hpat/tests/test_ml.py +++ b/hpat/tests/test_ml.py @@ -12,8 +12,6 @@ class TestML(unittest.TestCase): - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_logistic_regression(self): def test_impl(n, d): iterations = 3 @@ -32,8 +30,6 @@ def test_impl(n, d): self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 3) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_logistic_regression_acc(self): def test_impl(N, D): iterations = 3 @@ -55,8 +51,6 @@ def test_impl(N, D): self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 4) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_linear_regression(self): def test_impl(N, D): p = 2 @@ -76,8 +70,6 @@ def test_impl(N, D): self.assertEqual(count_array_OneDs(), 5) self.assertEqual(count_parfor_OneDs(), 3) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_kde(self): def test_impl(n): X = np.ones(n) diff --git a/hpat/tests/test_rolling.py b/hpat/tests/test_rolling.py index 728e8425f..bc60c383a 100644 --- a/hpat/tests/test_rolling.py +++ b/hpat/tests/test_rolling.py @@ -2,6 +2,7 @@ import itertools import os import pandas as pd +import platform import numpy as np import numba import hpat @@ -87,8 +88,6 @@ def test_impl(df, w, c): df = pd.DataFrame({'B': np.arange(n)}) pd.testing.assert_frame_equal(hpat_func(df, w, c), test_impl(df, w, c)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_fixed_parallel1(self): def test_impl(n, w, center): df = pd.DataFrame({'B': np.arange(n)}) @@ -108,8 +107,6 @@ def test_impl(n, w, center): self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_fixed_parallel_apply1(self): def test_impl(n, w, center): df = pd.DataFrame({'B': np.arange(n)}) @@ -225,8 +222,7 @@ def test_variable_apply2(self): df = pd.DataFrame({'B': np.arange(n), 'time': time}) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') + @unittest.skipIf(platform.system() == 'Windows', "ValueError: time must be monotonic") def test_variable_parallel1(self): wins = ('2s',) sizes = (121,) @@ -250,8 +246,7 @@ def test_variable_parallel1(self): self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') + @unittest.skipIf(platform.system() == 'Windows', "ValueError: time must be monotonic") def test_variable_apply_parallel1(self): wins = ('2s',) sizes = (121,) @@ -302,8 +297,6 @@ def apply_test_impl(S, w, c): pd.testing.assert_series_equal(hpat_func(S1, *args), apply_test_impl(S1, *args)) pd.testing.assert_series_equal(hpat_func(S2, *args), apply_test_impl(S2, *args)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_series_cov1(self): # test series rolling functions # all functions except apply @@ -328,8 +321,6 @@ def test_impl2(S, S2, w, c): pd.testing.assert_series_equal(hpat_func(*args), test_impl2(*args)) pd.testing.assert_series_equal(hpat_func(*args), test_impl2(*args)) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_df_cov1(self): # test series rolling functions # all functions except apply diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 1ed6fec58..2c0cf38a7 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -64,9 +64,6 @@ def _make_func_use_method_arg1(method): class TestSeries(unittest.TestCase): - @unittest.skip('AssertionError - fix needed\n' - '122 != 1\n' - 'NUMA_PES=3 build') def test_create1(self): def test_impl(): df = pd.DataFrame({'A': [1, 2, 3]}) @@ -75,9 +72,6 @@ def test_impl(): self.assertEqual(hpat_func(), test_impl()) - @unittest.skip('AssertionError - fix needed\n' - '122 != 1\n' - 'NUMA_PES=3 build') def test_create2(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n)}) @@ -595,7 +589,6 @@ def test_impl(): pd.testing.assert_series_equal(hpat_func(), test_impl()) - @unittest.skip("ERROR: Segmentation fault on the second launch (using HPAT_REPEAT_TEST_NUMBER=2)") def test_series_list_str_unbox1(self): def test_impl(A): return A.iloc[0] @@ -635,7 +628,7 @@ def test_impl(): one, two, three = hpat_func() self.assertTrue(isinstance(one, np.ndarray)) - self.assertTrue(isinstance(two, np.ndarray)) + self.assertTrue(isinstance(two, np.ndarray)) self.assertTrue(isinstance(three, np.ndarray)) @unittest.skip("needs empty_like typing fix in npydecl.py") @@ -778,7 +771,7 @@ def test_impl(S): def test_series_sum2(self): def test_impl(S): - return (S+S).sum() + return (S + S).sum() hpat_func = hpat.jit(test_impl) S = pd.Series([np.nan, 2., 3.]) @@ -800,9 +793,6 @@ def test_impl(S): S = pd.Series([np.nan, np.nan]) self.assertEqual(hpat_func(S), test_impl(S)) - @unittest.skip('AssertionError - fix needed\n' - '5 != 2\n' - 'NUMA_PES=3 build') def test_series_count1(self): def test_impl(S): return S.count() @@ -857,9 +847,6 @@ def test_impl(S): S = pd.Series(['AA', 'BB', 'C', 'AA', 'C', 'AA']) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skip('AssertionError - fix needed\n' - '61 != 110\n' - 'NUMA_PES=3 build') def test_series_dist_input1(self): def test_impl(S): return S.max() @@ -872,6 +859,30 @@ def test_impl(S): self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) + def test_series_dist_input2(self): + def test_impl(S): + return S.max() + hpat_func = hpat.jit(distributed={'S'})(test_impl) + + n = 111 + S = pd.Series(np.arange(n), 1 + np.arange(n)) + start, end = get_start_end(n) + self.assertEqual(hpat_func(S[start:end]), test_impl(S)) + self.assertEqual(count_array_REPs(), 0) + self.assertEqual(count_parfor_REPs(), 0) + + def test_series_dist_input3(self): + def test_impl(S): + return S.max() + hpat_func = hpat.jit(distributed={'S'})(test_impl) + + n = 111 + S = pd.Series(np.arange(n), ['abc{}'.format(id) for id in range(n)]) + start, end = get_start_end(n) + self.assertEqual(hpat_func(S[start:end]), test_impl(S)) + self.assertEqual(count_array_REPs(), 0) + self.assertEqual(count_parfor_REPs(), 0) + def test_series_tuple_input1(self): def test_impl(s_tup): return s_tup[0].max() @@ -879,7 +890,7 @@ def test_impl(s_tup): n = 111 S = pd.Series(np.arange(n)) - S2 = pd.Series(np.arange(n)+1.0) + S2 = pd.Series(np.arange(n) + 1.0) s_tup = (S, 1, S2) self.assertEqual(hpat_func(s_tup), test_impl(s_tup)) @@ -891,7 +902,7 @@ def test_impl(s_tup): n = 111 S = pd.Series(np.arange(n)) - S2 = pd.Series(np.arange(n)+1.0) + S2 = pd.Series(np.arange(n) + 1.0) start, end = get_start_end(n) s_tup = (S, 1, S2) h_s_tup = (S[start:end], 1, S2[start:end]) @@ -916,7 +927,7 @@ def test_impl(S1, S2): def test_series_map1(self): def test_impl(S): - return S.map(lambda a: 2*a) + return S.map(lambda a: 2 * a) hpat_func = hpat.jit(test_impl) S = pd.Series([1.0, 2., 3., 4., 5.]) @@ -932,7 +943,7 @@ def test_impl(S): def test_series_map_tup1(self): def test_impl(S): - return S.map(lambda a: (a, 2*a)) + return S.map(lambda a: (a, 2 * a)) hpat_func = hpat.jit(test_impl) S = pd.Series([1.0, 2., 3., 4., 5.]) @@ -940,7 +951,7 @@ def test_impl(S): def test_series_map_tup_map1(self): def test_impl(S): - A = S.map(lambda a: (a, 2*a)) + A = S.map(lambda a: (a, 2 * a)) return A.map(lambda a: a[1]) hpat_func = hpat.jit(test_impl) @@ -949,7 +960,7 @@ def test_impl(S): def test_series_combine(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b) + return S1.combine(S2, lambda a, b: 2 * a + b) hpat_func = hpat.jit(test_impl) S1 = pd.Series([1.0, 2., 3., 4., 5.]) @@ -958,7 +969,7 @@ def test_impl(S1, S2): def test_series_combine_float3264(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b) + return S1.combine(S2, lambda a, b: 2 * a + b) hpat_func = hpat.jit(test_impl) S1 = pd.Series([np.float64(1), np.float64(2), @@ -969,7 +980,7 @@ def test_impl(S1, S2): def test_series_combine_assert1(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b) + return S1.combine(S2, lambda a, b: 2 * a + b) hpat_func = hpat.jit(test_impl) S1 = pd.Series([1, 2, 3]) @@ -979,7 +990,7 @@ def test_impl(S1, S2): def test_series_combine_assert2(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b) + return S1.combine(S2, lambda a, b: 2 * a + b) hpat_func = hpat.jit(test_impl) S1 = pd.Series([6., 21., 3., 5.]) @@ -989,7 +1000,7 @@ def test_impl(S1, S2): def test_series_combine_integer(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b, 16) + return S1.combine(S2, lambda a, b: 2 * a + b, 16) hpat_func = hpat.jit(test_impl) S1 = pd.Series([1, 2, 3, 4, 5]) @@ -998,7 +1009,7 @@ def test_impl(S1, S2): def test_series_combine_different_types(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b) + return S1.combine(S2, lambda a, b: 2 * a + b) hpat_func = hpat.jit(test_impl) S1 = pd.Series([6.1, 21.2, 3.3, 5.4, 6.7]) @@ -1007,7 +1018,7 @@ def test_impl(S1, S2): def test_series_combine_integer_samelen(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b) + return S1.combine(S2, lambda a, b: 2 * a + b) hpat_func = hpat.jit(test_impl) S1 = pd.Series([1, 2, 3, 4, 5]) @@ -1016,7 +1027,7 @@ def test_impl(S1, S2): def test_series_combine_samelen(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b) + return S1.combine(S2, lambda a, b: 2 * a + b) hpat_func = hpat.jit(test_impl) S1 = pd.Series([1.0, 2., 3., 4., 5.]) @@ -1025,7 +1036,7 @@ def test_impl(S1, S2): def test_series_combine_value(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b, 1237.56) + return S1.combine(S2, lambda a, b: 2 * a + b, 1237.56) hpat_func = hpat.jit(test_impl) S1 = pd.Series([1.0, 2., 3., 4., 5.]) @@ -1034,7 +1045,7 @@ def test_impl(S1, S2): def test_series_combine_value_samelen(self): def test_impl(S1, S2): - return S1.combine(S2, lambda a, b: 2*a + b, 1237.56) + return S1.combine(S2, lambda a, b: 2 * a + b, 1237.56) hpat_func = hpat.jit(test_impl) S1 = pd.Series([1.0, 2., 3., 4., 5.]) @@ -1043,7 +1054,7 @@ def test_impl(S1, S2): def test_series_apply1(self): def test_impl(S): - return S.apply(lambda a: 2*a) + return S.apply(lambda a: 2 * a) hpat_func = hpat.jit(test_impl) S = pd.Series([1.0, 2., 3., 4., 5.]) @@ -1057,11 +1068,6 @@ def test_impl(S): S = pd.Series([np.nan, -2., 3., 0.5E-01, 0xFF, 0o7, 0b101]) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skip('AssertionError - fix needed\n' - 'Arrays are not almost equal to 7 decimals\n' - 'ACTUAL: 4.166666666666667\n' - 'DESIRED: 12.5\n' - 'NUMA_PES=3 build') def test_series_cov1(self): def test_impl(S1, S2): return S1.cov(S2) @@ -1073,11 +1079,6 @@ def test_impl(S1, S2): hpat_func(S1, S2), test_impl(S1, S2), err_msg='S1={}\nS2={}'.format(S1, S2)) - @unittest.skip('AssertionError - fix needed\n' - 'Arrays are not almost equal to 7 decimals\n' - 'ACTUAL: 0.9539980920057239\n' - 'DESIRED: 1.0\n' - 'NUMA_PES=3 build') def test_series_corr1(self): def test_impl(S1, S2): return S1.corr(S2) @@ -1194,14 +1195,6 @@ def test_impl(S): S = pd.Series([1.0, np.nan, 3.0, 2.0, np.nan, 4.0]) np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) - @unittest.skip('AssertionError - fix needed\n' - 'Arrays are not equal\n' - 'Mismatch: 100%\n' - 'Max absolute difference: 0.04361003\n' - 'Max relative difference: 9.04840049\n' - 'x: array([0.04843 , 0.05106 , 0.057625, 0.0671 ])\n' - 'y: array([0.00482 , 0.04843 , 0.05106 , 0.057625])\n' - 'NUMA_PES=3 build') def test_series_nlargest_parallel1(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() @@ -1242,14 +1235,6 @@ def test_impl(S): S = pd.Series([1.0, np.nan, 3.0, 2.0, np.nan, 4.0]) np.testing.assert_array_equal(hpat_func(S).values, test_impl(S).values) - @unittest.skip('AssertionError - fix needed\n' - 'Arrays are not equal\n' - 'Mismatch: 50%\n' - 'Max absolute difference: 0.01813261\n' - 'Max relative difference: 0.50757593\n' - 'x: array([0.007431, 0.024095, 0.035724, 0.053857])\n' - 'y: array([0.007431, 0.024095, 0.031374, 0.035724])\n' - 'NUMA_PES=3 build') def test_series_nsmallest_parallel1(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() @@ -1302,48 +1287,71 @@ def test_impl(): def test_series_head_index2(self): def test_impl(): - S = pd.Series([6, 9, 2, 3, 6, 4, 5], - ['a', 'ab', 'abc', 'c', 'f', 'hh', '']) + S = pd.Series([6, 9, 2, 3, 6, 4, 5], ['a', 'ab', 'abc', 'c', 'f', 'hh', '']) return S.head(3) hpat_func = hpat.jit(test_impl) pd.testing.assert_series_equal(hpat_func(), test_impl()) - @unittest.skip( - '''Skipped as it corrupts memmory and causes failures of other tests - while running with NUM_PES=3 and at least TestSeries and TestBasic suites together. - Exact commands to reproduce: - mpiexec -n 3 python -W ignore -u -m unittest -v $SUITES $SUITES - where SUITES="hpat.tests.TestBasic hpat.tests.TestSeries" - Test failures occur on the second suite run only. - Exact errors: - 1. Segmentation fault in TestBasic.test_rebalance - 2. FAIL in TestBasic.test_astype with following error message: - test_astype (hpat.tests.test_basic.TestBasic) ... - Fatal error in MPI_Allreduce: Message truncated, error stack: - MPI_Allreduce(907)..................: MPI_Allreduce(sbuf=0x7ffe3b734128, rbuf=0x7ffe3b734120, count=1, - MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD) failed - MPIR_Allreduce_impl(764)............: - MPIR_Allreduce_intra(238)...........: - MPIR_Reduce_impl(1070)..............: - MPIR_Reduce_intra(878)..............: - MPIR_Reduce_binomial(186)...........: - MPIC_Recv(353)......................: - MPIDI_CH3U_Request_unpack_uebuf(568): Message truncated; 40 bytes received but buffer size is 8 - MPIR_Allreduce_intra(268)...........: - MPIR_Bcast_impl(1452)...............: - MPIR_Bcast(1476)....................: - MPIR_Bcast_intra(1287)..............: - MPIR_Bcast_binomial(310)............: Failure during collective - Fatal error in MPI_Allreduce: Other MPI error, error stack''' - ) + @unittest.skip('Failed due to lack of Int64Index support. Error:' + 'Series.index values are different (66.66667 %)' + '[left]: RangeIndex(start=0, stop=3, step=1)' + '[right]: Int64Index([8, 1, 6], dtype=\'int64\')') + def test_series_head_index3(self): + def test_impl(S): + return S.head(3) + hpat_func = hpat.jit(test_impl) + + S = pd.Series([6, 9, 2, 3, 6, 4, 5], [8, 1, 6, 0, 9, 1, 3]) + pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) + + def test_series_head_index4(self): + def test_impl(S): + return S.head(3) + hpat_func = hpat.jit(test_impl) + + S = pd.Series([6, 9, 2, 4, 6, 4, 5], ['a', 'ab', 'abc', 'c', 'f', 'hh', '']) + pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) + + @unittest.skip('numba.errors.TypingError - fix needed\n' + 'Failed in hpat mode pipeline' + '(step: convert to distributed)\n' + 'Invalid use of Function()' + 'with argument(s) of type(s): (none)\n') + def test_series_head_parallel1(self): + def test_impl(S): + return S.head(7) + + hpat_func = hpat.jit(distributed={'S'})(test_impl) + + # need to test different lenghts, as head's size is fixed and implementation + # depends on relation of size of the data per processor to output data size + for n in range(1, 5): + S = pd.Series(['a', 'ab', 'abc', 'c', 'f', 'hh', ''] * n) + start, end = get_start_end(len(S)) + pd.testing.assert_series_equal(hpat_func(S[start:end]), test_impl(S)) + self.assertTrue(count_array_OneDs() > 0) + + @unittest.skip('Failed due to lack of Int64Index support. Error:' + 'Series.index values are different (66.66667 %)' + '[left]: RangeIndex(start=0, stop=3, step=1)' + '[right]: Int64Index([8, 1, 6], dtype=\'int64\')') def test_series_head_index_parallel1(self): def test_impl(S): return S.head(3) hpat_func = hpat.jit(distributed={'S'})(test_impl) - S = pd.Series([6, 9, 2, 3, 6, 4, 5], - ['a', 'ab', 'abc', 'c', 'f', 'hh', '']) + S = pd.Series([6, 9, 2, 3, 6, 4, 5], [8, 1, 6, 0, 9, 1, 3]) + start, end = get_start_end(len(S)) + pd.testing.assert_series_equal(hpat_func(S[start:end]), test_impl(S)) + self.assertTrue(count_array_OneDs() > 0) + + def test_series_head_index_parallel2(self): + def test_impl(S): + return S.head(4) + hpat_func = hpat.jit(distributed={'S'})(test_impl) + + S = pd.Series([6, 9, 2, 3, 6, 4, 5], ['a', 'ab', 'abc', 'c', 'f', 'hh', '']) start, end = get_start_end(len(S)) pd.testing.assert_series_equal(hpat_func(S[start:end]), test_impl(S)) self.assertTrue(count_array_OneDs() > 0) @@ -1369,9 +1377,6 @@ def test_impl(S): S = pd.Series(np.random.ranf(m)) self.assertEqual(hpat_func(S), test_impl(S)) - @unittest.skip('AssertionError - fix needed\n' - 'nan != 0.45894510159707225\n' - 'NUMA_PES=3 build') def test_series_median_parallel1(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() @@ -1476,7 +1481,7 @@ def test_impl(): hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(), test_impl()) - @unittest.skip("Enabel after fixing distributed for get_series_index") + @unittest.skip("Enable after fixing distributed for get_series_index") def test_series_index3(self): def test_impl(): A = pd.Series([1, 2, 3]) diff --git a/hpat/tests/test_strings.py b/hpat/tests/test_strings.py index 50229646c..fa7f824d4 100644 --- a/hpat/tests/test_strings.py +++ b/hpat/tests/test_strings.py @@ -13,7 +13,7 @@ from hpat.tests.gen_test_data import ParquetGenerator -class TestString(unittest.TestCase): +class TestStrings(unittest.TestCase): def test_pass_return(self): def test_impl(_str): @@ -207,10 +207,8 @@ def test_impl(ds): ) ds, rs = hpat_func(df.B) gc.collect() - self.assertTrue(isinstance(ds, pd.Series) and - isinstance(rs, pd.Series)) - self.assertTrue(ds[0] == 'one' and ds[2] == 'three' and - rs[0] and rs[2] == False) + self.assertTrue(isinstance(ds, pd.Series) and isinstance(rs, pd.Series)) + self.assertTrue(ds[0] == 'one' and ds[2] == 'three' and rs[0] and not rs[2]) def test_string_array_bool_getitem(self): def test_impl(): @@ -222,8 +220,6 @@ def test_impl(): self.assertEqual(hpat_func(), True) - @unittest.skip('Error - fix needed\n' - 'NUMA_PES=3 build') def test_string_NA_box(self): # create `example.parquet` file ParquetGenerator.gen_pq_test()