@@ -41,6 +41,8 @@ DPCTLSyclEventRef dpnp_all_c(DPCTLSyclQueueRef q_ref,
4141 const size_t size,
4242 const DPCTLEventVectorRef dep_event_vec_ref)
4343{
44+ static_assert (std::is_same_v<_ResultType, bool >, " Boolean result type is required" );
45+
4446 // avoid warning unused variable
4547 (void )dep_event_vec_ref;
4648
@@ -52,38 +54,50 @@ DPCTLSyclEventRef dpnp_all_c(DPCTLSyclQueueRef q_ref,
5254 }
5355
5456 sycl::queue q = *(reinterpret_cast <sycl::queue*>(q_ref));
55- sycl::event event;
5657
57- DPNPC_ptr_adapter<_DataType> input1_ptr (q_ref, array1_in, size);
58- DPNPC_ptr_adapter<_ResultType> result1_ptr (q_ref, result1, 1 , true , true );
59- const _DataType* array_in = input1_ptr.get_ptr ();
60- _ResultType* result = result1_ptr.get_ptr ();
58+ const _DataType* array_in = static_cast <const _DataType*>(array1_in);
59+ bool * result = static_cast <bool *>(result1);
6160
62- result[ 0 ] = true ;
61+ auto fill_event = q. fill (result, true , 1 ) ;
6362
6463 if (!size)
6564 {
66- return event_ref;
65+ event_ref = reinterpret_cast <DPCTLSyclEventRef>(&fill_event);
66+ return DPCTLEvent_Copy (event_ref);
6767 }
6868
69- sycl::range<1 > gws (size);
70- auto kernel_parallel_for_func = [=](sycl::id<1 > global_id) {
71- size_t i = global_id[0 ];
69+ constexpr size_t lws = 64 ;
70+ constexpr size_t vec_sz = 8 ;
71+
72+ auto gws_range = sycl::range<1 >(((size + lws * vec_sz - 1 ) / (lws * vec_sz)) * lws);
73+ auto lws_range = sycl::range<1 >(lws);
74+ sycl::nd_range<1 > gws (gws_range, lws_range);
7275
73- if (!array_in[i])
76+ auto kernel_parallel_for_func = [=](sycl::nd_item<1 > nd_it) {
77+ auto gr = nd_it.get_group ();
78+ const auto max_gr_size = gr.get_max_local_range ()[0 ];
79+ const size_t start =
80+ vec_sz * (nd_it.get_group (0 ) * nd_it.get_local_range (0 ) + gr.get_group_id ()[0 ] * max_gr_size);
81+ const size_t end = sycl::min (start + vec_sz * max_gr_size, size);
82+
83+ // each work-item reduces over "vec_sz" elements in the input array
84+ bool local_reduction = sycl::joint_none_of (
85+ gr, &array_in[start], &array_in[end], [&](_DataType elem) { return elem == static_cast <_DataType>(0 ); });
86+
87+ if (gr.leader () && (local_reduction == false ))
7488 {
7589 result[0 ] = false ;
7690 }
7791 };
7892
7993 auto kernel_func = [&](sycl::handler& cgh) {
94+ cgh.depends_on (fill_event);
8095 cgh.parallel_for <class dpnp_all_c_kernel <_DataType, _ResultType>>(gws, kernel_parallel_for_func);
8196 };
8297
83- event = q.submit (kernel_func);
98+ auto event = q.submit (kernel_func);
8499
85100 event_ref = reinterpret_cast <DPCTLSyclEventRef>(&event);
86-
87101 return DPCTLEvent_Copy (event_ref);
88102}
89103
@@ -98,6 +112,7 @@ void dpnp_all_c(const void* array1_in, void* result1, const size_t size)
98112 size,
99113 dep_event_vec_ref);
100114 DPCTLEvent_WaitAndThrow (event_ref);
115+ DPCTLEvent_Delete (event_ref);
101116}
102117
103118template <typename _DataType, typename _ResultType>
@@ -751,6 +766,8 @@ void func_map_init_logic(func_map_t& fmap)
751766 fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void *)dpnp_all_ext_c<int64_t , bool >};
752767 fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void *)dpnp_all_ext_c<float , bool >};
753768 fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void *)dpnp_all_ext_c<double , bool >};
769+ fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_C64][eft_C64] = {eft_C64, (void *)dpnp_all_ext_c<std::complex <float >, bool >};
770+ fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_C128][eft_C128] = {eft_C128, (void *)dpnp_all_ext_c<std::complex <double >, bool >};
754771
755772 fmap[DPNPFuncName::DPNP_FN_ALLCLOSE][eft_INT][eft_INT] = {eft_BLN,
756773 (void *)dpnp_allclose_default_c<int32_t , int32_t , bool >};
0 commit comments