@@ -123,3 +123,306 @@ TEST_F(TestQueueSubmit, CheckSubmitRange_saxpy)
123123 DPCTLDevice_Delete (DRef);
124124 DPCTLDeviceSelector_Delete (DSRef);
125125}
126+
127+ namespace
128+ {
129+
130+ template <typename T> class populate_a ;
131+
132+ template <typename T> class populate_b ;
133+
134+ template <typename T, typename scT> class add_a_and_b ;
135+
136+ template <typename T> struct kernel_arg_t
137+ {
138+ static constexpr DPCTLKernelArgType value = DPCTL_VOID_PTR;
139+ };
140+
141+ /*
142+ template <>
143+ struct kernel_arg_t<char> {
144+ static constexpr DPCTLKernelArgType value = DPCTL_CHAR;
145+ };
146+
147+ template <>
148+ struct kernel_arg_t<signed char> {
149+ static constexpr DPCTLKernelArgType value = DPCTL_SIGNED_CHAR;
150+ };
151+
152+ template <>
153+ struct kernel_arg_t<unsigned char> {
154+ static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_CHAR;
155+ };
156+ */
157+ template <> struct kernel_arg_t <short >
158+ {
159+ static constexpr DPCTLKernelArgType value = DPCTL_SHORT;
160+ };
161+
162+ template <> struct kernel_arg_t <int >
163+ {
164+ static constexpr DPCTLKernelArgType value = DPCTL_INT;
165+ };
166+
167+ template <> struct kernel_arg_t <unsigned int >
168+ {
169+ static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_INT;
170+ };
171+
172+ template <> struct kernel_arg_t <long >
173+ {
174+ static constexpr DPCTLKernelArgType value = DPCTL_LONG;
175+ };
176+
177+ template <> struct kernel_arg_t <unsigned long >
178+ {
179+ static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_LONG;
180+ };
181+
182+ template <> struct kernel_arg_t <long long >
183+ {
184+ static constexpr DPCTLKernelArgType value = DPCTL_LONG_LONG;
185+ };
186+
187+ template <> struct kernel_arg_t <unsigned long long >
188+ {
189+ static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_LONG_LONG;
190+ };
191+
192+ /*
193+ template <>
194+ struct kernel_arg_t<size_t> {
195+ static constexpr DPCTLKernelArgType value = DPCTL_SIZE_T;
196+ };
197+ */
198+
199+ template <> struct kernel_arg_t <float >
200+ {
201+ static constexpr DPCTLKernelArgType value = DPCTL_FLOAT;
202+ };
203+
204+ template <> struct kernel_arg_t <double >
205+ {
206+ static constexpr DPCTLKernelArgType value = DPCTL_DOUBLE;
207+ };
208+
209+ /*
210+ template <>
211+ struct kernel_arg_t<long double> {
212+ static constexpr DPCTLKernelArgType value = DPCTL_LONG_DOUBLE;
213+ };
214+ */
215+
216+ #ifdef USE_FUNCTOR
217+ template <typename name, class kernelFunc >
218+ auto make_cgh_function (int n, kernelFunc func)
219+ {
220+ auto Kernel = [&](sycl::handler &cgh) {
221+ cgh.parallel_for <name>(sycl::range<1 >(n), func);
222+ };
223+ return Kernel;
224+ };
225+
226+ template <typename Ty, typename scT> struct Add
227+ {
228+ const Ty *in1, *in2;
229+ Ty *out;
230+ scT val;
231+ Add (const Ty *a, const Ty *b, Ty *c, scT d) : in1(a), in2(b), out(c), val(d)
232+ {
233+ }
234+ void operator ()(sycl::id<1 > myId) const
235+ {
236+ auto gid = myId[0 ];
237+ out[gid] = in1[gid] + val * in2[gid];
238+ return ;
239+ }
240+ };
241+
242+ template <typename T> struct PopulateA
243+ {
244+ T *out;
245+ PopulateA (T *a) : out(a) {}
246+ void operator ()(sycl::id<1 > myId) const
247+ {
248+ auto gid = myId[0 ];
249+ out[gid] = T (1 );
250+ return ;
251+ };
252+ };
253+
254+ template <typename T> struct PopulateB
255+ {
256+ T *out;
257+ PopulateB (T *b) : out(b) {}
258+ void operator ()(sycl::id<1 > myId) const
259+ {
260+ auto gid = myId[0 ];
261+ out[gid] = T (gid);
262+ return ;
263+ };
264+ };
265+ #endif
266+
267+ template <typename T, typename scT>
268+ bool common_submit_range_fn (sycl::queue &q, size_t n, scT val)
269+ {
270+ sycl::program program (q.get_context ());
271+
272+ T *a = sycl::malloc_device<T>(n, q);
273+ T *b = sycl::malloc_device<T>(n, q);
274+ T *c = sycl::malloc_device<T>(n, q);
275+ T *d = sycl::malloc_device<T>(n, q);
276+
277+ #ifdef USE_FUNCTOR
278+ auto popa_fn = make_cgh_function<class populate_a <T>>(n, PopulateA<T>(a));
279+ #else
280+ auto popa_fn = [&](sycl::handler &cgh) {
281+ cgh.parallel_for <populate_a<T>>(
282+ n, [=](sycl::id<1 > idx) { a[idx[0 ]] = T (1 ); });
283+ };
284+ #endif
285+ std::cout << q.get_context ().get_platform ().get_backend () << std::endl;
286+ sycl::event popa_ev = q.submit (popa_fn);
287+
288+ #ifdef USE_FUNCTOR
289+ auto popb_fn = make_cgh_function<class populate_b <T>>(n, PopulateB<T>(b));
290+ #else
291+ auto popb_fn = [&](sycl::handler &cgh) {
292+ cgh.parallel_for <populate_b<T>>(
293+ n, [=](sycl::id<1 > idx) { b[idx[0 ]] = T (idx[0 ]); });
294+ };
295+ #endif
296+ sycl::event popb_ev = q.submit (popb_fn);
297+
298+ #ifdef USE_FUNCTOR
299+ auto add_fn = make_cgh_function<class add_a_and_b <T, scT>>(
300+ n, Add<T, scT>(a, b, c, val));
301+ #else
302+ auto add_fn = [&](sycl::handler &cgh) {
303+ cgh.depends_on ({popa_ev, popb_ev});
304+ cgh.parallel_for <add_a_and_b<T, scT>>(n, [=](sycl::id<1 > idx) {
305+ const auto gid = idx[0 ];
306+ const T va = a[gid];
307+ const T vb = b[gid];
308+ c[gid] = va + val * vb;
309+ });
310+ };
311+ #endif
312+
313+ sycl::event add_ev = q.submit (add_fn);
314+ add_ev.wait_and_throw ();
315+
316+ program.build_with_kernel_type <add_a_and_b<T, scT>>();
317+ auto kern = program.get_kernel <add_a_and_b<T, scT>>();
318+
319+ DPCTLSyclQueueRef QRef = reinterpret_cast <DPCTLSyclQueueRef>(&q);
320+ DPCTLSyclKernelRef KRef = reinterpret_cast <DPCTLSyclKernelRef>(&kern);
321+ DPCTLSyclEventRef PopAERef = reinterpret_cast <DPCTLSyclEventRef>(&popa_ev);
322+ DPCTLSyclEventRef PopBERef = reinterpret_cast <DPCTLSyclEventRef>(&popb_ev);
323+
324+ void *args2[4 ] = {reinterpret_cast <void *>(a), reinterpret_cast <void *>(b),
325+ reinterpret_cast <void *>(d),
326+ reinterpret_cast <void *>(&val)};
327+ DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR,
328+ DPCTL_VOID_PTR,
329+ kernel_arg_t <scT>::value};
330+ size_t Range[] = {n};
331+ DPCTLSyclEventRef events[2 ] = {PopAERef, PopBERef};
332+ auto ERef = DPCTLQueue_SubmitRange (KRef, QRef, args2, addKernelArgTypes, 4 ,
333+ Range, 1 , events, 2 );
334+ DPCTLQueue_Wait (QRef);
335+ DPCTLEvent_Delete (ERef);
336+
337+ T *host_data = new T[n];
338+ q.memcpy (host_data, d, n * sizeof (T));
339+ q.wait_and_throw ();
340+
341+ bool worked = true ;
342+ for (size_t i = 0 ; i < n; ++i) {
343+ worked = worked && (host_data[i] == T (1 ) + val * T (i));
344+ }
345+
346+ sycl::free (a, q);
347+ sycl::free (b, q);
348+ sycl::free (c, q);
349+ sycl::free (d, q);
350+
351+ return worked;
352+ };
353+
354+ } // end of anonymous namespace
355+
356+ struct TestQueueSubmitRange : public ::testing::Test
357+ {
358+ sycl::queue q;
359+ size_t n_elems = 512 ;
360+
361+ TestQueueSubmitRange () : q(sycl::default_selector{}) {}
362+ ~TestQueueSubmitRange () {}
363+ };
364+
365+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeInt)
366+ {
367+ bool worked = false ;
368+ worked = common_submit_range_fn<int , int >(q, n_elems, int (-1 ));
369+ EXPECT_TRUE (worked);
370+ }
371+
372+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeUnsignedInt)
373+ {
374+ bool worked = false ;
375+ worked = common_submit_range_fn<unsigned int , unsigned int >(q, n_elems, 2 );
376+ EXPECT_TRUE (worked);
377+ }
378+
379+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeShort)
380+ {
381+ bool worked = false ;
382+ worked = common_submit_range_fn<short , short >(q, n_elems, short (-1 ));
383+ EXPECT_TRUE (worked);
384+ }
385+
386+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeLong)
387+ {
388+ bool worked = false ;
389+ worked = common_submit_range_fn<long , long >(q, n_elems, -1 );
390+ EXPECT_TRUE (worked);
391+ }
392+
393+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeUnsignedLong)
394+ {
395+ bool worked = false ;
396+ worked =
397+ common_submit_range_fn<unsigned long , unsigned long >(q, n_elems, 2 );
398+ EXPECT_TRUE (worked);
399+ }
400+
401+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeLongLong)
402+ {
403+ bool worked = false ;
404+ worked = common_submit_range_fn<long long , long long >(q, n_elems, -1 );
405+ EXPECT_TRUE (worked);
406+ }
407+
408+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeUnsignedLongLong)
409+ {
410+ bool worked = false ;
411+ worked = common_submit_range_fn<unsigned long long , unsigned long long >(
412+ q, n_elems, 2 );
413+ EXPECT_TRUE (worked);
414+ }
415+
416+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeFloat)
417+ {
418+ bool worked = false ;
419+ worked = common_submit_range_fn<float , float >(q, n_elems, 0.5 );
420+ EXPECT_TRUE (worked);
421+ }
422+
423+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeDouble)
424+ {
425+ bool worked = false ;
426+ worked = common_submit_range_fn<double , double >(q, n_elems, 0.5 );
427+ EXPECT_TRUE (worked);
428+ }
0 commit comments