diff --git a/examples/cython/sycl_buffer/use_sycl_buffer.cpp b/examples/cython/sycl_buffer/use_sycl_buffer.cpp index c49e8b3204..ace6a594ba 100644 --- a/examples/cython/sycl_buffer/use_sycl_buffer.cpp +++ b/examples/cython/sycl_buffer/use_sycl_buffer.cpp @@ -1,108 +1,111 @@ -#include #include "use_sycl_buffer.h" -#include #include "dpctl_sycl_types.h" +#include +#include -int -c_columnwise_total(DPCTLSyclQueueRef q_ref, size_t n, size_t m, double *mat, double *ct) { +int c_columnwise_total(DPCTLSyclQueueRef q_ref, + size_t n, + size_t m, + double *mat, + double *ct) +{ sycl::queue q = *(reinterpret_cast(q_ref)); - sycl::buffer mat_buffer = sycl::buffer(mat, sycl::range<1>(n * m)); + sycl::buffer mat_buffer = + sycl::buffer(mat, sycl::range<1>(n * m)); sycl::buffer ct_buffer = sycl::buffer(ct, sycl::range<1>(m)); double *ones = reinterpret_cast(malloc(n * sizeof(double))); { - sycl::buffer ones_buffer = sycl::buffer(ones, sycl::range<1>(n)); - - try { - auto ev = q.submit([&](sycl::handler &cgh) { - auto ones_acc = ones_buffer.get_access(cgh); - cgh.fill(ones_acc, double(1.0)); - }); - - ev.wait_and_throw(); - } - catch (sycl::exception const& e) { - std::cout << "\t\tCaught synchronous SYCL exception during fill:\n" - << e.what() << std::endl << "OpenCL status: " << e.get_cl_code() << std::endl; - goto cleanup; - } - - try { - oneapi::mkl::blas::row_major::gemv( - q, - oneapi::mkl::transpose::trans, - n, m, double(1.0), mat_buffer, m, - ones_buffer, 1, - double(0.0), ct_buffer, 1); - q.wait(); - } - catch (sycl::exception const &e) { - std::cout << "\t\tCaught synchronous SYCL exception during GEMV:\n" - << e.what() << std::endl << "OpenCL status: " << e.get_cl_code() << std::endl; - goto cleanup; - } + sycl::buffer ones_buffer = + sycl::buffer(ones, sycl::range<1>(n)); + + try { + auto ev = q.submit([&](sycl::handler &cgh) { + auto ones_acc = + ones_buffer.get_access(cgh); + cgh.fill(ones_acc, double(1.0)); + }); + + ev.wait_and_throw(); + } catch (sycl::exception const &e) { + std::cout << "\t\tCaught synchronous SYCL exception during fill:\n" + << e.what() << std::endl + << "OpenCL status: " << e.get_cl_code() << std::endl; + goto cleanup; + } + + try { + oneapi::mkl::blas::row_major::gemv( + q, oneapi::mkl::transpose::trans, n, m, double(1.0), mat_buffer, + m, ones_buffer, 1, double(0.0), ct_buffer, 1); + q.wait(); + } catch (sycl::exception const &e) { + std::cout << "\t\tCaught synchronous SYCL exception during GEMV:\n" + << e.what() << std::endl + << "OpenCL status: " << e.get_cl_code() << std::endl; + goto cleanup; + } } - + free(ones); return 0; - cleanup: +cleanup: free(ones); return -1; } -inline size_t upper_multiple(size_t n, size_t wg) { return wg * ((n + wg - 1)/wg); } +inline size_t upper_multiple(size_t n, size_t wg) +{ + return wg * ((n + wg - 1) / wg); +} -int -c_columnwise_total_no_mkl(DPCTLSyclQueueRef q_ref, size_t n, size_t m, double *mat, double *ct) { +int c_columnwise_total_no_mkl(DPCTLSyclQueueRef q_ref, + size_t n, + size_t m, + double *mat, + double *ct) +{ sycl::queue q = *(reinterpret_cast(q_ref)); - sycl::buffer mat_buffer = sycl::buffer(mat, sycl::range<2>(n, m)); + sycl::buffer mat_buffer = + sycl::buffer(mat, sycl::range<2>(n, m)); sycl::buffer ct_buffer = sycl::buffer(ct, sycl::range<1>(m)); - auto e = q.submit( - [&](sycl::handler &h) { - sycl::accessor ct_acc {ct_buffer, h, sycl::write_only}; - h.parallel_for( - sycl::range<1>(m), - [=](sycl::id<1> i){ - ct_acc[i] = 0.0; - }); - }); + auto e = q.submit([&](sycl::handler &h) { + sycl::accessor ct_acc{ct_buffer, h, sycl::write_only}; + h.parallel_for(sycl::range<1>(m), + [=](sycl::id<1> i) { ct_acc[i] = 0.0; }); + }); constexpr size_t wg = 256; - auto e2 = q.submit( - [&](sycl::handler &h) { - - sycl::accessor mat_acc {mat_buffer, h, sycl::read_only}; - sycl::accessor ct_acc {ct_buffer, h}; - h.depends_on(e); - - sycl::range<2> global {upper_multiple(n, wg), m}; - sycl::range<2> local {wg, 1}; - - h.parallel_for( - sycl::nd_range<2>(global, local), - [=](sycl::nd_item<2> it) { - size_t i = it.get_global_id(0); - size_t j = it.get_global_id(1); - double group_sum = sycl::ONEAPI::reduce( - it.get_group(), - (i < n) ? mat_acc[it.get_global_id()] : 0.0, - std::plus() - ); - if (it.get_local_id(0) == 0) { - sycl::ONEAPI::atomic_ref< - double, - sycl::ONEAPI::memory_order::relaxed, - sycl::ONEAPI::memory_scope::system, - sycl::access::address_space::global_space>(ct_acc[j]) += group_sum; - } - }); - }); + auto e2 = q.submit([&](sycl::handler &h) { + sycl::accessor mat_acc{mat_buffer, h, sycl::read_only}; + sycl::accessor ct_acc{ct_buffer, h}; + h.depends_on(e); + + sycl::range<2> global{upper_multiple(n, wg), m}; + sycl::range<2> local{wg, 1}; + + h.parallel_for( + sycl::nd_range<2>(global, local), [=](sycl::nd_item<2> it) { + size_t i = it.get_global_id(0); + size_t j = it.get_global_id(1); + double group_sum = sycl::ONEAPI::reduce( + it.get_group(), (i < n) ? mat_acc[it.get_global_id()] : 0.0, + std::plus()); + if (it.get_local_id(0) == 0) { + sycl::ONEAPI::atomic_ref< + double, sycl::ONEAPI::memory_order::relaxed, + sycl::ONEAPI::memory_scope::system, + sycl::access::address_space::global_space>(ct_acc[j]) += + group_sum; + } + }); + }); e2.wait_and_throw(); return 0; diff --git a/examples/cython/sycl_buffer/use_sycl_buffer.h b/examples/cython/sycl_buffer/use_sycl_buffer.h index 51ef63eab9..66bfe756bc 100644 --- a/examples/cython/sycl_buffer/use_sycl_buffer.h +++ b/examples/cython/sycl_buffer/use_sycl_buffer.h @@ -1,7 +1,13 @@ -#include #include "dpctl_sycl_types.h" +#include -extern int c_columnwise_total( - DPCTLSyclQueueRef q, size_t n, size_t m, double *mat, double *ct); -extern int c_columnwise_total_no_mkl( - DPCTLSyclQueueRef q, size_t n, size_t m, double *mat, double *ct); +extern int c_columnwise_total(DPCTLSyclQueueRef q, + size_t n, + size_t m, + double *mat, + double *ct); +extern int c_columnwise_total_no_mkl(DPCTLSyclQueueRef q, + size_t n, + size_t m, + double *mat, + double *ct); diff --git a/examples/cython/sycl_direct_linkage/sycl_function.cpp b/examples/cython/sycl_direct_linkage/sycl_function.cpp index ad48580aaf..f38896adf0 100644 --- a/examples/cython/sycl_direct_linkage/sycl_function.cpp +++ b/examples/cython/sycl_direct_linkage/sycl_function.cpp @@ -1,51 +1,55 @@ -#include #include "sycl_function.hpp" -#include #include "mkl.h" +#include +#include -int c_columnwise_total(cl::sycl::queue &q, size_t n, size_t m, double *mat, double *ct) { - sycl::buffer mat_buffer = sycl::buffer(mat, sycl::range<1>(n * m)); +int c_columnwise_total(cl::sycl::queue &q, + size_t n, + size_t m, + double *mat, + double *ct) +{ + sycl::buffer mat_buffer = + sycl::buffer(mat, sycl::range<1>(n * m)); sycl::buffer ct_buffer = sycl::buffer(ct, sycl::range<1>(m)); double *ones = reinterpret_cast(malloc(n * sizeof(double))); { - sycl::buffer ones_buffer = sycl::buffer(ones, sycl::range<1>(n)); + sycl::buffer ones_buffer = + sycl::buffer(ones, sycl::range<1>(n)); - try { - auto ev = q.submit([&](sycl::handler &cgh) { - auto ones_acc = ones_buffer.get_access(cgh); - cgh.fill(ones_acc, double(1.0)); - }); - - ev.wait_and_throw(); - } - catch (sycl::exception const& e) { - std::cout << "\t\tCaught synchronous SYCL exception during fill:\n" - << e.what() << std::endl << "OpenCL status: " << e.get_cl_code() << std::endl; - goto cleanup; - } + try { + auto ev = q.submit([&](sycl::handler &cgh) { + auto ones_acc = + ones_buffer.get_access(cgh); + cgh.fill(ones_acc, double(1.0)); + }); - try { - oneapi::mkl::blas::row_major::gemv( - q, - oneapi::mkl::transpose::trans, - n, m, double(1.0), mat_buffer, m, - ones_buffer, 1, - double(0.0), ct_buffer, 1); - q.wait(); - } - catch (sycl::exception const &e) { - std::cout << "\t\tCaught synchronous SYCL exception during GEMV:\n" - << e.what() << std::endl << "OpenCL status: " << e.get_cl_code() << std::endl; - goto cleanup; - } + ev.wait_and_throw(); + } catch (sycl::exception const &e) { + std::cout << "\t\tCaught synchronous SYCL exception during fill:\n" + << e.what() << std::endl + << "OpenCL status: " << e.get_cl_code() << std::endl; + goto cleanup; + } + + try { + oneapi::mkl::blas::row_major::gemv( + q, oneapi::mkl::transpose::trans, n, m, double(1.0), mat_buffer, + m, ones_buffer, 1, double(0.0), ct_buffer, 1); + q.wait(); + } catch (sycl::exception const &e) { + std::cout << "\t\tCaught synchronous SYCL exception during GEMV:\n" + << e.what() << std::endl + << "OpenCL status: " << e.get_cl_code() << std::endl; + goto cleanup; + } } - + free(ones); return 0; - cleanup: +cleanup: free(ones); return -1; } - diff --git a/examples/cython/sycl_direct_linkage/sycl_function.hpp b/examples/cython/sycl_direct_linkage/sycl_function.hpp index 51e5e8474b..88ca1e2712 100644 --- a/examples/cython/sycl_direct_linkage/sycl_function.hpp +++ b/examples/cython/sycl_direct_linkage/sycl_function.hpp @@ -1,3 +1,7 @@ #include -int c_columnwise_total(cl::sycl::queue&, size_t n, size_t m, double *mat, double *ct); +int c_columnwise_total(cl::sycl::queue &, + size_t n, + size_t m, + double *mat, + double *ct); diff --git a/examples/cython/usm_memory/sycl_blackscholes.cpp b/examples/cython/usm_memory/sycl_blackscholes.cpp index 759e863fda..798716ccaf 100644 --- a/examples/cython/usm_memory/sycl_blackscholes.cpp +++ b/examples/cython/usm_memory/sycl_blackscholes.cpp @@ -1,11 +1,10 @@ +#include "sycl_blackscholes.hpp" +#include "dpctl_sycl_types.h" #include #include #include -#include "dpctl_sycl_types.h" -#include "sycl_blackscholes.hpp" -template -class black_scholes_kernel; +template class black_scholes_kernel; constexpr int n_params = 5; constexpr int n_params_next_pow2 = 8; @@ -19,153 +18,200 @@ constexpr int VOLATILITY = 4; constexpr int CALL = 0; constexpr int PUT = 1; -template -extern void cpp_blackscholes(DPCTLSyclQueueRef q_ptr, size_t n_opts, T* params, T* callput) { +template +extern void +cpp_blackscholes(DPCTLSyclQueueRef q_ptr, size_t n_opts, T *params, T *callput) +{ using data_t = T; sycl::queue q = *(reinterpret_cast(q_ptr)); auto ctx = q.get_context(); { - sycl::usm::alloc params_type = sycl::get_pointer_type(params, ctx); - if (params_type != sycl::usm::alloc::shared) { - throw std::runtime_error("Input option_params to cpp_blackscholes is not a USM-shared pointer."); - } + sycl::usm::alloc params_type = sycl::get_pointer_type(params, ctx); + if (params_type != sycl::usm::alloc::shared) { + throw std::runtime_error("Input option_params to cpp_blackscholes " + "is not a USM-shared pointer."); + } } { - sycl::usm::alloc callput_type = sycl::get_pointer_type(callput, ctx); - if (callput_type != sycl::usm::alloc::shared) { - throw std::runtime_error("Input callput to cpp_blackscholes is not a USM-shared pointer."); - } + sycl::usm::alloc callput_type = sycl::get_pointer_type(callput, ctx); + if (callput_type != sycl::usm::alloc::shared) { + throw std::runtime_error("Input callput to cpp_blackscholes is not " + "a USM-shared pointer."); + } } - auto e = q.submit( - [&](sycl::handler &cgh){ + auto e = q.submit([&](sycl::handler &cgh) { + data_t zero = data_t(0), one = data_t(1), two = data_t(2); + data_t quarter = one / data_t(4); + data_t half = one / two; - data_t zero = data_t(0), one = data_t(1), two = data_t(2); - data_t quarter = one / data_t(4); - data_t half = one / two; - - cgh.parallel_for>( + cgh.parallel_for>( sycl::range<1>(n_opts), - [=](sycl::id<1> idx) { - const size_t i = n_params * idx[0]; - const data_t opt_price = params[i + PRICE]; - const data_t opt_strike = params[i + STRIKE]; - const data_t opt_maturity = params[i + MATURITY]; - const data_t opt_rate = params[i + RATE]; - const data_t opt_volatility = params[i + VOLATILITY]; - data_t a, b, c, y, z, e, d1, d1c, d2, d2c, w1, w2; - data_t mr = -opt_rate, sig_sig_two = two * opt_volatility * opt_volatility; - - a = cl::sycl::log( opt_price / opt_strike ); - b = opt_maturity * mr; - z = opt_maturity * sig_sig_two; - - c = quarter * z; - e = cl::sycl::exp( b ); - y = cl::sycl::rsqrt( z ); - - a = b - a; - w1 = ( a - c ) * y; - w2 = ( a + c ) * y; - - if (w1 < zero) { - d1 = cl::sycl::erfc(w1) * half; - d1c = one - d1; - } else { - d1c = cl::sycl::erfc(-w1) * half; - d1 = one - d1c; - } - if (w2 < zero) { - d2 = cl::sycl::erfc(w2) * half; - d2c = one - d2; - } else { - d2c = cl::sycl::erfc(-w2) * half; - d2 = one - d2c; - } - - e *= opt_strike; - data_t call_price = opt_price * d1 - e * d2; - data_t put_price = e * d2c - opt_price * d1c; - - const size_t callput_i = n_prices * idx[0]; - callput[callput_i + CALL] = call_price; - callput[callput_i + PUT ] = put_price; - }); - }); + [=](sycl::id<1> idx) + { + const size_t i = n_params * idx[0]; + const data_t opt_price = params[i + PRICE]; + const data_t opt_strike = params[i + STRIKE]; + const data_t opt_maturity = params[i + MATURITY]; + const data_t opt_rate = params[i + RATE]; + const data_t opt_volatility = params[i + VOLATILITY]; + data_t a, b, c, y, z, e, d1, d1c, d2, d2c, w1, w2; + data_t mr = -opt_rate, + sig_sig_two = two * opt_volatility * opt_volatility; + + a = cl::sycl::log(opt_price / opt_strike); + b = opt_maturity * mr; + z = opt_maturity * sig_sig_two; + + c = quarter * z; + e = cl::sycl::exp(b); + y = cl::sycl::rsqrt(z); + + a = b - a; + w1 = (a - c) * y; + w2 = (a + c) * y; + + if (w1 < zero) { + d1 = cl::sycl::erfc(w1) * half; + d1c = one - d1; + } + else { + d1c = cl::sycl::erfc(-w1) * half; + d1 = one - d1c; + } + if (w2 < zero) { + d2 = cl::sycl::erfc(w2) * half; + d2c = one - d2; + } + else { + d2c = cl::sycl::erfc(-w2) * half; + d2 = one - d2c; + } + + e *= opt_strike; + data_t call_price = opt_price * d1 - e * d2; + data_t put_price = e * d2c - opt_price * d1c; + + const size_t callput_i = n_prices * idx[0]; + callput[callput_i + CALL] = call_price; + callput[callput_i + PUT] = put_price; + }); + }); e.wait_and_throw(); return; } -template -void cpp_populate_params(DPCTLSyclQueueRef q_ptr, size_t n_opts, T* params, T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, int seed) { +template +void cpp_populate_params(DPCTLSyclQueueRef q_ptr, + size_t n_opts, + T *params, + T pl, + T ph, + T sl, + T sh, + T tl, + T th, + T rl, + T rh, + T vl, + T vh, + int seed) +{ sycl::queue q = *(reinterpret_cast(q_ptr)); auto ctx = q.get_context(); { - sycl::usm::alloc params_type = sycl::get_pointer_type(params, ctx); - if (params_type != sycl::usm::alloc::shared) { - throw std::runtime_error("Input option_params to cpp_blackscholes is not a USM-shared pointer."); - } + sycl::usm::alloc params_type = sycl::get_pointer_type(params, ctx); + if (params_type != sycl::usm::alloc::shared) { + throw std::runtime_error("Input option_params to cpp_blackscholes " + "is not a USM-shared pointer."); + } } - sycl::event e = q.submit( - [&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::range<1>(n_opts), - [=](sycl::item<1> idx) { - size_t i = n_params * idx.get_id(0); - size_t j = n_params_next_pow2 * idx.get_id(0); - - // create engine to sample 5 parameters per workers - oneapi::mkl::rng::device::philox4x32x10 engine(seed, j); - oneapi::mkl::rng::device::uniform distr; - - sycl::vec res = oneapi::mkl::rng::device::generate(distr, engine); - - { - const int pos = PRICE; - auto u = res[pos]; - params[i + pos] = pl * u + ph * (T(1)-u); - } - { - const int pos = STRIKE; - auto u = res[pos]; - params[i + pos] = sl * u + sh * (T(1)-u); - } - { - const int pos = MATURITY; - auto u = res[pos]; - params[i + pos] = tl * u + th * (T(1)-u); - } - { - const int pos = RATE; - auto u = res[pos]; - params[i + pos] = rl * u + rh * (T(1)-u); - } - { - const int pos = VOLATILITY; - auto u = res[pos]; - params[i + pos] = vl * u + vh * (T(1)-u); - } - }); - }); + sycl::event e = q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>(n_opts), [=](sycl::item<1> idx) { + size_t i = n_params * idx.get_id(0); + size_t j = n_params_next_pow2 * idx.get_id(0); + + // create engine to sample 5 parameters per workers + oneapi::mkl::rng::device::philox4x32x10 engine( + seed, j); + oneapi::mkl::rng::device::uniform distr; + + sycl::vec res = + oneapi::mkl::rng::device::generate(distr, engine); + + { + const int pos = PRICE; + auto u = res[pos]; + params[i + pos] = pl * u + ph * (T(1) - u); + } + { + const int pos = STRIKE; + auto u = res[pos]; + params[i + pos] = sl * u + sh * (T(1) - u); + } + { + const int pos = MATURITY; + auto u = res[pos]; + params[i + pos] = tl * u + th * (T(1) - u); + } + { + const int pos = RATE; + auto u = res[pos]; + params[i + pos] = rl * u + rh * (T(1) - u); + } + { + const int pos = VOLATILITY; + auto u = res[pos]; + params[i + pos] = vl * u + vh * (T(1) - u); + } + }); + }); e.wait_and_throw(); } // instantation for object files to not be empty -template void cpp_blackscholes(DPCTLSyclQueueRef q_ptr, size_t n_opts, double* params, double* callput); -template void cpp_blackscholes(DPCTLSyclQueueRef q_ptr, size_t n_opts, float* params, float* callput); - - -template void cpp_populate_params(DPCTLSyclQueueRef q_ptr, size_t n_opts, double* params, - double pl, double ph, double sl, double sh, double tl, double th, - double rl, double rh, double vl, double vh, int seed); -template void cpp_populate_params(DPCTLSyclQueueRef q_ptr, size_t n_opts, float* params, - float pl, float ph, float sl, float sh, float tl, float th, - float rl, float rh, float vl, float vh, int seed); +template void cpp_blackscholes(DPCTLSyclQueueRef q_ptr, + size_t n_opts, + double *params, + double *callput); +template void cpp_blackscholes(DPCTLSyclQueueRef q_ptr, + size_t n_opts, + float *params, + float *callput); + +template void cpp_populate_params(DPCTLSyclQueueRef q_ptr, + size_t n_opts, + double *params, + double pl, + double ph, + double sl, + double sh, + double tl, + double th, + double rl, + double rh, + double vl, + double vh, + int seed); +template void cpp_populate_params(DPCTLSyclQueueRef q_ptr, + size_t n_opts, + float *params, + float pl, + float ph, + float sl, + float sh, + float tl, + float th, + float rl, + float rh, + float vl, + float vh, + int seed); diff --git a/examples/cython/usm_memory/sycl_blackscholes.hpp b/examples/cython/usm_memory/sycl_blackscholes.hpp index bf3fbf849c..7a2d48e0b8 100644 --- a/examples/cython/usm_memory/sycl_blackscholes.hpp +++ b/examples/cython/usm_memory/sycl_blackscholes.hpp @@ -1,10 +1,22 @@ -#include #include "dpctl_sycl_types.h" +#include -template -extern void cpp_blackscholes(DPCTLSyclQueueRef q, size_t n_opts, T* params, T* callput); +template +extern void +cpp_blackscholes(DPCTLSyclQueueRef q, size_t n_opts, T *params, T *callput); -template -extern void cpp_populate_params(DPCTLSyclQueueRef q, size_t n_opts, T* params, - T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, - int seed); +template +extern void cpp_populate_params(DPCTLSyclQueueRef q, + size_t n_opts, + T *params, + T pl, + T ph, + T sl, + T sh, + T tl, + T th, + T rl, + T rh, + T vl, + T vh, + int seed);