IntelPython · antonwolfy · Jun 1, 2023 · May 2, 2023 · May 31, 2023 · May 31, 2023
@@ -17,6 +17,7 @@ jobs:
 
     env:
       python-ver: '3.9'
+      CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels'
 
     steps:
       - name: Cancel Previous Runs
@@ -74,10 +75,10 @@ jobs:
       - name: Install dpnp dependencies
         run: |
           conda install dpctl mkl-devel-dpcpp onedpl-devel tbb-devel dpcpp_linux-64 \
-              cmake cython pytest ninja scikit-build -c dppy/label/dev -c intel -c conda-forge
+              cmake cython pytest ninja scikit-build sysroot_linux-64">=2.28" ${{ env.CHANNELS }}
 
       - name: Install cuPy dependencies
-        run: conda install -c conda-forge cupy cudatoolkit=10.0
+        run: conda install cupy cudatoolkit=10.0
 
       - name: Conda info
         run: conda info

@@ -15,6 +15,7 @@ jobs:
 
     env:
       python-ver: '3.10'
+      CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels'
 
     steps:
       - name: Cancel Previous Runs
@@ -34,15 +35,14 @@ jobs:
           python-version: ${{ env.python-ver }}
           miniconda-version: 'latest'
           activate-environment: 'coverage'
-          channels: intel, conda-forge
 
       - name: Install Lcov
         run: |
           sudo apt-get install lcov
       - name: Install dpnp dependencies
         run: |
           conda install cython llvm cmake scikit-build ninja pytest pytest-cov coverage[toml] \
-              dppy/label/dev::dpctl dpcpp_linux-64  mkl-devel-dpcpp  tbb-devel  onedpl-devel
+              dpctl dpcpp_linux-64 sysroot_linux-64">=2.28" mkl-devel-dpcpp tbb-devel onedpl-devel ${{ env.CHANNELS }}
       - name: Conda info
         run: |
           conda info

@@ -57,6 +57,9 @@ set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"")
 find_package(Cython REQUIRED)
 find_package(Dpctl REQUIRED)
 
+message(STATUS "Dpctl_INCLUDE_DIRS=" ${Dpctl_INCLUDE_DIRS})
+message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
+
 if(WIN32)
     string(CONCAT WARNING_FLAGS
         "-Wall "

@@ -45,6 +45,7 @@ target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 
 if (WIN32)
   target_compile_options(${python_module_name} PRIVATE

@@ -26,7 +26,12 @@
 
 #include <pybind11/pybind11.h>
 
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
 #include "heevd.hpp"
+#include "types_matrix.hpp"
 
 #include "dpnp_utils.hpp"
 
@@ -42,19 +47,34 @@ namespace lapack
 
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*heevd_impl_fn_ptr_t)(sycl::queue,
+                                           const oneapi::mkl::job,
+                                           const oneapi::mkl::uplo,
+                                           const std::int64_t,
+                                           char*,
+                                           char*,
+                                           std::vector<sycl::event>&,
+                                           const std::vector<sycl::event>&);
+
+static heevd_impl_fn_ptr_t heevd_dispatch_table[dpctl_td_ns::num_types][dpctl_td_ns::num_types];
 
 template <typename T, typename RealT>
-static sycl::event call_heevd(sycl::queue exec_q,
+static sycl::event heevd_impl(sycl::queue exec_q,
                               const oneapi::mkl::job jobz,
                               const oneapi::mkl::uplo upper_lower,
                               const std::int64_t n,
-                              T* a,
-                              RealT* w,
+                              char* in_a,
+                              char* out_w,
                               std::vector<sycl::event>& host_task_events,
                               const std::vector<sycl::event>& depends)
 {
-    validate_type_for_device<T>(exec_q);
-    validate_type_for_device<RealT>(exec_q);
+    type_utils::validate_type_for_device<T>(exec_q);
+    type_utils::validate_type_for_device<RealT>(exec_q);
+
+    T* a = reinterpret_cast<T*>(in_a);
+    RealT* w = reinterpret_cast<RealT*>(out_w);
 
     const std::int64_t lda = std::max<size_t>(1UL, n);
     const std::int64_t scratchpad_size = mkl_lapack::heevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
@@ -163,13 +183,11 @@ std::pair<sycl::event, sycl::event> heevd(sycl::queue exec_q,
         throw py::value_error("Execution queue is not compatible with allocation queues");
     }
 
-    // check that arrays do not overlap, and concurrent access is safe.
-    // TODO: need to be exposed by DPCTL headers
-    // auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
-    // if (overlap(eig_vecs, eig_vals))
-    // {
-    //     throw py::value_error("Arrays index overlapping segments of memory");
-    // }
+    auto const& overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(eig_vecs, eig_vals))
+    {
+        throw py::value_error("Arrays with eigenvectors and eigenvalues are overlapping segments of memory");
+    }
 
     bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
     bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
@@ -182,38 +200,51 @@ std::pair<sycl::event, sycl::event> heevd(sycl::queue exec_q,
         throw py::value_error("An array with output eigenvalues must be C-contiguous");
     }
 
-    int eig_vecs_typenum = eig_vecs.get_typenum();
-    int eig_vals_typenum = eig_vals.get_typenum();
-    auto const& dpctl_capi = dpctl::detail::dpctl_capi::get();
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
+    int eig_vals_type_id = array_types.typenum_to_lookup_id(eig_vals.get_typenum());
 
-    sycl::event heevd_ev;
-    std::vector<sycl::event> host_task_events;
+    heevd_impl_fn_ptr_t heevd_fn = heevd_dispatch_table[eig_vecs_type_id][eig_vals_type_id];
+    if (heevd_fn == nullptr)
+    {
+        throw py::value_error("No heevd implementation defined for a pair of type for eigenvectors and eigenvalues");
+    }
+
+    char* eig_vecs_data = eig_vecs.get_data();
+    char* eig_vals_data = eig_vals.get_data();
 
     const std::int64_t n = eig_vecs_shape[0];
     const oneapi::mkl::job jobz_val = static_cast<oneapi::mkl::job>(jobz);
     const oneapi::mkl::uplo uplo_val = static_cast<oneapi::mkl::uplo>(upper_lower);
 
-    if ((eig_vecs_typenum == dpctl_capi.UAR_CDOUBLE_) && (eig_vals_typenum == dpctl_capi.UAR_DOUBLE_))
-    {
-        std::complex<double>* a = reinterpret_cast<std::complex<double>*>(eig_vecs.get_data());
-        double* w = reinterpret_cast<double*>(eig_vals.get_data());
+    std::vector<sycl::event> host_task_events;
+    sycl::event heevd_ev =
+        heevd_fn(exec_q, jobz_val, uplo_val, n, eig_vecs_data, eig_vals_data, host_task_events, depends);
 
-        heevd_ev = call_heevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
-    }
-    else if ((eig_vecs_typenum == dpctl_capi.UAR_CFLOAT_) && (eig_vals_typenum == dpctl_capi.UAR_FLOAT_))
-    {
-        std::complex<float>* a = reinterpret_cast<std::complex<float>*>(eig_vecs.get_data());
-        float* w = reinterpret_cast<float*>(eig_vals.get_data());
+    sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events);
+    return std::make_pair(args_ev, heevd_ev);
+}
 
-        heevd_ev = call_heevd(exec_q, jobz_val, uplo_val, n, a, w, host_task_events, depends);
-    }
-    else
+template <typename fnT, typename T, typename RealT>
+struct HeevdContigFactory
+{
+    fnT get()
     {
-        throw py::value_error("Unexpected types of either eigenvectors or eigenvalues");
+        if constexpr (types::HeevdTypePairSupportFactory<T, RealT>::is_defined)
+        {
+            return heevd_impl<T, RealT>;
+        }
+        else
+        {
+            return nullptr;
+        }
     }
+};
 
-    sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events);
-    return std::make_pair(args_ev, heevd_ev);
+void init_heevd_dispatch_table(void)
+{
+    dpctl_td_ns::DispatchTableBuilder<heevd_impl_fn_ptr_t, HeevdContigFactory, dpctl_td_ns::num_types> contig;
+    contig.populate_dispatch_table(heevd_dispatch_table);
 }
 }
 }

@@ -45,6 +45,8 @@ namespace lapack
                                                      dpctl::tensor::usm_ndarray eig_vecs,
                                                      dpctl::tensor::usm_ndarray eig_vals,
                                                      const std::vector<sycl::event>& depends);
+
+    extern void init_heevd_dispatch_table(void);
 }
 }
 }

@@ -33,25 +33,45 @@
 #include "heevd.hpp"
 #include "syevd.hpp"
 
+namespace lapack_ext = dpnp::backend::ext::lapack;
 namespace py = pybind11;
 
+// populate dispatch vectors
+void init_dispatch_vectors(void)
+{
+    lapack_ext::init_syevd_dispatch_vector();
+}
+
+// populate dispatch tables
+void init_dispatch_tables(void)
+{
+    lapack_ext::init_heevd_dispatch_table();
+}
+
 PYBIND11_MODULE(_lapack_impl, m)
 {
+    init_dispatch_vectors();
+    init_dispatch_tables();
+
     m.def("_heevd",
-          &dpnp::backend::ext::lapack::heevd,
+          &lapack_ext::heevd,
           "Call `heevd` from OneMKL LAPACK library to return "
           "the eigenvalues and eigenvectors of a complex Hermitian matrix",
           py::arg("sycl_queue"),
-          py::arg("jobz"), py::arg("upper_lower"),
-          py::arg("eig_vecs"), py::arg("eig_vals"),
+          py::arg("jobz"),
+          py::arg("upper_lower"),
+          py::arg("eig_vecs"),
+          py::arg("eig_vals"),
           py::arg("depends") = py::list());
 
     m.def("_syevd",
-          &dpnp::backend::ext::lapack::syevd,
+          &lapack_ext::syevd,
           "Call `syevd` from OneMKL LAPACK library to return "
           "the eigenvalues and eigenvectors of a real symmetric matrix",
           py::arg("sycl_queue"),
-          py::arg("jobz"), py::arg("upper_lower"),
-          py::arg("eig_vecs"), py::arg("eig_vals"),
+          py::arg("jobz"),
+          py::arg("upper_lower"),
+          py::arg("eig_vecs"),
+          py::arg("eig_vals"),
           py::arg("depends") = py::list());
 }