Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ install(DIRECTORY
FILES_MATCHING REGEX "\\.h(pp)?$"
)

# find Python before enabling pybind11
find_package(Python REQUIRED COMPONENTS Development.Module)

# Define CMAKE_INSTALL_xxx: LIBDIR, INCLUDEDIR
include(GNUInstallDirs)

Expand Down
2 changes: 1 addition & 1 deletion dpctl/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
find_package(Python REQUIRED COMPONENTS Development.Module NumPy)
find_package(Python REQUIRED COMPONENTS NumPy)

# -t is to only Cythonize sources with timestamps newer than existing CXX files (if present)
# -w is to set working directory (and correctly set __pyx_f[] array of filenames)
Expand Down
1 change: 1 addition & 0 deletions dpctl/tensor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ set(_accumulator_sources
)
set(_tensor_accumulation_impl_sources
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/tensor_accumulation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/accumulators.cpp
${_accumulator_sources}
)

Expand Down
9 changes: 4 additions & 5 deletions dpctl/tensor/_copy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import dpctl.utils
from dpctl.tensor._data_types import _get_dtype
from dpctl.tensor._device import normalize_queue_device
from dpctl.tensor._tensor_accumulation_impl import mask_positions
from dpctl.tensor._type_utils import _dtype_supported_by_device_impl

from ._numpy_helper import normalize_axis_index
Expand Down Expand Up @@ -792,7 +793,7 @@ def _extract_impl(ary, ary_mask, axis=0):
exec_q = cumsum.sycl_queue
_manager = dpctl.utils.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
mask_count = ti.mask_positions(
mask_count = mask_positions(
ary_mask, cumsum, sycl_queue=exec_q, depends=dep_evs
)
dst_shape = ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
Expand Down Expand Up @@ -828,9 +829,7 @@ def _nonzero_impl(ary):
)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
mask_count = ti.mask_positions(
ary, cumsum, sycl_queue=exec_q, depends=dep_evs
)
mask_count = mask_positions(ary, cumsum, sycl_queue=exec_q, depends=dep_evs)
indexes_dt = ti.default_device_index_type(exec_q.sycl_device)
indexes = dpt.empty(
(ary.ndim, mask_count),
Expand Down Expand Up @@ -1050,7 +1049,7 @@ def _place_impl(ary, ary_mask, vals, axis=0):
exec_q = cumsum.sycl_queue
_manager = dpctl.utils.SequentialOrderManager[exec_q]
dep_ev = _manager.submitted_events
mask_count = ti.mask_positions(
mask_count = mask_positions(
ary_mask, cumsum, sycl_queue=exec_q, depends=dep_ev
)
expected_vals_shape = (
Expand Down
5 changes: 2 additions & 3 deletions dpctl/tensor/_indexing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import dpctl.tensor as dpt
import dpctl.tensor._tensor_impl as ti
import dpctl.utils
from dpctl.tensor._tensor_accumulation_impl import mask_positions

from ._copy_utils import (
_extract_impl,
Expand Down Expand Up @@ -413,9 +414,7 @@ def place(arr, mask, vals):
cumsum = dpt.empty(mask.size, dtype="i8", sycl_queue=exec_q)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
deps_ev = _manager.submitted_events
nz_count = ti.mask_positions(
mask, cumsum, sycl_queue=exec_q, depends=deps_ev
)
nz_count = mask_positions(mask, cumsum, sycl_queue=exec_q, depends=deps_ev)
if nz_count == 0:
return
if vals.size == 0:
Expand Down
5 changes: 3 additions & 2 deletions dpctl/tensor/_manipulation_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import dpctl.tensor as dpt
import dpctl.tensor._tensor_impl as ti
import dpctl.utils as dputils
from dpctl.tensor._tensor_accumulation_impl import _cumsum_1d

from ._copy_utils import _broadcast_strides
from ._numpy_helper import normalize_axis_index, normalize_axis_tuple
Expand Down Expand Up @@ -908,7 +909,7 @@ def repeat(x, repeats, /, *, axis=None):
sycl_queue=exec_q,
)
# _cumsum_1d synchronizes so `depends` ends here safely
res_axis_size = ti._cumsum_1d(
res_axis_size = _cumsum_1d(
rep_buf, cumsum, sycl_queue=exec_q, depends=[copy_ev]
)
if axis is not None:
Expand Down Expand Up @@ -940,7 +941,7 @@ def repeat(x, repeats, /, *, axis=None):
usm_type=usm_type,
sycl_queue=exec_q,
)
res_axis_size = ti._cumsum_1d(
res_axis_size = _cumsum_1d(
repeats, cumsum, sycl_queue=exec_q, depends=dep_evs
)
if axis is not None:
Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/_set_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
_get_shape,
_validate_dtype,
)
from ._tensor_accumulation_impl import mask_positions
from ._tensor_elementwise_impl import _not_equal, _subtract
from ._tensor_impl import (
_copy_usm_ndarray_into_usm_ndarray,
Expand All @@ -34,7 +35,6 @@
_linspace_step,
_take,
default_device_index_type,
mask_positions,
)
from ._tensor_sorting_impl import (
_argsort_ascending,
Expand Down
17 changes: 17 additions & 0 deletions dpctl/tensor/libtensor/source/tensor_accumulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,29 @@
//===----------------------------------------------------------------------===//

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

#include "accumulators.hpp"
#include "accumulators/accumulators_common.hpp"

namespace py = pybind11;

namespace py_int = dpctl::tensor::py_internal;

using py_int::py_cumsum_1d;
using py_int::py_mask_positions;

PYBIND11_MODULE(_tensor_accumulation_impl, m)
{
py_int::populate_mask_positions_dispatch_vectors();
py_int::populate_cumsum_1d_dispatch_vectors();

dpctl::tensor::py_internal::init_accumulator_functions(m);

m.def("mask_positions", &py_mask_positions, "", py::arg("mask"),
py::arg("cumsum"), py::arg("sycl_queue"),
py::arg("depends") = py::list());

m.def("_cumsum_1d", &py_cumsum_1d, "", py::arg("src"), py::arg("cumsum"),
py::arg("sycl_queue"), py::arg("depends") = py::list());
}
14 changes: 2 additions & 12 deletions dpctl/tensor/libtensor/source/tensor_ctors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,12 @@ using dpctl::tensor::py_internal::usm_ndarray_put;
using dpctl::tensor::py_internal::usm_ndarray_take;

using dpctl::tensor::py_internal::py_extract;
using dpctl::tensor::py_internal::py_mask_positions;
// using dpctl::tensor::py_internal::py_mask_positions;
using dpctl::tensor::py_internal::py_nonzero;
using dpctl::tensor::py_internal::py_place;

/* ================= Repeat ====================*/
using dpctl::tensor::py_internal::py_cumsum_1d;
// using dpctl::tensor::py_internal::py_cumsum_1d;
using dpctl::tensor::py_internal::py_repeat_by_scalar;
using dpctl::tensor::py_internal::py_repeat_by_sequence;

Expand Down Expand Up @@ -158,9 +158,6 @@ void init_dispatch_vectors(void)
populate_masked_extract_dispatch_vectors();
populate_masked_place_dispatch_vectors();

populate_mask_positions_dispatch_vectors();

populate_cumsum_1d_dispatch_vectors();
init_repeat_dispatch_vectors();

init_clip_dispatch_vectors();
Expand Down Expand Up @@ -402,13 +399,6 @@ PYBIND11_MODULE(_tensor_impl, m)
py::arg("dst"), py::arg("k") = 0, py::arg("sycl_queue"),
py::arg("depends") = py::list());

m.def("mask_positions", &py_mask_positions, "", py::arg("mask"),
py::arg("cumsum"), py::arg("sycl_queue"),
py::arg("depends") = py::list());

m.def("_cumsum_1d", &py_cumsum_1d, "", py::arg("src"), py::arg("cumsum"),
py::arg("sycl_queue"), py::arg("depends") = py::list());

m.def("_extract", &py_extract, "", py::arg("src"), py::arg("cumsum"),
py::arg("axis_start"), py::arg("axis_end"), py::arg("dst"),
py::arg("sycl_queue"), py::arg("depends") = py::list());
Expand Down
Loading