Skip to content

Commit

Permalink
Prevent reallocation of aligned memory (#572)
Browse files Browse the repository at this point in the history
* Allow aligned alloc to be more useful

* Auto update version

* Remove comments

* Fix testing failures

* remove redundant functions from LG & LK

* Readd removed import

* Auto update version

* Auto update version

* Trigger CI

* Ensure dtype conversion happens when using lower prec SV

* Remove unused imports

* Auto update version

* Trigger CI

* Auto update version

* Trigger CI

* Readd missing block to LQ

* Auto update version

* Trigger CI

* Add TODO and update changelog

* Ensure pip installed clang-format on path

---------

Co-authored-by: Dev version update bot <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
mlxd and github-actions[bot] committed Jan 4, 2024
1 parent dfad9d7 commit 3c648f7
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 44 deletions.
3 changes: 3 additions & 0 deletions .github/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@

### Improvements

* Ensure aligned memory used for numpy arrays with state-vector without reallocations.
[(#572)](https://github.com/PennyLaneAI/pennylane-lightning/pull/572)

* Unify error messages of shot measurement related unsupported observables to better Catalyst.
[(#577)](https://github.com/PennyLaneAI/pennylane-lightning/pull/577)

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ jobs:
uses: actions/checkout@v3

- name: Install dependencies
run: sudo apt update && sudo apt -y install python3 python3-pip && python3 -m pip install -r requirements-dev.txt
run: sudo apt update && sudo apt -y install python3 python3-pip && python -m pip install pip~=22.0 && python3 -m pip install -r requirements-dev.txt

- name: Run formatter
run: PATH=$PATH:$(dirname $(which python3)) ./bin/format --check ./pennylane_lightning/core/src
run: PATH=$PATH:$(dirname $(which python3))/ ./bin/format --check ./pennylane_lightning/core/src

tidy-cpp:
strategy:
Expand Down
2 changes: 1 addition & 1 deletion pennylane_lightning/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
Version number (major.minor.patch[-label])
"""

__version__ = "0.34.0-dev26"
__version__ = "0.34.0-dev27"
21 changes: 13 additions & 8 deletions pennylane_lightning/core/src/bindings/Bindings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,12 @@ auto getNumpyArrayAlignment(const py::array &numpyArray) -> CPUMemoryModel {
* @return Numpy array
*/
template <typename T>
auto alignedNumpyArray(CPUMemoryModel memory_model, size_t size) -> py::array {
auto alignedNumpyArray(CPUMemoryModel memory_model, size_t size,
bool zeroInit = false) -> py::array {
using Pennylane::Util::alignedAlloc;
if (getAlignment<T>(memory_model) > alignof(std::max_align_t)) {
void *ptr =
alignedAlloc(getAlignment<T>(memory_model), sizeof(T) * size);
void *ptr = alignedAlloc(getAlignment<T>(memory_model),
sizeof(T) * size, zeroInit);
auto capsule = py::capsule(ptr, &Util::alignedFree);
return py::array{py::dtype::of<T>(), {size}, {sizeof(T)}, ptr, capsule};
}
Expand All @@ -172,20 +173,24 @@ auto alignedNumpyArray(CPUMemoryModel memory_model, size_t size) -> py::array {
* @param size Size of the array to create
* @param dt Pybind11's datatype object
*/
auto allocateAlignedArray(size_t size, const py::dtype &dt) -> py::array {
auto allocateAlignedArray(size_t size, const py::dtype &dt,
bool zeroInit = false) -> py::array {
// TODO: Move memset operations to here to reduce zeroInit pass-throughs.
auto memory_model = bestCPUMemoryModel();

if (dt.is(py::dtype::of<float>())) {
return alignedNumpyArray<float>(memory_model, size);
return alignedNumpyArray<float>(memory_model, size, zeroInit);
}
if (dt.is(py::dtype::of<double>())) {
return alignedNumpyArray<double>(memory_model, size);
return alignedNumpyArray<double>(memory_model, size, zeroInit);
}
if (dt.is(py::dtype::of<std::complex<float>>())) {
return alignedNumpyArray<std::complex<float>>(memory_model, size);
return alignedNumpyArray<std::complex<float>>(memory_model, size,
zeroInit);
}
if (dt.is(py::dtype::of<std::complex<double>>())) {
return alignedNumpyArray<std::complex<double>>(memory_model, size);
return alignedNumpyArray<std::complex<double>>(memory_model, size,
zeroInit);
}
throw py::type_error("Unsupported datatype.");
}
Expand Down
16 changes: 11 additions & 5 deletions pennylane_lightning/core/src/utils/Memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <limits>
#include <memory>
#include <new>
Expand All @@ -35,23 +36,28 @@ namespace Pennylane::Util {
* @param bytes Number of bytes to allocate
* @return Pointer to the allocated memory
*/
inline auto alignedAlloc(uint32_t alignment, size_t bytes) -> void * {
inline auto alignedAlloc(uint32_t alignment, size_t bytes,
bool zero_init = false) -> void * {
if (bytes % alignment != 0) {
bytes = alignment * (bytes / alignment + 1);
}
void *p = nullptr;

#if defined(__clang__) && defined(__APPLE__)
/*
* We use `posix_memalign` for MacOS as Mac does not support
* `std::aligned_alloc` properly yet (even in MacOS 10.15).
*/
void *p;
posix_memalign(&p, alignment, bytes);
return p;
#elif defined(_MSC_VER)
return _aligned_malloc(bytes, alignment);
p = _aligned_malloc(bytes, alignment);
#else
return std::aligned_alloc(alignment, bytes);
p = std::aligned_alloc(alignment, bytes);
#endif
if (zero_init) {
std::memset(p, 0, bytes);
}
return p;
}

/**
Expand Down
11 changes: 0 additions & 11 deletions pennylane_lightning/lightning_gpu/lightning_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,7 @@

try:
from pennylane_lightning.lightning_gpu_ops import (
allocate_aligned_array,
backend_info,
best_alignment,
get_alignment,
StateVectorC128,
StateVectorC64,
MeasurementsC128,
Expand Down Expand Up @@ -331,14 +328,6 @@ def _asarray(arr, dtype=None):
if not dtype:
dtype = arr.dtype

# We allocate a new aligned memory and copy data to there if alignment
# or dtype mismatches
# Note that get_alignment does not necessarily return CPUMemoryModel(Unaligned) even for
# numpy allocated memory as the memory location happens to be aligned.
if int(get_alignment(arr)) < int(best_alignment()) or arr.dtype != dtype:
new_arr = allocate_aligned_array(arr.size, np.dtype(dtype)).reshape(arr.shape)
np.copyto(new_arr, arr)
arr = new_arr
return arr

# pylint disable=missing-function-docstring
Expand Down
8 changes: 4 additions & 4 deletions pennylane_lightning/lightning_kokkos/lightning_kokkos.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
from pennylane_lightning.lightning_kokkos_ops import (
allocate_aligned_array,
backend_info,
best_alignment,
get_alignment,
InitializationSettings,
MeasurementsC128,
MeasurementsC64,
Expand Down Expand Up @@ -232,8 +230,10 @@ def _asarray(arr, dtype=None):
# or dtype mismatches
# Note that get_alignment does not necessarily return CPUMemoryModel(Unaligned) even for
# numpy allocated memory as the memory location happens to be aligned.
if int(get_alignment(arr)) < int(best_alignment()) or arr.dtype != dtype:
new_arr = allocate_aligned_array(arr.size, np.dtype(dtype)).reshape(arr.shape)
if arr.dtype != dtype:
new_arr = allocate_aligned_array(arr.size, np.dtype(dtype), False).reshape(
arr.shape
)
np.copyto(new_arr, arr)
arr = new_arr
return arr
Expand Down
16 changes: 11 additions & 5 deletions pennylane_lightning/lightning_qubit/lightning_qubit.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ def __init__( # pylint: disable=too-many-arguments
# state as an array of dimension [2]*wires.
self._state = self._create_basis_state(0)
self._pre_rotated_state = self._state
self._c_dtype = c_dtype

self._batch_obs = batch_obs
self._mcmc = mcmc
Expand Down Expand Up @@ -259,8 +260,13 @@ def _asarray(arr, dtype=None):
# Note that get_alignment does not necessarily return CPUMemoryModel(Unaligned)
# numpy allocated memory as the memory location happens to be aligned.
if int(get_alignment(arr)) < int(best_alignment()) or arr.dtype != dtype:
new_arr = allocate_aligned_array(arr.size, np.dtype(dtype)).reshape(arr.shape)
np.copyto(new_arr, arr)
new_arr = allocate_aligned_array(arr.size, np.dtype(dtype), False).reshape(
arr.shape
)
if len(arr.shape):
new_arr[:] = arr
else:
np.copyto(new_arr, arr)
arr = new_arr
return arr

Expand All @@ -273,17 +279,17 @@ def _create_basis_state(self, index):
representing the statevector of the basis state
Note: This function does not support broadcasted inputs yet.
"""
state = np.zeros(2**self.num_wires, dtype=np.complex128)
state = allocate_aligned_array(2**self.num_wires, np.dtype(self.C_DTYPE), True)
state[index] = 1
state = self._asarray(state, dtype=self.C_DTYPE)
return self._reshape(state, [2] * self.num_wires)

def reset(self):
"""Reset the device"""
super().reset()

# init the state vector to |00..0>
self._state = self._create_basis_state(0)
if not self.state[0] == 1.0 + 0j:
self._state = self._create_basis_state(0)
self._pre_rotated_state = self._state

@property
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pip==23.0
pip~=22.0
git+https://github.com/PennyLaneAI/pennylane.git@master
ninja
flaky
Expand Down
2 changes: 1 addition & 1 deletion tests/test_adjoint_jacobian.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ def test_provide_starting_state(self, tol, dev):

dM1 = dev.adjoint_jacobian(tape)

if device_name != "lightning.gpu":
if device_name == "lightning.kokkos":
dev._pre_rotated_state = dev.state_vector # necessary for lightning.kokkos

qml.execute([tape], dev, None)
Expand Down
17 changes: 11 additions & 6 deletions tests/test_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,19 @@
try:
from pennylane_lightning.lightning_qubit_ops import allocate_aligned_array
except (ImportError, ModuleNotFoundError):
try:
from pennylane_lightning.lightning_kokkos_ops import allocate_aligned_array
except (ImportError, ModuleNotFoundError):
pytest.skip("No binary module found. Skipping.", allow_module_level=True)
pytest.skip("No binary module found. Skipping.", allow_module_level=True)


@pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required")
@pytest.mark.parametrize("dt", [np.dtype(np.complex64), np.dtype(np.complex128)])
def test_allocate_aligned_array(dt):
arr = allocate_aligned_array(1024, dt)
def test_allocate_aligned_array_unset(dt):
arr = allocate_aligned_array(1024, dt, False)
assert arr.dtype == dt


@pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required")
@pytest.mark.parametrize("dt", [np.dtype(np.complex64), np.dtype(np.complex128)])
def test_allocate_aligned_array_set(dt):
arr = allocate_aligned_array(1024, dt, True)
assert arr.dtype == dt
assert np.all(arr == 0)

0 comments on commit 3c648f7

Please sign in to comment.