diff --git a/CMakeLists.txt b/CMakeLists.txt index 26b182be..a1956e0f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,8 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) option(BUILD_UCM_STORE "build ucm store module." ON) option(BUILD_UCM_SPARSE "build ucm sparse module." ON) option(BUILD_UNIT_TESTS "build all unit test suits." OFF) -option(BUILD_NUMA "build numactl library" OFF) +option(BUILD_NUMA "build numactl library." OFF) +option(DOWNLOAD_DEPENDENCE "download dependence by cmake." ON) set(RUNTIME_ENVIRONMENT "simu" CACHE STRING "runtime: simu, ascend, musa or cuda.") execute_process(COMMAND git rev-parse HEAD OUTPUT_VARIABLE UCM_COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/ucm/CMakeLists.txt b/ucm/CMakeLists.txt index af0712f1..0d4579d5 100644 --- a/ucm/CMakeLists.txt +++ b/ucm/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory(shared) if(BUILD_UCM_STORE) add_subdirectory(store) endif() diff --git a/ucm/shared/CMakeLists.txt b/ucm/shared/CMakeLists.txt new file mode 100644 index 00000000..471602fb --- /dev/null +++ b/ucm/shared/CMakeLists.txt @@ -0,0 +1,4 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +add_subdirectory(vendor) +add_subdirectory(trans) +add_subdirectory(test) diff --git a/ucm/shared/__init__.py b/ucm/shared/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ucm/shared/test/CMakeLists.txt b/ucm/shared/test/CMakeLists.txt new file mode 100644 index 00000000..07241d81 --- /dev/null +++ b/ucm/shared/test/CMakeLists.txt @@ -0,0 +1,11 @@ +if(BUILD_UNIT_TESTS) + include(GoogleTest) + file(GLOB_RECURSE UCMSHARED_TEST_SOURCE_FILES "./case/*.cc") + add_executable(ucmshared.test ${UCMSHARED_TEST_SOURCE_FILES}) + target_include_directories(ucmshared.test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/case) + target_link_libraries(ucmshared.test PRIVATE + trans + gtest_main gtest mockcpp + ) + gtest_discover_tests(ucmshared.test) +endif() diff --git a/ucm/shared/test/case/trans/trans_test.cc b/ucm/shared/test/case/trans/trans_test.cc new file mode 100644 index 00000000..38c55a4f --- /dev/null +++ b/ucm/shared/test/case/trans/trans_test.cc @@ -0,0 +1,93 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include +#include "trans/device.h" + +class UCTransUnitTest : public ::testing::Test {}; + +TEST_F(UCTransUnitTest, CopyDataWithCE) +{ + const auto ok = UC::Trans::Status::OK(); + constexpr int32_t deviceId = 0; + constexpr size_t size = 36 * 1024; + constexpr size_t number = 64 * 61; + UC::Trans::Device device; + ASSERT_EQ(device.Setup(deviceId), ok); + auto buffer = device.MakeBuffer(); + auto stream = device.MakeStream(); + auto hPtr1 = buffer->MakeHostBuffer(size * number); + ASSERT_NE(hPtr1, nullptr); + ASSERT_EQ(buffer->MakeDeviceBuffers(size, number), ok); + std::vector> ptrHolder; + ptrHolder.reserve(number); + void* dPtrArr[number]; + for (size_t i = 0; i < number; i++) { + *(size_t*)(((char*)hPtr1.get()) + size * i) = i; + auto ptr = buffer->GetDeviceBuffer(size); + dPtrArr[i] = ptr.get(); + ptrHolder.emplace_back(ptr); + } + auto hPtr2 = buffer->MakeHostBuffer(size * number); + ASSERT_NE(hPtr2, nullptr); + ASSERT_EQ(stream->HostToDeviceAsync(hPtr1.get(), dPtrArr, size, number), ok); + ASSERT_EQ(stream->DeviceToHostAsync(dPtrArr, hPtr2.get(), size, number), ok); + ASSERT_EQ(stream->Synchronized(), ok); + for (size_t i = 0; i < number; i++) { + ASSERT_EQ(*(size_t*)(((char*)hPtr2.get()) + size * i), i); + } +} + +TEST_F(UCTransUnitTest, CopyDataWithSM) +{ + const auto ok = UC::Trans::Status::OK(); + constexpr int32_t deviceId = 0; + constexpr size_t size = 36 * 1024; + constexpr size_t number = 64 * 61; + UC::Trans::Device device; + ASSERT_EQ(device.Setup(deviceId), ok); + auto buffer = device.MakeBuffer(); + auto stream = device.MakeSMStream(); + auto hPtr1 = buffer->MakeHostBuffer(size * number); + ASSERT_NE(hPtr1, nullptr); + ASSERT_EQ(buffer->MakeDeviceBuffers(size, number), ok); + std::vector> ptrHolder; + ptrHolder.reserve(number); + void* dPtrArr[number]; + for (size_t i = 0; i < number; i++) { + *(size_t*)(((char*)hPtr1.get()) + size * i) = i; + auto ptr = buffer->GetDeviceBuffer(size); + dPtrArr[i] = ptr.get(); + ptrHolder.emplace_back(ptr); + } + auto dPtrArrOnDev = buffer->MakeDeviceBuffer(sizeof(dPtrArr)); + ASSERT_EQ(stream->HostToDevice((void*)dPtrArr, dPtrArrOnDev.get(), sizeof(dPtrArr)), ok); + auto hPtr2 = buffer->MakeHostBuffer(size * number); + ASSERT_NE(hPtr2, nullptr); + ASSERT_EQ(stream->HostToDeviceAsync(hPtr1.get(), (void**)dPtrArrOnDev.get(), size, number), ok); + ASSERT_EQ(stream->DeviceToHostAsync((void**)dPtrArrOnDev.get(), hPtr2.get(), size, number), ok); + ASSERT_EQ(stream->Synchronized(), ok); + for (size_t i = 0; i < number; i++) { + ASSERT_EQ(*(size_t*)(((char*)hPtr2.get()) + size * i), i); + } +} diff --git a/ucm/shared/test/example/trans/trans_on_cuda_example.py b/ucm/shared/test/example/trans/trans_on_cuda_example.py new file mode 100644 index 00000000..12e5f9e3 --- /dev/null +++ b/ucm/shared/test/example/trans/trans_on_cuda_example.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- +# +# MIT License +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +import time +from functools import wraps + +import cupy +import numpy as np + +from ucm.shared.trans import ucmtrans + + +def test_wrap(func): + @wraps(func) + def wrapper(*args, **kwargs): + print(f"========>> Running in {func.__name__}:") + result = func(*args, **kwargs) + print() + return result + + return wrapper + + +def make_host_memory(size, number, dtype, fill=False): + host = cupy.cuda.alloc_pinned_memory(size * number) + host_np = np.frombuffer(host, dtype=dtype) + if fill: + fixed_len = min(1024, number) + host_np[:fixed_len] = np.arange(fixed_len, dtype=dtype) + print("make:", host_np.shape, host_np.itemsize, host_np) + return host + + +def compare(host1, host2, dtype): + host1_np = np.frombuffer(host1, dtype=dtype) + host2_np = np.frombuffer(host2, dtype=dtype) + print("compare[1]:", host1_np.shape, host1_np.itemsize, host1_np) + print("compare[2]:", host2_np.shape, host2_np.itemsize, host2_np) + return np.array_equal(host1_np, host2_np) + + +@test_wrap +def trans_with_ce(d, size, number, dtype): + s = d.MakeStream() + host1 = make_host_memory(size, number, dtype, True) + device = [cupy.empty(size, dtype=np.uint8) for _ in range(number)] + device_ptr = np.array([d.data.ptr for d in device], dtype=np.uint64) + host2 = make_host_memory(size, number, dtype) + tp = time.perf_counter() + s.HostToDeviceScatter(host1.ptr, device_ptr, size, number) + s.DeviceToHostGather(device_ptr, host2.ptr, size, number) + cost = time.perf_counter() - tp + print(f"cost: {cost}s") + print(f"bandwidth: {size * number / cost / 1e9}GB/s") + assert compare(host1, host2, dtype) + + +@test_wrap +def trans_with_sm(d, size, number, dtype): + s = d.MakeSMStream() + host1 = make_host_memory(size, number, dtype, True) + device = [cupy.empty(size, dtype=np.uint8) for _ in range(number)] + device_ptr = np.array([d.data.ptr for d in device], dtype=np.uint64) + device_ptr_cupy = cupy.empty(number, dtype=np.uint64) + device_ptr_cupy.set(device_ptr) + host2 = make_host_memory(size, number, dtype) + tp = time.perf_counter() + s.HostToDeviceScatter(host1.ptr, device_ptr_cupy.data.ptr, size, number) + s.DeviceToHostGather(device_ptr_cupy.data.ptr, host2.ptr, size, number) + cost = time.perf_counter() - tp + print(f"cost: {cost}s") + print(f"bandwidth: {size * number / cost / 1e9}GB/s") + assert compare(host1, host2, dtype) + + +@test_wrap +def trans_with_ce_async(d, size, number, dtype): + s = d.MakeStream() + host1 = make_host_memory(size, number, dtype, True) + device = [cupy.empty(size, dtype=np.uint8) for _ in range(number)] + device_ptr = np.array([d.data.ptr for d in device], dtype=np.uint64) + host2 = make_host_memory(size, number, dtype) + tp = time.perf_counter() + s.HostToDeviceScatterAsync(host1.ptr, device_ptr, size, number) + s.DeviceToHostGatherAsync(device_ptr, host2.ptr, size, number) + s.Synchronized() + cost = time.perf_counter() - tp + print(f"cost: {cost}s") + print(f"bandwidth: {size * number / cost / 1e9}GB/s") + assert compare(host1, host2, dtype) + + +@test_wrap +def trans_with_sm_async(d, size, number, dtype): + s = d.MakeSMStream() + host1 = make_host_memory(size, number, dtype, True) + device = [cupy.empty(size, dtype=np.uint8) for _ in range(number)] + device_ptr = np.array([d.data.ptr for d in device], dtype=np.uint64) + device_ptr_cupy = cupy.empty(number, dtype=np.uint64) + device_ptr_cupy.set(device_ptr) + host2 = make_host_memory(size, number, dtype) + tp = time.perf_counter() + s.HostToDeviceScatterAsync(host1.ptr, device_ptr_cupy.data.ptr, size, number) + s.DeviceToHostGatherAsync(device_ptr_cupy.data.ptr, host2.ptr, size, number) + s.Synchronized() + cost = time.perf_counter() - tp + print(f"cost: {cost}s") + print(f"bandwidth: {size * number / cost / 1e9}GB/s") + assert compare(host1, host2, dtype) + + +def main(): + device_id = 0 + size = 36 * 1024 + number = 61 * 64 + dtype = np.float16 + print(f"ucmtrans: {ucmtrans.commit_id}-{ucmtrans.build_type}") + cupy.cuda.Device(device_id).use() + d = ucmtrans.Device() + d.Setup(device_id) + trans_with_ce(d, size, number, dtype) + trans_with_sm(d, size, number, dtype) + trans_with_ce_async(d, size, number, dtype) + trans_with_sm_async(d, size, number, dtype) + + +if __name__ == "__main__": + main() diff --git a/ucm/shared/trans/CMakeLists.txt b/ucm/shared/trans/CMakeLists.txt new file mode 100644 index 00000000..a6528ae1 --- /dev/null +++ b/ucm/shared/trans/CMakeLists.txt @@ -0,0 +1,14 @@ +if(RUNTIME_ENVIRONMENT STREQUAL "ascend") + add_subdirectory(ascend) +endif() +if(RUNTIME_ENVIRONMENT STREQUAL "cuda") + add_subdirectory(cuda) +endif() +if(RUNTIME_ENVIRONMENT STREQUAL "simu") + add_subdirectory(simu) +endif() + +file(GLOB_RECURSE UCMTRANS_CPY_SOURCE_FILES "./cpy/*.cc") +pybind11_add_module(ucmtrans ${UCMTRANS_CPY_SOURCE_FILES}) +target_link_libraries(ucmtrans PRIVATE trans) +set_target_properties(ucmtrans PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/ucm/shared/trans/__init__.py b/ucm/shared/trans/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ucm/shared/trans/buffer.h b/ucm/shared/trans/buffer.h new file mode 100644 index 00000000..a7b03f6d --- /dev/null +++ b/ucm/shared/trans/buffer.h @@ -0,0 +1,51 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_BUFFER_H +#define UNIFIEDCACHE_TRANS_BUFFER_H + +#include +#include "status.h" + +namespace UC::Trans { + +class Buffer { +public: + virtual ~Buffer() = default; + + virtual std::shared_ptr MakeDeviceBuffer(size_t size) = 0; + virtual Status MakeDeviceBuffers(size_t size, size_t number) = 0; + virtual std::shared_ptr GetDeviceBuffer(size_t size) = 0; + + virtual std::shared_ptr MakeHostBuffer(size_t size) = 0; + virtual Status MakeHostBuffers(size_t size, size_t number) = 0; + virtual std::shared_ptr GetHostBuffer(size_t size) = 0; + + virtual Status RegisterHostBuffer(void* ptr, size_t size) = 0; + virtual void UnregisterHostBuffer(void* ptr) = 0; + virtual void* GetHostPtrOnDevice(void* ptr) = 0; +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/cpy/trans.py.cc b/ucm/shared/trans/cpy/trans.py.cc new file mode 100644 index 00000000..952fc8e6 --- /dev/null +++ b/ucm/shared/trans/cpy/trans.py.cc @@ -0,0 +1,192 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include +#include +#include "trans/device.h" + +namespace py = pybind11; + +namespace UC::Trans { + +using Ptr = uintptr_t; +using PtrArray = py::array_t; + +inline void ThrowIfFailed(const Status& s) +{ + if (s.Failure()) [[unlikely]] { throw std::runtime_error{s.ToString()}; } +} + +inline void DeviceToHost(Stream& self, Ptr src, Ptr dst, size_t size) +{ + ThrowIfFailed(self.DeviceToHost((void*)src, (void*)dst, size)); +} + +inline void DeviceToHostBatch(Stream& self, py::object src, py::object dst, size_t size, + size_t number) +{ + if (py::isinstance(src)) { + auto device = static_cast(src.cast().request().ptr); + auto host = static_cast(dst.cast().request().ptr); + ThrowIfFailed(self.DeviceToHost(device, host, size, number)); + } else { + auto device = static_cast((void*)src.cast()); + auto host = static_cast((void*)dst.cast()); + ThrowIfFailed(self.DeviceToHost(device, host, size, number)); + } +} + +inline void DeviceToHostGather(Stream& self, py::object src, Ptr dst, size_t size, size_t number) +{ + if (py::isinstance(src)) { + auto device = static_cast(src.cast().request().ptr); + ThrowIfFailed(self.DeviceToHost(device, (void*)dst, size, number)); + } else { + auto device = static_cast((void*)src.cast()); + ThrowIfFailed(self.DeviceToHost(device, (void*)dst, size, number)); + } +} + +inline void DeviceToHostAsync(Stream& self, Ptr src, Ptr dst, size_t size) +{ + ThrowIfFailed(self.DeviceToHostAsync((void*)src, (void*)dst, size)); +} + +inline void DeviceToHostBatchAsync(Stream& self, py::object src, py::object dst, size_t size, + size_t number) +{ + if (py::isinstance(src)) { + auto device = static_cast(src.cast().request().ptr); + auto host = static_cast(dst.cast().request().ptr); + ThrowIfFailed(self.DeviceToHostAsync(device, host, size, number)); + } else { + auto device = static_cast((void*)src.cast()); + auto host = static_cast((void*)dst.cast()); + ThrowIfFailed(self.DeviceToHostAsync(device, host, size, number)); + } +} + +inline void DeviceToHostGatherAsync(Stream& self, py::object src, Ptr dst, size_t size, + size_t number) +{ + if (py::isinstance(src)) { + auto device = static_cast(src.cast().request().ptr); + ThrowIfFailed(self.DeviceToHostAsync(device, (void*)dst, size, number)); + } else { + auto device = static_cast((void*)src.cast()); + ThrowIfFailed(self.DeviceToHostAsync(device, (void*)dst, size, number)); + } +} + +inline void HostToDevice(Stream& self, Ptr src, Ptr dst, size_t size) +{ + ThrowIfFailed(self.HostToDevice((void*)src, (void*)dst, size)); +} + +inline void HostToDeviceBatch(Stream& self, py::object src, py::object dst, size_t size, + size_t number) +{ + if (py::isinstance(src)) { + auto host = static_cast(src.cast().request().ptr); + auto device = static_cast(dst.cast().request().ptr); + ThrowIfFailed(self.HostToDevice(host, device, size, number)); + } else { + auto host = static_cast((void*)src.cast()); + auto device = static_cast((void*)dst.cast()); + ThrowIfFailed(self.HostToDevice(host, device, size, number)); + } +} + +inline void HostToDeviceScatter(Stream& self, Ptr src, py::object dst, size_t size, size_t number) +{ + if (py::isinstance(dst)) { + auto device = static_cast(dst.cast().request().ptr); + ThrowIfFailed(self.HostToDevice((void*)src, device, size, number)); + } else { + auto device = static_cast((void*)dst.cast()); + ThrowIfFailed(self.HostToDevice((void*)src, device, size, number)); + } +} + +inline void HostToDeviceAsync(Stream& self, Ptr src, Ptr dst, size_t size) +{ + ThrowIfFailed(self.HostToDeviceAsync((void*)src, (void*)dst, size)); +} + +inline void HostToDeviceBatchAsync(Stream& self, py::object src, py::object dst, size_t size, + size_t number) +{ + if (py::isinstance(src)) { + auto host = static_cast(src.cast().request().ptr); + auto device = static_cast(dst.cast().request().ptr); + ThrowIfFailed(self.HostToDeviceAsync(host, device, size, number)); + } else { + auto host = static_cast((void*)src.cast()); + auto device = static_cast((void*)dst.cast()); + ThrowIfFailed(self.HostToDeviceAsync(host, device, size, number)); + } +} + +inline void HostToDeviceScatterAsync(Stream& self, Ptr src, py::object dst, size_t size, + size_t number) +{ + if (py::isinstance(dst)) { + auto device = static_cast(dst.cast().request().ptr); + ThrowIfFailed(self.HostToDeviceAsync((void*)src, device, size, number)); + } else { + auto device = static_cast((void*)dst.cast()); + ThrowIfFailed(self.HostToDeviceAsync((void*)src, device, size, number)); + } +} + +} // namespace UC::Trans + +PYBIND11_MODULE(ucmtrans, m) +{ + using namespace UC::Trans; + m.attr("project") = UCM_PROJECT_NAME; + m.attr("version") = UCM_PROJECT_VERSION; + m.attr("commit_id") = UCM_COMMIT_ID; + m.attr("build_type") = UCM_BUILD_TYPE; + + auto s = py::class_>(m, "Stream"); + s.def("DeviceToHost", &DeviceToHost); + s.def("DeviceToHostBatch", &DeviceToHostBatch); + s.def("DeviceToHostGather", &DeviceToHostGather); + s.def("DeviceToHostAsync", &DeviceToHostAsync); + s.def("DeviceToHostBatchAsync", &DeviceToHostBatchAsync); + s.def("DeviceToHostGatherAsync", &DeviceToHostGatherAsync); + s.def("HostToDevice", &HostToDevice); + s.def("HostToDeviceBatch", &HostToDeviceBatch); + s.def("HostToDeviceScatter", &HostToDeviceScatter); + s.def("HostToDeviceAsync", &HostToDeviceAsync); + s.def("HostToDeviceBatchAsync", &HostToDeviceBatchAsync); + s.def("HostToDeviceScatterAsync", &HostToDeviceScatterAsync); + s.def("Synchronized", [](Stream& self) { ThrowIfFailed(self.Synchronized()); }); + + auto d = py::class_(m, "Device"); + d.def(py::init<>()); + d.def("Setup", [](Device& self, int32_t deviceId) { ThrowIfFailed(self.Setup(deviceId)); }); + d.def("MakeStream", &Device::MakeStream); + d.def("MakeSMStream", &Device::MakeSMStream); +} diff --git a/ucm/shared/trans/cuda/CMakeLists.txt b/ucm/shared/trans/cuda/CMakeLists.txt new file mode 100644 index 00000000..b98de998 --- /dev/null +++ b/ucm/shared/trans/cuda/CMakeLists.txt @@ -0,0 +1,23 @@ +set(CUDA_ROOT "/usr/local/cuda/" CACHE PATH "Path to CUDA root directory") +set(CMAKE_CUDA_COMPILER ${CUDA_ROOT}/bin/nvcc) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90) +enable_language(CUDA) +add_library(kernel OBJECT cuda_sm_kernel.cu) +target_compile_options(kernel PRIVATE + --diag-suppress=128 --diag-suppress=2417 --diag-suppress=2597 + -Wall -fPIC +) +add_library(trans STATIC + cuda_device.cc + cuda_buffer.cc + cuda_stream.cc + cuda_sm_stream.cc +) +target_include_directories(trans PUBLIC ${CUDA_ROOT}/include) +target_link_directories(trans PUBLIC ${CUDA_ROOT}/lib64) +target_link_libraries(trans PUBLIC + fmt + cudart + nvidia-ml + kernel +) diff --git a/ucm/shared/trans/cuda/cuda_buffer.cc b/ucm/shared/trans/cuda/cuda_buffer.cc new file mode 100644 index 00000000..c003fc21 --- /dev/null +++ b/ucm/shared/trans/cuda/cuda_buffer.cc @@ -0,0 +1,62 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include "cuda_buffer.h" +#include + +namespace UC::Trans { + +std::shared_ptr CudaBuffer::MakeDeviceBuffer(size_t size) +{ + void* device = nullptr; + auto ret = cudaMalloc(&device, size); + if (ret == cudaSuccess) { return std::shared_ptr(device, cudaFree); } + return nullptr; +} + +std::shared_ptr CudaBuffer::MakeHostBuffer(size_t size) +{ + void* host = nullptr; + auto ret = cudaMallocHost(&host, size); + if (ret == cudaSuccess) { return std::shared_ptr(host, cudaFreeHost); } + return nullptr; +} + +Status CudaBuffer::RegisterHostBuffer(void* ptr, size_t size) +{ + auto ret = cudaHostRegister(ptr, size, cudaHostRegisterDefault); + if (ret == cudaSuccess) { return Status::OK(); } + return Status{ret, cudaGetErrorString(ret)}; +} + +void CudaBuffer::UnregisterHostBuffer(void* ptr) { cudaHostUnregister(ptr); } + +void* CudaBuffer::GetHostPtrOnDevice(void* ptr) +{ + void* device = nullptr; + auto ret = cudaHostGetDevicePointer(&device, ptr, 0); + if (ret == cudaSuccess) { return nullptr; } + return device; +} + +} // namespace UC::Trans diff --git a/ucm/shared/trans/cuda/cuda_buffer.h b/ucm/shared/trans/cuda/cuda_buffer.h new file mode 100644 index 00000000..14a1a7d8 --- /dev/null +++ b/ucm/shared/trans/cuda/cuda_buffer.h @@ -0,0 +1,43 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_CUDA_BUFFER_H +#define UNIFIEDCACHE_TRANS_CUDA_BUFFER_H + +#include "trans/detail/reserved_buffer.h" + +namespace UC::Trans { + +class CudaBuffer : public ReservedBuffer { +public: + std::shared_ptr MakeDeviceBuffer(size_t size) override; + std::shared_ptr MakeHostBuffer(size_t size) override; + + Status RegisterHostBuffer(void* ptr, size_t size) override; + void UnregisterHostBuffer(void* ptr) override; + void* GetHostPtrOnDevice(void* ptr) override; +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/cuda/cuda_device.cc b/ucm/shared/trans/cuda/cuda_device.cc new file mode 100644 index 00000000..86132e99 --- /dev/null +++ b/ucm/shared/trans/cuda/cuda_device.cc @@ -0,0 +1,82 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include +#include +#include "cuda_buffer.h" +#include "cuda_sm_stream.h" +#include "cuda_stream.h" +#include "trans/device.h" + +namespace UC::Trans { + +static void SetCpuAffinity(int32_t deviceId) +{ + nvmlDevice_t device; + auto ret = nvmlDeviceGetHandleByIndex(deviceId, &device); + if (ret != NVML_SUCCESS) { return; } + nvmlDeviceSetCpuAffinity(device); +} + +Status Device::Setup(int32_t deviceId) +{ + auto ret = cudaSetDevice(deviceId); + if (ret != cudaSuccess) { return Status{ret, cudaGetErrorString(ret)}; } + SetCpuAffinity(deviceId); + return Status::OK(); +} + +std::unique_ptr Device::MakeStream() +{ + std::unique_ptr stream = nullptr; + try { + stream = std::make_unique(); + } catch (...) { + return nullptr; + } + if (stream->Setup().Success()) { return stream; } + return nullptr; +} + +std::unique_ptr Device::MakeSMStream() +{ + std::unique_ptr stream = nullptr; + try { + stream = std::make_unique(); + } catch (...) { + return nullptr; + } + if (stream->Setup().Success()) { return stream; } + return nullptr; +} + +std::unique_ptr Device::MakeBuffer() +{ + try { + return std::make_unique(); + } catch (...) { + return nullptr; + } +} + +} // namespace UC::Trans diff --git a/ucm/shared/trans/cuda/cuda_sm_kernel.cu b/ucm/shared/trans/cuda/cuda_sm_kernel.cu new file mode 100644 index 00000000..fa71c61e --- /dev/null +++ b/ucm/shared/trans/cuda/cuda_sm_kernel.cu @@ -0,0 +1,116 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include +#include "cuda_sm_kernel.h" + +namespace UC::Trans { + +#define CUDA_TRANS_UNIT_SIZE (sizeof(uint4) * 2) +#define CUDA_TRANS_BLOCK_NUMBER (32) +#define CUDA_TRANS_BLOCK_SIZE (256) +#define CUDA_TRANS_THREAD_NUMBER (CUDA_TRANS_BLOCK_NUMBER * CUDA_TRANS_BLOCK_SIZE) + +inline __device__ void CudaCopyUnit(const uint8_t* __restrict__ src, + volatile uint8_t* __restrict__ dst) +{ + uint4 lo, hi; + asm volatile("ld.global.cs.v4.b32 {%0,%1,%2,%3}, [%4];" + : "=r"(lo.x), "=r"(lo.y), "=r"(lo.z), "=r"(lo.w) + : "l"(src)); + asm volatile("ld.global.cs.v4.b32 {%0,%1,%2,%3}, [%4+16];" + : "=r"(hi.x), "=r"(hi.y), "=r"(hi.z), "=r"(hi.w) + : "l"(src)); + asm volatile("st.volatile.global.v4.b32 [%0], {%1,%2,%3,%4};" + : + : "l"(dst), "r"(lo.x), "r"(lo.y), "r"(lo.z), "r"(lo.w)); + asm volatile("st.volatile.global.v4.b32 [%0+16], {%1,%2,%3,%4};" + : + : "l"(dst), "r"(hi.x), "r"(hi.y), "r"(hi.z), "r"(hi.w)); +} + +__global__ void CudaCopyKernel(const void** src, void** dst, size_t size, size_t num) +{ + auto length = size * num; + auto offset = (blockIdx.x * blockDim.x + threadIdx.x) * CUDA_TRANS_UNIT_SIZE; + while (offset + CUDA_TRANS_UNIT_SIZE <= length) { + auto idx = offset / size; + auto off = offset % size; + auto host = ((const uint8_t*)src[idx]) + off; + auto device = ((uint8_t*)dst[idx]) + off; + CudaCopyUnit(host, device); + offset += CUDA_TRANS_THREAD_NUMBER * CUDA_TRANS_UNIT_SIZE; + } +} + +__global__ void CudaCopyKernel(const void** src, void* dst, size_t size, size_t num) +{ + auto length = size * num; + auto offset = (blockIdx.x * blockDim.x + threadIdx.x) * CUDA_TRANS_UNIT_SIZE; + while (offset + CUDA_TRANS_UNIT_SIZE <= length) { + auto idx = offset / size; + auto off = offset % size; + auto host = ((const uint8_t*)src[idx]) + off; + auto device = ((uint8_t*)dst) + offset; + CudaCopyUnit(host, device); + offset += CUDA_TRANS_THREAD_NUMBER * CUDA_TRANS_UNIT_SIZE; + } +} + +__global__ void CudaCopyKernel(const void* src, void** dst, size_t size, size_t num) +{ + auto length = size * num; + auto offset = (blockIdx.x * blockDim.x + threadIdx.x) * CUDA_TRANS_UNIT_SIZE; + while (offset + CUDA_TRANS_UNIT_SIZE <= length) { + auto idx = offset / size; + auto off = offset % size; + auto host = ((const uint8_t*)src) + offset; + auto device = ((uint8_t*)dst[idx]) + off; + CudaCopyUnit(host, device); + offset += CUDA_TRANS_THREAD_NUMBER * CUDA_TRANS_UNIT_SIZE; + } +} + +cudaError_t CudaSMCopyAsync(void* src[], void* dst[], size_t size, size_t number, + cudaStream_t stream) +{ + CudaCopyKernel<<>>(src, dst, size, + number); + return cudaGetLastError(); +} + +cudaError_t CudaSMCopyAsync(void* src[], void* dst, size_t size, size_t number, cudaStream_t stream) +{ + CudaCopyKernel<<>>( + (const void**)src, dst, size, number); + return cudaGetLastError(); +} + +cudaError_t CudaSMCopyAsync(void* src, void* dst[], size_t size, size_t number, cudaStream_t stream) +{ + CudaCopyKernel<<>>(src, dst, size, + number); + return cudaGetLastError(); +} + +} // namespace UC::Trans diff --git a/ucm/shared/trans/cuda/cuda_sm_kernel.h b/ucm/shared/trans/cuda/cuda_sm_kernel.h new file mode 100644 index 00000000..a161c82e --- /dev/null +++ b/ucm/shared/trans/cuda/cuda_sm_kernel.h @@ -0,0 +1,41 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_CUDA_SM_KERNEL_H +#define UNIFIEDCACHE_TRANS_CUDA_SM_KERNEL_H + +#include +#include + +namespace UC::Trans { + +cudaError_t CudaSMCopyAsync(void* src[], void* dst[], size_t size, size_t number, + cudaStream_t stream); +cudaError_t CudaSMCopyAsync(void* src[], void* dst, size_t size, size_t number, + cudaStream_t stream); +cudaError_t CudaSMCopyAsync(void* src, void* dst[], size_t size, size_t number, + cudaStream_t stream); + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/cuda/cuda_sm_stream.cc b/ucm/shared/trans/cuda/cuda_sm_stream.cc new file mode 100644 index 00000000..f2f34172 --- /dev/null +++ b/ucm/shared/trans/cuda/cuda_sm_stream.cc @@ -0,0 +1,57 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include "cuda_sm_stream.h" +#include "cuda_sm_kernel.h" + +namespace UC::Trans { + +Status CudaSmStream::DeviceToHostAsync(void* device[], void* host[], size_t size, size_t number) +{ + auto ret = CudaSMCopyAsync(device, host, size, number, stream_); + if (ret != cudaSuccess) [[unlikely]] { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +Status CudaSmStream::DeviceToHostAsync(void* device[], void* host, size_t size, size_t number) +{ + auto ret = CudaSMCopyAsync(device, host, size, number, stream_); + if (ret != cudaSuccess) [[unlikely]] { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +Status CudaSmStream::HostToDeviceAsync(void* host[], void* device[], size_t size, size_t number) +{ + auto ret = CudaSMCopyAsync(host, device, size, number, stream_); + if (ret != cudaSuccess) [[unlikely]] { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +Status CudaSmStream::HostToDeviceAsync(void* host, void* device[], size_t size, size_t number) +{ + auto ret = CudaSMCopyAsync(host, device, size, number, stream_); + if (ret != cudaSuccess) [[unlikely]] { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +} // namespace UC::Trans diff --git a/ucm/shared/trans/cuda/cuda_sm_stream.h b/ucm/shared/trans/cuda/cuda_sm_stream.h new file mode 100644 index 00000000..ab9817dc --- /dev/null +++ b/ucm/shared/trans/cuda/cuda_sm_stream.h @@ -0,0 +1,41 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_CUDA_SM_STREAM_H +#define UNIFIEDCACHE_TRANS_CUDA_SM_STREAM_H + +#include "cuda_stream.h" + +namespace UC::Trans { + +class CudaSmStream : public CudaStream { +public: + Status DeviceToHostAsync(void* device[], void* host[], size_t size, size_t number) override; + Status DeviceToHostAsync(void* device[], void* host, size_t size, size_t number) override; + Status HostToDeviceAsync(void* host[], void* device[], size_t size, size_t number) override; + Status HostToDeviceAsync(void* host, void* device[], size_t size, size_t number) override; +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/cuda/cuda_stream.cc b/ucm/shared/trans/cuda/cuda_stream.cc new file mode 100644 index 00000000..103dee53 --- /dev/null +++ b/ucm/shared/trans/cuda/cuda_stream.cc @@ -0,0 +1,158 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include "cuda_stream.h" + +namespace UC::Trans { + +Status CudaStream::Setup() +{ + auto ret = cudaStreamCreate(&stream_); + if (ret != cudaSuccess) { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +Status CudaStream::DeviceToHost(void* device, void* host, size_t size) +{ + auto ret = cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost); + if (ret != cudaSuccess) [[unlikely]] { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +Status CudaStream::DeviceToHost(void* device[], void* host[], size_t size, size_t number) +{ + auto s = DeviceToHostAsync(device, host, size, number); + if (s.Failure()) [[unlikely]] { return s; } + return Synchronized(); +} + +Status CudaStream::DeviceToHost(void* device[], void* host, size_t size, size_t number) +{ + auto s = DeviceToHostAsync(device, host, size, number); + if (s.Failure()) [[unlikely]] { return s; } + return Synchronized(); +} + +Status CudaStream::DeviceToHostAsync(void* device, void* host, size_t size) +{ + auto ret = cudaMemcpyAsync(host, device, size, cudaMemcpyDeviceToHost, stream_); + if (ret != cudaSuccess) [[unlikely]] { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +Status CudaStream::DeviceToHostAsync(void* device[], void* host[], size_t size, size_t number) +{ + for (size_t i = 0; i < number; i++) { + auto s = DeviceToHostAsync(device[i], host[i], size); + if (s.Failure()) [[unlikely]] { return s; } + } + return Status::OK(); +} + +Status CudaStream::DeviceToHostAsync(void* device[], void* host, size_t size, size_t number) +{ + for (size_t i = 0; i < number; i++) { + auto pHost = (void*)(((int8_t*)host) + size * i); + auto s = DeviceToHostAsync(device[i], pHost, size); + if (s.Failure()) [[unlikely]] { return s; } + } + return Status::OK(); +} + +Status CudaStream::HostToDevice(void* host, void* device, size_t size) +{ + auto ret = cudaMemcpy(device, host, size, cudaMemcpyHostToDevice); + if (ret != cudaSuccess) [[unlikely]] { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +Status CudaStream::HostToDevice(void* host[], void* device[], size_t size, size_t number) +{ + auto s = HostToDeviceAsync(host, device, size, number); + if (s.Failure()) [[unlikely]] { return s; } + return Synchronized(); +} + +Status CudaStream::HostToDevice(void* host, void* device[], size_t size, size_t number) +{ + auto s = HostToDeviceAsync(host, device, size, number); + if (s.Failure()) [[unlikely]] { return s; } + return Synchronized(); +} + +Status CudaStream::HostToDeviceAsync(void* host, void* device, size_t size) +{ + auto ret = cudaMemcpyAsync(device, host, size, cudaMemcpyHostToDevice, stream_); + if (ret != cudaSuccess) [[unlikely]] { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +Status CudaStream::HostToDeviceAsync(void* host[], void* device[], size_t size, size_t number) +{ + for (size_t i = 0; i < number; i++) { + auto s = HostToDeviceAsync(host[i], device[i], size); + if (s.Failure()) [[unlikely]] { return s; } + } + return Status::OK(); +} + +Status Trans::CudaStream::HostToDeviceAsync(void* host, void* device[], size_t size, size_t number) +{ + for (size_t i = 0; i < number; i++) { + auto pHost = (void*)(((int8_t*)host) + size * i); + auto s = HostToDeviceAsync(pHost, device[i], size); + if (s.Failure()) [[unlikely]] { return s; } + } + return Status::OK(); +} + +using Closure = std::function; + +static void Trampoline(cudaStream_t stream, cudaError_t err, void* data) +{ + (void)stream; + auto c = static_cast(data); + (*c)(err == cudaSuccess); + delete c; +} + +Status Trans::CudaStream::AppendCallback(std::function cb) +{ + auto c = new (std::nothrow) Closure{std::move(cb)}; + if (!c) [[unlikely]] { return Status::Error("out of memory for appending callback"); } + auto ret = cudaStreamAddCallback(stream_, Trampoline, c, 0); + if (ret != cudaSuccess) [[unlikely]] { + delete c; + return Status{ret, cudaGetErrorString(ret)}; + } + return Status::OK(); +} + +Status Trans::CudaStream::Synchronized() +{ + auto ret = cudaStreamSynchronize(stream_); + if (ret != cudaSuccess) [[unlikely]] { return Status{ret, cudaGetErrorString(ret)}; } + return Status::OK(); +} + +} // namespace UC::Trans diff --git a/ucm/shared/trans/cuda/cuda_stream.h b/ucm/shared/trans/cuda/cuda_stream.h new file mode 100644 index 00000000..d327285d --- /dev/null +++ b/ucm/shared/trans/cuda/cuda_stream.h @@ -0,0 +1,59 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_CUDA_STREAM_H +#define UNIFIEDCACHE_TRANS_CUDA_STREAM_H + +#include +#include "trans/stream.h" + +namespace UC::Trans { + +class CudaStream : public Stream { +protected: + cudaStream_t stream_; + +public: + Status Setup() override; + + Status DeviceToHost(void* device, void* host, size_t size) override; + Status DeviceToHost(void* device[], void* host[], size_t size, size_t number) override; + Status DeviceToHost(void* device[], void* host, size_t size, size_t number) override; + Status DeviceToHostAsync(void* device, void* host, size_t size) override; + Status DeviceToHostAsync(void* device[], void* host[], size_t size, size_t number) override; + Status DeviceToHostAsync(void* device[], void* host, size_t size, size_t number) override; + + Status HostToDevice(void* host, void* device, size_t size) override; + Status HostToDevice(void* host[], void* device[], size_t size, size_t number) override; + Status HostToDevice(void* host, void* device[], size_t size, size_t number) override; + Status HostToDeviceAsync(void* host, void* device, size_t size) override; + Status HostToDeviceAsync(void* host[], void* device[], size_t size, size_t number) override; + Status HostToDeviceAsync(void* host, void* device[], size_t size, size_t number) override; + + Status AppendCallback(std::function cb) override; + Status Synchronized() override; +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/detail/indexer.h b/ucm/shared/trans/detail/indexer.h new file mode 100644 index 00000000..dc6833df --- /dev/null +++ b/ucm/shared/trans/detail/indexer.h @@ -0,0 +1,98 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_INDEXER_H +#define UNIFIEDCACHE_TRANS_INDEXER_H + +#include +#include +#include +#include + +namespace UC::Trans { + +class Indexer { +public: + using Index = uint32_t; + static constexpr Index npos = std::numeric_limits::max(); + +private: + struct Node { + Index idx; + Index next; + }; + struct Pointer { + Index slot; + uint32_t ver; + }; + static_assert(sizeof(Pointer) == 8, "Pointer must be 64-bit"); + +public: + void Setup(const Index capacity) noexcept + { + this->capacity_ = capacity; + this->nodes_.resize(capacity + 1); + for (Index slot = 1; slot <= capacity; slot++) { + this->nodes_[slot].idx = slot - 1; + this->nodes_[slot].next = slot + 1; + } + this->nodes_[capacity].next = 0; + this->pointer_.store({1, 0}); + } + Index Acquire() noexcept + { + for (;;) { + auto ptr = this->pointer_.load(std::memory_order_acquire); + if (ptr.slot == 0) { return npos; } + auto next = this->nodes_[ptr.slot].next; + Pointer desired{next, ptr.ver + 1}; + if (this->pointer_.compare_exchange_weak(ptr, desired, std::memory_order_release, + std::memory_order_relaxed)) { + return this->nodes_[ptr.slot].idx; + } + } + } + void Release(const Index idx) noexcept + { + if (idx >= this->capacity_) { return; } + auto slot = idx + 1; + for (;;) { + auto ptr = this->pointer_.load(std::memory_order_acquire); + this->nodes_[slot].next = ptr.slot; + Pointer desired{slot, ptr.ver + 1}; + if (this->pointer_.compare_exchange_weak(ptr, desired, std::memory_order_release, + std::memory_order_relaxed)) { + return; + } + } + } + +private: + Index capacity_; + std::vector nodes_; + alignas(64) std::atomic pointer_; +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/detail/reserved_buffer.h b/ucm/shared/trans/detail/reserved_buffer.h new file mode 100644 index 00000000..98eba320 --- /dev/null +++ b/ucm/shared/trans/detail/reserved_buffer.h @@ -0,0 +1,99 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_RESERVED_BUFFER_H +#define UNIFIEDCACHE_TRANS_RESERVED_BUFFER_H + +#include +#include "indexer.h" +#include "trans/buffer.h" + +namespace UC::Trans { + +class ReservedBuffer : public Buffer { + struct { + Indexer indexer; + std::shared_ptr buffers; + size_t size; + } hostBuffers_, deviceBuffers_; + + template + static std::shared_ptr GetBufferFrom(Buffers& buffers) + { + auto pos = buffers.indexer.Acquire(); + if (pos != buffers.indexer.npos) { + auto addr = static_cast(buffers.buffers.get()); + auto ptr = static_cast(addr + buffers.size * pos); + return std::shared_ptr(ptr, + [&buffers, pos](void*) { buffers.indexer.Release(pos); }); + } + return nullptr; + } + +public: + Status MakeDeviceBuffers(size_t size, size_t number) override + { + auto totalSize = size * number; + auto buffers = this->MakeDeviceBuffer(totalSize); + if (!buffers) { + return Status::Error(fmt::format("out of memory({}) on device", totalSize)); + } + this->deviceBuffers_.size = size; + this->deviceBuffers_.buffers = buffers; + this->deviceBuffers_.indexer.Setup(number); + return Status::OK(); + } + + std::shared_ptr GetDeviceBuffer(size_t size) override + { + if (size <= this->deviceBuffers_.size) { + auto buffer = GetBufferFrom(this->deviceBuffers_); + if (buffer) { return buffer; } + } + return this->MakeDeviceBuffer(size); + } + + Status MakeHostBuffers(size_t size, size_t number) override + { + auto totalSize = size * number; + auto buffers = this->MakeHostBuffer(totalSize); + if (!buffers) { return Status::Error(fmt::format("out of memory({}) on host", totalSize)); } + this->hostBuffers_.size = size; + this->hostBuffers_.buffers = buffers; + this->hostBuffers_.indexer.Setup(number); + return Status::OK(); + } + + std::shared_ptr GetHostBuffer(size_t size) override + { + if (size <= this->hostBuffers_.size) { + auto buffer = GetBufferFrom(this->hostBuffers_); + if (buffer) { return buffer; } + } + return this->MakeDeviceBuffer(size); + } +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/device.h b/ucm/shared/trans/device.h new file mode 100644 index 00000000..a6801c8a --- /dev/null +++ b/ucm/shared/trans/device.h @@ -0,0 +1,42 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_DEVICE_H +#define UNIFIEDCACHE_TRANS_DEVICE_H + +#include "buffer.h" +#include "stream.h" + +namespace UC::Trans { + +class Device { +public: + Status Setup(int32_t deviceId); + std::unique_ptr MakeStream(); + std::unique_ptr MakeSMStream(); + std::unique_ptr MakeBuffer(); +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/simu/CMakeLists.txt b/ucm/shared/trans/simu/CMakeLists.txt new file mode 100644 index 00000000..9404eead --- /dev/null +++ b/ucm/shared/trans/simu/CMakeLists.txt @@ -0,0 +1,8 @@ +add_library(trans STATIC + simu_device.cc + simu_buffer.cc + simu_stream.cc +) +target_link_libraries(trans PUBLIC + fmt +) diff --git a/ucm/shared/trans/simu/simu_buffer.cc b/ucm/shared/trans/simu/simu_buffer.cc new file mode 100644 index 00000000..2dbdac40 --- /dev/null +++ b/ucm/shared/trans/simu/simu_buffer.cc @@ -0,0 +1,76 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include "simu_buffer.h" +#include +#include +#include +#include "trans/buffer.h" + +namespace UC::Trans { + +static void* AllocMemory(size_t size, int8_t initVal) +{ + auto ptr = malloc(size); + if (!ptr) { return nullptr; } + std::memset(ptr, initVal, size); + return ptr; +} + +static void FreeMemory(void* ptr) { free(ptr); } + +template +static std::shared_ptr GetBuffer(Buffers& buffers) +{ + auto pos = buffers.indexer.Acquire(); + if (pos != buffers.indexer.npos) { + auto addr = static_cast(buffers.buffers.get()); + auto ptr = static_cast(addr + buffers.size * pos); + return std::shared_ptr(ptr, [&buffers, pos](void*) { buffers.indexer.Release(pos); }); + } + return nullptr; +} + +std::shared_ptr SimuBuffer::MakeDeviceBuffer(size_t size) +{ + constexpr int8_t deviceInitVal = 0xd; + auto device = AllocMemory(size, deviceInitVal); + if (!device) { return nullptr; } + return std::shared_ptr(device, FreeMemory); +} + +std::shared_ptr SimuBuffer::MakeHostBuffer(size_t size) +{ + constexpr int8_t hostInitVal = 0xa; + auto device = AllocMemory(size, hostInitVal); + if (!device) { return nullptr; } + return std::shared_ptr(device, FreeMemory); +} + +Status SimuBuffer::RegisterHostBuffer(void* ptr, size_t size) { return Status::OK(); } + +void SimuBuffer::UnregisterHostBuffer(void* ptr) {} + +void* SimuBuffer::GetHostPtrOnDevice(void* ptr) { return ptr; } + +} // namespace UC::Trans diff --git a/ucm/shared/trans/simu/simu_buffer.h b/ucm/shared/trans/simu/simu_buffer.h new file mode 100644 index 00000000..67dccd08 --- /dev/null +++ b/ucm/shared/trans/simu/simu_buffer.h @@ -0,0 +1,43 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_SIMU_BUFFER_H +#define UNIFIEDCACHE_TRANS_SIMU_BUFFER_H + +#include "trans/detail/reserved_buffer.h" + +namespace UC::Trans { + +class SimuBuffer : public ReservedBuffer { +public: + std::shared_ptr MakeDeviceBuffer(size_t size) override; + std::shared_ptr MakeHostBuffer(size_t size) override; + + Status RegisterHostBuffer(void* ptr, size_t size) override; + void UnregisterHostBuffer(void* ptr) override; + void* GetHostPtrOnDevice(void* ptr) override; +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/simu/simu_device.cc b/ucm/shared/trans/simu/simu_device.cc new file mode 100644 index 00000000..351be42e --- /dev/null +++ b/ucm/shared/trans/simu/simu_device.cc @@ -0,0 +1,60 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include +#include "simu_buffer.h" +#include "simu_stream.h" +#include "trans/device.h" + +namespace UC::Trans { + +Status Device::Setup(int32_t deviceId) +{ + if (deviceId < 0) { return Status::Error(fmt::format("invalid device id({})", deviceId)); } + return Status::OK(); +} + +std::unique_ptr Device::MakeStream() +{ + std::unique_ptr stream = nullptr; + try { + stream = std::make_unique(); + } catch (...) { + return nullptr; + } + if (stream->Setup().Success()) { return stream; } + return nullptr; +} + +std::unique_ptr Device::MakeSMStream() { return MakeStream(); } + +std::unique_ptr Device::MakeBuffer() +{ + try { + return std::make_unique(); + } catch (...) { + return nullptr; + } +} + +} // namespace UC::Trans diff --git a/ucm/shared/trans/simu/simu_stream.cc b/ucm/shared/trans/simu/simu_stream.cc new file mode 100644 index 00000000..0d6efaa5 --- /dev/null +++ b/ucm/shared/trans/simu/simu_stream.cc @@ -0,0 +1,175 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#include "simu_stream.h" +#include + +namespace UC::Trans { + +void SimuStream::AsyncWorker() +{ + for (;;) { + std::unique_lock lock{this->mutex_}; + this->condition_.wait(lock, [this] { return this->stop_ || !this->tasks_.empty(); }); + if (this->stop_) { return; } + if (this->tasks_.empty()) { continue; } + auto task = std::move(this->tasks_.front()); + this->tasks_.pop_front(); + lock.unlock(); + task(); + } +} + +void SimuStream::EnqueueTask(std::function task) +{ + std::lock_guard lock{this->mutex_}; + this->tasks_.emplace_back(std::move(task)); + this->condition_.notify_one(); +} + +SimuStream::~SimuStream() +{ + { + std::lock_guard lock{this->mutex_}; + this->stop_ = true; + this->condition_.notify_all(); + } + if (this->thread_.joinable()) { this->thread_.join(); } +} + +Status SimuStream::Setup() +{ + this->thread_ = std::thread{&SimuStream::AsyncWorker, this}; + return Status::OK(); +} + +Status SimuStream::DeviceToHost(void* device, void* host, size_t size) +{ + std::memcpy(host, device, size); + return Status::OK(); +} + +Status SimuStream::DeviceToHost(void* device[], void* host[], size_t size, size_t number) +{ + for (size_t i = 0; i < number; i++) { + auto s = this->DeviceToHost(device[i], host[i], size); + if (s.Failure()) { return s; } + } + return Status::OK(); +} + +Status SimuStream::DeviceToHost(void* device[], void* host, size_t size, size_t number) +{ + for (size_t i = 0; i < number; i++) { + auto pDevice = device[i]; + auto pHost = (void*)(((int8_t*)host) + size * i); + auto s = this->DeviceToHost(pDevice, pHost, size); + if (s.Failure()) { return s; } + } + return Status::OK(); +} + +Status SimuStream::DeviceToHostAsync(void* device, void* host, size_t size) +{ + this->EnqueueTask([=] { this->DeviceToHost(device, host, size); }); + return Status::OK(); +} + +Status SimuStream::DeviceToHostAsync(void* device[], void* host[], size_t size, size_t number) +{ + this->EnqueueTask([=] { this->DeviceToHost(device, host, size, number); }); + return Status::OK(); +} + +Status SimuStream::DeviceToHostAsync(void* device[], void* host, size_t size, size_t number) +{ + this->EnqueueTask([=] { this->DeviceToHost(device, host, size, number); }); + return Status::OK(); +} + +Status SimuStream::HostToDevice(void* host, void* device, size_t size) +{ + std::memcpy(device, host, size); + return Status::OK(); +} + +Status SimuStream::HostToDevice(void* host[], void* device[], size_t size, size_t number) +{ + for (size_t i = 0; i < number; i++) { + auto s = this->HostToDevice(host[i], device[i], size); + if (s.Failure()) { return s; } + } + return Status::OK(); +} + +Status SimuStream::HostToDevice(void* host, void* device[], size_t size, size_t number) +{ + for (size_t i = 0; i < number; i++) { + auto pHost = (void*)(((int8_t*)host) + size * i); + auto pDevice = device[i]; + auto s = this->HostToDevice(pHost, pDevice, size); + if (s.Failure()) { return s; } + } + return Status::OK(); +} + +Status SimuStream::HostToDeviceAsync(void* host, void* device, size_t size) +{ + this->EnqueueTask([=] { this->HostToDevice(host, device, size); }); + return Status::OK(); +} + +Status SimuStream::HostToDeviceAsync(void* host[], void* device[], size_t size, size_t number) +{ + this->EnqueueTask([=] { this->HostToDevice(host, device, size, number); }); + return Status::OK(); +} + +Status SimuStream::HostToDeviceAsync(void* host, void* device[], size_t size, size_t number) +{ + this->EnqueueTask([=] { this->HostToDevice(host, device, size, number); }); + return Status::OK(); +} + +Status SimuStream::AppendCallback(std::function cb) +{ + this->EnqueueTask([=] { cb(true); }); + return Status::OK(); +} + +Status SimuStream::Synchronized() +{ + std::mutex mutex; + std::condition_variable cv; + bool finish = false; + this->EnqueueTask([&] { + std::lock_guard lock{mutex}; + finish = true; + cv.notify_one(); + }); + std::unique_lock lock{mutex}; + cv.wait(lock, [&] { return finish; }); + return Status::OK(); +} + +} // namespace UC::Trans diff --git a/ucm/shared/trans/simu/simu_stream.h b/ucm/shared/trans/simu/simu_stream.h new file mode 100644 index 00000000..57028d97 --- /dev/null +++ b/ucm/shared/trans/simu/simu_stream.h @@ -0,0 +1,70 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_SIMU_STREAM_H +#define UNIFIEDCACHE_TRANS_SIMU_STREAM_H + +#include +#include +#include +#include +#include +#include "trans/stream.h" + +namespace UC::Trans { + +class SimuStream : public Stream { + std::thread thread_; + std::list> tasks_; + std::mutex mutex_; + std::condition_variable condition_; + bool stop_{false}; + + void AsyncWorker(); + void EnqueueTask(std::function task); + +public: + ~SimuStream() override; + Status Setup() override; + + Status DeviceToHost(void* device, void* host, size_t size) override; + Status DeviceToHost(void* device[], void* host[], size_t size, size_t number) override; + Status DeviceToHost(void* device[], void* host, size_t size, size_t number) override; + Status DeviceToHostAsync(void* device, void* host, size_t size) override; + Status DeviceToHostAsync(void* device[], void* host[], size_t size, size_t number) override; + Status DeviceToHostAsync(void* device[], void* host, size_t size, size_t number) override; + + Status HostToDevice(void* host, void* device, size_t size) override; + Status HostToDevice(void* host[], void* device[], size_t size, size_t number) override; + Status HostToDevice(void* host, void* device[], size_t size, size_t number) override; + Status HostToDeviceAsync(void* host, void* device, size_t size) override; + Status HostToDeviceAsync(void* host[], void* device[], size_t size, size_t number) override; + Status HostToDeviceAsync(void* host, void* device[], size_t size, size_t number) override; + + Status AppendCallback(std::function cb) override; + Status Synchronized() override; +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/status.h b/ucm/shared/trans/status.h new file mode 100644 index 00000000..cab27179 --- /dev/null +++ b/ucm/shared/trans/status.h @@ -0,0 +1,54 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_STATUS_H +#define UNIFIEDCACHE_TRANS_STATUS_H + +#include +#include + +namespace UC::Trans { + +class Status { + static constexpr int32_t OK_ = 0; + static constexpr int32_t ERROR_ = -1; + int32_t code_; + std::string message_; + explicit Status(int32_t code) : code_(code) {} + +public: + bool operator==(const Status& other) const noexcept { return code_ == other.code_; } + bool operator!=(const Status& other) const noexcept { return !(*this == other); } + std::string ToString() const { return fmt::format("({}) {}", code_, message_); } + constexpr bool Success() const noexcept { return code_ == OK_; } + constexpr bool Failure() const noexcept { return !Success(); } + +public: + Status(int32_t code, std::string message) : code_{code}, message_{std::move(message)} {} + static Status OK() { return Status{OK_}; } + static Status Error(std::string message) { return {ERROR_, std::move(message)}; } +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/shared/trans/stream.h b/ucm/shared/trans/stream.h new file mode 100644 index 00000000..3cb0c368 --- /dev/null +++ b/ucm/shared/trans/stream.h @@ -0,0 +1,57 @@ +/** + * MIT License + * + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * */ +#ifndef UNIFIEDCACHE_TRANS_STREAM_H +#define UNIFIEDCACHE_TRANS_STREAM_H + +#include +#include "status.h" + +namespace UC::Trans { + +class Stream { +public: + virtual ~Stream() = default; + virtual Status Setup() = 0; + + virtual Status DeviceToHost(void* device, void* host, size_t size) = 0; + virtual Status DeviceToHost(void* device[], void* host[], size_t size, size_t number) = 0; + virtual Status DeviceToHost(void* device[], void* host, size_t size, size_t number) = 0; + virtual Status DeviceToHostAsync(void* device, void* host, size_t size) = 0; + virtual Status DeviceToHostAsync(void* device[], void* host[], size_t size, size_t number) = 0; + virtual Status DeviceToHostAsync(void* device[], void* host, size_t size, size_t number) = 0; + + virtual Status HostToDevice(void* host, void* device, size_t size) = 0; + virtual Status HostToDevice(void* host[], void* device[], size_t size, size_t number) = 0; + virtual Status HostToDevice(void* host, void* device[], size_t size, size_t number) = 0; + virtual Status HostToDeviceAsync(void* host, void* device, size_t size) = 0; + virtual Status HostToDeviceAsync(void* host[], void* device[], size_t size, size_t number) = 0; + virtual Status HostToDeviceAsync(void* host, void* device[], size_t size, size_t number) = 0; + + virtual Status AppendCallback(std::function cb) = 0; + virtual Status Synchronized() = 0; +}; + +} // namespace UC::Trans + +#endif diff --git a/ucm/sparse/vendor/CMakeLists.txt b/ucm/shared/vendor/CMakeLists.txt similarity index 85% rename from ucm/sparse/vendor/CMakeLists.txt rename to ucm/shared/vendor/CMakeLists.txt index 67b7f493..10d813cc 100644 --- a/ucm/sparse/vendor/CMakeLists.txt +++ b/ucm/shared/vendor/CMakeLists.txt @@ -8,6 +8,6 @@ function(EnableDept name url tag) endfunction() include(FetchContent) -EnableDept(pybind11 https://github.com/pybind/pybind11.git v2.13.6) EnableDept(fmt https://github.com/fmtlib/fmt.git 11.2.0) EnableDept(spdlog https://github.com/gabime/spdlog.git v1.15.3) +EnableDept(pybind11 https://github.com/pybind/pybind11.git v3.0.1) diff --git a/ucm/sparse/CMakeLists.txt b/ucm/sparse/CMakeLists.txt index a0033323..8e39e358 100644 --- a/ucm/sparse/CMakeLists.txt +++ b/ucm/sparse/CMakeLists.txt @@ -1,4 +1,3 @@ -add_subdirectory(vendor) add_subdirectory(esa) add_subdirectory(gsa) add_subdirectory(kvcomp) diff --git a/ucm/store/CMakeLists.txt b/ucm/store/CMakeLists.txt index c58db90b..d183155e 100644 --- a/ucm/store/CMakeLists.txt +++ b/ucm/store/CMakeLists.txt @@ -1,8 +1,6 @@ -option(DOWNLOAD_DEPENDENCE "download dependence by cmake." ON) set(LOGGER_BACKEND "spdlog" CACHE STRING "backend: spdlog or flux.") include_directories(.) -add_subdirectory(vendor) add_subdirectory(infra) add_subdirectory(device) add_subdirectory(nfsstore) diff --git a/ucm/store/vendor/CMakeLists.txt b/ucm/store/vendor/CMakeLists.txt deleted file mode 100644 index 15f11539..00000000 --- a/ucm/store/vendor/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -function(EnableDept name url tag) - if(DOWNLOAD_DEPENDENCE) - FetchContent_Declare(${name} GIT_REPOSITORY ${url} GIT_TAG ${tag} GIT_SHALLOW TRUE) - FetchContent_MakeAvailable(${name}) - else() - add_subdirectory(${name}) - endif() -endfunction() - -include(FetchContent) -EnableDept(fmt https://github.com/fmtlib/fmt.git 11.2.0) -if(LOGGER_BACKEND STREQUAL "spdlog") - EnableDept(spdlog https://github.com/gabime/spdlog.git v1.15.3) -endif() -EnableDept(pybind11 https://github.com/pybind/pybind11.git v2.13.6)