From bd6ad6b8a903633a39de7dbb669f4ae17784dcb3 Mon Sep 17 00:00:00 2001 From: kilinchange Date: Wed, 3 Jan 2024 17:30:03 +0800 Subject: [PATCH 1/6] =?UTF-8?q?feat(hardware):=20=E5=AE=9E=E7=8E=B0=20mlu?= =?UTF-8?q?=20=E7=A1=AC=E4=BB=B6=E7=9B=B8=E5=85=B3=E7=9A=84=E5=87=BD?= =?UTF-8?q?=E6=95=B0=EF=BC=9B=E6=94=B9=E5=8F=98=E7=BC=96=E8=AF=91=E6=96=B9?= =?UTF-8?q?=E5=BC=8F=E6=8C=89=E7=85=A7=E4=BB=A5=E7=A1=AC=E4=BB=B6=E5=90=8D?= =?UTF-8?q?=E7=A7=B0=E5=91=BD=E5=90=8D=E7=9A=84=E7=9B=AE=E5=BD=95=E5=90=8D?= =?UTF-8?q?=E5=8C=BA=E5=88=86=E6=98=AF=E5=90=A6=E9=9C=80=E8=A6=81=E7=BC=96?= =?UTF-8?q?=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 5 +++ Makefile | 1 + src/02hardware/CMakeLists.txt | 12 +++++-- src/02hardware/include/hardware/device.h | 2 ++ src/02hardware/include/hardware/devices/mlu.h | 19 ++++++++++++ src/02hardware/src/devices/cpu/memory.cc | 10 +++--- src/02hardware/src/devices/cpu/memory.hh | 10 +++--- src/02hardware/src/devices/mlu/device.cc | 27 ++++++++++++++++ src/02hardware/src/devices/mlu/functions.cc | 19 ++++++++++++ src/02hardware/src/devices/mlu/functions.hh | 24 ++++++++++++++ src/02hardware/src/devices/mlu/memory.cc | 31 +++++++++++++++++++ src/02hardware/src/devices/mlu/memory.hh | 18 +++++++++++ src/02hardware/src/devices/nvidia/device.cc | 12 ++----- 13 files changed, 167 insertions(+), 23 deletions(-) create mode 100644 src/02hardware/include/hardware/devices/mlu.h create mode 100644 src/02hardware/src/devices/mlu/device.cc create mode 100644 src/02hardware/src/devices/mlu/functions.cc create mode 100644 src/02hardware/src/devices/mlu/functions.hh create mode 100644 src/02hardware/src/devices/mlu/memory.cc create mode 100644 src/02hardware/src/devices/mlu/memory.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 5521ed552..45116732d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,7 @@ message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION}) option(ABSL_PROPAGATE_CXX_STD "Abseil need this option" ON) option(USE_CUDA "Support Nvidia GPU" OFF) option(USE_KUNLUN "Support Baidu Kunlunxin" OFF) +option(USE_BANG "Support Hanwuji MLU" OFF) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -41,6 +42,10 @@ if(USE_KUNLUN) message(STATUS "KUNLUN_HOME: ${KUNLUN_HOME}") endif() +if (USE_BANG) + add_compile_definitions(USE_BANG) +endif() + add_compile_options(-march=native) # this will cause error in some machine add_compile_options(-mtune=native) add_compile_options(-Wall) diff --git a/Makefile b/Makefile index cf3e402ce..571515f5b 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ TYPE ?= Debug CUDA ?= OFF KUNLUN ?= OFF +BANG ?= OFF CMAKE_EXTRA = # CMAKE_EXTRA += -DCMAKE_CXX_COMPILER= diff --git a/src/02hardware/CMakeLists.txt b/src/02hardware/CMakeLists.txt index ece758395..1e38c5e2e 100644 --- a/src/02hardware/CMakeLists.txt +++ b/src/02hardware/CMakeLists.txt @@ -2,12 +2,18 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR) project(hardware VERSION 0.0.0 LANGUAGES CXX) message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION}) +# Source files +file(GLOB HARDWARE_SRC src/*.cc src/*.cpp src/devices/cpu/*.cc) + if(USE_CUDA) - file(GLOB_RECURSE HARDWARE_CUDA_SRC src/*.cu) + file(GLOB_RECURSE HARDWARE_CUDA_SRC src/devices/nvidia/*.cu src/devices/nvidia/*.cc) +endif() + +if(USE_BANG) + file(GLOB_RECURSE HARDWARE_BANG_SRC src/devices/mlu/*.cc) endif() -file(GLOB_RECURSE HARDWARE_SRC src/*.cc src/*.cpp) -add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC}) +add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC} ${HARDWARE_BANG_SRC}) target_link_libraries(hardware PUBLIC common) target_include_directories(hardware PUBLIC include) diff --git a/src/02hardware/include/hardware/device.h b/src/02hardware/include/hardware/device.h index 5809fcf94..cb65a2730 100644 --- a/src/02hardware/include/hardware/device.h +++ b/src/02hardware/include/hardware/device.h @@ -11,6 +11,8 @@ namespace refactor::hardware { enum class Type : int32_t { Cpu, Nvidia, + Mlu, + Kunlun, }; protected: diff --git a/src/02hardware/include/hardware/devices/mlu.h b/src/02hardware/include/hardware/devices/mlu.h new file mode 100644 index 000000000..aa3b76529 --- /dev/null +++ b/src/02hardware/include/hardware/devices/mlu.h @@ -0,0 +1,19 @@ +#ifndef HARDWARE_DEVICES_MLU_H +#define HARDWARE_DEVICES_MLU_H + +#include "../device.h" + +namespace refactor::hardware { + + class Mlu final : public Device { + public: + explicit Mlu(int32_t card); + void setContext() const noexcept final; + Type type() const noexcept final { + return Type::Mlu; + } + }; + +}// namespace refactor::hardware + +#endif// HARDWARE_DEVICES_MLU_H diff --git a/src/02hardware/src/devices/cpu/memory.cc b/src/02hardware/src/devices/cpu/memory.cc index 4db56e98e..dbd64f51e 100644 --- a/src/02hardware/src/devices/cpu/memory.cc +++ b/src/02hardware/src/devices/cpu/memory.cc @@ -5,19 +5,19 @@ namespace refactor::hardware { using M = CpuMemory; - void *M::malloc(size_t size) noexcept { + void *M::malloc(size_t size) { return std::malloc(size); } - void M::free(void *ptr) noexcept { + void M::free(void *ptr) { std::free(ptr); } - void *M::copyHD(void *dst, void const *src, size_t bytes) const noexcept { + void *M::copyHD(void *dst, void const *src, size_t bytes) const { return std::memcpy(dst, src, bytes); } - void *M::copyDH(void *dst, void const *src, size_t bytes) const noexcept { + void *M::copyDH(void *dst, void const *src, size_t bytes) const { return std::memcpy(dst, src, bytes); } - void *M::copyDD(void *dst, void const *src, size_t bytes) const noexcept { + void *M::copyDD(void *dst, void const *src, size_t bytes) const { return std::memcpy(dst, src, bytes); } diff --git a/src/02hardware/src/devices/cpu/memory.hh b/src/02hardware/src/devices/cpu/memory.hh index 5bd3a1dda..d1681b24d 100644 --- a/src/02hardware/src/devices/cpu/memory.hh +++ b/src/02hardware/src/devices/cpu/memory.hh @@ -6,11 +6,11 @@ namespace refactor::hardware { class CpuMemory final : public Memory { - void *malloc(size_t) noexcept final; - void free(void *) noexcept final; - void *copyHD(void *dst, void const *src, size_t bytes) const noexcept final; - void *copyDH(void *dst, void const *src, size_t bytes) const noexcept final; - void *copyDD(void *dst, void const *src, size_t bytes) const noexcept final; + void *malloc(size_t) final; + void free(void *) final; + void *copyHD(void *dst, void const *src, size_t bytes) const final; + void *copyDH(void *dst, void const *src, size_t bytes) const final; + void *copyDD(void *dst, void const *src, size_t bytes) const final; }; }// namespace refactor::hardware diff --git a/src/02hardware/src/devices/mlu/device.cc b/src/02hardware/src/devices/mlu/device.cc new file mode 100644 index 000000000..87b6150db --- /dev/null +++ b/src/02hardware/src/devices/mlu/device.cc @@ -0,0 +1,27 @@ +#include "functions.cc" +#include "hardware/devices/mlu.h" +#include "hardware/mem_pool.h" +#include "memory.hh" + +namespace refactor::hardware { + + static Arc bangMemory(int32_t card) { + ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card); + setDevice(card); + auto [free, total] = getMemInfo(); + auto size = std::min(free, std::max(5ul << 30, total * 4 / 5)); + fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}", + card, free, total, size); + return std::make_shared( + std::make_shared(), + size, + 256ul); + } + + Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {} + + void Mlu::setContext() const noexcept { + setDevice(_card); + } + +}// namespace refactor::hardware diff --git a/src/02hardware/src/devices/mlu/functions.cc b/src/02hardware/src/devices/mlu/functions.cc new file mode 100644 index 000000000..28918bcec --- /dev/null +++ b/src/02hardware/src/devices/mlu/functions.cc @@ -0,0 +1,19 @@ +#include "functions.hh" + +namespace refactor::hardware { + + int getDeviceCount() { + int deviceCount; + BANG_ASSERT(cnrtGetDeviceCount(&deviceCount)); + return deviceCount; + } + void setDevice(int device) { + BANG_ASSERT(cnrtSetDevice(device)); + } + MemInfo getMemInfo() { + MemInfo memInfo; + BANG_ASSERT(cudaMemGetInfo(&memInfo.free, &memInfo.total)); + return memInfo; + } + +}// namespace refactor::hardware diff --git a/src/02hardware/src/devices/mlu/functions.hh b/src/02hardware/src/devices/mlu/functions.hh new file mode 100644 index 000000000..cb5a561c2 --- /dev/null +++ b/src/02hardware/src/devices/mlu/functions.hh @@ -0,0 +1,24 @@ +#ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH +#define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH + +#include "common.h" + +#define BANG_ASSERT(STATUS) \ + if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \ + RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \ + cnrtGetErrorStr(status), (int) status)); \ + } + +namespace refactor::hardware { + + struct MemInfo { + size_t free, total; + }; + + int getDeviceCount(); + void setDevice(int device); + MemInfo getMemInfo(); + +}// namespace refactor::hardware + +#endif// HARDWARE_DEVICES_NVIDIA_FUNCTIONS_CUH diff --git a/src/02hardware/src/devices/mlu/memory.cc b/src/02hardware/src/devices/mlu/memory.cc new file mode 100644 index 000000000..81b3c626a --- /dev/null +++ b/src/02hardware/src/devices/mlu/memory.cc @@ -0,0 +1,31 @@ +#include "memory.hh" +#include "functions.hh" + +namespace refactor::hardware { + using M = MluMemory; + + void *M::malloc(size_t size) { + void *ptr; + BANG_ASSERT(cnrtMalloc(&ptr, size)); + return ptr; + } + void M::free(void *ptr) { + BANG_ASSERT(cnrtFree(ptr)); + } + void *M::copyHD(void *dst, void const *src, size_t bytes) const { + BANG_ASSERT(cnrtMemcpy(dst, const_cast(src), bytes, + CNRT_MEM_TRANS_DIR_HOST2DEV)) + return dst; + } + void *M::copyDH(void *dst, void const *src, size_t bytes) const { + BANG_ASSERT(cnrtMemcpy(dst, const_cast(src), bytes, + CNRT_MEM_TRANS_DIR_DEV2HOST)); + return dst; + } + void *M::copyDD(void *dst, void const *src, size_t bytes) const { + BANG_ASSERT(cnrtMemcpy(dst, const_cast(src), bytes, + CNRT_MEM_TRANS_DIR_PEER2PEER)); + return dst; + } + +}// namespace refactor::hardware diff --git a/src/02hardware/src/devices/mlu/memory.hh b/src/02hardware/src/devices/mlu/memory.hh new file mode 100644 index 000000000..85ec39887 --- /dev/null +++ b/src/02hardware/src/devices/mlu/memory.hh @@ -0,0 +1,18 @@ +#ifndef HARDWARE_DEVICES_MLU_MEMORY_CUH +#define HARDWARE_DEVICES_MLU_MEMORY_CUH + +#include "hardware/memory.h" + +namespace refactor::hardware { + + class MluMemory final : public Memory { + void *malloc(size_t) final; + void free(void *) final; + void *copyHD(void *dst, void const *src, size_t bytes) const final; + void *copyDH(void *dst, void const *src, size_t bytes) const final; + void *copyDD(void *dst, void const *src, size_t bytes) const final; + }; + +}// namespace refactor::hardware + +#endif// HARDWARE_DEVICES_MLU_MEMORY_HH diff --git a/src/02hardware/src/devices/nvidia/device.cc b/src/02hardware/src/devices/nvidia/device.cc index 1ae5b2244..403921cba 100644 --- a/src/02hardware/src/devices/nvidia/device.cc +++ b/src/02hardware/src/devices/nvidia/device.cc @@ -1,14 +1,11 @@ -#include "hardware/devices/nvidia.h" +#include "functions.cuh" +#include "hardware/devices/nvidia.h" #include "hardware/mem_pool.h" -#ifdef USE_CUDA -#include "functions.cuh" #include "memory.cuh" -#endif namespace refactor::hardware { static Arc cudaMemory(int32_t card) { -#ifdef USE_CUDA ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card); setDevice(card); auto [free, total] = getMemInfo(); @@ -19,17 +16,12 @@ namespace refactor::hardware { std::make_shared(), size, 256ul); -#else - RUNTIME_ERROR("CUDA is not enabled"); -#endif } Nvidia::Nvidia(int32_t card) : Device(card, cudaMemory(card)) {} void Nvidia::setContext() const noexcept { -#ifdef USE_CUDA setDevice(_card); -#endif } }// namespace refactor::hardware From 02939c95f27459146cf8036c5d66156201ab9383 Mon Sep 17 00:00:00 2001 From: kilinchange Date: Thu, 4 Jan 2024 17:03:15 +0800 Subject: [PATCH 2/6] =?UTF-8?q?feat:=20=E5=9C=A8=20cmake=20=E4=B8=AD?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=20mlu=20=E7=9A=84=E7=9B=B8=E5=85=B3=E7=8E=AF?= =?UTF-8?q?=E5=A2=83=E7=BC=96=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 29 +++++++++++++++++++++++++++++ Makefile | 2 +- src/04kernel/CMakeLists.txt | 3 +++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45116732d..6125281c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,35 @@ endif() if (USE_BANG) add_compile_definitions(USE_BANG) + include_directories(src/kernels/mlu/include) + + # Neuware Evironment + if ((NOT DEFINED NEUWARE_HOME) AND (NOT DEFINED ENV{NEUWARE_HOME})) + message(FATAL_ERROR "NEUWARE_HOME is not defined from cmake or env") + elseif (DEFINED NEUWARE_HOME) + set(NEUWARE_HOME ${NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development") + else() + set(NEUWARE_HOME $ENV{NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development") + endif() + message(STATUS "NEUWARE_HOME: ${NEUWARE_HOME}") + + # cnrt cndrv cnnl + include_directories("${NEUWARE_HOME}/include") + find_library(CAMBRICON_CNNL libcnnl.so "${NEUWARE_HOME}/lib64") + find_library(CAMBRICON_CNRT libcnrt.so "${NEUWARE_HOME}/lib64") + find_library(CAMBRICON_CNDRV libcndrv.so "${NEUWARE_HOME}/lib64") + find_library(CAMBRICON_CNCL libcncl.so "${NEUWARE_HOME}/lib64") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++ -Wall -Werror") + + if ((NOT DEFINED TARGET_CPU_ARCH) AND (NOT DEFINED ENV{TARGET_CPU_ARCH})) + execute_process(COMMAND uname -m OUTPUT_VARIABLE _uname_m OUTPUT_STRIP_TRAILING_WHITESPACE) + set(TARGET_CPU_ARCH "${_uname_m}" CACHE STRING "Target CPU ARCH") + elseif(DEFINED TARGET_CPU_ARCH) + set(TARGET_CPU_ARCH ${TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH") + else() + set(TARGET_CPU_ARCH $ENV{TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH") + endif() + message(STATUS "TARGET_CPU_ARCH: ${TARGET_CPU_ARCH}") endif() add_compile_options(-march=native) # this will cause error in some machine diff --git a/Makefile b/Makefile index 571515f5b..482087658 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ CMAKE_EXTRA = build: mkdir -p build - cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) $(CMAKE_EXTRA) + cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) -DUSE_BANG=$(BANG) $(CMAKE_EXTRA) make -j -C build install-python: build diff --git a/src/04kernel/CMakeLists.txt b/src/04kernel/CMakeLists.txt index f09a9fedf..e8564af44 100644 --- a/src/04kernel/CMakeLists.txt +++ b/src/04kernel/CMakeLists.txt @@ -35,6 +35,9 @@ if(USE_KUNLUN) find_library(KUNLUN_DNN libxpuapi.so ${KUNLUN_HOME}/XTDK/shlib) target_link_libraries(kernel PUBLIC ${KUNLUN_RT} ${KUNLUN_DNN}) endif() +if(USE_BANG) + target_link_libraries(kernel ${CAMBRICON_CNCL} ${CAMBRICON_CNNL} ${CAMBRICON_CNRT} ${CAMBRICON_CNDRV} stdc++) +endif() file(GLOB_RECURSE KERNEL_TEST test/*.cpp) if(KERNEL_TEST) From 7f82d7449d827746efa27d86e8693a2800189c6f Mon Sep 17 00:00:00 2001 From: kilinchange Date: Fri, 5 Jan 2024 03:34:51 +0000 Subject: [PATCH 3/6] =?UTF-8?q?fix:=20=E5=8E=BB=E6=8E=89=20cncl=20?= =?UTF-8?q?=E7=BC=96=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 3 +-- src/02hardware/src/devices/mlu/functions.cc | 6 +++--- src/02hardware/src/devices/mlu/functions.hh | 1 + src/04kernel/CMakeLists.txt | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6125281c6..ba918e3b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,8 +61,7 @@ if (USE_BANG) find_library(CAMBRICON_CNNL libcnnl.so "${NEUWARE_HOME}/lib64") find_library(CAMBRICON_CNRT libcnrt.so "${NEUWARE_HOME}/lib64") find_library(CAMBRICON_CNDRV libcndrv.so "${NEUWARE_HOME}/lib64") - find_library(CAMBRICON_CNCL libcncl.so "${NEUWARE_HOME}/lib64") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++ -Wall -Werror") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++ -Wall") if ((NOT DEFINED TARGET_CPU_ARCH) AND (NOT DEFINED ENV{TARGET_CPU_ARCH})) execute_process(COMMAND uname -m OUTPUT_VARIABLE _uname_m OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/src/02hardware/src/devices/mlu/functions.cc b/src/02hardware/src/devices/mlu/functions.cc index 28918bcec..d8f30d0fe 100644 --- a/src/02hardware/src/devices/mlu/functions.cc +++ b/src/02hardware/src/devices/mlu/functions.cc @@ -3,16 +3,16 @@ namespace refactor::hardware { int getDeviceCount() { - int deviceCount; + unsigned deviceCount; BANG_ASSERT(cnrtGetDeviceCount(&deviceCount)); - return deviceCount; + return static_cast(deviceCount); } void setDevice(int device) { BANG_ASSERT(cnrtSetDevice(device)); } MemInfo getMemInfo() { MemInfo memInfo; - BANG_ASSERT(cudaMemGetInfo(&memInfo.free, &memInfo.total)); + BANG_ASSERT(cnrtMemGetInfo(&memInfo.free, &memInfo.total)); return memInfo; } diff --git a/src/02hardware/src/devices/mlu/functions.hh b/src/02hardware/src/devices/mlu/functions.hh index cb5a561c2..0244e01f0 100644 --- a/src/02hardware/src/devices/mlu/functions.hh +++ b/src/02hardware/src/devices/mlu/functions.hh @@ -1,6 +1,7 @@ #ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH #define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH +#include "cnrt.h" #include "common.h" #define BANG_ASSERT(STATUS) \ diff --git a/src/04kernel/CMakeLists.txt b/src/04kernel/CMakeLists.txt index e8564af44..1349193e5 100644 --- a/src/04kernel/CMakeLists.txt +++ b/src/04kernel/CMakeLists.txt @@ -36,7 +36,7 @@ if(USE_KUNLUN) target_link_libraries(kernel PUBLIC ${KUNLUN_RT} ${KUNLUN_DNN}) endif() if(USE_BANG) - target_link_libraries(kernel ${CAMBRICON_CNCL} ${CAMBRICON_CNNL} ${CAMBRICON_CNRT} ${CAMBRICON_CNDRV} stdc++) + target_link_libraries(kernel PUBLIC ${CAMBRICON_CNNL} ${CAMBRICON_CNRT} ${CAMBRICON_CNDRV}) endif() file(GLOB_RECURSE KERNEL_TEST test/*.cpp) From 0ebf34c4ee54945cd1539cf434933a6ec6455bf5 Mon Sep 17 00:00:00 2001 From: Zhang Bolun Date: Fri, 26 Jan 2024 15:45:30 +0800 Subject: [PATCH 4/6] fix: format CMakeLists.txt --- CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ba918e3b4..fe305d124 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,11 +48,11 @@ if (USE_BANG) # Neuware Evironment if ((NOT DEFINED NEUWARE_HOME) AND (NOT DEFINED ENV{NEUWARE_HOME})) - message(FATAL_ERROR "NEUWARE_HOME is not defined from cmake or env") + message(FATAL_ERROR "NEUWARE_HOME is not defined from cmake or env") elseif (DEFINED NEUWARE_HOME) - set(NEUWARE_HOME ${NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development") + set(NEUWARE_HOME ${NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development") else() - set(NEUWARE_HOME $ENV{NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development") + set(NEUWARE_HOME $ENV{NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development") endif() message(STATUS "NEUWARE_HOME: ${NEUWARE_HOME}") @@ -64,12 +64,12 @@ if (USE_BANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++ -Wall") if ((NOT DEFINED TARGET_CPU_ARCH) AND (NOT DEFINED ENV{TARGET_CPU_ARCH})) - execute_process(COMMAND uname -m OUTPUT_VARIABLE _uname_m OUTPUT_STRIP_TRAILING_WHITESPACE) - set(TARGET_CPU_ARCH "${_uname_m}" CACHE STRING "Target CPU ARCH") + execute_process(COMMAND uname -m OUTPUT_VARIABLE _uname_m OUTPUT_STRIP_TRAILING_WHITESPACE) + set(TARGET_CPU_ARCH "${_uname_m}" CACHE STRING "Target CPU ARCH") elseif(DEFINED TARGET_CPU_ARCH) - set(TARGET_CPU_ARCH ${TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH") + set(TARGET_CPU_ARCH ${TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH") else() - set(TARGET_CPU_ARCH $ENV{TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH") + set(TARGET_CPU_ARCH $ENV{TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH") endif() message(STATUS "TARGET_CPU_ARCH: ${TARGET_CPU_ARCH}") endif() From cc19556002d1bd5971dd43c22b5c0c5be659f016 Mon Sep 17 00:00:00 2001 From: kilinchange Date: Fri, 26 Jan 2024 16:13:01 +0800 Subject: [PATCH 5/6] feat(hardware): add cnnl --- src/02hardware/CMakeLists.txt | 8 ++------ src/02hardware/src/device_manager.cpp | 4 +++- src/02hardware/src/devices/mlu/device.cc | 8 ++++++-- src/02hardware/src/devices/mlu/functions.cc | 2 ++ src/02hardware/src/devices/mlu/functions.hh | 5 ++++- src/02hardware/src/devices/mlu/memory.cc | 2 ++ src/02hardware/src/devices/nvidia/device.cc | 6 +++++- 7 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/02hardware/CMakeLists.txt b/src/02hardware/CMakeLists.txt index 1e38c5e2e..b42ef6327 100644 --- a/src/02hardware/CMakeLists.txt +++ b/src/02hardware/CMakeLists.txt @@ -3,14 +3,10 @@ project(hardware VERSION 0.0.0 LANGUAGES CXX) message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION}) # Source files -file(GLOB HARDWARE_SRC src/*.cc src/*.cpp src/devices/cpu/*.cc) +file(GLOB_RECURSE HARDWARE_SRC src/*.cc src/*.cpp) if(USE_CUDA) - file(GLOB_RECURSE HARDWARE_CUDA_SRC src/devices/nvidia/*.cu src/devices/nvidia/*.cc) -endif() - -if(USE_BANG) - file(GLOB_RECURSE HARDWARE_BANG_SRC src/devices/mlu/*.cc) + file(GLOB_RECURSE HARDWARE_CUDA_SRC src/devices/nvidia/*.cu) endif() add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC} ${HARDWARE_BANG_SRC}) diff --git a/src/02hardware/src/device_manager.cpp b/src/02hardware/src/device_manager.cpp index bcfab3bb8..c3ed1b994 100644 --- a/src/02hardware/src/device_manager.cpp +++ b/src/02hardware/src/device_manager.cpp @@ -1,5 +1,6 @@ -#include "hardware/device_manager.h" +#include "hardware/device_manager.h" #include "hardware/devices/cpu.h" +#include "hardware/devices/mlu.h" #include "hardware/devices/nvidia.h" namespace refactor::hardware::device { @@ -37,6 +38,7 @@ namespace refactor::hardware::device { using T = Device::Type; // clang-format off auto device = type == T::Nvidia ? std::make_shared(card) + : type == T::Mlu ? std::make_shared(card) : UNREACHABLEX(Arc, ""); // clang-format on auto [kind, ok] = DEVICES.try_emplace(static_cast(type)); diff --git a/src/02hardware/src/devices/mlu/device.cc b/src/02hardware/src/devices/mlu/device.cc index 87b6150db..ea1f6affd 100644 --- a/src/02hardware/src/devices/mlu/device.cc +++ b/src/02hardware/src/devices/mlu/device.cc @@ -1,4 +1,4 @@ -#include "functions.cc" +#include "functions.hh" #include "hardware/devices/mlu.h" #include "hardware/mem_pool.h" #include "memory.hh" @@ -6,16 +6,20 @@ namespace refactor::hardware { static Arc bangMemory(int32_t card) { +#ifdef USE_BANG ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card); setDevice(card); auto [free, total] = getMemInfo(); auto size = std::min(free, std::max(5ul << 30, total * 4 / 5)); - fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}", + fmt::println("initializing Cambricon MLU {}, memory {} / {}, alloc {}", card, free, total, size); return std::make_shared( std::make_shared(), size, 256ul); +#else + return nullptr; +#endif } Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {} diff --git a/src/02hardware/src/devices/mlu/functions.cc b/src/02hardware/src/devices/mlu/functions.cc index d8f30d0fe..bedea0458 100644 --- a/src/02hardware/src/devices/mlu/functions.cc +++ b/src/02hardware/src/devices/mlu/functions.cc @@ -2,6 +2,7 @@ namespace refactor::hardware { +#ifdef USE_BANG int getDeviceCount() { unsigned deviceCount; BANG_ASSERT(cnrtGetDeviceCount(&deviceCount)); @@ -15,5 +16,6 @@ namespace refactor::hardware { BANG_ASSERT(cnrtMemGetInfo(&memInfo.free, &memInfo.total)); return memInfo; } +#endif }// namespace refactor::hardware diff --git a/src/02hardware/src/devices/mlu/functions.hh b/src/02hardware/src/devices/mlu/functions.hh index 0244e01f0..f12faab4b 100644 --- a/src/02hardware/src/devices/mlu/functions.hh +++ b/src/02hardware/src/devices/mlu/functions.hh @@ -1,14 +1,17 @@ #ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH #define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH -#include "cnrt.h" #include "common.h" +#ifdef USE_BANG +#include "cnrt.h" + #define BANG_ASSERT(STATUS) \ if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \ RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \ cnrtGetErrorStr(status), (int) status)); \ } +#endif namespace refactor::hardware { diff --git a/src/02hardware/src/devices/mlu/memory.cc b/src/02hardware/src/devices/mlu/memory.cc index 81b3c626a..55550314a 100644 --- a/src/02hardware/src/devices/mlu/memory.cc +++ b/src/02hardware/src/devices/mlu/memory.cc @@ -2,6 +2,7 @@ #include "functions.hh" namespace refactor::hardware { +#ifdef USE_BANG using M = MluMemory; void *M::malloc(size_t size) { @@ -27,5 +28,6 @@ namespace refactor::hardware { CNRT_MEM_TRANS_DIR_PEER2PEER)); return dst; } +#endif }// namespace refactor::hardware diff --git a/src/02hardware/src/devices/nvidia/device.cc b/src/02hardware/src/devices/nvidia/device.cc index 403921cba..0f0eb5f68 100644 --- a/src/02hardware/src/devices/nvidia/device.cc +++ b/src/02hardware/src/devices/nvidia/device.cc @@ -1,4 +1,4 @@ -#include "functions.cuh" +#include "functions.cuh" #include "hardware/devices/nvidia.h" #include "hardware/mem_pool.h" #include "memory.cuh" @@ -6,6 +6,7 @@ namespace refactor::hardware { static Arc cudaMemory(int32_t card) { +#ifdef USE_CUDA ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card); setDevice(card); auto [free, total] = getMemInfo(); @@ -16,6 +17,9 @@ namespace refactor::hardware { std::make_shared(), size, 256ul); +#else + return nullptr; +#endif } Nvidia::Nvidia(int32_t card) : Device(card, cudaMemory(card)) {} From b3b7d09e2c064d85fc29c84f2051f5f657d5115c Mon Sep 17 00:00:00 2001 From: kilinchange Date: Fri, 26 Jan 2024 16:50:48 +0800 Subject: [PATCH 6/6] fix(hardware): fix file encoding bug and include bug --- src/02hardware/src/device_manager.cpp | 2 +- src/02hardware/src/devices/nvidia/device.cc | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/02hardware/src/device_manager.cpp b/src/02hardware/src/device_manager.cpp index c3ed1b994..15ae3b901 100644 --- a/src/02hardware/src/device_manager.cpp +++ b/src/02hardware/src/device_manager.cpp @@ -1,4 +1,4 @@ -#include "hardware/device_manager.h" +#include "hardware/device_manager.h" #include "hardware/devices/cpu.h" #include "hardware/devices/mlu.h" #include "hardware/devices/nvidia.h" diff --git a/src/02hardware/src/devices/nvidia/device.cc b/src/02hardware/src/devices/nvidia/device.cc index e298d378a..7b25cc1f8 100644 --- a/src/02hardware/src/devices/nvidia/device.cc +++ b/src/02hardware/src/devices/nvidia/device.cc @@ -1,5 +1,4 @@ -#include "functions.cuh" -#include "hardware/devices/nvidia.h" +#include "hardware/devices/nvidia.h" #include "hardware/mem_pool.h" #ifdef USE_CUDA