Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION})
option(ABSL_PROPAGATE_CXX_STD "Abseil need this option" ON)
option(USE_CUDA "Support Nvidia GPU" OFF)
option(USE_KUNLUN "Support Baidu Kunlunxin" OFF)
option(USE_BANG "Support Hanwuji MLU" OFF)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
Expand Down Expand Up @@ -41,6 +42,38 @@ if(USE_KUNLUN)
message(STATUS "KUNLUN_HOME: ${KUNLUN_HOME}")
endif()

if (USE_BANG)
add_compile_definitions(USE_BANG)
include_directories(src/kernels/mlu/include)

# Neuware Evironment
if ((NOT DEFINED NEUWARE_HOME) AND (NOT DEFINED ENV{NEUWARE_HOME}))
message(FATAL_ERROR "NEUWARE_HOME is not defined from cmake or env")
elseif (DEFINED NEUWARE_HOME)
set(NEUWARE_HOME ${NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development")
else()
set(NEUWARE_HOME $ENV{NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development")
endif()
message(STATUS "NEUWARE_HOME: ${NEUWARE_HOME}")

# cnrt cndrv cnnl
include_directories("${NEUWARE_HOME}/include")
find_library(CAMBRICON_CNNL libcnnl.so "${NEUWARE_HOME}/lib64")
find_library(CAMBRICON_CNRT libcnrt.so "${NEUWARE_HOME}/lib64")
find_library(CAMBRICON_CNDRV libcndrv.so "${NEUWARE_HOME}/lib64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++ -Wall")

if ((NOT DEFINED TARGET_CPU_ARCH) AND (NOT DEFINED ENV{TARGET_CPU_ARCH}))
execute_process(COMMAND uname -m OUTPUT_VARIABLE _uname_m OUTPUT_STRIP_TRAILING_WHITESPACE)
set(TARGET_CPU_ARCH "${_uname_m}" CACHE STRING "Target CPU ARCH")
elseif(DEFINED TARGET_CPU_ARCH)
set(TARGET_CPU_ARCH ${TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH")
else()
set(TARGET_CPU_ARCH $ENV{TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH")
endif()
message(STATUS "TARGET_CPU_ARCH: ${TARGET_CPU_ARCH}")
endif()

add_compile_options(-march=native) # this will cause error in some machine
add_compile_options(-mtune=native)
add_compile_options(-Wall)
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
TYPE ?= Debug
CUDA ?= OFF
KUNLUN ?= OFF
BANG ?= OFF

CMAKE_EXTRA =
# CMAKE_EXTRA += -DCMAKE_CXX_COMPILER=

build:
mkdir -p build
cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) $(CMAKE_EXTRA)
cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) -DUSE_BANG=$(BANG) $(CMAKE_EXTRA)
make -j -C build

install-python: build
Expand Down
2 changes: 2 additions & 0 deletions src/02hardware/include/hardware/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ namespace refactor::hardware {
enum class Type : int32_t {
Cpu,
Nvidia,
Mlu,
Kunlun,
};

protected:
Expand Down
19 changes: 19 additions & 0 deletions src/02hardware/include/hardware/devices/mlu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef HARDWARE_DEVICES_MLU_H
#define HARDWARE_DEVICES_MLU_H

#include "../device.h"

namespace refactor::hardware {

class Mlu final : public Device {
public:
explicit Mlu(int32_t card);
void setContext() const noexcept final;
Type type() const noexcept final {
return Type::Mlu;
}
};

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_MLU_H
2 changes: 2 additions & 0 deletions src/02hardware/src/device_manager.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "hardware/device_manager.h"
#include "hardware/devices/cpu.h"
#include "hardware/devices/mlu.h"
#include "hardware/devices/nvidia.h"

namespace refactor::hardware::device {
Expand Down Expand Up @@ -37,6 +38,7 @@ namespace refactor::hardware::device {
using T = Device::Type;
// clang-format off
auto device = type == T::Nvidia ? std::make_shared<Nvidia>(card)
: type == T::Mlu ? std::make_shared<Mlu>(card)
: UNREACHABLEX(Arc<Device>, "");
// clang-format on
auto [kind, ok] = DEVICES.try_emplace(static_cast<int32_t>(type));
Expand Down
10 changes: 5 additions & 5 deletions src/02hardware/src/devices/cpu/memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@
namespace refactor::hardware {
using M = CpuMemory;

void *M::malloc(size_t size) noexcept {
void *M::malloc(size_t size) {
return std::malloc(size);
}
void M::free(void *ptr) noexcept {
void M::free(void *ptr) {
std::free(ptr);
}
void *M::copyHD(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}
void *M::copyDH(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}
void *M::copyDD(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}

Expand Down
10 changes: 5 additions & 5 deletions src/02hardware/src/devices/cpu/memory.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
namespace refactor::hardware {

class CpuMemory final : public Memory {
void *malloc(size_t) noexcept final;
void free(void *) noexcept final;
void *copyHD(void *dst, void const *src, size_t bytes) const noexcept final;
void *copyDH(void *dst, void const *src, size_t bytes) const noexcept final;
void *copyDD(void *dst, void const *src, size_t bytes) const noexcept final;
void *malloc(size_t) final;
void free(void *) final;
void *copyHD(void *dst, void const *src, size_t bytes) const final;
void *copyDH(void *dst, void const *src, size_t bytes) const final;
void *copyDD(void *dst, void const *src, size_t bytes) const final;
};

}// namespace refactor::hardware
Expand Down
31 changes: 31 additions & 0 deletions src/02hardware/src/devices/mlu/device.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "functions.hh"
#include "hardware/devices/mlu.h"
#include "hardware/mem_pool.h"
#include "memory.hh"

namespace refactor::hardware {

static Arc<Memory> bangMemory(int32_t card) {
#ifdef USE_BANG
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card);
setDevice(card);
auto [free, total] = getMemInfo();
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
fmt::println("initializing Cambricon MLU {}, memory {} / {}, alloc {}",
card, free, total, size);
return std::make_shared<MemPool>(
std::make_shared<MluMemory>(),
size,
256ul);
#else
return nullptr;
#endif
}

Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {}

void Mlu::setContext() const noexcept {
setDevice(_card);
}

}// namespace refactor::hardware
21 changes: 21 additions & 0 deletions src/02hardware/src/devices/mlu/functions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "functions.hh"

namespace refactor::hardware {

#ifdef USE_BANG
int getDeviceCount() {
unsigned deviceCount;
BANG_ASSERT(cnrtGetDeviceCount(&deviceCount));
return static_cast<int>(deviceCount);
}
void setDevice(int device) {
BANG_ASSERT(cnrtSetDevice(device));
}
MemInfo getMemInfo() {
MemInfo memInfo;
BANG_ASSERT(cnrtMemGetInfo(&memInfo.free, &memInfo.total));
return memInfo;
}
#endif

}// namespace refactor::hardware
28 changes: 28 additions & 0 deletions src/02hardware/src/devices/mlu/functions.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH
#define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH

#include "common.h"

#ifdef USE_BANG
#include "cnrt.h"

#define BANG_ASSERT(STATUS) \
if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \
RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \
cnrtGetErrorStr(status), (int) status)); \
}
#endif

namespace refactor::hardware {

struct MemInfo {
size_t free, total;
};

int getDeviceCount();
void setDevice(int device);
MemInfo getMemInfo();

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_NVIDIA_FUNCTIONS_CUH
33 changes: 33 additions & 0 deletions src/02hardware/src/devices/mlu/memory.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "memory.hh"
#include "functions.hh"

namespace refactor::hardware {
#ifdef USE_BANG
using M = MluMemory;

void *M::malloc(size_t size) {
void *ptr;
BANG_ASSERT(cnrtMalloc(&ptr, size));
return ptr;
}
void M::free(void *ptr) {
BANG_ASSERT(cnrtFree(ptr));
}
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_HOST2DEV))
return dst;
}
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_DEV2HOST));
return dst;
}
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_PEER2PEER));
return dst;
}
#endif

}// namespace refactor::hardware
18 changes: 18 additions & 0 deletions src/02hardware/src/devices/mlu/memory.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef HARDWARE_DEVICES_MLU_MEMORY_CUH
#define HARDWARE_DEVICES_MLU_MEMORY_CUH

#include "hardware/memory.h"

namespace refactor::hardware {

class MluMemory final : public Memory {
void *malloc(size_t) final;
void free(void *) final;
void *copyHD(void *dst, void const *src, size_t bytes) const final;
void *copyDH(void *dst, void const *src, size_t bytes) const final;
void *copyDD(void *dst, void const *src, size_t bytes) const final;
};

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_MLU_MEMORY_HH
2 changes: 1 addition & 1 deletion src/02hardware/src/devices/nvidia/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace refactor::hardware {
size,
alignment);
#else
RUNTIME_ERROR("CUDA is not enabled");
return nullptr;
#endif
}

Expand Down
3 changes: 3 additions & 0 deletions src/04kernel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ if(USE_KUNLUN)
find_library(KUNLUN_DNN libxpuapi.so ${KUNLUN_HOME}/XTDK/shlib)
target_link_libraries(kernel PUBLIC ${KUNLUN_RT} ${KUNLUN_DNN})
endif()
if(USE_BANG)
target_link_libraries(kernel PUBLIC ${CAMBRICON_CNNL} ${CAMBRICON_CNRT} ${CAMBRICON_CNDRV})
endif()

file(GLOB_RECURSE KERNEL_TEST test/*.cpp)
if(KERNEL_TEST)
Expand Down