Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Please check all the platforms and/or backends this PR affects (i.e., code is to
- [ ] NVIDIA GPU
- [ ] Iluvatar GPU
- [ ] MetaX GPU
- [ ] Moore Threads GPU
- [ ] Cambricon MLU

### Backend
Expand Down Expand Up @@ -102,6 +103,7 @@ See `CONTRIBUTING.md` § Pull Requests for the official testing requirements and
- [ ] NVIDIA GPU
- [ ] Iluvatar GPU
- [ ] MetaX GPU
- [ ] Moore Threads GPU
- [ ] Cambricon MLU

### Test Involved Backend
Expand Down
67 changes: 67 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
option(WITH_NVIDIA "Enable NVIDIA GPU support" OFF)
option(WITH_ILUVATAR "Enable ILUVATAR GPU support" OFF)
option(WITH_METAX "Enable MetaX GPU support" OFF)
option(WITH_MOORE "Enable Moore GPU support" OFF)
option(WITH_CAMBRICON "Enable Cambricon MLU support" OFF)

set(WITH_CPU ON CACHE INTERNAL "CPU backend is always enabled")
Expand Down Expand Up @@ -114,6 +115,38 @@ if(AUTO_DETECT_DEVICES)
endif()
endif()

# MThreads (Moore)
set(MOORE_FOUND FALSE)

file(GLOB MOORE_DEV_FILES "/dev/mtgpu*")

if(DEFINED ENV{MUSA_ROOT} OR DEFINED ENV{MUSA_HOME} OR DEFINED ENV{MUSA_PATH})
set(MOORE_FOUND TRUE)
elseif(MOORE_DEV_FILES)
set(MOORE_FOUND TRUE)
elseif(NOT MOORE_FOUND)
find_program(MOORE_SMI_PATH mthreads-gmi)

if(MOORE_SMI_PATH)
execute_process(
COMMAND ${MOORE_SMI_PATH} -L
RESULT_VARIABLE SMI_RESULT
OUTPUT_QUIET
ERROR_QUIET
)
if(SMI_RESULT EQUAL 0)
set(MOORE_FOUND TRUE)
endif()
endif()
endif()

if(MOORE_FOUND)
set(WITH_MOORE ON)
message(STATUS "Auto-detected Moore environment.")
else()
message(STATUS "Moore environment not detected.")
endif()

# Cambricon
set(CAMBRICON_FOUND FALSE)

Expand Down Expand Up @@ -248,6 +281,40 @@ if(WITH_METAX)
find_library(MACA_RUNTIME_LIB NAMES mcruntime HINTS "${MACA_PATH}/lib" REQUIRED)
endif()

if(WITH_MOORE)
set(MUSA_ROOT "")
foreach(_musa_env MUSA_ROOT MUSA_HOME MUSA_PATH)
if(NOT MUSA_ROOT AND DEFINED ENV{${_musa_env}} AND NOT "$ENV{${_musa_env}}" STREQUAL "")
set(MUSA_ROOT "$ENV{${_musa_env}}")
endif()
endforeach()

if(NOT MUSA_ROOT AND EXISTS "/usr/local/musa")
set(MUSA_ROOT "/usr/local/musa")
endif()

if(NOT MUSA_ROOT)
message(FATAL_ERROR "`WITH_MOORE` is `ON` but `MUSA_ROOT`/`MUSA_HOME`/`MUSA_PATH` is not set and `/usr/local/musa` was not found.")
endif()

if(NOT EXISTS "${MUSA_ROOT}/bin/mcc")
message(FATAL_ERROR "Could not find `mcc` under `${MUSA_ROOT}/bin`.")
endif()

get_filename_component(MCC_WRAPPER_ABS "${CMAKE_CURRENT_SOURCE_DIR}/scripts/devices/mcc_wrapper.sh" ABSOLUTE)
file(CHMOD "${MCC_WRAPPER_ABS}"
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
set(CMAKE_C_COMPILER "${MCC_WRAPPER_ABS}")
set(CMAKE_CXX_COMPILER "${MCC_WRAPPER_ABS}")

include_directories("${MUSA_ROOT}/include")
link_directories("${MUSA_ROOT}/lib")

find_library(MUSA_LIB NAMES musa HINTS "${MUSA_ROOT}/lib" REQUIRED)
find_library(MUSART_LIB NAMES musart HINTS "${MUSA_ROOT}/lib" REQUIRED)
find_library(MUBLAS_LIB NAMES mublas HINTS "${MUSA_ROOT}/lib" REQUIRED)
endif()

if(WITH_CAMBRICON)
set(NEUWARE_HOME $ENV{NEUWARE_HOME})

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ cmake .. -DWITH_NVIDIA=ON -DWITH_OMPI=ON
| `WITH_NVIDIA` | Enable NVIDIA GPU support | `OFF` |
| `WITH_ILUVATAR` | Enable Iluvatar GPU support | `OFF` |
| `WITH_METAX` | Enable MetaX GPU support | `OFF` |
| `WITH_MOORE` | Enable Moore Threads GPU support | `OFF` |
| `WITH_CAMBRICON` | Enable Cambricon MLU support | `OFF` |
| `WITH_CPU` | CPU support (always enabled) | `ON` (internal, not user‑settable) |
| **Backend (Communication) Options** |||
Expand Down Expand Up @@ -282,6 +283,7 @@ export LD_LIBRARY_PATH=${INFINI_INSTALL}/lib:$LD_LIBRARY_PATH
| **NVIDIA** | Full | Requires CUDA Toolkit. |
| **Iluvatar** | Full | Requires Iluvatar CoreX SDK. |
| **MetaX** | Full | Requires MACA SDK and `MACA_PATH` (default `/opt/maca`) to be set. |
| **Moore Threads** | Full | Requires MUSA SDK and at least one of `MACA_ROOT` (default `/usr/local/musa`), `MACA_PATH`, and `MUSA_HOME` to be set. |
| **Cambricon** | Full | Requires CNToolKit and `NEUWARE_HOME` to be set. |

</details>
Expand Down
5 changes: 5 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ foreach(source_file ${EXAMPLE_SOURCES})
target_link_libraries(${example_name} PRIVATE ${MACA_RUNTIME_LIB})
endif()

if(WITH_MOORE)
target_link_libraries(${example_name} PRIVATE ${MUSART_LIB})
target_compile_options(${example_name} PRIVATE "-x" "musa")
endif()

if(WITH_CAMBRICON)
target_link_libraries(${example_name} PRIVATE ${CAMBRICON_RUNTIME_LIB})
endif()
Expand Down
48 changes: 48 additions & 0 deletions scripts/devices/mcc_wrapper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
# Filter out flags unsupported by `mcc`.
ARGS=()
skip_next=0
linking=1
for arg in "$@"; do
if [ $skip_next -eq 1 ]; then
skip_next=0
continue
fi
case "$arg" in
-c|-E|-S)
linking=0
ARGS+=("$arg")
;;
-pthread)
;;
-B)
skip_next=1
;;
-B*)
;;
*)
ARGS+=("$arg")
;;
esac
done

MUSA_ROOT_DIR="${MUSA_ROOT:-${MUSA_HOME:-${MUSA_PATH:-/usr/local/musa}}}"

if command -v g++ >/dev/null 2>&1; then
GXX_MAJOR="$(g++ -dumpversion | cut -d. -f1)"
if [ -d "/usr/include/c++/${GXX_MAJOR}" ]; then
ARGS=(
"-isystem" "/usr/include/c++/${GXX_MAJOR}"
"-isystem" "/usr/include/x86_64-linux-gnu/c++/${GXX_MAJOR}"
"-isystem" "/usr/include/c++/${GXX_MAJOR}/backward"
"${ARGS[@]}"
)
fi

STDCPP_LIB="$(g++ -print-file-name=libstdc++.so)"
if [ $linking -eq 1 ] && [ -f "${STDCPP_LIB}" ]; then
ARGS=("-L$(dirname "${STDCPP_LIB}")" "${ARGS[@]}")
fi
fi

exec "${MUSA_ROOT_DIR}/bin/mcc" "${ARGS[@]}"
5 changes: 5 additions & 0 deletions scripts/icclrun_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ def ensure_launcher_exists(self):
'grep -l "9999" /sys/bus/pci/devices/*/vendor >/dev/null 2>&1'
)

elif n_type == "moore":
condition = (
'[ -c "/dev/mtgpu.0" ] || [ -x "$(command -v mthreads-gmi)" ]'
)

elif n_type == "cambricon":
condition = (
'[ -n "${NEUWARE_HOME}" ] || command -v cnmon >/dev/null 2>&1'
Expand Down
2 changes: 2 additions & 0 deletions scripts/run_wrapper.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ if [ -c "/dev/nvidia0" ] || [ -x "$(command -v nvidia-smi)" ]; then
ARCH="nvidia"
elif grep -l "9999" /sys/bus/pci/devices/*/vendor >/dev/null 2>&1 || [ -d "/opt/maca" ]; then
ARCH="metax"
elif [ -c "/dev/mtgpu.0" ] || [ -x "$(command -v mthreads-gmi)" ]; then
ARCH="moore"
elif [ -n "${NEUWARE_HOME}" ] || [ -x "$(command -v cnmon)" ]; then
ARCH="cambricon"
else
Expand Down
23 changes: 23 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,29 @@ if(WITH_METAX)
target_compile_options(infiniccl PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-x maca>)
endif()

# Mthreads (Moore)
if(WITH_MOORE)
list(APPEND DEVICE_LIST "moore")

set(MOORE_PATTERNS
"cuda/*.cc"
"cuda/*.cpp"
"moore/*.cc"
"moore/*.cpp"
"moore/*.mu"
)
file(GLOB MOORE_SOURCES ${MOORE_PATTERNS})

set_source_files_properties(${MOORE_SOURCES} PROPERTIES LANGUAGE CXX)

# target_compile_options(infiniccl PRIVATE "-x" "musa")
target_sources(infiniccl PRIVATE ${MOORE_SOURCES})

target_include_directories(infiniccl PRIVATE "${MUSA_ROOT}/include")
target_link_libraries(infiniccl PRIVATE ${MUSA_LIB} ${MUSART_LIB} ${MUBLAS_LIB})
target_compile_options(infiniccl PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-x musa>)
endif()

# Cambricon
if(WITH_CAMBRICON)
list(APPEND DEVICE_LIST "cambricon")
Expand Down
5 changes: 5 additions & 0 deletions src/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ struct DevicePriority<Device::Type::kMetax> {
static constexpr int value = 5;
};

template <>
struct DevicePriority<Device::Type::kMoore> {
static constexpr int value = 5;
};

template <>
struct DevicePriority<Device::Type::kCambricon> {
static constexpr int value = 5;
Expand Down
26 changes: 26 additions & 0 deletions src/moore/data_type_.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#ifndef INFINI_CCL_MOORE_DATA_TYPE__H_
#define INFINI_CCL_MOORE_DATA_TYPE__H_

// clang-format off
#include <musa_bf16.h>
#include <musa_fp16.h>
// clang-format on

#include "data_type_impl.h"
#include "moore/device_.h"

namespace infini::ccl {

template <>
struct TypeMap<Device::Type::kMoore, DataType::kFloat16> {
using type = half;
};

template <>
struct TypeMap<Device::Type::kMoore, DataType::kBFloat16> {
using type = __mt_bfloat16;
};

} // namespace infini::ccl

#endif // INFINI_CCL_MOORE_DATA_TYPE__H_
13 changes: 13 additions & 0 deletions src/moore/device_.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef INFINI_CCL_MOORE_DEVICE__H_
#define INFINI_CCL_MOORE_DEVICE__H_

#include "device.h"

namespace infini::ccl {

template <>
struct DeviceEnabled<Device::Type::kMoore> : std::true_type {};

} // namespace infini::ccl

#endif // INFINI_CCL_MOORE_DEVICE__H_
62 changes: 62 additions & 0 deletions src/moore/runtime_.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#ifndef INFINI_CCL_MOORE_RUNTIME_H_
#define INFINI_CCL_MOORE_RUNTIME_H_

#include <utility>

// clang-format off
#include <musa_runtime.h>
// clang-format on

#include "cuda/runtime_.h"
#include "logging.h"
#include "moore/device_.h"
#include "return_status_impl.h"

namespace infini::ccl {

template <>
struct Runtime<Device::Type::kMoore>
: CudaRuntime<Runtime<Device::Type::kMoore>> {
using Stream = musaStream_t;

static constexpr Device::Type kDeviceType = Device::Type::kMoore;

static constexpr auto Check =
[](auto status, ReturnStatus err_code = ReturnStatus::kSystemError) {
if (status != musaSuccess) {
LOG(musaGetErrorString(static_cast<musaError_t>(status)));
return err_code;
}
return ReturnStatus::kSuccess;
};

static constexpr auto Malloc = [](auto &&...args) {
return musaMalloc(std::forward<decltype(args)>(args)...);
};

static constexpr auto Memcpy = musaMemcpy;

static constexpr auto Free = [](auto &&...args) {
return musaFree(std::forward<decltype(args)>(args)...);
};

static constexpr auto MemcpyHostToDevice = musaMemcpyHostToDevice;

static constexpr auto MemcpyDeviceToHost = musaMemcpyDeviceToHost;

static constexpr auto Memset = musaMemset;

static constexpr auto SetDevice = musaSetDevice;

static constexpr auto DeviceSynchronize = [](auto &&...args) {
return musaDeviceSynchronize(std::forward<decltype(args)>(args)...);
};

static constexpr auto StreamSynchronize = musaStreamSynchronize;
};

static_assert(Runtime<Device::Type::kMoore>::Validate());

} // namespace infini::ccl

#endif // INFINI_CCL_MOORE_RUNTIME_H_
Loading