From 2019c098f1da02b16710d9399c6951140a83909d Mon Sep 17 00:00:00 2001 From: Zimin Li Date: Tue, 2 Jun 2026 14:19:49 +0800 Subject: [PATCH 1/5] feat: support moore threads platform - add mthreads support for the build system - add mthreads support for the runtime and specializations under `src/moore` - update `icclrun_logic.py` to generate mthreads dispatch logic - add the mcc wrapper script --- CMakeLists.txt | 67 ++++++++++++++++++++++++++++++++++ examples/CMakeLists.txt | 5 +++ scripts/devices/mcc_wrapper.sh | 48 ++++++++++++++++++++++++ scripts/icclrun_logic.py | 3 ++ scripts/run_wrapper.sh | 2 + src/CMakeLists.txt | 23 ++++++++++++ src/device.h | 5 +++ src/moore/data_type_.h | 26 +++++++++++++ src/moore/device_.h | 13 +++++++ src/moore/runtime_.h | 62 +++++++++++++++++++++++++++++++ 10 files changed, 254 insertions(+) create mode 100755 scripts/devices/mcc_wrapper.sh mode change 100755 => 100644 scripts/run_wrapper.sh create mode 100644 src/moore/data_type_.h create mode 100644 src/moore/device_.h create mode 100644 src/moore/runtime_.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 10d27c9..743780b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) option(WITH_NVIDIA "Enable NVIDIA GPU support" OFF) option(WITH_ILUVATAR "Enable ILUVATAR GPU support" OFF) option(WITH_METAX "Enable MetaX GPU support" OFF) +option(WITH_MOORE "Enable Moore GPU support" OFF) option(WITH_CAMBRICON "Enable Cambricon MLU support" OFF) set(WITH_CPU ON CACHE INTERNAL "CPU backend is always enabled") @@ -114,6 +115,38 @@ if(AUTO_DETECT_DEVICES) endif() endif() + # MThreads (Moore) + set(MOORE_FOUND FALSE) + + file(GLOB MOORE_DEV_FILES "/dev/mtgpu*") + + if(DEFINED ENV{MUSA_ROOT} OR DEFINED ENV{MUSA_HOME} OR DEFINED ENV{MUSA_PATH}) + set(MOORE_FOUND TRUE) + elseif(MOORE_DEV_FILES) + set(MOORE_FOUND TRUE) + elseif(NOT MOORE_FOUND) + find_program(MOORE_SMI_PATH mthreads-gmi) + + if(MOORE_SMI_PATH) + execute_process( + COMMAND ${MOORE_SMI_PATH} -L + RESULT_VARIABLE SMI_RESULT + OUTPUT_QUIET + ERROR_QUIET + ) + if(SMI_RESULT EQUAL 0) + set(MOORE_FOUND TRUE) + endif() + endif() + endif() + + if(MOORE_FOUND) + set(WITH_MOORE ON) + message(STATUS "Auto-detected Moore environment.") + else() + message(STATUS "Moore environment not detected.") + endif() + # Cambricon set(CAMBRICON_FOUND FALSE) @@ -248,6 +281,40 @@ if(WITH_METAX) find_library(MACA_RUNTIME_LIB NAMES mcruntime HINTS "${MACA_PATH}/lib" REQUIRED) endif() +if(WITH_MOORE) + set(MUSA_ROOT "") + foreach(_musa_env MUSA_ROOT MUSA_HOME MUSA_PATH) + if(NOT MUSA_ROOT AND DEFINED ENV{${_musa_env}} AND NOT "$ENV{${_musa_env}}" STREQUAL "") + set(MUSA_ROOT "$ENV{${_musa_env}}") + endif() + endforeach() + + if(NOT MUSA_ROOT AND EXISTS "/usr/local/musa") + set(MUSA_ROOT "/usr/local/musa") + endif() + + if(NOT MUSA_ROOT) + message(FATAL_ERROR "`WITH_MOORE` is `ON` but `MUSA_ROOT`/`MUSA_HOME`/`MUSA_PATH` is not set and `/usr/local/musa` was not found.") + endif() + + if(NOT EXISTS "${MUSA_ROOT}/bin/mcc") + message(FATAL_ERROR "Could not find `mcc` under `${MUSA_ROOT}/bin`.") + endif() + + get_filename_component(MCC_WRAPPER_ABS "${CMAKE_CURRENT_SOURCE_DIR}/scripts/devices/mcc_wrapper.sh" ABSOLUTE) + file(CHMOD "${MCC_WRAPPER_ABS}" + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + set(CMAKE_C_COMPILER "${MCC_WRAPPER_ABS}") + set(CMAKE_CXX_COMPILER "${MCC_WRAPPER_ABS}") + + include_directories("${MUSA_ROOT}/include") + link_directories("${MUSA_ROOT}/lib") + + find_library(MUSA_LIB NAMES musa HINTS "${MUSA_ROOT}/lib" REQUIRED) + find_library(MUSART_LIB NAMES musart HINTS "${MUSA_ROOT}/lib" REQUIRED) + find_library(MUBLAS_LIB NAMES mublas HINTS "${MUSA_ROOT}/lib" REQUIRED) +endif() + if(WITH_CAMBRICON) set(NEUWARE_HOME $ENV{NEUWARE_HOME}) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 836df62..1524711 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -16,6 +16,11 @@ foreach(source_file ${EXAMPLE_SOURCES}) target_link_libraries(${example_name} PRIVATE ${MACA_RUNTIME_LIB}) endif() + if(WITH_MOORE) + target_link_libraries(${example_name} PRIVATE ${MUSART_LIB}) + target_compile_options(${example_name} PRIVATE "-x" "musa") + endif() + if(WITH_CAMBRICON) target_link_libraries(${example_name} PRIVATE ${CAMBRICON_RUNTIME_LIB}) endif() diff --git a/scripts/devices/mcc_wrapper.sh b/scripts/devices/mcc_wrapper.sh new file mode 100755 index 0000000..29ce5cd --- /dev/null +++ b/scripts/devices/mcc_wrapper.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Filter out flags unsupported by `mcc`. +ARGS=() +skip_next=0 +linking=1 +for arg in "$@"; do + if [ $skip_next -eq 1 ]; then + skip_next=0 + continue + fi + case "$arg" in + -c|-E|-S) + linking=0 + ARGS+=("$arg") + ;; + -pthread) + ;; + -B) + skip_next=1 + ;; + -B*) + ;; + *) + ARGS+=("$arg") + ;; + esac +done + +MUSA_ROOT_DIR="${MUSA_ROOT:-${MUSA_HOME:-${MUSA_PATH:-/usr/local/musa}}}" + +if command -v g++ >/dev/null 2>&1; then + GXX_MAJOR="$(g++ -dumpversion | cut -d. -f1)" + if [ -d "/usr/include/c++/${GXX_MAJOR}" ]; then + ARGS=( + "-isystem" "/usr/include/c++/${GXX_MAJOR}" + "-isystem" "/usr/include/x86_64-linux-gnu/c++/${GXX_MAJOR}" + "-isystem" "/usr/include/c++/${GXX_MAJOR}/backward" + "${ARGS[@]}" + ) + fi + + STDCPP_LIB="$(g++ -print-file-name=libstdc++.so)" + if [ $linking -eq 1 ] && [ -f "${STDCPP_LIB}" ]; then + ARGS=("-L$(dirname "${STDCPP_LIB}")" "${ARGS[@]}") + fi +fi + +exec "${MUSA_ROOT_DIR}/bin/mcc" "${ARGS[@]}" diff --git a/scripts/icclrun_logic.py b/scripts/icclrun_logic.py index 622c851..e6766ee 100644 --- a/scripts/icclrun_logic.py +++ b/scripts/icclrun_logic.py @@ -124,6 +124,9 @@ def ensure_launcher_exists(self): 'grep -l "9999" /sys/bus/pci/devices/*/vendor >/dev/null 2>&1' ) + elif n_type == "moore": + condition = '[ -c "/dev/mtgpu.0" ] || [ -x "$(command -v mthreads-gmi)" ]' + elif n_type == "cambricon": condition = ( '[ -n "${NEUWARE_HOME}" ] || command -v cnmon >/dev/null 2>&1' diff --git a/scripts/run_wrapper.sh b/scripts/run_wrapper.sh old mode 100755 new mode 100644 index 8f1ec49..d9a748f --- a/scripts/run_wrapper.sh +++ b/scripts/run_wrapper.sh @@ -13,6 +13,8 @@ if [ -c "/dev/nvidia0" ] || [ -x "$(command -v nvidia-smi)" ]; then ARCH="nvidia" elif grep -l "9999" /sys/bus/pci/devices/*/vendor >/dev/null 2>&1 || [ -d "/opt/maca" ]; then ARCH="metax" +elif [ -c "/dev/mtgpu.0" ] || [ -x "$(command -v mthreads-gmi)" ]; then + ARCH="moore" elif [ -n "${NEUWARE_HOME}" ] || [ -x "$(command -v cnmon)" ]; then ARCH="cambricon" else diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f76cba2..2a3dfb1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -98,6 +98,29 @@ if(WITH_METAX) target_compile_options(infiniccl PRIVATE $<$:-x maca>) endif() +# Mthreads (Moore) +if(WITH_MOORE) + list(APPEND DEVICE_LIST "moore") + + set(MOORE_PATTERNS + "cuda/*.cc" + "cuda/*.cpp" + "moore/*.cc" + "moore/*.cpp" + "moore/*.mu" + ) + file(GLOB MOORE_SOURCES ${MOORE_PATTERNS}) + + set_source_files_properties(${MOORE_SOURCES} PROPERTIES LANGUAGE CXX) + + # target_compile_options(infiniccl PRIVATE "-x" "musa") + target_sources(infiniccl PRIVATE ${MOORE_SOURCES}) + + target_include_directories(infiniccl PRIVATE "${MUSA_ROOT}/include") + target_link_libraries(infiniccl PRIVATE ${MUSA_LIB} ${MUSART_LIB} ${MUBLAS_LIB}) + target_compile_options(infiniccl PRIVATE $<$:-x musa>) +endif() + # Cambricon if(WITH_CAMBRICON) list(APPEND DEVICE_LIST "cambricon") diff --git a/src/device.h b/src/device.h index be1a089..f877f03 100644 --- a/src/device.h +++ b/src/device.h @@ -145,6 +145,11 @@ struct DevicePriority { static constexpr int value = 5; }; +template <> +struct DevicePriority { + static constexpr int value = 5; +}; + template <> struct DevicePriority { static constexpr int value = 5; diff --git a/src/moore/data_type_.h b/src/moore/data_type_.h new file mode 100644 index 0000000..395f6d0 --- /dev/null +++ b/src/moore/data_type_.h @@ -0,0 +1,26 @@ +#ifndef INFINI_CCL_MOORE_DATA_TYPE__H_ +#define INFINI_CCL_MOORE_DATA_TYPE__H_ + +// clang-format off +#include +#include +// clang-format on + +#include "data_type_impl.h" +#include "moore/device_.h" + +namespace infini::ccl { + +template <> +struct TypeMap { + using type = half; +}; + +template <> +struct TypeMap { + using type = __mt_bfloat16; +}; + +} // namespace infini::ccl + +#endif // INFINI_CCL_MOORE_DATA_TYPE__H_ diff --git a/src/moore/device_.h b/src/moore/device_.h new file mode 100644 index 0000000..6757c06 --- /dev/null +++ b/src/moore/device_.h @@ -0,0 +1,13 @@ +#ifndef INFINI_CCL_MOORE_DEVICE__H_ +#define INFINI_CCL_MOORE_DEVICE__H_ + +#include "device.h" + +namespace infini::ccl { + +template <> +struct DeviceEnabled : std::true_type {}; + +} // namespace infini::ccl + +#endif // INFINI_CCL_MOORE_DEVICE__H_ diff --git a/src/moore/runtime_.h b/src/moore/runtime_.h new file mode 100644 index 0000000..4f3b3e8 --- /dev/null +++ b/src/moore/runtime_.h @@ -0,0 +1,62 @@ +#ifndef INFINI_CCL_MOORE_RUNTIME_H_ +#define INFINI_CCL_MOORE_RUNTIME_H_ + +#include + +// clang-format off +#include +// clang-format on + +#include "cuda/runtime_.h" +#include "logging.h" +#include "moore/device_.h" +#include "return_status_impl.h" + +namespace infini::ccl { + +template <> +struct Runtime + : CudaRuntime> { + using Stream = musaStream_t; + + static constexpr Device::Type kDeviceType = Device::Type::kMoore; + + static constexpr auto Check = + [](auto status, ReturnStatus err_code = ReturnStatus::kSystemError) { + if (status != musaSuccess) { + LOG(musaGetErrorString(static_cast(status))); + return err_code; + } + return ReturnStatus::kSuccess; + }; + + static constexpr auto Malloc = [](auto &&...args) { + return musaMalloc(std::forward(args)...); + }; + + static constexpr auto Memcpy = musaMemcpy; + + static constexpr auto Free = [](auto &&...args) { + return musaFree(std::forward(args)...); + }; + + static constexpr auto MemcpyHostToDevice = musaMemcpyHostToDevice; + + static constexpr auto MemcpyDeviceToHost = musaMemcpyDeviceToHost; + + static constexpr auto Memset = musaMemset; + + static constexpr auto SetDevice = musaSetDevice; + + static constexpr auto DeviceSynchronize = [](auto &&...args) { + return musaDeviceSynchronize(std::forward(args)...); + };; + + static constexpr auto StreamSynchronize = musaStreamSynchronize; +}; + +static_assert(Runtime::Validate()); + +} // namespace infini::ccl + +#endif // INFINI_CCL_MOORE_RUNTIME_H_ From 490904f8bccd8af9f2b180e9532e374fee3aec24 Mon Sep 17 00:00:00 2001 From: Zimin Li Date: Thu, 4 Jun 2026 11:13:22 +0800 Subject: [PATCH 2/5] docs: update `.github/pull_request_template.md` to include Moore Threads as a supported platform --- .github/pull_request_template.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 36d0894..5493e1e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -56,6 +56,7 @@ Please check all the platforms and/or backends this PR affects (i.e., code is to - [ ] NVIDIA GPU - [ ] Iluvatar GPU - [ ] MetaX GPU +- [ ] Moore Threads GPU - [ ] Cambricon MLU ### Backend @@ -102,6 +103,7 @@ See `CONTRIBUTING.md` § Pull Requests for the official testing requirements and - [ ] NVIDIA GPU - [ ] Iluvatar GPU - [ ] MetaX GPU +- [ ] Moore Threads GPU - [ ] Cambricon MLU ### Test Involved Backend From 23fc8f7a5f7d51ea36ae4793a10474c0eade4693 Mon Sep 17 00:00:00 2001 From: Zimin Li Date: Thu, 4 Jun 2026 11:37:11 +0800 Subject: [PATCH 3/5] docs: update `README.md` to include Moore Threads as a supported platform/runtime --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 9cdf5af..bf89d4b 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,7 @@ cmake .. -DWITH_NVIDIA=ON -DWITH_OMPI=ON | `WITH_NVIDIA` | Enable NVIDIA GPU support | `OFF` | | `WITH_ILUVATAR` | Enable Iluvatar GPU support | `OFF` | | `WITH_METAX` | Enable MetaX GPU support | `OFF` | +| `WITH_MOORE` | Enable Moore Threads GPU support | `OFF` | | `WITH_CAMBRICON` | Enable Cambricon MLU support | `OFF` | | `WITH_CPU` | CPU support (always enabled) | `ON` (internal, not user‑settable) | | **Backend (Communication) Options** ||| @@ -282,6 +283,7 @@ export LD_LIBRARY_PATH=${INFINI_INSTALL}/lib:$LD_LIBRARY_PATH | **NVIDIA** | Full | Requires CUDA Toolkit. | | **Iluvatar** | Full | Requires Iluvatar CoreX SDK. | | **MetaX** | Full | Requires MACA SDK and `MACA_PATH` (default `/opt/maca`) to be set. | +| **Moore Threads** | Full | Requires MUSA SDK and at least one of `MACA_ROOT` (default `/usr/local/musa`), `MACA_PATH`, and `MUSA_HOME` to be set. | | **Cambricon** | Full | Requires CNToolKit and `NEUWARE_HOME` to be set. | From aa944842f761de903a8d3e6f48fadb55b55a6a62 Mon Sep 17 00:00:00 2001 From: Zimin Li Date: Thu, 4 Jun 2026 07:10:18 +0000 Subject: [PATCH 4/5] style: clang-format `src/moore/device_.h` and `src/moore/runtime_.h` --- src/moore/device_.h | 2 +- src/moore/runtime_.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/moore/device_.h b/src/moore/device_.h index 6757c06..35004f5 100644 --- a/src/moore/device_.h +++ b/src/moore/device_.h @@ -10,4 +10,4 @@ struct DeviceEnabled : std::true_type {}; } // namespace infini::ccl -#endif // INFINI_CCL_MOORE_DEVICE__H_ +#endif // INFINI_CCL_MOORE_DEVICE__H_ diff --git a/src/moore/runtime_.h b/src/moore/runtime_.h index 4f3b3e8..7043d2f 100644 --- a/src/moore/runtime_.h +++ b/src/moore/runtime_.h @@ -50,7 +50,7 @@ struct Runtime static constexpr auto DeviceSynchronize = [](auto &&...args) { return musaDeviceSynchronize(std::forward(args)...); - };; + }; static constexpr auto StreamSynchronize = musaStreamSynchronize; }; From 435a999dd61b2e262107a363b3238259692bcdb7 Mon Sep 17 00:00:00 2001 From: Zimin Li Date: Thu, 4 Jun 2026 07:13:50 +0000 Subject: [PATCH 5/5] style: ruff format `scripts/icclrun_logic.py` --- scripts/icclrun_logic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/icclrun_logic.py b/scripts/icclrun_logic.py index e6766ee..4b4015c 100644 --- a/scripts/icclrun_logic.py +++ b/scripts/icclrun_logic.py @@ -125,7 +125,9 @@ def ensure_launcher_exists(self): ) elif n_type == "moore": - condition = '[ -c "/dev/mtgpu.0" ] || [ -x "$(command -v mthreads-gmi)" ]' + condition = ( + '[ -c "/dev/mtgpu.0" ] || [ -x "$(command -v mthreads-gmi)" ]' + ) elif n_type == "cambricon": condition = (