diff --git a/src/infiniop/devices/handle.cc b/src/infiniop/devices/handle.cc index f9e74c429..76ff650c9 100644 --- a/src/infiniop/devices/handle.cc +++ b/src/infiniop/devices/handle.cc @@ -15,7 +15,7 @@ #include "ascend/ascend_handle.h" #endif #ifdef ENABLE_MOORE_API -#include "musa/musa_handle.h" +#include "moore/moore_handle.h" #endif #ifdef ENABLE_KUNLUN_API #include "kunlun/kunlun_handle.h" @@ -54,7 +54,7 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) { CREATE(INFINI_DEVICE_ASCEND, ascend); #endif #ifdef ENABLE_MOORE_API - CREATE(INFINI_DEVICE_MOORE, musa); + CREATE(INFINI_DEVICE_MOORE, moore); #endif #ifdef ENABLE_KUNLUN_API CREATE(INFINI_DEVICE_KUNLUN, kunlun); @@ -94,7 +94,7 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) { DELETE(INFINI_DEVICE_ASCEND, ascend); #endif #ifdef ENABLE_MOORE_API - DELETE(INFINI_DEVICE_MOORE, musa); + DELETE(INFINI_DEVICE_MOORE, moore); #endif #ifdef ENABLE_KUNLUN_API DELETE(INFINI_DEVICE_KUNLUN, kunlun); diff --git a/src/infiniop/devices/musa/common_musa.h b/src/infiniop/devices/moore/moore_common.h similarity index 93% rename from src/infiniop/devices/musa/common_musa.h rename to src/infiniop/devices/moore/moore_common.h index 9fded1151..03ff2aef6 100644 --- a/src/infiniop/devices/musa/common_musa.h +++ b/src/infiniop/devices/moore/moore_common.h @@ -1,6 +1,6 @@ #include "../../../utils.h" #include "../pool.h" -#include "musa_handle.h" +#include "moore_handle.h" #include #include #include @@ -10,7 +10,7 @@ #define CHECK_MUBLAS(API) CHECK_INTERNAL(API, MUBLAS_STATUS_SUCCESS) #define CHECK_MUDNN(API) CHECK_INTERNAL((int)API, (int)::musa::dnn::Status::SUCCESS) -namespace device::musa { +namespace device::moore { class Handle::Internal { Pool> mublas_handles; @@ -39,4 +39,4 @@ class Handle::Internal { int gridSizeZ() const; }; -} // namespace device::musa +} // namespace device::moore diff --git a/src/infiniop/devices/musa/musa_handle.cc b/src/infiniop/devices/moore/moore_handle.cc similarity index 96% rename from src/infiniop/devices/musa/musa_handle.cc rename to src/infiniop/devices/moore/moore_handle.cc index 7e7b5dcba..ac9a95680 100644 --- a/src/infiniop/devices/musa/musa_handle.cc +++ b/src/infiniop/devices/moore/moore_handle.cc @@ -1,6 +1,6 @@ -#include "common_musa.h" +#include "moore_common.h" -namespace device::musa { +namespace device::moore { Handle::Handle(infiniDevice_t device, int device_id) : InfiniopHandle{device, device_id}, _internal(std::make_shared(device_id)) {} @@ -67,4 +67,4 @@ infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) { return INFINI_STATUS_SUCCESS; } -} // namespace device::musa +} // namespace device::moore diff --git a/src/infiniop/devices/musa/musa_handle.h b/src/infiniop/devices/moore/moore_handle.h similarity index 70% rename from src/infiniop/devices/musa/musa_handle.h rename to src/infiniop/devices/moore/moore_handle.h index 5e79a5ff5..52157dd84 100644 --- a/src/infiniop/devices/musa/musa_handle.h +++ b/src/infiniop/devices/moore/moore_handle.h @@ -1,10 +1,10 @@ -#ifndef __INFINIOP_MUSA_HANDLE_H__ -#define __INFINIOP_MUSA_HANDLE_H__ +#ifndef __INFINIOP_MOORE_HANDLE_H__ +#define __INFINIOP_MOORE_HANDLE_H__ #include "../../handle.h" #include -namespace device::musa { +namespace device::moore { struct Handle : public InfiniopHandle { Handle(int device_id); class Internal; @@ -20,6 +20,6 @@ struct Handle : public InfiniopHandle { std::shared_ptr _internal; }; -} // namespace device::musa +} // namespace device::moore -#endif // __INFINIOP_MUSA_HANDLE_H__ +#endif // __INFINIOP_MOORE_HANDLE_H__ diff --git a/src/infiniop/devices/musa/musa_kernel_common.h b/src/infiniop/devices/moore/moore_kernel_common.h similarity index 86% rename from src/infiniop/devices/musa/musa_kernel_common.h rename to src/infiniop/devices/moore/moore_kernel_common.h index 633f28af4..b75c1d60e 100644 --- a/src/infiniop/devices/musa/musa_kernel_common.h +++ b/src/infiniop/devices/moore/moore_kernel_common.h @@ -1,20 +1,20 @@ -#define INFINIOP_MUSA_KERNEL __global__ void +#define INFINIOP_MOORE_KERNEL __global__ void #include #include // Posible maximum number of threads per block for MUSA architectures // Used for picking correct kernel launch configuration -#define MUSA_BLOCK_SIZE_2048 2048 -#define MUSA_BLOCK_SIZE_1024 1024 -#define MUSA_BLOCK_SIZE_512 512 +#define MOORE_BLOCK_SIZE_2048 2048 +#define MOORE_BLOCK_SIZE_1024 1024 +#define MOORE_BLOCK_SIZE_512 512 -#define CHECK_MUSA(API) CHECK_INTERNAL(API, musaSuccess) +#define CHECK_MOORE(API) CHECK_INTERNAL(API, musaSuccess) using musa_bfloat16 = mt_bfloat16; using musa_bfloat162 = mt_bfloat162; -namespace device::musa { +namespace device::moore { // return the memory offset of original tensor, given the flattened index of broadcasted tensor __forceinline__ __device__ __host__ size_t @@ -45,7 +45,7 @@ indexToOffset( } return res; } -} // namespace device::musa +} // namespace device::moore __forceinline__ __device__ float exp_(const float val) { diff --git a/src/infiniop/ops/gemm/moore/gemm_moore.h b/src/infiniop/ops/gemm/moore/gemm_moore.h new file mode 100644 index 000000000..1fe0e8171 --- /dev/null +++ b/src/infiniop/ops/gemm/moore/gemm_moore.h @@ -0,0 +1,8 @@ +#ifndef __GEMM_MOORE_H__ +#define __GEMM_MOORE_H__ + +#include "../gemm.h" + +DESCRIPTOR(moore) + +#endif // __GEMM_MOORE_H__ diff --git a/src/infiniop/ops/gemm/moore/gemm_moore.mu b/src/infiniop/ops/gemm/moore/gemm_moore.mu new file mode 100644 index 000000000..2d22f3720 --- /dev/null +++ b/src/infiniop/ops/gemm/moore/gemm_moore.mu @@ -0,0 +1,125 @@ +#include "../../../devices/moore/moore_common.h" +#include "../../../devices/moore/moore_handle.h" +#include "gemm_moore.h" + +namespace op::gemm::moore { + +struct Descriptor::Opaque { + std::shared_ptr internal; +}; + +Descriptor::~Descriptor() { + delete _opaque; +} + +infiniStatus_t Descriptor::create( + infiniopHandle_t handle_, + Descriptor **desc_ptr, + infiniopTensorDescriptor_t c_desc, + infiniopTensorDescriptor_t a_desc, + infiniopTensorDescriptor_t b_desc) { + auto handle = reinterpret_cast(handle_); + auto dtype = c_desc->dtype(); + + CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_BF16); + + auto result = MatmulInfo::create(c_desc, a_desc, b_desc, MatrixLayout::COL_MAJOR); + CHECK_RESULT(result); + + *desc_ptr = new Descriptor( + dtype, result.take(), 0, + new Opaque{handle->internal()}, + handle->device, handle->device_id); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t Descriptor::calculate( + void *workspace, + size_t workspace_size, + void *c, + float beta, + const void *a, + const void *b, + float alpha, + void *stream) const { + + musaDataType a_type, b_type, c_type; + mublasComputeType_t compute_type; + + // MUSA's GEMM operations require that the scalar values alpha and beta have the same data type as the matrices. + // This ensures correct computation during the muBLAS GEMM operation. + // Declare half-precision variables to handle F16 types. + half alpha_h, beta_h; + + // Initialize generic void pointers for alpha and beta. + // They point to the original float values + // It will be used directly when the GEMM operation is performed with F32 data. + const void *p_alpha = α + const void *p_beta = β + + switch (_dtype) { + case INFINI_DTYPE_F16: + a_type = b_type = c_type = MUSA_R_16F; + compute_type = MUBLAS_COMPUTE_16F; + + // Convert alpha/beta to half-precision and update the pointers. + alpha_h = __float2half(alpha); + beta_h = __float2half(beta); + p_alpha = &alpha_h; + p_beta = &beta_h; + + break; + case INFINI_DTYPE_BF16: + a_type = b_type = c_type = MUSA_R_16BF; + compute_type = MUBLAS_COMPUTE_32F; + break; + case INFINI_DTYPE_F32: + a_type = b_type = c_type = MUSA_R_32F; + compute_type = MUBLAS_COMPUTE_32F_FAST_TF32; + break; + + default: + return INFINI_STATUS_BAD_TENSOR_DTYPE; + } + + if (_info.is_transed) { + std::swap(a, b); + } + + auto op_a = _info.a_matrix.row_stride == 1 ? MUBLAS_OP_N : MUBLAS_OP_T; + auto op_b = _info.b_matrix.row_stride == 1 ? MUBLAS_OP_N : MUBLAS_OP_T; + + CHECK_STATUS(_opaque->internal->useMublas( + (musaStream_t)stream, + [&](mublasHandle_t handle) { + CHECK_MUBLAS( + mublasGemmStridedBatchedEx( + handle, + op_a, + op_b, + static_cast(_info.m), + static_cast(_info.n), + static_cast(_info.k), + p_alpha, + a, + a_type, + static_cast(_info.a_matrix.ld()), + _info.a_matrix.stride, + b, + b_type, + static_cast(_info.b_matrix.ld()), + _info.b_matrix.stride, + p_beta, + c, + c_type, + static_cast(_info.c_matrix.ld()), + _info.c_matrix.stride, + static_cast(_info.batch), + compute_type, + MUBLAS_GEMM_DEFAULT)); + return INFINI_STATUS_SUCCESS; + })); + return INFINI_STATUS_SUCCESS; +} + +} // namespace op::gemm::moore diff --git a/src/infiniop/ops/gemm/musa/gemm_musa.h b/src/infiniop/ops/gemm/musa/gemm_musa.h deleted file mode 100644 index bba9644e0..000000000 --- a/src/infiniop/ops/gemm/musa/gemm_musa.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __GEMM_MUSA_H__ -#define __GEMM_MUSA_H__ - -#include "../gemm.h" - -DESCRIPTOR(musa) - -#endif // __GEMM_MUSA_H__ diff --git a/src/infiniop/ops/gemm/musa/gemm_musa.mu b/src/infiniop/ops/gemm/musa/gemm_musa.mu deleted file mode 100644 index c9127894b..000000000 --- a/src/infiniop/ops/gemm/musa/gemm_musa.mu +++ /dev/null @@ -1,121 +0,0 @@ -#include "../../../devices/musa/common_musa.h" -#include "../../../devices/musa/musa_handle.h" -#include "gemm_musa.h" - -namespace op::gemm::musa { - -struct Descriptor::Opaque { - std::shared_ptr internal; -}; - -Descriptor::~Descriptor() { - delete _opaque; -} - -infiniStatus_t Descriptor::create( - infiniopHandle_t handle_, - Descriptor **desc_ptr, - infiniopTensorDescriptor_t c_desc, - infiniopTensorDescriptor_t a_desc, - infiniopTensorDescriptor_t b_desc) { - auto handle = reinterpret_cast(handle_); - auto dtype = c_desc->dtype(); - - CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32); - - auto result = MatmulInfo::create(c_desc, a_desc, b_desc, MatrixLayout::COL_MAJOR); - CHECK_RESULT(result); - - *desc_ptr = new Descriptor( - dtype, result.take(), 0, - new Opaque{handle->internal()}, - handle->device, handle->device_id); - return INFINI_STATUS_SUCCESS; -} - -template -infiniStatus_t calculate( - const MatmulInfo &info, - std::shared_ptr &_internal, - void *c, - float beta, - const void *a, - const void *b, - float alpha, - void *stream) { - - musaDataType a_type, b_type, c_type; - mublasComputeType_t compute_type; - Tdata alpha_, beta_; - - if constexpr (std::is_same::value) { - alpha_ = __float2half(alpha); - beta_ = __float2half(beta); - a_type = b_type = c_type = MUSA_R_16F; - compute_type = MUBLAS_COMPUTE_16F; - } else { - alpha_ = alpha; - beta_ = beta; - a_type = b_type = c_type = MUSA_R_32F; - compute_type = MUBLAS_COMPUTE_32F_FAST_TF32; - } - - if (info.is_transed) { - std::swap(a, b); - } - - auto op_a = info.a_matrix.row_stride == 1 ? MUBLAS_OP_N : MUBLAS_OP_T; - auto op_b = info.b_matrix.row_stride == 1 ? MUBLAS_OP_N : MUBLAS_OP_T; - - CHECK_STATUS(_internal->useMublas( - (musaStream_t)stream, - [&](mublasHandle_t handle) { - CHECK_MUBLAS( - mublasGemmStridedBatchedEx( - handle, - op_a, - op_b, - static_cast(info.m), - static_cast(info.n), - static_cast(info.k), - &alpha_, - a, - a_type, - static_cast(info.a_matrix.ld()), - info.a_matrix.stride, - b, - b_type, - static_cast(info.b_matrix.ld()), - info.b_matrix.stride, - &beta_, - c, - c_type, - static_cast(info.c_matrix.ld()), - info.c_matrix.stride, - static_cast(info.batch), - compute_type, - MUBLAS_GEMM_DEFAULT)); - return INFINI_STATUS_SUCCESS; - })); - return INFINI_STATUS_SUCCESS; -} - -infiniStatus_t Descriptor::calculate(void *workspace, - size_t workspace_size, - void *c, - float beta, - const void *a, - const void *b, - float alpha, - void *stream) const { - switch (_dtype) { - case INFINI_DTYPE_F16: - return musa::calculate(_info, _opaque->internal, c, beta, a, b, alpha, stream); - case INFINI_DTYPE_F32: - return musa::calculate(_info,_opaque->internal, c, beta, a, b, alpha, stream); - default: - return INFINI_STATUS_BAD_TENSOR_DTYPE; - } -} - -} // namespace op::gemm::musa diff --git a/src/infiniop/ops/gemm/operator.cc b/src/infiniop/ops/gemm/operator.cc index c8ecd5501..2b1b28c81 100644 --- a/src/infiniop/ops/gemm/operator.cc +++ b/src/infiniop/ops/gemm/operator.cc @@ -18,7 +18,7 @@ #include "metax/gemm_metax.h" #endif #ifdef ENABLE_MOORE_API -#include "musa/gemm_musa.h" +#include "moore/gemm_moore.h" #endif #ifdef ENABLE_KUNLUN_API #include "kunlun/gemm_kunlun.h" @@ -61,7 +61,7 @@ __C infiniStatus_t infiniopCreateGemmDescriptor( CREATE(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API - CREATE(INFINI_DEVICE_MOORE, musa); + CREATE(INFINI_DEVICE_MOORE, moore); #endif #ifdef ENABLE_KUNLUN_API @@ -106,7 +106,7 @@ infiniopGetGemmWorkspaceSize( GET(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API - GET(INFINI_DEVICE_MOORE, musa); + GET(INFINI_DEVICE_MOORE, moore); #endif #ifdef ENABLE_KUNLUN_API GET(INFINI_DEVICE_KUNLUN, kunlun); @@ -158,7 +158,7 @@ __C infiniStatus_t infiniopGemm( CALCULATE(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API - CALCULATE(INFINI_DEVICE_MOORE, musa); + CALCULATE(INFINI_DEVICE_MOORE, moore); #endif #ifdef ENABLE_KUNLUN_API CALCULATE(INFINI_DEVICE_KUNLUN, kunlun); @@ -200,7 +200,7 @@ infiniopDestroyGemmDescriptor(infiniopGemmDescriptor_t desc) { DELETE(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API - DELETE(INFINI_DEVICE_MOORE, musa); + DELETE(INFINI_DEVICE_MOORE, moore); #endif #ifdef ENABLE_KUNLUN_API DELETE(INFINI_DEVICE_KUNLUN, kunlun); diff --git a/src/infiniop/ops/rms_norm/moore/rms_norm_moore.h b/src/infiniop/ops/rms_norm/moore/rms_norm_moore.h new file mode 100644 index 000000000..f515c1392 --- /dev/null +++ b/src/infiniop/ops/rms_norm/moore/rms_norm_moore.h @@ -0,0 +1,8 @@ +#ifndef __RMS_NORM_MOORE_H__ +#define __RMS_NORM_MOORE_H__ + +#include "../rms_norm.h" + +DESCRIPTOR(moore) + +#endif diff --git a/src/infiniop/ops/rms_norm/musa/rms_norm_musa.mu b/src/infiniop/ops/rms_norm/moore/rms_norm_moore.mu similarity index 79% rename from src/infiniop/ops/rms_norm/musa/rms_norm_musa.mu rename to src/infiniop/ops/rms_norm/moore/rms_norm_moore.mu index 360122b0b..81cea64c2 100644 --- a/src/infiniop/ops/rms_norm/musa/rms_norm_musa.mu +++ b/src/infiniop/ops/rms_norm/moore/rms_norm_moore.mu @@ -1,7 +1,7 @@ -#include "../../../devices/musa/common_musa.h" -#include "rms_norm_musa.h" +#include "../../../devices/moore/moore_common.h" +#include "rms_norm_moore.h" -#include "../../../devices/musa/musa_kernel_common.h" +#include "../../../devices/moore/moore_kernel_common.h" #include #include "../../../reduce/cuda/reduce.cuh" @@ -9,7 +9,7 @@ #include "../cuda/kernel.cuh" template -INFINIOP_MUSA_KERNEL rmsnormKernel( +INFINIOP_MOORE_KERNEL rmsnormKernel( Tdata *__restrict__ y, ptrdiff_t stride_y, const Tdata *__restrict__ x, @@ -20,10 +20,10 @@ INFINIOP_MUSA_KERNEL rmsnormKernel( rmsnormBlock(y, stride_y, x, stride_x, w, dim, epsilon); } -namespace op::rms_norm::musa { +namespace op::rms_norm::moore { struct Descriptor::Opaque { - std::shared_ptr internal; + std::shared_ptr internal; }; Descriptor::~Descriptor() { @@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create( } *desc_ptr = new Descriptor( - new Opaque{reinterpret_cast(handle)->internal()}, + new Opaque{reinterpret_cast(handle)->internal()}, std::move(info), 0, handle->device, handle->device_id); @@ -109,15 +109,15 @@ infiniStatus_t Descriptor::calculate( auto musa_stream = reinterpret_cast(stream); // launch kernel with different block sizes - if (_opaque->internal->maxThreadsPerBlock() == MUSA_BLOCK_SIZE_1024) { - CHECK_STATUS(launchKernel(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, musa_stream)); - } else if (_opaque->internal->maxThreadsPerBlock() == MUSA_BLOCK_SIZE_512) { - CHECK_STATUS(launchKernel(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, musa_stream)); - } else if (_opaque->internal->maxThreadsPerBlock() == MUSA_BLOCK_SIZE_2048) { - CHECK_STATUS(launchKernel(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, musa_stream)); + if (_opaque->internal->maxThreadsPerBlock() == MOORE_BLOCK_SIZE_1024) { + CHECK_STATUS(launchKernel(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, musa_stream)); + } else if (_opaque->internal->maxThreadsPerBlock() == MOORE_BLOCK_SIZE_512) { + CHECK_STATUS(launchKernel(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, musa_stream)); + } else if (_opaque->internal->maxThreadsPerBlock() == MOORE_BLOCK_SIZE_2048) { + CHECK_STATUS(launchKernel(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, musa_stream)); } else { return INFINI_STATUS_DEVICE_ARCHITECTURE_NOT_SUPPORTED; } return INFINI_STATUS_SUCCESS; } -} // namespace op::rms_norm::musa +} // namespace op::rms_norm::moore diff --git a/src/infiniop/ops/rms_norm/musa/rms_norm_musa.h b/src/infiniop/ops/rms_norm/musa/rms_norm_musa.h deleted file mode 100644 index caa18ab03..000000000 --- a/src/infiniop/ops/rms_norm/musa/rms_norm_musa.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __RMS_NORM_MUSA_H__ -#define __RMS_NORM_MUSA_H__ - -#include "../rms_norm.h" - -DESCRIPTOR(musa) - -#endif diff --git a/src/infiniop/ops/rms_norm/operator.cc b/src/infiniop/ops/rms_norm/operator.cc index 12bd4d2e3..b93233f8b 100644 --- a/src/infiniop/ops/rms_norm/operator.cc +++ b/src/infiniop/ops/rms_norm/operator.cc @@ -15,7 +15,7 @@ #include "metax/rms_norm_metax.cuh" #endif #ifdef ENABLE_MOORE_API -#include "musa/rms_norm_musa.h" +#include "moore/rms_norm_moore.h" #endif #ifdef ENABLE_KUNLUN_API #include "kunlun/rms_norm_kunlun.h" @@ -64,7 +64,7 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor( CREATE(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API - CREATE(INFINI_DEVICE_MOORE, musa); + CREATE(INFINI_DEVICE_MOORE, moore); #endif } @@ -105,7 +105,7 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d GET(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API - GET(INFINI_DEVICE_MOORE, musa); + GET(INFINI_DEVICE_MOORE, moore); #endif } @@ -147,7 +147,7 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works CALCULATE(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API - CALCULATE(INFINI_DEVICE_MOORE, musa); + CALCULATE(INFINI_DEVICE_MOORE, moore); #endif } @@ -188,7 +188,7 @@ __C infiniStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t DESTROY(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API - DESTROY(INFINI_DEVICE_MOORE, musa); + DESTROY(INFINI_DEVICE_MOORE, moore); #endif } diff --git a/src/infinirt/infinirt.cc b/src/infinirt/infinirt.cc index d57841532..1e60f72be 100644 --- a/src/infinirt/infinirt.cc +++ b/src/infinirt/infinirt.cc @@ -6,7 +6,7 @@ #include "cuda/infinirt_cuda.cuh" #include "kunlun/infinirt_kunlun.h" #include "metax/infinirt_metax.h" -#include "musa/infinirt_musa.h" +#include "moore/infinirt_moore.h" thread_local infiniDevice_t CURRENT_DEVICE_TYPE = INFINI_DEVICE_CPU; thread_local int CURRENT_DEVICE_ID = 0; diff --git a/src/infinirt/musa/infinirt_musa.cc b/src/infinirt/moore/infinirt_moore.cc similarity index 99% rename from src/infinirt/musa/infinirt_musa.cc rename to src/infinirt/moore/infinirt_moore.cc index a3db63024..e805958d5 100644 --- a/src/infinirt/musa/infinirt_musa.cc +++ b/src/infinirt/moore/infinirt_moore.cc @@ -1,4 +1,4 @@ -#include "infinirt_musa.h" +#include "infinirt_moore.h" #include "../../utils.h" #include #include diff --git a/src/infinirt/musa/infinirt_musa.h b/src/infinirt/moore/infinirt_moore.h similarity index 100% rename from src/infinirt/musa/infinirt_musa.h rename to src/infinirt/moore/infinirt_moore.h diff --git a/xmake.lua b/xmake.lua index a4b195532..4c0ca1385 100644 --- a/xmake.lua +++ b/xmake.lua @@ -119,7 +119,7 @@ option_end() if has_config("moore-gpu") then add_defines("ENABLE_MOORE_API") - includes("xmake/musa.lua") + includes("xmake/moore.lua") end -- 海光 diff --git a/xmake/musa.lua b/xmake/moore.lua similarity index 91% rename from xmake/musa.lua rename to xmake/moore.lua index c84afaa4c..c350a6e5c 100644 --- a/xmake/musa.lua +++ b/xmake/moore.lua @@ -42,8 +42,8 @@ target("infiniop-moore") set_languages("cxx17") set_warnings("all", "error") add_cxflags("-lstdc++", "-fPIC", "-Wno-comment") - add_files("../src/infiniop/devices/musa/*.cc") - add_files("../src/infiniop/ops/*/musa/*.mu", {rule = "mu"}) + add_files("../src/infiniop/devices/moore/*.cc") + add_files("../src/infiniop/ops/*/moore/*.mu", {rule = "mu"}) target_end() target("infinirt-moore") @@ -53,5 +53,5 @@ target("infinirt-moore") add_deps("infini-utils") set_warnings("all", "error") add_cxflags("-lstdc++", "-fPIC") - add_files("../src/infinirt/musa/*.cc") + add_files("../src/infinirt/moore/*.cc") target_end()