From 507be07e7cc3ea7a1ff777c0c73e24ab5d0620a3 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 11 Jul 2025 16:37:00 +0800 Subject: [PATCH] =?UTF-8?q?issue/291/style:=20=E6=89=80=E6=9C=89=20maca=20?= =?UTF-8?q?=E6=94=B9=E4=B8=BA=20metax?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- src/infiniccl/infiniccl.cc | 8 ++-- src/infiniccl/maca/infiniccl_maca.h | 12 ------ .../infiniccl_metax.cc} | 6 +-- src/infiniccl/metax/infiniccl_metax.h | 12 ++++++ src/infiniop/devices/handle.cc | 6 +-- .../common_maca.h => metax/metax_common.h} | 6 +-- .../maca_handle.cc => metax/metax_handle.cc} | 6 +-- .../maca_handle.h => metax/metax_handle.h} | 10 ++--- .../metax_kernel_common.h} | 14 +++---- .../elementwise_metax.h} | 32 +++++++------- .../elementwise_metax_api.h} | 42 +++++++++---------- .../metax/causal_softmax_metax.maca | 18 ++++---- src/infiniop/ops/gemm/metax/gemm_metax.cc | 8 ++-- src/infiniop/ops/gemm/metax/gemm_metax.h | 6 +-- .../metax/random_sample_kernel.h | 12 +++--- .../random_sample/metax/random_sample_metax.h | 6 +-- .../metax/random_sample_metax.maca | 10 ++--- .../ops/rearrange/metax/rearrange_kernel.h | 10 ++--- .../ops/rearrange/metax/rearrange_metax.h | 6 +-- .../ops/rearrange/metax/rearrange_metax.maca | 22 +++++----- .../ops/rms_norm/metax/rms_norm_metax.cuh | 6 +-- .../ops/rms_norm/metax/rms_norm_metax.maca | 26 ++++++------ src/infiniop/ops/rms_norm/operator.cc | 8 ++-- src/infiniop/ops/rope/metax/rope_metax.h | 6 +-- src/infiniop/ops/rope/metax/rope_metax.maca | 14 +++---- src/infiniop/ops/swiglu/metax/swiglu_metax.h | 10 ++--- .../ops/swiglu/metax/swiglu_metax.maca | 8 ++-- src/infinirt/infinirt.cc | 4 +- .../infinirt_metax.cc} | 6 +-- .../infinirt_metax.h} | 4 +- xmake/metax.lua | 6 +-- 31 files changed, 175 insertions(+), 175 deletions(-) delete mode 100644 src/infiniccl/maca/infiniccl_maca.h rename src/infiniccl/{maca/infiniccl_maca.cc => metax/infiniccl_metax.cc} (96%) create mode 100644 src/infiniccl/metax/infiniccl_metax.h rename src/infiniop/devices/{maca/common_maca.h => metax/metax_common.h} (92%) rename src/infiniop/devices/{maca/maca_handle.cc => metax/metax_handle.cc} (97%) rename src/infiniop/devices/{maca/maca_handle.h => metax/metax_handle.h} (70%) rename src/infiniop/devices/{maca/maca_kernel_common.h => metax/metax_kernel_common.h} (83%) rename src/infiniop/elementwise/{maca/elementwise_maca.h => metax/elementwise_metax.h} (90%) rename src/infiniop/elementwise/{maca/elementwise_maca_api.h => metax/elementwise_metax_api.h} (69%) rename src/infinirt/{maca/infinirt_maca.cc => metax/infinirt_metax.cc} (97%) rename src/infinirt/{maca/infinirt_maca.h => metax/infinirt_metax.h} (77%) diff --git a/src/infiniccl/infiniccl.cc b/src/infiniccl/infiniccl.cc index 32160dfac..ce2c0d32c 100644 --- a/src/infiniccl/infiniccl.cc +++ b/src/infiniccl/infiniccl.cc @@ -2,7 +2,7 @@ #include "./ascend/infiniccl_ascend.h" #include "./cuda/infiniccl_cuda.h" -#include "./maca/infiniccl_maca.h" +#include "./metax/infiniccl_metax.h" __C infiniStatus_t infinicclCommInitAll( infiniDevice_t device_type, @@ -17,7 +17,7 @@ __C infiniStatus_t infinicclCommInitAll( switch (device_type) { COMM_INIT_ALL(INFINI_DEVICE_NVIDIA, cuda) COMM_INIT_ALL(INFINI_DEVICE_ASCEND, ascend) - COMM_INIT_ALL(INFINI_DEVICE_METAX, maca) + COMM_INIT_ALL(INFINI_DEVICE_METAX, metax) default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; } @@ -37,7 +37,7 @@ __C infiniStatus_t infinicclCommDestroy(infinicclComm_t comm) { switch (comm->device_type) { COMM_DESTROY(INFINI_DEVICE_NVIDIA, cuda) COMM_DESTROY(INFINI_DEVICE_ASCEND, ascend) - COMM_DESTROY(INFINI_DEVICE_METAX, maca) + COMM_DESTROY(INFINI_DEVICE_METAX, metax) default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; @@ -65,7 +65,7 @@ __C infiniStatus_t infinicclAllReduce( switch (comm->device_type) { ALL_REDUCE(INFINI_DEVICE_NVIDIA, cuda) ALL_REDUCE(INFINI_DEVICE_ASCEND, ascend) - ALL_REDUCE(INFINI_DEVICE_METAX, maca) + ALL_REDUCE(INFINI_DEVICE_METAX, metax) default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; diff --git a/src/infiniccl/maca/infiniccl_maca.h b/src/infiniccl/maca/infiniccl_maca.h deleted file mode 100644 index 3fe932531..000000000 --- a/src/infiniccl/maca/infiniccl_maca.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef INFINICCL_MACA_H_ -#define INFINICCL_MACA_H_ - -#include "../infiniccl_impl.h" - -#if defined(ENABLE_METAX_API) && defined(ENABLE_CCL) -INFINICCL_DEVICE_API_IMPL(maca) -#else -INFINICCL_DEVICE_API_NOOP(maca) -#endif - -#endif /* INFINICCL_MACA_H_ */ diff --git a/src/infiniccl/maca/infiniccl_maca.cc b/src/infiniccl/metax/infiniccl_metax.cc similarity index 96% rename from src/infiniccl/maca/infiniccl_maca.cc rename to src/infiniccl/metax/infiniccl_metax.cc index ffb03f096..04b91dea9 100644 --- a/src/infiniccl/maca/infiniccl_maca.cc +++ b/src/infiniccl/metax/infiniccl_metax.cc @@ -1,4 +1,4 @@ -#include "infiniccl_maca.h" +#include "infiniccl_metax.h" #include "../../utils.h" @@ -51,7 +51,7 @@ inline hcclComm_t getHcclComm(infinicclComm_t comm) { return static_cast(comm->comm); } -namespace infiniccl::maca { +namespace infiniccl::metax { infiniStatus_t commInitAll( infinicclComm_t *comms, @@ -92,4 +92,4 @@ infiniStatus_t allReduce( return INFINI_STATUS_SUCCESS; } -} // namespace infiniccl::maca +} // namespace infiniccl::metax diff --git a/src/infiniccl/metax/infiniccl_metax.h b/src/infiniccl/metax/infiniccl_metax.h new file mode 100644 index 000000000..cd6b90a40 --- /dev/null +++ b/src/infiniccl/metax/infiniccl_metax.h @@ -0,0 +1,12 @@ +#ifndef INFINICCL_METAX_H_ +#define INFINICCL_METAX_H_ + +#include "../infiniccl_impl.h" + +#if defined(ENABLE_METAX_API) && defined(ENABLE_CCL) +INFINICCL_DEVICE_API_IMPL(metax) +#else +INFINICCL_DEVICE_API_NOOP(metax) +#endif + +#endif /* INFINICCL_METAX_H_ */ diff --git a/src/infiniop/devices/handle.cc b/src/infiniop/devices/handle.cc index 163c7bebc..ae7a32cf0 100644 --- a/src/infiniop/devices/handle.cc +++ b/src/infiniop/devices/handle.cc @@ -21,7 +21,7 @@ #include "kunlun/kunlun_handle.h" #endif #ifdef ENABLE_METAX_API -#include "maca/maca_handle.h" +#include "metax/metax_handle.h" #endif __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) { @@ -57,7 +57,7 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) { CREATE(INFINI_DEVICE_KUNLUN, kunlun); #endif #ifdef ENABLE_METAX_API - CREATE(INFINI_DEVICE_METAX, maca); + CREATE(INFINI_DEVICE_METAX, metax); #endif default: @@ -94,7 +94,7 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) { DELETE(INFINI_DEVICE_KUNLUN, kunlun); #endif #ifdef ENABLE_METAX_API - DELETE(INFINI_DEVICE_METAX, maca); + DELETE(INFINI_DEVICE_METAX, metax); #endif default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; diff --git a/src/infiniop/devices/maca/common_maca.h b/src/infiniop/devices/metax/metax_common.h similarity index 92% rename from src/infiniop/devices/maca/common_maca.h rename to src/infiniop/devices/metax/metax_common.h index cb463d69b..225ce74e1 100644 --- a/src/infiniop/devices/maca/common_maca.h +++ b/src/infiniop/devices/metax/metax_common.h @@ -1,6 +1,6 @@ #include "../../../utils.h" #include "../pool.h" -#include "maca_handle.h" +#include "metax_handle.h" #include #include #include @@ -8,7 +8,7 @@ #define CHECK_MCBLAS(API) CHECK_INTERNAL(API, HCBLAS_STATUS_SUCCESS) #define CHECK_MCDNN(API) CHECK_INTERNAL(API, HCDNN_STATUS_SUCCESS) -namespace device::maca { +namespace device::metax { class Handle::Internal { Pool mcblas_handles; @@ -39,4 +39,4 @@ class Handle::Internal { hcdnnDataType_t getHcdnnDtype(infiniDtype_t dt); -} // namespace device::maca +} // namespace device::metax diff --git a/src/infiniop/devices/maca/maca_handle.cc b/src/infiniop/devices/metax/metax_handle.cc similarity index 97% rename from src/infiniop/devices/maca/maca_handle.cc rename to src/infiniop/devices/metax/metax_handle.cc index 916d36415..d56561c02 100644 --- a/src/infiniop/devices/maca/maca_handle.cc +++ b/src/infiniop/devices/metax/metax_handle.cc @@ -1,6 +1,6 @@ -#include "common_maca.h" +#include "metax_common.h" -namespace device::maca { +namespace device::metax { Handle::Handle(infiniDevice_t device, int device_id) : InfiniopHandle{device, device_id}, _internal(std::make_shared(device_id)) {} @@ -83,4 +83,4 @@ infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) { return INFINI_STATUS_SUCCESS; } -} // namespace device::maca +} // namespace device::metax diff --git a/src/infiniop/devices/maca/maca_handle.h b/src/infiniop/devices/metax/metax_handle.h similarity index 70% rename from src/infiniop/devices/maca/maca_handle.h rename to src/infiniop/devices/metax/metax_handle.h index 18b8edddb..ff01150f9 100644 --- a/src/infiniop/devices/maca/maca_handle.h +++ b/src/infiniop/devices/metax/metax_handle.h @@ -1,10 +1,10 @@ -#ifndef __INFINIOP_MACA_HANDLE_H__ -#define __INFINIOP_MACA_HANDLE_H__ +#ifndef __INFINIOP_METAX_HANDLE_H__ +#define __INFINIOP_METAX_HANDLE_H__ #include "../../handle.h" #include -namespace device::maca { +namespace device::metax { struct Handle : public InfiniopHandle { Handle(int device_id); class Internal; @@ -20,6 +20,6 @@ struct Handle : public InfiniopHandle { std::shared_ptr _internal; }; -} // namespace device::maca +} // namespace device::metax -#endif // __INFINIOP_MACA_HANDLE_H__ +#endif // __INFINIOP_METAX_HANDLE_H__ diff --git a/src/infiniop/devices/maca/maca_kernel_common.h b/src/infiniop/devices/metax/metax_kernel_common.h similarity index 83% rename from src/infiniop/devices/maca/maca_kernel_common.h rename to src/infiniop/devices/metax/metax_kernel_common.h index 5e2a7a254..4ad0130f1 100644 --- a/src/infiniop/devices/maca/maca_kernel_common.h +++ b/src/infiniop/devices/metax/metax_kernel_common.h @@ -1,16 +1,16 @@ -#define INFINIOP_MACA_KERNEL __global__ void +#define INFINIOP_METAX_KERNEL __global__ void -// Posible maximum number of threads per block for MACA architectures +// Posible maximum number of threads per block for METAX architectures // Used for picking correct kernel launch configuration -#define MACA_BLOCK_SIZE_1024 1024 -#define MACA_BLOCK_SIZE_512 512 +#define METAX_BLOCK_SIZE_1024 1024 +#define METAX_BLOCK_SIZE_512 512 -#define CHECK_MACA(API) CHECK_INTERNAL(API, hcSuccess) +#define CHECK_METAX(API) CHECK_INTERNAL(API, hcSuccess) using cuda_bfloat16 = hpcc_bfloat16; using cuda_bfloat162 = hpcc_bfloat162; -namespace device::maca { +namespace device::metax { // return the memory offset of original tensor, given the flattened index of broadcasted tensor __forceinline__ __device__ __host__ size_t @@ -41,7 +41,7 @@ indexToOffset( } return res; } -} // namespace device::maca +} // namespace device::metax __forceinline__ __device__ float exp_(const float val) { diff --git a/src/infiniop/elementwise/maca/elementwise_maca.h b/src/infiniop/elementwise/metax/elementwise_metax.h similarity index 90% rename from src/infiniop/elementwise/maca/elementwise_maca.h rename to src/infiniop/elementwise/metax/elementwise_metax.h index 970f14e3b..aa662ec15 100644 --- a/src/infiniop/elementwise/maca/elementwise_maca.h +++ b/src/infiniop/elementwise/metax/elementwise_metax.h @@ -1,12 +1,12 @@ -#ifndef __INFINIOP_ELEMENTWISE_MACA_H__ -#define __INFINIOP_ELEMENTWISE_MACA_H__ +#ifndef __INFINIOP_ELEMENTWISE_METAX_H__ +#define __INFINIOP_ELEMENTWISE_METAX_H__ #include "../../../utils.h" -#include "../../devices/maca/common_maca.h" -#include "../../devices/maca/maca_kernel_common.h" -#include "elementwise_maca_api.h" +#include "../../devices/metax/metax_common.h" +#include "../../devices/metax/metax_kernel_common.h" +#include "elementwise_metax_api.h" -namespace op::elementwise::maca { +namespace op::elementwise::metax { template __device__ __forceinline__ const T *typedInputPtr(const void *ptr) { return reinterpret_cast(ptr); @@ -14,7 +14,7 @@ __device__ __forceinline__ const T *typedInputPtr(const void *ptr) { __device__ __forceinline__ size_t getOutputIndex(size_t idx, bool is_contiguous, size_t ndim, const size_t *shape, const ptrdiff_t *strides) { - return is_contiguous ? idx : device::maca::indexToOffset(idx, ndim, shape, strides); + return is_contiguous ? idx : device::metax::indexToOffset(idx, ndim, shape, strides); } struct InputIndexer { @@ -30,8 +30,8 @@ struct InputIndexer { return input_contiguous[input_id] ? idx : (input_broadcasted[input_id] - ? device::maca::indexToReducedOffset(idx, ndim, output_strides, input_strides + input_id * ndim) - : device::maca::indexToOffset(idx, ndim, input_shapes + input_id * ndim, input_strides + input_id * ndim)); + ? device::metax::indexToReducedOffset(idx, ndim, output_strides, input_strides + input_id * ndim) + : device::metax::indexToOffset(idx, ndim, input_shapes + input_id * ndim, input_strides + input_id * ndim)); } }; @@ -41,7 +41,7 @@ __device__ __forceinline__ void unpackInputsAndApply(F &&f, std::index_sequence< } template -INFINIOP_MACA_KERNEL elementwiseKernel( +INFINIOP_METAX_KERNEL elementwiseKernel( size_t output_size, size_t ndim, bool output_contiguous, @@ -72,7 +72,7 @@ INFINIOP_MACA_KERNEL elementwiseKernel( } template -INFINIOP_MACA_KERNEL elementwiseKernel( +INFINIOP_METAX_KERNEL elementwiseKernel( size_t output_size, size_t ndim, bool output_contiguous, @@ -102,9 +102,9 @@ INFINIOP_MACA_KERNEL elementwiseKernel( } struct DeviceImpl::Opaque { - std::shared_ptr internal; + std::shared_ptr internal; - Opaque(const std::shared_ptr &internal) + Opaque(const std::shared_ptr &internal) : internal(internal) {} template @@ -159,8 +159,8 @@ struct DeviceImpl::Opaque { const int8_t *d_meta_start = reinterpret_cast(workspace) + input_arr_size; // copy the input pointer array and meta to device - CHECK_MACA(hcMemcpyAsync(workspace, h_inputs_arr, input_arr_size, hcMemcpyHostToDevice, stream)); - CHECK_MACA(hcMemcpyAsync((void *)d_meta_start, info_meta_start, info.getMetaMemSize(), hcMemcpyHostToDevice, stream)); + CHECK_METAX(hcMemcpyAsync(workspace, h_inputs_arr, input_arr_size, hcMemcpyHostToDevice, stream)); + CHECK_METAX(hcMemcpyAsync((void *)d_meta_start, info_meta_start, info.getMetaMemSize(), hcMemcpyHostToDevice, stream)); // offset/assign the pointers d_inputs_arr = reinterpret_cast(workspace); @@ -259,6 +259,6 @@ infiniStatus_t DeviceImpl::calculate(const op::elementwise::ElementwiseInfo &inf std::forward(args)...); } -} // namespace op::elementwise::maca +} // namespace op::elementwise::metax #endif diff --git a/src/infiniop/elementwise/maca/elementwise_maca_api.h b/src/infiniop/elementwise/metax/elementwise_metax_api.h similarity index 69% rename from src/infiniop/elementwise/maca/elementwise_maca_api.h rename to src/infiniop/elementwise/metax/elementwise_metax_api.h index e257cc6f8..b59c14da5 100644 --- a/src/infiniop/elementwise/maca/elementwise_maca_api.h +++ b/src/infiniop/elementwise/metax/elementwise_metax_api.h @@ -1,9 +1,9 @@ -#ifndef __INFINIOP_ELEMENTWISE_MACA_API_H__ -#define __INFINIOP_ELEMENTWISE_MACA_API_H__ +#ifndef __INFINIOP_ELEMENTWISE_METAX_API_H__ +#define __INFINIOP_ELEMENTWISE_METAX_API_H__ #include "../elementwise.h" -namespace op::elementwise::maca { +namespace op::elementwise::metax { class DeviceImpl final { struct Opaque; @@ -37,23 +37,23 @@ class DeviceImpl final { void *stream, Args &&...args); }; -} // namespace op::elementwise::maca -#define CREATE_ELEMENTWISE_MACA_DESCRIPTOR(HANDLE, DTYPE, OUT_DESC, INPUT_DESC_VEC) \ - \ - auto info_result = op::elementwise::ElementwiseInfo::create(OUT_DESC, INPUT_DESC_VEC); \ - CHECK_RESULT(info_result); \ - auto info = info_result.take(); \ - auto workspace_size = info.getMetaMemSize() + info.getInputSize() * sizeof(void *); \ - \ - auto device_impl_result = op::elementwise::maca::DeviceImpl::create(HANDLE->internal()); \ - CHECK_RESULT(device_impl_result); \ - \ - *desc_ptr = new Descriptor( \ - DTYPE, \ - std::move(info), \ - std::move(device_impl_result.take()), \ - workspace_size, \ - HANDLE->device, \ +} // namespace op::elementwise::metax +#define CREATE_ELEMENTWISE_METAX_DESCRIPTOR(HANDLE, DTYPE, OUT_DESC, INPUT_DESC_VEC) \ + \ + auto info_result = op::elementwise::ElementwiseInfo::create(OUT_DESC, INPUT_DESC_VEC); \ + CHECK_RESULT(info_result); \ + auto info = info_result.take(); \ + auto workspace_size = info.getMetaMemSize() + info.getInputSize() * sizeof(void *); \ + \ + auto device_impl_result = op::elementwise::metax::DeviceImpl::create(HANDLE->internal()); \ + CHECK_RESULT(device_impl_result); \ + \ + *desc_ptr = new Descriptor( \ + DTYPE, \ + std::move(info), \ + std::move(device_impl_result.take()), \ + workspace_size, \ + HANDLE->device, \ HANDLE->device_id); -#endif // __INFINIOP_ELEMENTWISE_MACA_API_H__ +#endif // __INFINIOP_ELEMENTWISE_METAX_API_H__ diff --git a/src/infiniop/ops/causal_softmax/metax/causal_softmax_metax.maca b/src/infiniop/ops/causal_softmax/metax/causal_softmax_metax.maca index e23130923..670666ef4 100644 --- a/src/infiniop/ops/causal_softmax/metax/causal_softmax_metax.maca +++ b/src/infiniop/ops/causal_softmax/metax/causal_softmax_metax.maca @@ -1,15 +1,15 @@ -#include "../../../devices/maca/common_maca.h" +#include "../../../devices/metax/metax_common.h" #include "causal_softmax_metax.h" #include -#include "../../../devices/maca/maca_kernel_common.h" +#include "../../../devices/metax/metax_kernel_common.h" #include "../../../reduce/cuda/reduce.cuh" #include "../cuda/kernel.cuh" template -INFINIOP_MACA_KERNEL causalSoftmax( +INFINIOP_METAX_KERNEL causalSoftmax( Tdata *y, const Tdata *x, size_t batch, size_t height, size_t width, ptrdiff_t y_stride_b, ptrdiff_t y_stride_h, @@ -20,7 +20,7 @@ INFINIOP_MACA_KERNEL causalSoftmax( namespace op::causal_softmax::metax { struct Descriptor::Opaque { - std::shared_ptr internal; + std::shared_ptr internal; }; Descriptor::~Descriptor() { @@ -35,7 +35,7 @@ infiniStatus_t Descriptor::create( auto info = CausalSoftmaxInfo::create(y_desc, x_desc); CHECK_RESULT(info); *desc_ptr = new Descriptor( - new Opaque{reinterpret_cast(handle)->internal()}, + new Opaque{reinterpret_cast(handle)->internal()}, info.take(), 0, handle->device, handle->device_id); return INFINI_STATUS_SUCCESS; } @@ -76,12 +76,12 @@ infiniStatus_t Descriptor::calculate(void *workspace, size_t workspace_size, const void *x, void *stream_) const { hcStream_t stream = (hcStream_t)stream_; - if (_opaque->internal->maxThreadsPerBlock() == MACA_BLOCK_SIZE_1024) { - CHECK_STATUS(launchKernel( + if (_opaque->internal->maxThreadsPerBlock() == METAX_BLOCK_SIZE_1024) { + CHECK_STATUS(launchKernel( y, x, _info.dtype, _info.batch_size, _info.seq_len, _info.total_seq_len, _info.y_stride_b, _info.y_stride_i, _info.x_stride_b, _info.x_stride_i, stream)); - } else if (_opaque->internal->maxThreadsPerBlock() == MACA_BLOCK_SIZE_512) { - CHECK_STATUS(launchKernel( + } else if (_opaque->internal->maxThreadsPerBlock() == METAX_BLOCK_SIZE_512) { + CHECK_STATUS(launchKernel( y, x, _info.dtype, _info.batch_size, _info.seq_len, _info.total_seq_len, _info.y_stride_b, _info.y_stride_i, _info.x_stride_b, _info.x_stride_i, stream)); } else { diff --git a/src/infiniop/ops/gemm/metax/gemm_metax.cc b/src/infiniop/ops/gemm/metax/gemm_metax.cc index 65b16b077..9d45099dc 100644 --- a/src/infiniop/ops/gemm/metax/gemm_metax.cc +++ b/src/infiniop/ops/gemm/metax/gemm_metax.cc @@ -1,11 +1,11 @@ #include "gemm_metax.h" -#include "../../../devices/maca/common_maca.h" -#include "../../../devices/maca/maca_handle.h" +#include "../../../devices/metax/metax_common.h" +#include "../../../devices/metax/metax_handle.h" namespace op::gemm::metax { struct Descriptor::Opaque { - std::shared_ptr internal; + std::shared_ptr internal; }; Descriptor::~Descriptor() { @@ -18,7 +18,7 @@ infiniStatus_t Descriptor::create( infiniopTensorDescriptor_t c_desc, infiniopTensorDescriptor_t a_desc, infiniopTensorDescriptor_t b_desc) { - auto handle = reinterpret_cast(handle_); + auto handle = reinterpret_cast(handle_); auto dtype = c_desc->dtype(); CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_BF16); diff --git a/src/infiniop/ops/gemm/metax/gemm_metax.h b/src/infiniop/ops/gemm/metax/gemm_metax.h index 947fb6e83..42f651c77 100644 --- a/src/infiniop/ops/gemm/metax/gemm_metax.h +++ b/src/infiniop/ops/gemm/metax/gemm_metax.h @@ -1,8 +1,8 @@ -#ifndef __GEMM_MACA_H__ -#define __GEMM_MACA_H__ +#ifndef __GEMM_METAX_H__ +#define __GEMM_METAX_H__ #include "../gemm.h" DESCRIPTOR(metax) -#endif // __GEMM_MACA_H__ +#endif // __GEMM_METAX_H__ diff --git a/src/infiniop/ops/random_sample/metax/random_sample_kernel.h b/src/infiniop/ops/random_sample/metax/random_sample_kernel.h index 011580673..a0e6ba2b3 100644 --- a/src/infiniop/ops/random_sample/metax/random_sample_kernel.h +++ b/src/infiniop/ops/random_sample/metax/random_sample_kernel.h @@ -1,4 +1,4 @@ -#include "../../../devices/maca/maca_kernel_common.h" +#include "../../../devices/metax/metax_kernel_common.h" #include "infinicore.h" #include #include @@ -62,7 +62,7 @@ utils::Result calculateWorkspace(size_t n_) { const auto n = static_cast(n_); size_t argmax; - CHECK_MACA(argMax_( + CHECK_METAX(argMax_( nullptr, nullptr, n, nullptr, argmax, nullptr)); @@ -77,7 +77,7 @@ utils::Result calculateWorkspace(size_t n_) { size_random += align256(sizeof(Tidx) * n); // cub device api size_t size_radix_sort; - CHECK_MACA((radixSort( + CHECK_METAX((radixSort( nullptr, size_radix_sort, nullptr, nullptr, nullptr, nullptr, @@ -85,7 +85,7 @@ utils::Result calculateWorkspace(size_t n_) { nullptr))); size_t size_inclusive_sum; - CHECK_MACA(inclusiveSum( + CHECK_METAX(inclusiveSum( nullptr, size_inclusive_sum, nullptr, n, nullptr)); @@ -233,7 +233,7 @@ struct Algo { auto grid = (n + block - 1) / block; // sort fillIndices<<>>(indices, n); - CHECK_MACA(radixSort( + CHECK_METAX(radixSort( workspace_, workspace_size, logits, sorted, indices, indices_out, @@ -243,7 +243,7 @@ struct Algo { partialSoftmaxKernel<<>>(sorted, n, temperature); setSoftmaxMaxKernel<<<1, 1, 0, stream>>>(sorted); // sum - CHECK_MACA(inclusiveSum( + CHECK_METAX(inclusiveSum( workspace_, workspace, sorted, n, stream)); diff --git a/src/infiniop/ops/random_sample/metax/random_sample_metax.h b/src/infiniop/ops/random_sample/metax/random_sample_metax.h index cc961479e..a6df5c95f 100644 --- a/src/infiniop/ops/random_sample/metax/random_sample_metax.h +++ b/src/infiniop/ops/random_sample/metax/random_sample_metax.h @@ -1,8 +1,8 @@ -#ifndef __RANDOM_SAMPLE_MACA_H__ -#define __RANDOM_SAMPLE_MACA_H__ +#ifndef __RANDOM_SAMPLE_METAX_H__ +#define __RANDOM_SAMPLE_METAX_H__ #include "../random_sample.h" DESCRIPTOR(metax) -#endif // __RANDOM_SAMPLE_MACA_H__ +#endif // __RANDOM_SAMPLE_METAX_H__ diff --git a/src/infiniop/ops/random_sample/metax/random_sample_metax.maca b/src/infiniop/ops/random_sample/metax/random_sample_metax.maca index e61c1b0a8..eed593ed8 100644 --- a/src/infiniop/ops/random_sample/metax/random_sample_metax.maca +++ b/src/infiniop/ops/random_sample/metax/random_sample_metax.maca @@ -1,5 +1,5 @@ -#include "../../../devices/maca/common_maca.h" -#include "../../../devices/maca/maca_handle.h" +#include "../../../devices/metax/metax_common.h" +#include "../../../devices/metax/metax_handle.h" #include "../info.h" #include "random_sample_kernel.h" #include "random_sample_metax.h" @@ -7,7 +7,7 @@ namespace op::random_sample::metax { struct Descriptor::Opaque { - std::shared_ptr internal; + std::shared_ptr internal; }; Descriptor::~Descriptor() { @@ -19,7 +19,7 @@ infiniStatus_t Descriptor::create( Descriptor **desc_ptr, infiniopTensorDescriptor_t result_desc, infiniopTensorDescriptor_t probs_desc) { - auto handle = reinterpret_cast(handle_); + auto handle = reinterpret_cast(handle_); auto result = RandomSampleInfo::create(result_desc, probs_desc); CHECK_RESULT(result); @@ -100,4 +100,4 @@ infiniStatus_t Descriptor::calculate( return INFINI_STATUS_SUCCESS; } -} // namespace op::random_sample::maca +} // namespace op::random_sample::metax diff --git a/src/infiniop/ops/rearrange/metax/rearrange_kernel.h b/src/infiniop/ops/rearrange/metax/rearrange_kernel.h index 51d10ba20..7e431543d 100644 --- a/src/infiniop/ops/rearrange/metax/rearrange_kernel.h +++ b/src/infiniop/ops/rearrange/metax/rearrange_kernel.h @@ -1,8 +1,8 @@ -#ifndef __REARRANGE_MACA_KERNEL_H__ -#define __REARRANGE_MACA_KERNEL_H__ +#ifndef __REARRANGE_METAX_KERNEL_H__ +#define __REARRANGE_METAX_KERNEL_H__ -#include "../../../devices/maca/common_maca.h" -#include "../../../devices/maca/maca_kernel_common.h" +#include "../../../devices/metax/metax_common.h" +#include "../../../devices/metax/metax_kernel_common.h" #define ARRAY_TYPE_STRIDE ptrdiff_t #define ARRAY_TYPE_SIZE size_t @@ -328,4 +328,4 @@ utils::Result getRearrangeKernel(const RearrangeParams ¶ms) { return utils::Result(kernel_func); } -#endif // __REARRANGE_MACA_KERNEL_H__ +#endif // __REARRANGE_METAX_KERNEL_H__ diff --git a/src/infiniop/ops/rearrange/metax/rearrange_metax.h b/src/infiniop/ops/rearrange/metax/rearrange_metax.h index 93a45bda3..e3f2ff52d 100644 --- a/src/infiniop/ops/rearrange/metax/rearrange_metax.h +++ b/src/infiniop/ops/rearrange/metax/rearrange_metax.h @@ -1,8 +1,8 @@ -#ifndef __REARRANGE_MACA_H__ -#define __REARRANGE_MACA_H__ +#ifndef __REARRANGE_METAX_H__ +#define __REARRANGE_METAX_H__ #include "../rearrange.h" DESCRIPTOR(metax) -#endif // __REARRANGE_MACA_H__ +#endif // __REARRANGE_METAX_H__ diff --git a/src/infiniop/ops/rearrange/metax/rearrange_metax.maca b/src/infiniop/ops/rearrange/metax/rearrange_metax.maca index 9149d0c5c..de9db762f 100644 --- a/src/infiniop/ops/rearrange/metax/rearrange_metax.maca +++ b/src/infiniop/ops/rearrange/metax/rearrange_metax.maca @@ -10,7 +10,7 @@ namespace op::rearrange::metax { struct Descriptor::Opaque { - std::shared_ptr internal; + std::shared_ptr internal; }; Descriptor::~Descriptor() { @@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create( *desc_ptr = new Descriptor( std::move(*meta), - new Opaque{reinterpret_cast(handle)->internal()}, + new Opaque{reinterpret_cast(handle)->internal()}, handle->device, handle->device_id); return INFINI_STATUS_SUCCESS; } @@ -429,18 +429,18 @@ infiniStatus_t launchKernel( infiniStatus_t Descriptor::calculate( void *y, const void *x, - void *stream) const { + void *stream_) const { - auto maca_stream = reinterpret_cast(stream); + auto stream = reinterpret_cast(stream_); // 如果没有维度,直接进行内存拷贝 if (_meta.ndim() == 0) { - auto err = hcMemcpyAsync(y, x, _meta.unit(), hcMemcpyDeviceToDevice, maca_stream); + auto err = hcMemcpyAsync(y, x, _meta.unit(), hcMemcpyDeviceToDevice, stream); if (err != hcSuccess) { return INFINI_STATUS_INTERNAL_ERROR; } - CHECK_OR_RETURN(hcMemcpyAsync(y, x, _meta.unit(), hcMemcpyDeviceToDevice, maca_stream) == hcSuccess, + CHECK_OR_RETURN(hcMemcpyAsync(y, x, _meta.unit(), hcMemcpyDeviceToDevice, stream) == hcSuccess, INFINI_STATUS_INTERNAL_ERROR); return INFINI_STATUS_SUCCESS; } @@ -449,7 +449,7 @@ infiniStatus_t Descriptor::calculate( int max_threads = _opaque->internal->maxThreadsPerBlock(); // 准备参数 - auto params_result = prepareRearrangeParams(_meta, std::min(MACA_BLOCK_SIZE_1024, max_threads)); + auto params_result = prepareRearrangeParams(_meta, std::min(METAX_BLOCK_SIZE_1024, max_threads)); CHECK_RESULT(params_result); auto params = params_result.take(); @@ -469,10 +469,10 @@ infiniStatus_t Descriptor::calculate( size_t block_size = params.block_len_total; - if (block_size <= MACA_BLOCK_SIZE_512) { - status = launchKernel(y, x, grid_size, params, _meta.unit(), maca_stream); - } else if (block_size <= MACA_BLOCK_SIZE_1024) { - status = launchKernel(y, x, grid_size, params, _meta.unit(), maca_stream); + if (block_size <= METAX_BLOCK_SIZE_512) { + status = launchKernel(y, x, grid_size, params, _meta.unit(), stream); + } else if (block_size <= METAX_BLOCK_SIZE_1024) { + status = launchKernel(y, x, grid_size, params, _meta.unit(), stream); } else { return INFINI_STATUS_DEVICE_ARCHITECTURE_NOT_SUPPORTED; } diff --git a/src/infiniop/ops/rms_norm/metax/rms_norm_metax.cuh b/src/infiniop/ops/rms_norm/metax/rms_norm_metax.cuh index 913e8e3ba..f74a8f444 100644 --- a/src/infiniop/ops/rms_norm/metax/rms_norm_metax.cuh +++ b/src/infiniop/ops/rms_norm/metax/rms_norm_metax.cuh @@ -1,8 +1,8 @@ -#ifndef __RMS_NORM_MACA_CUH__ -#define __RMS_NORM_MACA_CUH__ +#ifndef __RMS_NORM_METAX_CUH__ +#define __RMS_NORM_METAX_CUH__ #include "../rms_norm.h" -DESCRIPTOR(maca) +DESCRIPTOR(metax) #endif diff --git a/src/infiniop/ops/rms_norm/metax/rms_norm_metax.maca b/src/infiniop/ops/rms_norm/metax/rms_norm_metax.maca index 2e76303f3..e4b632264 100644 --- a/src/infiniop/ops/rms_norm/metax/rms_norm_metax.maca +++ b/src/infiniop/ops/rms_norm/metax/rms_norm_metax.maca @@ -1,7 +1,7 @@ -#include "../../../devices/maca/common_maca.h" +#include "../../../devices/metax/metax_common.h" #include "rms_norm_metax.cuh" -#include "../../../devices/maca/maca_kernel_common.h" +#include "../../../devices/metax/metax_kernel_common.h" #include #include "../../../reduce/cuda/reduce.cuh" @@ -9,7 +9,7 @@ #include "../cuda/kernel.cuh" template -INFINIOP_MACA_KERNEL rmsnormKernel( +INFINIOP_METAX_KERNEL rmsnormKernel( Tdata *__restrict__ y, ptrdiff_t stride_y, const Tdata *__restrict__ x, @@ -20,10 +20,10 @@ INFINIOP_MACA_KERNEL rmsnormKernel( rmsnormBlock(y, stride_y, x, stride_x, w, dim, epsilon); } -namespace op::rms_norm::maca { +namespace op::rms_norm::metax { struct Descriptor::Opaque { - std::shared_ptr internal; + std::shared_ptr internal; }; Descriptor::~Descriptor() { @@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create( } *desc_ptr = new Descriptor( - new Opaque{reinterpret_cast(handle)->internal()}, + new Opaque{reinterpret_cast(handle)->internal()}, std::move(info), 0, handle->device, handle->device_id); @@ -62,10 +62,10 @@ infiniStatus_t launchKernel( const void *x, ptrdiff_t stride_x, const void *w, infiniDtype_t wtype, float epsilon, - hcStream_t maca_stream) { + hcStream_t stream) { #define LAUNCH_KERNEL(Tdata, Tweight, Tcompute) \ - rmsnormKernel<<>>( \ + rmsnormKernel<<>>( \ reinterpret_cast(y), \ stride_y, \ reinterpret_cast(x), \ @@ -96,7 +96,7 @@ infiniStatus_t launchKernel( infiniStatus_t Descriptor::calculate( void *workspace, size_t workspace_size, void *y, const void *x, const void *w, - void *stream) const { + void *stream_) const { if (workspace_size < _workspace_size) { return INFINI_STATUS_INSUFFICIENT_WORKSPACE; @@ -106,14 +106,14 @@ infiniStatus_t Descriptor::calculate( auto stride_y = _info.y_strides[0]; auto dim = _info.dim(); uint32_t batch_size = static_cast(_info.shape[0]); - auto maca_stream = reinterpret_cast(stream); + auto stream = reinterpret_cast(stream_); // launch kernel with different block sizes - if (_opaque->internal->maxThreadsPerBlock() == MACA_BLOCK_SIZE_1024) { - CHECK_STATUS(launchKernel(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, maca_stream)); + if (_opaque->internal->maxThreadsPerBlock() == METAX_BLOCK_SIZE_1024) { + CHECK_STATUS(launchKernel(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, stream)); } else { return INFINI_STATUS_DEVICE_ARCHITECTURE_NOT_SUPPORTED; } return INFINI_STATUS_SUCCESS; } -} // namespace op::rms_norm::maca +} // namespace op::rms_norm::metax diff --git a/src/infiniop/ops/rms_norm/operator.cc b/src/infiniop/ops/rms_norm/operator.cc index 9c7fbab27..bb188c51d 100644 --- a/src/infiniop/ops/rms_norm/operator.cc +++ b/src/infiniop/ops/rms_norm/operator.cc @@ -58,7 +58,7 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor( CREATE(INFINI_DEVICE_ASCEND, ascend); #endif #ifdef ENABLE_METAX_API - CREATE(INFINI_DEVICE_METAX, maca); + CREATE(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API CREATE(INFINI_DEVICE_MOORE, musa); @@ -96,7 +96,7 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d GET(INFINI_DEVICE_ASCEND, ascend); #endif #ifdef ENABLE_METAX_API - GET(INFINI_DEVICE_METAX, maca); + GET(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API GET(INFINI_DEVICE_MOORE, musa); @@ -135,7 +135,7 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works CALCULATE(INFINI_DEVICE_ASCEND, ascend); #endif #ifdef ENABLE_METAX_API - CALCULATE(INFINI_DEVICE_METAX, maca); + CALCULATE(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API CALCULATE(INFINI_DEVICE_MOORE, musa); @@ -173,7 +173,7 @@ __C infiniStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t DESTROY(INFINI_DEVICE_ASCEND, ascend); #endif #ifdef ENABLE_METAX_API - DESTROY(INFINI_DEVICE_METAX, maca); + DESTROY(INFINI_DEVICE_METAX, metax); #endif #ifdef ENABLE_MOORE_API DESTROY(INFINI_DEVICE_MOORE, musa); diff --git a/src/infiniop/ops/rope/metax/rope_metax.h b/src/infiniop/ops/rope/metax/rope_metax.h index 40908f092..543e5c42d 100644 --- a/src/infiniop/ops/rope/metax/rope_metax.h +++ b/src/infiniop/ops/rope/metax/rope_metax.h @@ -1,8 +1,8 @@ -#ifndef __INFINIOP_ROPE_MACA_H__ -#define __INFINIOP_ROPE_MACA_H__ +#ifndef __INFINIOP_ROPE_METAX_H__ +#define __INFINIOP_ROPE_METAX_H__ #include "../rope.h" DESCRIPTOR(metax) -#endif // __INFINIOP_ROPE_MACA_H__ +#endif // __INFINIOP_ROPE_METAX_H__ diff --git a/src/infiniop/ops/rope/metax/rope_metax.maca b/src/infiniop/ops/rope/metax/rope_metax.maca index d7a3b9e82..b4373ebbd 100644 --- a/src/infiniop/ops/rope/metax/rope_metax.maca +++ b/src/infiniop/ops/rope/metax/rope_metax.maca @@ -1,12 +1,12 @@ -#include "../../../devices/maca/common_maca.h" +#include "../../../devices/metax/metax_common.h" #include "rope_metax.h" -#include "../../../devices/maca/maca_kernel_common.h" +#include "../../../devices/metax/metax_kernel_common.h" #include "../cuda/kernel.cuh" template -INFINIOP_MACA_KERNEL ropeThreadPerItemKernel( +INFINIOP_METAX_KERNEL ropeThreadPerItemKernel( Tdata *y_, const Tdata *x_, const Tindex *__restrict__ pos_ids, @@ -28,7 +28,7 @@ INFINIOP_MACA_KERNEL ropeThreadPerItemKernel( namespace op::rope::metax { struct Descriptor::Opaque { - std::shared_ptr internal; + std::shared_ptr internal; }; Descriptor::~Descriptor() { @@ -44,7 +44,7 @@ infiniStatus_t Descriptor::create( infiniopTensorDescriptor_t sin_desc, infiniopTensorDescriptor_t cos_desc) { - auto handle = reinterpret_cast(handle_); + auto handle = reinterpret_cast(handle_); auto info = RoPEInfo::createRoPEInfo(y_desc, x_desc, pos_desc, sin_desc, cos_desc); CHECK_RESULT(info); @@ -53,7 +53,7 @@ infiniStatus_t Descriptor::create( *desc_ptr = new Descriptor( info.take(), 0, - new Opaque{reinterpret_cast(handle)->internal()}, + new Opaque{reinterpret_cast(handle)->internal()}, handle->device, handle->device_id); @@ -141,4 +141,4 @@ infiniStatus_t Descriptor::calculate( #undef ROPE_TYPE #undef CALCULATE_ROPE -} // namespace op::rope::maca +} // namespace op::rope::metax diff --git a/src/infiniop/ops/swiglu/metax/swiglu_metax.h b/src/infiniop/ops/swiglu/metax/swiglu_metax.h index 830275fb8..608134fa8 100644 --- a/src/infiniop/ops/swiglu/metax/swiglu_metax.h +++ b/src/infiniop/ops/swiglu/metax/swiglu_metax.h @@ -1,8 +1,8 @@ -#ifndef __SWIGLU_MACA_API_H__ -#define __SWIGLU_MACA_API_H__ +#ifndef __SWIGLU_METAX_API_H__ +#define __SWIGLU_METAX_API_H__ -#include "../../../elementwise/maca/elementwise_maca_api.h" +#include "../../../elementwise/metax/elementwise_metax_api.h" -ELEMENTWISE_DESCRIPTOR(swiglu, metax, maca) +ELEMENTWISE_DESCRIPTOR(swiglu, metax, metax) -#endif // __SWIGLU_MACA_API_H__ +#endif // __SWIGLU_METAX_API_H__ diff --git a/src/infiniop/ops/swiglu/metax/swiglu_metax.maca b/src/infiniop/ops/swiglu/metax/swiglu_metax.maca index cc511f467..b0fabb039 100644 --- a/src/infiniop/ops/swiglu/metax/swiglu_metax.maca +++ b/src/infiniop/ops/swiglu/metax/swiglu_metax.maca @@ -1,6 +1,6 @@ #include "swiglu_metax.h" -#include "../../../elementwise/maca/elementwise_maca.h" +#include "../../../elementwise/metax/elementwise_metax.h" #include "../cuda/kernel.cuh" @@ -14,7 +14,7 @@ infiniStatus_t Descriptor::create( infiniopTensorDescriptor_t out_desc, std::vector input_desc_vec) { - auto handle = reinterpret_cast(handle_); + auto handle = reinterpret_cast(handle_); auto dtype = out_desc->dtype(); const auto &up_desc = input_desc_vec.at(0); @@ -26,8 +26,8 @@ infiniStatus_t Descriptor::create( CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_BF16, INFINI_DTYPE_F32, INFINI_DTYPE_F64); CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape); - // create MACA elementwise descriptor - CREATE_ELEMENTWISE_MACA_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec) + // create METAX elementwise descriptor + CREATE_ELEMENTWISE_METAX_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec) return INFINI_STATUS_SUCCESS; } diff --git a/src/infinirt/infinirt.cc b/src/infinirt/infinirt.cc index cf107503c..f85b26d14 100644 --- a/src/infinirt/infinirt.cc +++ b/src/infinirt/infinirt.cc @@ -5,7 +5,7 @@ #include "cpu/infinirt_cpu.h" #include "cuda/infinirt_cuda.cuh" #include "kunlun/infinirt_kunlun.h" -#include "maca/infinirt_maca.h" +#include "metax/infinirt_metax.h" #include "musa/infinirt_musa.h" thread_local infiniDevice_t CURRENT_DEVICE_TYPE = INFINI_DEVICE_CPU; @@ -62,7 +62,7 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ _status = infinirt::ascend::API PARAMS; \ break; \ case INFINI_DEVICE_METAX: \ - _status = infinirt::maca::API PARAMS; \ + _status = infinirt::metax::API PARAMS; \ break; \ case INFINI_DEVICE_MOORE: \ _status = infinirt::musa::API PARAMS; \ diff --git a/src/infinirt/maca/infinirt_maca.cc b/src/infinirt/metax/infinirt_metax.cc similarity index 97% rename from src/infinirt/maca/infinirt_maca.cc rename to src/infinirt/metax/infinirt_metax.cc index 82cf72157..362a7d7ca 100644 --- a/src/infinirt/maca/infinirt_maca.cc +++ b/src/infinirt/metax/infinirt_metax.cc @@ -1,11 +1,11 @@ -#include "infinirt_maca.h" +#include "infinirt_metax.h" #include "../../utils.h" #include #include #define CHECK_MACART(RT_API) CHECK_INTERNAL(RT_API, hcSuccess) -namespace infinirt::maca { +namespace infinirt::metax { infiniStatus_t getDeviceCount(int *count) { CHECK_MACART(hcGetDeviceCount(count)); return INFINI_STATUS_SUCCESS; @@ -124,4 +124,4 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { CHECK_MACART(hcFreeAsync(ptr, (hcStream_t)stream)); return INFINI_STATUS_SUCCESS; } -} // namespace infinirt::maca +} // namespace infinirt::metax diff --git a/src/infinirt/maca/infinirt_maca.h b/src/infinirt/metax/infinirt_metax.h similarity index 77% rename from src/infinirt/maca/infinirt_maca.h rename to src/infinirt/metax/infinirt_metax.h index 2e03c2eec..045fc3b7a 100644 --- a/src/infinirt/maca/infinirt_maca.h +++ b/src/infinirt/metax/infinirt_metax.h @@ -2,12 +2,12 @@ #define __INFINIRT_MACA_H__ #include "../infinirt_impl.h" -namespace infinirt::maca { +namespace infinirt::metax { #ifdef ENABLE_METAX_API INFINIRT_DEVICE_API_IMPL #else INFINIRT_DEVICE_API_NOOP #endif -} // namespace infinirt::maca +} // namespace infinirt::metax #endif // __INFINIRT_MACA_H__ diff --git a/xmake/metax.lua b/xmake/metax.lua index 48a0a8562..358ada57c 100644 --- a/xmake/metax.lua +++ b/xmake/metax.lua @@ -34,7 +34,7 @@ target("infiniop-metax") set_languages("cxx17") set_warnings("all", "error") add_cxflags("-lstdc++", "-fPIC", "-Wno-defaulted-function-deleted", "-Wno-strict-aliasing") - add_files("../src/infiniop/devices/maca/*.cc", "../src/infiniop/ops/*/metax/*.cc") + add_files("../src/infiniop/devices/metax/*.cc", "../src/infiniop/ops/*/metax/*.cc") add_files("../src/infiniop/ops/*/metax/*.maca", {rule = "maca"}) target_end() @@ -45,7 +45,7 @@ target("infinirt-metax") add_deps("infini-utils") set_warnings("all", "error") add_cxflags("-lstdc++ -fPIC") - add_files("../src/infinirt/maca/*.cc") + add_files("../src/infinirt/metax/*.cc") target_end() target("infiniccl-metax") @@ -58,7 +58,7 @@ target("infiniccl-metax") end if has_config("ccl") then add_links("libhccl.so") - add_files("../src/infiniccl/maca/*.cc") + add_files("../src/infiniccl/metax/*.cc") end set_languages("cxx17")