Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions asnumpy/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@
"minimum",
"fmax",
"fmin",
"relu",
"gelu",
"pareto",
"rayleigh",
"normal",
Expand Down
30 changes: 30 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# AsNumpy项目函数样例说明
样例调用本项目的函数,和Numpy的同功能函数用numpy.allclose进行结果对比,并输出运行时间,以此来展现AsNumpy的准确性和性能

## 已实现样例
| 文件名 | 功能描述 |
| :--- | :-- |
| [01_add](01_add.py) | 用asnumpy.add和numpy.add分别对输入数组 x1 和 x2 执行逐元素加法运算并对比结果,并计算它们的运行时间 |
| [02_exp2](02_exp2.py) | 用asnumpy.exp2和numpy.exp2分别对输入数组 x 的每个元素计算 2 的幂并对比结果,并计算它们的运行时间 |
| [03_multiply](03_multiply.py) | 用asnumpy.multiply和numpy.multiply分别对输入数组 x1 和 x2 执行逐元素乘法运算并对比结果,并计算它们的运行时间 |
| [04_all](04_all.py) | 用asnumpy.all和numpy.all分别对输入数组 x 执行对输入数组执行逻辑与归约操作,判断所有元素是否均为 True并对比结果,并计算它们的运行时间 |
| [05_divide](05_divide.py) | 用asnumpy.divide和numpy.divide分别对输入数组 x1 和 x2 执行逐元素除法并对比结果,并计算它们的运行时间 |

## 下一步预期实现样例
| 函数名 | 预期功能描述 |
| :--- | :-- |
| sinh | 用asnumpy.sinh和numpy.sinh分别对输入数组 x1 和 x2 执行逐元素计算双曲正弦并对比结果,并计算它们的运行时间 |
| real | 用asnumpy.real和numpy.real分别逐元素输出 x 的实数部分并对比结果,并计算它们的运行时间 |
| square | 用asnumpy.square和numpy.square分别逐元素计算 x 的平方并对比结果,并计算它们的运行时间 |
| sinc | 用asnumpy.sinc和numpy.sinc分别对输入数组 x 逐元素计算 sinc 函数并对比结果,并计算它们的运行时间 |
| gcd | 用asnumpy.gcd和numpy.gcd分别对输入数组 x1 和 x2 逐元素计算最大公约数并对比结果,并计算它们的运行时间 |
| around | 用asnumpy.around和numpy.around分别逐元素将 x 四舍五入到指定小数位数并对比结果,并计算它们的运行时间 |
| cumsum | 用asnumpy.cumsum和numpy.cumsum分别逐元素计算 x 沿给定轴的元素的累积和并对比结果,并计算它们的运行时间 |
| arcsin | 用asnumpy.arcsin和numpy.arcsin分别对 x 进行逐元素的反正弦计算并对比结果,并计算它们的运行时间 |
| reciprocal | 用asnumpy.reciprocal和numpy.reciprocal分别对 x 计算每个元素的倒数并对比结果,并计算它们的运行时间 |
| binomial | 用asnumpy.binomial从二项分布中抽取足够多随机样本并用卡方分布测试是否符合分布,并计算运行时间 |

## 更新说明
| 时间 | 更新事项 |
| :--- | :-- |
| 2025/10/14 | 新增AsNumpy项目函数样例说明 |
3 changes: 3 additions & 0 deletions include/asnumpy/math/miscellaneous.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,7 @@ NPUArray Fmax(const NPUArray& x1, const NPUArray& x2, std::optional<py::dtype> d

NPUArray Fmin(const NPUArray& x1, const NPUArray& x2, std::optional<py::dtype> dtype = std::nullopt);

NPUArray Relu(const NPUArray& x, std::optional<py::dtype> dtype = std::nullopt);

NPUArray Gelu(const NPUArray& x, std::optional<py::dtype> dtype = std::nullopt);
}
2 changes: 2 additions & 0 deletions python/bind_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ void bind_miscellaneous(py::module_& math){
math.def("minimum", &Minimum, py::arg("x1"), py::arg("x2"), py::arg("dtype") = py::none());
math.def("fmax", &Fmax, py::arg("x1"), py::arg("x2"), py::arg("dtype") = py::none());
math.def("fmin", &Fmin, py::arg("x1"), py::arg("x2"), py::arg("dtype") = py::none());
math.def("relu", &Relu, py::arg("x"), py::arg("dtype") = py::none());
math.def("gelu", &Gelu, py::arg("x"), py::arg("dtype") = py::none());
}

void bind_arithmetic_operations(py::module_& math) {
Expand Down
68 changes: 68 additions & 0 deletions src/math/miscellaneous.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include <aclnnop/aclnn_convolution.h>
#include <aclnnop/aclnn_clamp.h>
#include <aclnnop/aclnn_pow.h>
#include <aclnnop/aclnn_relu.h>
#include <aclnnop/aclnn_gelu.h>
#include <aclnnop/aclnn_nan_to_num.h>
#include <aclnnop/aclnn_abs.h>
#include <aclnnop/aclnn_sign.h>
Expand Down Expand Up @@ -750,4 +752,70 @@ NPUArray Fmin(const NPUArray& x1, const NPUArray& x2, std::optional<py::dtype> d
return out;
}


/**
* @brief Compute element-wise Rectified Linear Unit (ReLU).
*
* Applies ReLU activation function element-wise: max(0, x).
* Equivalent to numpy.maximum(x, 0).
*
* @param x Input array.
* @param dtype Optional target numpy dtype for the output array. If not provided, uses input dtype.
* @return NPUArray Array with element-wise ReLU values.
* @throws std::runtime_error If ACL operation or memory allocation fails.
*/
NPUArray Relu(const NPUArray& x, std::optional<py::dtype> dtype) {
py::dtype out_dtype = dtype.has_value() ? dtype.value() : x.dtype;
auto out = NPUArray(x.shape, out_dtype);
uint64_t workspaceSize = 0;
aclOpExecutor* executor = nullptr;
auto error = aclnnReluGetWorkspaceSize(x.tensorPtr, out.tensorPtr, &workspaceSize, &executor);
CheckGetWorkspaceSizeAclnnStatus(error);
void* workspaceAddr = nullptr;
if(workspaceSize > 0) {
error = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST);
CheckMallocAclnnStatus(error);
}
error = aclnnRelu(workspaceAddr, workspaceSize, executor, nullptr);
CheckAclnnStatus(error, "aclnnRelu error");
error = aclrtSynchronizeDevice();
CheckSynchronizeDeviceAclnnStatus(error);
if (workspaceAddr) aclrtFree(workspaceAddr);
return out;
}


/**
* @brief Compute element-wise Gaussian Error Linear Unit (GELU).
*
* Applies GELU activation function element-wise: GELU(x) = x * Φ(x)
* where Φ(x) is the cumulative distribution function of the standard normal distribution.
*
* GELU is commonly used in models like BERT and GPT. It provides smoother gradients
* compared to ReLU and incorporates probabilistic properties.
*
* @param x Input array.
* @param dtype Optional target numpy dtype for the output array. If not provided, uses input dtype.
* @return NPUArray Array with element-wise GELU values.
* @throws std::runtime_error If ACL operation or memory allocation fails.
*/
NPUArray Gelu(const NPUArray& x, std::optional<py::dtype> dtype) {
py::dtype out_dtype = dtype.has_value() ? dtype.value() : x.dtype;
auto out = NPUArray(x.shape, out_dtype);
uint64_t workspaceSize = 0;
aclOpExecutor* executor = nullptr;
auto error = aclnnGeluGetWorkspaceSize(x.tensorPtr, out.tensorPtr, &workspaceSize, &executor);
CheckGetWorkspaceSizeAclnnStatus(error);
void* workspaceAddr = nullptr;
if(workspaceSize > 0) {
error = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST);
CheckMallocAclnnStatus(error);
}
error = aclnnGelu(workspaceAddr, workspaceSize, executor, nullptr);
CheckAclnnStatus(error, "aclnnGelu error");
error = aclrtSynchronizeDevice();
CheckSynchronizeDeviceAclnnStatus(error);
if (workspaceAddr) aclrtFree(workspaceAddr);
return out;
}
}
12 changes: 12 additions & 0 deletions test/test_math/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@
ap.fabs,
UNARY_TEST_CASES
),
(
"relu",
lambda x: np.maximum(x, 0), # NumPy 没有内置 relu,用 maximum 模拟
ap.relu,
UNARY_TEST_CASES
),
(
"gelu",
lambda x: x * 0.5 * (1.0 + np.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * np.power(x, 3)))), # GELU 近似公式
ap.gelu,
UNARY_TEST_CASES
),
]

# 双操作数函数注册表 (函数名, numpy函数, asnumpy函数, 测试用例列表)
Expand Down