diff --git a/paddle/phi/kernels/sparse/activation_grad_kernel.h b/paddle/phi/kernels/sparse/activation_grad_kernel.h
deleted file mode 100644
index f9e5bc3c37fc5..0000000000000
--- a/paddle/phi/kernels/sparse/activation_grad_kernel.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/phi/kernels/sparse/utils.h"
-
-namespace phi {
-namespace sparse {
-
-DECLARE_SPARSE_UNARY_GRAD_KERNEL(Relu)
-
-}  // namespace sparse
-}  // namespace phi
diff --git a/paddle/phi/kernels/sparse/activation_kernel.cc b/paddle/phi/kernels/sparse/activation_kernel.cc
deleted file mode 100644
index f67d6533adc20..0000000000000
--- a/paddle/phi/kernels/sparse/activation_kernel.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/phi/kernels/sparse/activation_kernel.h"
-
-#include "paddle/phi/kernels/sparse/utils.h"
-
-DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(tanh, TanhKernel)
-
-// NOTE: the following code is to bypass the restriction of Paddle
-// kernel registration mechanism. Do NOT refactor them unless you
-// know what you are doing.
-// If you want to implement any new kernel, please follow the above
-// `tanh`, do NOT follow the following `relu`.
-DEFINE_SPARSE_UNARY_KERNEL(ReluKernel)
-
-PD_REGISTER_KERNEL(sparse_coo_relu,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCooReluKernel,
-                   float,
-                   double) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
-}
-PD_REGISTER_KERNEL(sparse_csr_relu,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCsrReluKernel,
-                   float,
-                   double) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
-}
-
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-PD_REGISTER_KERNEL(sparse_coo_relu,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCooReluKernel,
-                   float,
-                   double,
-                   phi::dtype::float16) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
-}
-
-PD_REGISTER_KERNEL(sparse_csr_relu,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCsrReluKernel,
-                   float,
-                   double,
-                   phi::dtype::float16) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
-}
-#endif
diff --git a/paddle/phi/kernels/sparse/activation_kernel.h b/paddle/phi/kernels/sparse/activation_kernel.h
deleted file mode 100644
index 03622f5c9f77d..0000000000000
--- a/paddle/phi/kernels/sparse/activation_kernel.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/sparse_coo_tensor.h"
-#include "paddle/phi/core/sparse_csr_tensor.h"
-#include "paddle/phi/kernels/activation_kernel.h"
-#include "paddle/phi/kernels/empty_kernel.h"
-#include "paddle/phi/kernels/sparse/utils.h"
-
-namespace phi {
-namespace sparse {
-
-DECLARE_SPARSE_UNARY_KERNEL(Relu)
-
-template <typename T, typename Context>
-SparseCooTensor SparseRelu(const Context& dev_ctx, const SparseCooTensor& x) {
-  DenseTensor indices, values;
-  SparseCooTensor coo(indices, values, x.dims());
-  SparseCooReluKernel<T, Context>(dev_ctx, x, &coo);
-  return coo;
-}
-
-}  // namespace sparse
-}  // namespace phi
diff --git a/paddle/phi/kernels/sparse/math_grad_kernel.cc b/paddle/phi/kernels/sparse/math_grad_kernel.cc
deleted file mode 100644
index 5bc39673d0f44..0000000000000
--- a/paddle/phi/kernels/sparse/math_grad_kernel.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/kernels/sparse/math_grad_kernel.h"
-
-#include "paddle/phi/kernels/activation_grad_kernel.h"
-#include "paddle/phi/kernels/sparse/utils.h"
-
-DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(sin_grad, SinGradKernel)
-
-// NOTE: the following code is to bypass the restriction of Paddle
-// kernel registration mechanism. Do NOT refactor them unless you
-// know what you are doing.
-// If you want to implement any new kernel, please follow the above
-// `sin_grad`, do NOT follow the following `sqrt_grad`.
-DEFINE_SPARSE_UNARY_GRAD_KERNEL(SqrtGradKernel)
-
-PD_REGISTER_KERNEL(sparse_coo_sqrt_grad,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCooSqrtGradKernel,
-                   float,
-                   double) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
-}
-PD_REGISTER_KERNEL(sparse_csr_sqrt_grad,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCsrSqrtGradKernel,
-                   float,
-                   double) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
-}
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-PD_REGISTER_KERNEL(sparse_coo_sqrt_grad,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCooSqrtGradKernel,
-                   float,
-                   double,
-                   phi::dtype::float16) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
-}
-
-PD_REGISTER_KERNEL(sparse_csr_sqrt_grad,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCsrSqrtGradKernel,
-                   float,
-                   double,
-                   phi::dtype::float16) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
-}
-#endif
diff --git a/paddle/phi/kernels/sparse/math_grad_kernel.h b/paddle/phi/kernels/sparse/math_grad_kernel.h
deleted file mode 100644
index 0b6a9ce26af76..0000000000000
--- a/paddle/phi/kernels/sparse/math_grad_kernel.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/kernels/sparse/utils.h"
-
-namespace phi {
-namespace sparse {
-
-DECLARE_SPARSE_UNARY_GRAD_KERNEL(Sqrt)
-DECLARE_SPARSE_UNARY_GRAD_KERNEL(Sin)
-
-}  // namespace sparse
-}  // namespace phi
diff --git a/paddle/phi/kernels/sparse/math_kernel.cc b/paddle/phi/kernels/sparse/math_kernel.cc
deleted file mode 100644
index 9706730e0119d..0000000000000
--- a/paddle/phi/kernels/sparse/math_kernel.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/kernels/sparse/math_kernel.h"
-
-#include "paddle/phi/kernels/activation_kernel.h"
-#include "paddle/phi/kernels/sparse/utils.h"
-
-DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(sin, SinKernel)
-
-// NOTE: the following code is to bypass the restriction of Paddle
-// kernel registration mechanism. Do NOT refactor them unless you
-// know what you are doing.
-// If you want to implement any new kernel, please follow the above
-// `sin`, do NOT follow the following `sqrt`.
-DEFINE_SPARSE_UNARY_KERNEL(SqrtKernel)
-
-PD_REGISTER_KERNEL(sparse_coo_sqrt,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCooSqrtKernel,
-                   float,
-                   double) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
-}
-PD_REGISTER_KERNEL(sparse_csr_sqrt,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCsrSqrtKernel,
-                   float,
-                   double) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
-}
-
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-PD_REGISTER_KERNEL(sparse_coo_sqrt,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCooSqrtKernel,
-                   float,
-                   double,
-                   phi::dtype::float16) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
-}
-
-PD_REGISTER_KERNEL(sparse_csr_sqrt,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sparse::SparseCsrSqrtKernel,
-                   float,
-                   double,
-                   phi::dtype::float16) {
-  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
-}
-#endif
diff --git a/paddle/phi/kernels/sparse/unary_grad_kernel.cc b/paddle/phi/kernels/sparse/unary_grad_kernel.cc
new file mode 100644
index 0000000000000..1fd3ef2711299
--- /dev/null
+++ b/paddle/phi/kernels/sparse/unary_grad_kernel.cc
@@ -0,0 +1,183 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/sparse/unary_grad_kernel.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/sparse_coo_tensor.h"
+#include "paddle/phi/core/sparse_csr_tensor.h"
+#include "paddle/phi/kernels/activation_grad_kernel.h"
+#include "paddle/phi/kernels/copy_kernel.h"
+#include "paddle/phi/kernels/empty_kernel.h"
+
+#define DEFINE_SPARSE_UNARY_GRAD_KERNEL(DenseKernelFunc)                    \
+  namespace phi {                                                           \
+  namespace sparse {                                                        \
+                                                                            \
+  template <typename T, typename Context>                                   \
+  void SparseCoo##DenseKernelFunc(const Context& dev_ctx,                   \
+                                  const SparseCooTensor& x_or_out,          \
+                                  const SparseCooTensor& out_grad,          \
+                                  SparseCooTensor* x_grad) {                \
+    DenseTensor non_zero_indices =                                          \
+        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_indices());   \
+    DenseTensor non_zero_elements =                                         \
+        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_elements());  \
+    phi::Copy(dev_ctx,                                                      \
+              x_or_out.non_zero_indices(),                                  \
+              dev_ctx.GetPlace(),                                           \
+              false,                                                        \
+              &non_zero_indices);                                           \
+    phi::DenseKernelFunc<T, Context>(dev_ctx,                               \
+                                     x_or_out.non_zero_elements(),          \
+                                     out_grad.non_zero_elements(),          \
+                                     &non_zero_elements);                   \
+    x_grad->SetMember(                                                      \
+        non_zero_indices, non_zero_elements, x_or_out.dims(), true);        \
+  }                                                                         \
+                                                                            \
+  template <typename T, typename Context>                                   \
+  void SparseCsr##DenseKernelFunc(const Context& dev_ctx,                   \
+                                  const SparseCsrTensor& x_or_out,          \
+                                  const SparseCsrTensor& out_grad,          \
+                                  SparseCsrTensor* out) {                   \
+    DenseTensor non_zero_crows =                                            \
+        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_crows());     \
+    DenseTensor non_zero_cols =                                             \
+        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_cols());      \
+    DenseTensor non_zero_elements =                                         \
+        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_elements());  \
+    phi::Copy(dev_ctx,                                                      \
+              x_or_out.non_zero_crows(),                                    \
+              dev_ctx.GetPlace(),                                           \
+              false,                                                        \
+              &non_zero_crows);                                             \
+    phi::Copy(dev_ctx,                                                      \
+              x_or_out.non_zero_cols(),                                     \
+              dev_ctx.GetPlace(),                                           \
+              false,                                                        \
+              &non_zero_cols);                                              \
+    phi::DenseKernelFunc<T, Context>(dev_ctx,                               \
+                                     x_or_out.non_zero_elements(),          \
+                                     out_grad.non_zero_elements(),          \
+                                     &non_zero_elements);                   \
+    out->SetMember(                                                         \
+        non_zero_crows, non_zero_cols, non_zero_elements, x_or_out.dims()); \
+  }                                                                         \
+  }                                                                         \
+  }
+
+#define REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc) \
+  PD_REGISTER_KERNEL(sparse_coo_##kernel_name,                         \
+                     CPU,                                              \
+                     ALL_LAYOUT,                                       \
+                     phi::sparse::SparseCoo##DenseKernelFunc,          \
+                     float,                                            \
+                     double) {                                         \
+    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);     \
+  }                                                                    \
+  PD_REGISTER_KERNEL(sparse_csr_##kernel_name,                         \
+                     CPU,                                              \
+                     ALL_LAYOUT,                                       \
+                     phi::sparse::SparseCsr##DenseKernelFunc,          \
+                     float,                                            \
+                     double) {                                         \
+    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);     \
+  }
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#define REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc) \
+  PD_REGISTER_KERNEL(sparse_coo_##kernel_name,                         \
+                     GPU,                                              \
+                     ALL_LAYOUT,                                       \
+                     phi::sparse::SparseCoo##DenseKernelFunc,          \
+                     float,                                            \
+                     double,                                           \
+                     phi::dtype::float16) {                            \
+    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);     \
+  }                                                                    \
+                                                                       \
+  PD_REGISTER_KERNEL(sparse_csr_##kernel_name,                         \
+                     GPU,                                              \
+                     ALL_LAYOUT,                                       \
+                     phi::sparse::SparseCsr##DenseKernelFunc,          \
+                     float,                                            \
+                     double,                                           \
+                     phi::dtype::float16) {                            \
+    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);     \
+  }
+#else
+// This macro definition is empty when GPU is disabled
+#define REGISTER_GPU_SPARSE_UNARY_KERNEL(sparse_kernel_name, DenseKernelFunc)
+#endif
+
+#define REGISTER_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc) \
+  REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc)   \
+  REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc)
+
+#define DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(kernel_name,     \
+                                                     DenseKernelFunc) \
+  DEFINE_SPARSE_UNARY_GRAD_KERNEL(DenseKernelFunc)                    \
+  REGISTER_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc)
+
+// NOTE: the following code is to bypass the restriction of Paddle
+// kernel registration mechanism. Do NOT refactor them unless you
+// know what you are doing.
+// If you want to implement any new kernel, please follow `sin_grad`,
+// `tanh_grad` etc, do NOT follow the following `relu_grad`.
+DEFINE_SPARSE_UNARY_GRAD_KERNEL(ReluGradKernel)
+
+PD_REGISTER_KERNEL(sparse_coo_relu_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SparseCooReluGradKernel,
+                   float,
+                   double) {
+  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
+}
+PD_REGISTER_KERNEL(sparse_csr_relu_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SparseCsrReluGradKernel,
+                   float,
+                   double) {
+  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
+}
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_KERNEL(sparse_coo_relu_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SparseCooReluGradKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {
+  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
+}
+
+PD_REGISTER_KERNEL(sparse_csr_relu_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SparseCsrReluGradKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {
+  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
+}
+#endif
+
+DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(sin_grad, SinGradKernel)
+DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(sqrt_grad, SqrtGradKernel)
+DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(tanh_grad, TanhGradKernel)
diff --git a/paddle/phi/kernels/sparse/unary_grad_kernel.h b/paddle/phi/kernels/sparse/unary_grad_kernel.h
new file mode 100644
index 0000000000000..24ea4fee1a4fd
--- /dev/null
+++ b/paddle/phi/kernels/sparse/unary_grad_kernel.h
@@ -0,0 +1,41 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/sparse_coo_tensor.h"
+#include "paddle/phi/core/sparse_csr_tensor.h"
+
+#define DECLARE_SPARSE_UNARY_GRAD_KERNEL(name)                      \
+  template <typename T, typename Context>                           \
+  void SparseCoo##name##GradKernel(const Context& dev_ctx,          \
+                                   const SparseCooTensor& x,        \
+                                   const SparseCooTensor& out_grad, \
+                                   SparseCooTensor* x_grad);        \
+                                                                    \
+  template <typename T, typename Context>                           \
+  void SparseCsr##name##GradKernel(const Context& dev_ctx,          \
+                                   const SparseCsrTensor& x,        \
+                                   const SparseCsrTensor& out_grad, \
+                                   SparseCsrTensor* x_grad);
+
+namespace phi {
+namespace sparse {
+
+DECLARE_SPARSE_UNARY_GRAD_KERNEL(Relu)
+DECLARE_SPARSE_UNARY_GRAD_KERNEL(Sqrt)
+DECLARE_SPARSE_UNARY_GRAD_KERNEL(Sin)
+
+}  // namespace sparse
+}  // namespace phi
diff --git a/paddle/phi/kernels/sparse/unary_kernel.cc b/paddle/phi/kernels/sparse/unary_kernel.cc
new file mode 100644
index 0000000000000..97dd8f0d67c16
--- /dev/null
+++ b/paddle/phi/kernels/sparse/unary_kernel.cc
@@ -0,0 +1,171 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/sparse/unary_kernel.h"
+
+#include "paddle/phi/kernels/activation_kernel.h"
+#include "paddle/phi/kernels/sparse/utils.h"
+
+#define DEFINE_SPARSE_UNARY_KERNEL(DenseKernelFunc)                      \
+  namespace phi {                                                        \
+  namespace sparse {                                                     \
+                                                                         \
+  template <typename T, typename Context>                                \
+  void SparseCoo##DenseKernelFunc(const Context& dev_ctx,                \
+                                  const SparseCooTensor& x,              \
+                                  SparseCooTensor* out) {                \
+    DenseTensor non_zero_indices =                                       \
+        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices());       \
+    DenseTensor non_zero_elements =                                      \
+        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements());      \
+    phi::Copy(dev_ctx,                                                   \
+              x.non_zero_indices(),                                      \
+              dev_ctx.GetPlace(),                                        \
+              false,                                                     \
+              &non_zero_indices);                                        \
+    phi::DenseKernelFunc<T, Context>(                                    \
+        dev_ctx, x.non_zero_elements(), &non_zero_elements);             \
+    out->SetMember(non_zero_indices, non_zero_elements, x.dims(), true); \
+  }                                                                      \
+                                                                         \
+  template <typename T, typename Context>                                \
+  void SparseCsr##DenseKernelFunc(const Context& dev_ctx,                \
+                                  const SparseCsrTensor& x,              \
+                                  SparseCsrTensor* out) {                \
+    DenseTensor non_zero_crows =                                         \
+        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_crows());         \
+    DenseTensor non_zero_cols =                                          \
+        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_cols());          \
+    DenseTensor non_zero_elements =                                      \
+        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements());      \
+    phi::Copy(dev_ctx,                                                   \
+              x.non_zero_crows(),                                        \
+              dev_ctx.GetPlace(),                                        \
+              false,                                                     \
+              &non_zero_crows);                                          \
+    phi::Copy(dev_ctx,                                                   \
+              x.non_zero_cols(),                                         \
+              dev_ctx.GetPlace(),                                        \
+              false,                                                     \
+              &non_zero_cols);                                           \
+    phi::DenseKernelFunc<T, Context>(                                    \
+        dev_ctx, x.non_zero_elements(), &non_zero_elements);             \
+    out->SetMember(                                                      \
+        non_zero_crows, non_zero_cols, non_zero_elements, x.dims());     \
+  }                                                                      \
+  }                                                                      \
+  }
+
+#define REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc) \
+  PD_REGISTER_KERNEL(sparse_coo_##kernel_name,                         \
+                     CPU,                                              \
+                     ALL_LAYOUT,                                       \
+                     phi::sparse::SparseCoo##DenseKernelFunc,          \
+                     float,                                            \
+                     double) {                                         \
+    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);     \
+  }                                                                    \
+  PD_REGISTER_KERNEL(sparse_csr_##kernel_name,                         \
+                     CPU,                                              \
+                     ALL_LAYOUT,                                       \
+                     phi::sparse::SparseCsr##DenseKernelFunc,          \
+                     float,                                            \
+                     double) {                                         \
+    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);     \
+  }
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#define REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc) \
+  PD_REGISTER_KERNEL(sparse_coo_##kernel_name,                         \
+                     GPU,                                              \
+                     ALL_LAYOUT,                                       \
+                     phi::sparse::SparseCoo##DenseKernelFunc,          \
+                     float,                                            \
+                     double,                                           \
+                     phi::dtype::float16) {                            \
+    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);     \
+  }                                                                    \
+                                                                       \
+  PD_REGISTER_KERNEL(sparse_csr_##kernel_name,                         \
+                     GPU,                                              \
+                     ALL_LAYOUT,                                       \
+                     phi::sparse::SparseCsr##DenseKernelFunc,          \
+                     float,                                            \
+                     double,                                           \
+                     phi::dtype::float16) {                            \
+    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);     \
+  }
+#else
+// This macro definition is empty when GPU is disabled
+#define REGISTER_GPU_SPARSE_UNARY_KERNEL(sparse_kernel_name, DenseKernelFunc)
+#endif
+
+#define REGISTER_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc) \
+  REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc)   \
+  REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc)
+
+#define DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc) \
+  DEFINE_SPARSE_UNARY_KERNEL(DenseKernelFunc)                                 \
+  REGISTER_SPARSE_UNARY_KERNEL(kernel_name, DenseKernelFunc)
+
+// NOTE: the following code is to bypass the restriction of Paddle
+// kernel registration mechanism. Do NOT refactor them unless you
+// know what you are doing.
+// If you want to implement any new kernel, please follow `sin`,
+// `tanh` etc, do NOT follow `sqrt`.
+DEFINE_SPARSE_UNARY_KERNEL(SqrtKernel)
+
+PD_REGISTER_KERNEL(sparse_coo_sqrt,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SparseCooSqrtKernel,
+                   float,
+                   double) {
+  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
+}
+PD_REGISTER_KERNEL(sparse_csr_sqrt,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SparseCsrSqrtKernel,
+                   float,
+                   double) {
+  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
+}
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_KERNEL(sparse_coo_sqrt,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SparseCooSqrtKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {
+  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
+}
+
+PD_REGISTER_KERNEL(sparse_csr_sqrt,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SparseCsrSqrtKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {
+  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
+}
+
+#endif
+
+DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(sin, SinKernel)
+DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(tanh, TanhKernel)
+DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(relu, ReluKernel)
diff --git a/paddle/phi/kernels/sparse/math_kernel.h b/paddle/phi/kernels/sparse/unary_kernel.h
similarity index 51%
rename from paddle/phi/kernels/sparse/math_kernel.h
rename to paddle/phi/kernels/sparse/unary_kernel.h
index b48f04f7a0f0f..2aa056eec4360 100644
--- a/paddle/phi/kernels/sparse/math_kernel.h
+++ b/paddle/phi/kernels/sparse/unary_kernel.h
@@ -19,13 +19,31 @@
 #include "paddle/phi/core/sparse_csr_tensor.h"
 #include "paddle/phi/kernels/activation_kernel.h"
 #include "paddle/phi/kernels/empty_kernel.h"
-#include "paddle/phi/kernels/sparse/utils.h"
+
+#define DECLARE_SPARSE_UNARY_KERNEL(name)                                      \
+  template <typename T, typename Context>                                      \
+  void SparseCoo##name##Kernel(                                                \
+      const Context& dev_ctx, const SparseCooTensor& x, SparseCooTensor* out); \
+                                                                               \
+  template <typename T, typename Context>                                      \
+  void SparseCsr##name##Kernel(                                                \
+      const Context& dev_ctx, const SparseCsrTensor& x, SparseCsrTensor* out);
 
 namespace phi {
 namespace sparse {
 
+DECLARE_SPARSE_UNARY_KERNEL(Relu)
 DECLARE_SPARSE_UNARY_KERNEL(Sqrt)
 DECLARE_SPARSE_UNARY_KERNEL(Sin)
 
+template <typename T, typename Context>
+SparseCooTensor SparseRelu(const Context& dev_ctx, const SparseCooTensor& x) {
+  DenseTensor indices, values;
+  SparseCooTensor coo(indices, values, x.dims());
+  SparseCooReluKernel<T, Context>(dev_ctx, x, &coo);
+  return coo;
+}
+
+
 }  // namespace sparse
 }  // namespace phi
diff --git a/paddle/phi/kernels/sparse/utils.h b/paddle/phi/kernels/sparse/utils.h
index 9c19219e8518e..9b8d9459561bf 100644
--- a/paddle/phi/kernels/sparse/utils.h
+++ b/paddle/phi/kernels/sparse/utils.h
@@ -20,189 +20,3 @@
 #include "paddle/phi/kernels/copy_kernel.h"
 #include "paddle/phi/kernels/empty_kernel.h"
 
-#define DECLARE_SPARSE_UNARY_KERNEL(name)                                      \
-  template <typename T, typename Context>                                      \
-  void SparseCoo##name##Kernel(                                                \
-      const Context& dev_ctx, const SparseCooTensor& x, SparseCooTensor* out); \
-                                                                               \
-  template <typename T, typename Context>                                      \
-  void SparseCsr##name##Kernel(                                                \
-      const Context& dev_ctx, const SparseCsrTensor& x, SparseCsrTensor* out);
-
-#define DECLARE_SPARSE_UNARY_GRAD_KERNEL(name)                      \
-  template <typename T, typename Context>                           \
-  void SparseCoo##name##GradKernel(const Context& dev_ctx,          \
-                                   const SparseCooTensor& x,        \
-                                   const SparseCooTensor& out_grad, \
-                                   SparseCooTensor* x_grad);        \
-                                                                    \
-  template <typename T, typename Context>                           \
-  void SparseCsr##name##GradKernel(const Context& dev_ctx,          \
-                                   const SparseCsrTensor& x,        \
-                                   const SparseCsrTensor& out_grad, \
-                                   SparseCsrTensor* x_grad);
-
-#define DEFINE_SPARSE_UNARY_KERNEL(dense_kernel_func)                    \
-  namespace phi {                                                        \
-  namespace sparse {                                                     \
-                                                                         \
-  template <typename T, typename Context>                                \
-  void SparseCoo##dense_kernel_func(const Context& dev_ctx,              \
-                                    const SparseCooTensor& x,            \
-                                    SparseCooTensor* out) {              \
-    DenseTensor non_zero_indices =                                       \
-        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_indices());       \
-    DenseTensor non_zero_elements =                                      \
-        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements());      \
-    phi::Copy(dev_ctx,                                                   \
-              x.non_zero_indices(),                                      \
-              dev_ctx.GetPlace(),                                        \
-              false,                                                     \
-              &non_zero_indices);                                        \
-    phi::dense_kernel_func<T, Context>(                                  \
-        dev_ctx, x.non_zero_elements(), &non_zero_elements);             \
-    out->SetMember(non_zero_indices, non_zero_elements, x.dims(), true); \
-  }                                                                      \
-                                                                         \
-  template <typename T, typename Context>                                \
-  void SparseCsr##dense_kernel_func(const Context& dev_ctx,              \
-                                    const SparseCsrTensor& x,            \
-                                    SparseCsrTensor* out) {              \
-    DenseTensor non_zero_crows =                                         \
-        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_crows());         \
-    DenseTensor non_zero_cols =                                          \
-        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_cols());          \
-    DenseTensor non_zero_elements =                                      \
-        phi::EmptyLike<T, Context>(dev_ctx, x.non_zero_elements());      \
-    phi::Copy(dev_ctx,                                                   \
-              x.non_zero_crows(),                                        \
-              dev_ctx.GetPlace(),                                        \
-              false,                                                     \
-              &non_zero_crows);                                          \
-    phi::Copy(dev_ctx,                                                   \
-              x.non_zero_cols(),                                         \
-              dev_ctx.GetPlace(),                                        \
-              false,                                                     \
-              &non_zero_cols);                                           \
-    phi::dense_kernel_func<T, Context>(                                  \
-        dev_ctx, x.non_zero_elements(), &non_zero_elements);             \
-    out->SetMember(                                                      \
-        non_zero_crows, non_zero_cols, non_zero_elements, x.dims());     \
-  }                                                                      \
-  }                                                                      \
-  }
-
-#define DEFINE_SPARSE_UNARY_GRAD_KERNEL(dense_kernel_func)                  \
-  namespace phi {                                                           \
-  namespace sparse {                                                        \
-                                                                            \
-  template <typename T, typename Context>                                   \
-  void SparseCoo##dense_kernel_func(const Context& dev_ctx,                 \
-                                    const SparseCooTensor& x_or_out,        \
-                                    const SparseCooTensor& out_grad,        \
-                                    SparseCooTensor* x_grad) {              \
-    DenseTensor non_zero_indices =                                          \
-        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_indices());   \
-    DenseTensor non_zero_elements =                                         \
-        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_elements());  \
-    phi::Copy(dev_ctx,                                                      \
-              x_or_out.non_zero_indices(),                                  \
-              dev_ctx.GetPlace(),                                           \
-              false,                                                        \
-              &non_zero_indices);                                           \
-    phi::dense_kernel_func<T, Context>(dev_ctx,                             \
-                                       x_or_out.non_zero_elements(),        \
-                                       out_grad.non_zero_elements(),        \
-                                       &non_zero_elements);                 \
-    x_grad->SetMember(                                                      \
-        non_zero_indices, non_zero_elements, x_or_out.dims(), true);        \
-  }                                                                         \
-                                                                            \
-  template <typename T, typename Context>                                   \
-  void SparseCsr##dense_kernel_func(const Context& dev_ctx,                 \
-                                    const SparseCsrTensor& x_or_out,        \
-                                    const SparseCsrTensor& out_grad,        \
-                                    SparseCsrTensor* out) {                 \
-    DenseTensor non_zero_crows =                                            \
-        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_crows());     \
-    DenseTensor non_zero_cols =                                             \
-        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_cols());      \
-    DenseTensor non_zero_elements =                                         \
-        phi::EmptyLike<T, Context>(dev_ctx, x_or_out.non_zero_elements());  \
-    phi::Copy(dev_ctx,                                                      \
-              x_or_out.non_zero_crows(),                                    \
-              dev_ctx.GetPlace(),                                           \
-              false,                                                        \
-              &non_zero_crows);                                             \
-    phi::Copy(dev_ctx,                                                      \
-              x_or_out.non_zero_cols(),                                     \
-              dev_ctx.GetPlace(),                                           \
-              false,                                                        \
-              &non_zero_cols);                                              \
-    phi::dense_kernel_func<T, Context>(dev_ctx,                             \
-                                       x_or_out.non_zero_elements(),        \
-                                       out_grad.non_zero_elements(),        \
-                                       &non_zero_elements);                 \
-    out->SetMember(                                                         \
-        non_zero_crows, non_zero_cols, non_zero_elements, x_or_out.dims()); \
-  }                                                                         \
-  }                                                                         \
-  }
-
-#define REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
-  PD_REGISTER_KERNEL(sparse_coo_##kernel_name,                           \
-                     CPU,                                                \
-                     ALL_LAYOUT,                                         \
-                     phi::sparse::SparseCoo##dense_kernel_func,          \
-                     float,                                              \
-                     double) {                                           \
-    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);       \
-  }                                                                      \
-  PD_REGISTER_KERNEL(sparse_csr_##kernel_name,                           \
-                     CPU,                                                \
-                     ALL_LAYOUT,                                         \
-                     phi::sparse::SparseCsr##dense_kernel_func,          \
-                     float,                                              \
-                     double) {                                           \
-    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);       \
-  }
-
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-#define REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
-  PD_REGISTER_KERNEL(sparse_coo_##kernel_name,                           \
-                     GPU,                                                \
-                     ALL_LAYOUT,                                         \
-                     phi::sparse::SparseCoo##dense_kernel_func,          \
-                     float,                                              \
-                     double,                                             \
-                     phi::dtype::float16) {                              \
-    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);       \
-  }                                                                      \
-                                                                         \
-  PD_REGISTER_KERNEL(sparse_csr_##kernel_name,                           \
-                     GPU,                                                \
-                     ALL_LAYOUT,                                         \
-                     phi::sparse::SparseCsr##dense_kernel_func,          \
-                     float,                                              \
-                     double,                                             \
-                     phi::dtype::float16) {                              \
-    kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);       \
-  }
-#else
-// This macro definition is empty when GPU is disabled
-#define REGISTER_GPU_SPARSE_UNARY_KERNEL(sparse_kernel_name, dense_kernel_func)
-#endif
-
-#define REGISTER_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func) \
-  REGISTER_CPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)   \
-  REGISTER_GPU_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)
-
-#define DEFINE_AND_REGISTER_SPARSE_UNARY_KERNEL(kernel_name,       \
-                                                dense_kernel_func) \
-  DEFINE_SPARSE_UNARY_KERNEL(dense_kernel_func)                    \
-  REGISTER_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)
-
-#define DEFINE_AND_REGISTER_SPARSE_UNARY_GRAD_KERNEL(kernel_name,       \
-                                                     dense_kernel_func) \
-  DEFINE_SPARSE_UNARY_GRAD_KERNEL(dense_kernel_func)                    \
-  REGISTER_SPARSE_UNARY_KERNEL(kernel_name, dense_kernel_func)
diff --git a/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc
index f86170980ffbb..6de51fc4b77a2 100644
--- a/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc
@@ -24,8 +24,8 @@ limitations under the License. */
 #include "paddle/phi/kernels/activation_grad_kernel.h"
 #include "paddle/phi/kernels/activation_kernel.h"
 #include "paddle/phi/kernels/empty_kernel.h"
-#include "paddle/phi/kernels/sparse/activation_grad_kernel.h"
-#include "paddle/phi/kernels/sparse/activation_kernel.h"
+#include "paddle/phi/kernels/sparse/unary_grad_kernel.h"
+#include "paddle/phi/kernels/sparse/unary_kernel.h"
 #include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
 
 namespace phi {