From 43a2a608071f95768fd34778cdbb47192f31c793 Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Tue, 20 Feb 2024 12:01:58 +0800
Subject: [PATCH]  Fix some typos (Intermedaite, deperecated, etc.) (#61720)

---
 .../fused/fused_elemwise_activation_op.h         | 12 ++++++------
 paddle/phi/kernels/funcs/compound_functors.h     | 12 ++++++------
 paddle/phi/kernels/funcs/elementwise_base.h      |  2 +-
 paddle/phi/kernels/funcs/elementwise_functor.h   |  4 ++--
 paddle/phi/kernels/funcs/fft.cc                  |  6 +++---
 paddle/phi/kernels/funcs/jit/README.en.md        | 16 ++++++++--------
 paddle/phi/kernels/funcs/jit/README.md           |  4 ++--
 paddle/phi/kernels/funcs/jit/helper.cc           |  2 +-
 paddle/phi/kernels/funcs/jit/helper.h            |  4 ++--
 .../funcs/jit/more/intrinsic/crf_decoding.cc     |  2 +-
 paddle/phi/kernels/funcs/matrix_bit_code.h       |  2 +-
 paddle/phi/kernels/funcs/matrix_solve.h          |  2 +-
 .../phi/kernels/funcs/selected_rows_functor.cc   |  2 +-
 paddle/phi/kernels/funcs/selected_rows_functor.h |  4 ++--
 paddle/phi/kernels/funcs/seq2col.h               |  4 ++--
 paddle/phi/kernels/funcs/sparse/convolution.h    |  4 ++--
 paddle/phi/kernels/funcs/unsqueeze.h             |  2 +-
 .../phi/kernels/funcs/values_vectors_functor.h   |  2 +-
 paddle/phi/kernels/funcs/vol2col.cc              |  2 +-
 paddle/phi/kernels/funcs/vol2col.cu              |  8 ++++----
 .../cutlass/conv2d/conv2d_depthwise_bias_act.py  |  2 +-
 .../threadblock/epilogue_tensor_op_int32.h       |  2 +-
 .../fpA_intB_gemm/fpA_intB_gemm_template.h       |  2 +-
 .../transform/tile_smem_loader.h                 |  2 +-
 ...ed_bias_dropout_residual_layer_norm_kernel.cu |  2 +-
 .../fusion/onednn/fused_elementwise_kernel.cc    |  2 +-
 .../phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc  |  2 +-
 27 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
index ea51fd1a10a83..ad7f79307e628 100644
--- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
+++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
@@ -149,8 +149,8 @@ static void RunBinaryCompoundGradFunctors(
                                               UnaryFunctor,
                                               UnaryGradFunctor,
                                               InPlace>;
-  using BinaryCompoundDIntermedaiteOutFunctor =
-      phi::funcs::BinaryCompoundGradDIntermedaiteOutFunctor<T,
+  using BinaryCompoundDIntermediateOutFunctor =
+      phi::funcs::BinaryCompoundGradDIntermediateOutFunctor<T,
                                                             BinaryGradFunctor,
                                                             UnaryFunctor>;
 
@@ -160,7 +160,7 @@ static void RunBinaryCompoundGradFunctors(
         T,
         BinaryCompoundDxFunctor,
         BinaryCompoundDyFunctor,
-        BinaryCompoundDIntermedaiteOutFunctor,
+        BinaryCompoundDIntermediateOutFunctor,
         true /*UseIntermediateOut*/,
         false /*SameShapeOfIntermediateOutAndOut*/>(
         ctx,
@@ -176,7 +176,7 @@ static void RunBinaryCompoundGradFunctors(
         BinaryCompoundDxFunctor(binary_grad_functor, unary_functor),
         BinaryCompoundDyFunctor(
             binary_grad_functor, unary_functor, unary_grad_functor),
-        BinaryCompoundDIntermedaiteOutFunctor(binary_grad_functor,
+        BinaryCompoundDIntermediateOutFunctor(binary_grad_functor,
                                               unary_functor));
   } else {
     FusedElemwiseAndActGradComputeEx<
@@ -184,7 +184,7 @@ static void RunBinaryCompoundGradFunctors(
         T,
         BinaryCompoundDxFunctor,
         BinaryCompoundDyFunctor,
-        BinaryCompoundDIntermedaiteOutFunctor,
+        BinaryCompoundDIntermediateOutFunctor,
         false /*UseIntermediateOut*/,
         false /*SameShapeOfIntermediateOutAndOut*/>(
         ctx,
@@ -200,7 +200,7 @@ static void RunBinaryCompoundGradFunctors(
         BinaryCompoundDxFunctor(binary_grad_functor, unary_functor),
         BinaryCompoundDyFunctor(
             binary_grad_functor, unary_functor, unary_grad_functor),
-        BinaryCompoundDIntermedaiteOutFunctor(binary_grad_functor,
+        BinaryCompoundDIntermediateOutFunctor(binary_grad_functor,
                                               unary_functor));
   }
 }
diff --git a/paddle/phi/kernels/funcs/compound_functors.h b/paddle/phi/kernels/funcs/compound_functors.h
index 823dcd70a2f3c..72e7e8d872dc0 100644
--- a/paddle/phi/kernels/funcs/compound_functors.h
+++ b/paddle/phi/kernels/funcs/compound_functors.h
@@ -29,8 +29,8 @@ struct BinaryCompoundFunctor {
 
   inline HOSTDEVICE T GetOut(T x, T y) { return func1_(x, func2_(y)); }
 
-  inline HOSTDEVICE T GetOutUseIntermediateOut(T x, T intermediat_out) {
-    return func1_(x, intermediat_out);
+  inline HOSTDEVICE T GetOutUseIntermediateOut(T x, T intermediate_out) {
+    return func1_(x, intermediate_out);
   }
 
   inline HOSTDEVICE T GetIntermediateOut(T x UNUSED, T y) { return func2_(y); }
@@ -47,8 +47,8 @@ struct UnaryCompoundFunctor {
 
   inline HOSTDEVICE T GetOut(T x, T y) { return func1_(func2_(x, y)); }
 
-  inline HOSTDEVICE T GetOutUseIntermediateOut(T x UNUSED, T intermediat_out) {
-    return func1_(intermediat_out);
+  inline HOSTDEVICE T GetOutUseIntermediateOut(T x UNUSED, T intermediate_out) {
+    return func1_(intermediate_out);
   }
 
   inline HOSTDEVICE T GetIntermediateOut(T x, T y) { return func2_(x, y); }
@@ -209,8 +209,8 @@ struct UnaryCompoundGradDyFunctor {
 
 // Z = BinaryFunctor(X, UnaryFunctor(Y))
 template <typename T, typename DBinaryFun, typename UnaryFun>
-struct BinaryCompoundGradDIntermedaiteOutFunctor {
-  BinaryCompoundGradDIntermedaiteOutFunctor(const DBinaryFun &d_binary_fun,
+struct BinaryCompoundGradDIntermediateOutFunctor {
+  BinaryCompoundGradDIntermediateOutFunctor(const DBinaryFun &d_binary_fun,
                                             const UnaryFun &unary_fun)
       : d_binary_fun_(d_binary_fun), unary_fun_(unary_fun) {}
 
diff --git a/paddle/phi/kernels/funcs/elementwise_base.h b/paddle/phi/kernels/funcs/elementwise_base.h
index c92acdaf4180b..212b6dc4cddd2 100644
--- a/paddle/phi/kernels/funcs/elementwise_base.h
+++ b/paddle/phi/kernels/funcs/elementwise_base.h
@@ -49,7 +49,7 @@ class RowwiseTransformIterator;
 template <typename T, typename DeviceContext>
 class MidWiseTransformIterator;
 
-// NOTE(dzhwinter): ptrdiff_t in iterator is deperecated in c++17
+// NOTE(dzhwinter): ptrdiff_t in iterator is deprecated in c++17
 template <typename T>
 class RowwiseTransformIterator<T, CPUContext>
     : public std::iterator<std::random_access_iterator_tag,
diff --git a/paddle/phi/kernels/funcs/elementwise_functor.h b/paddle/phi/kernels/funcs/elementwise_functor.h
index 82ed5bbd0da3b..b03369d20c896 100644
--- a/paddle/phi/kernels/funcs/elementwise_functor.h
+++ b/paddle/phi/kernels/funcs/elementwise_functor.h
@@ -570,7 +570,7 @@ struct RemainderFunctor<dtype::float16> {
                                               const dtype::float16 b) const {
     float b_float = static_cast<float>(b);
     float res = fmod(static_cast<float>(a), b_float);
-    // Accoding to #PR26732: in dividen % divsor
+    // According to #PR26732: in dividen % divsor
     // remainder shall have the same sign as divsor.
     if ((res != 0.0f) && ((res < 0.0f) != (b_float < 0.0f))) res += b_float;
     return static_cast<dtype::float16>(res);
@@ -584,7 +584,7 @@ struct RemainderFunctor<dtype::bfloat16> {
     float b_float = static_cast<float>(b);
     float res = fmod(static_cast<float>(a), b_float);
 
-    // Accoding to #PR26732: in dividen % divsor
+    // According to #PR26732: in dividen % divsor
     // remainder shall have the same sign as divsor.
     if ((res != 0.0f) && ((res < 0.0f) != (b_float < 0.0f))) res += b_float;
     return static_cast<dtype::bfloat16>(res);
diff --git a/paddle/phi/kernels/funcs/fft.cc b/paddle/phi/kernels/funcs/fft.cc
index 97502787b6938..beb0a98636039 100644
--- a/paddle/phi/kernels/funcs/fft.cc
+++ b/paddle/phi/kernels/funcs/fft.cc
@@ -234,7 +234,7 @@ struct FFTC2CFunctor<phi::CPUContext, Ti, To> {
     // pocketfft requires std::vector<size_t>
     std::vector<size_t> axes_(axes.size());
     std::copy(axes.begin(), axes.end(), axes_.begin());
-    // compuet factor
+    // compute factor
     size_t signal_numel = 1;
     for (const auto axis : axes) {
       signal_numel *= in_sizes[axis];
@@ -291,7 +291,7 @@ struct FFTR2CFunctor<phi::CPUContext, Ti, To> {
     // pocketfft requires std::vector<size_t>
     std::vector<size_t> axes_(axes.size());
     std::copy(axes.begin(), axes.end(), axes_.begin());
-    // compuet normalization factor
+    // compute normalization factor
     size_t signal_numel = 1;
     for (const auto axis : axes) {
       signal_numel *= in_sizes[axis];
@@ -348,7 +348,7 @@ struct FFTC2RFunctor<phi::CPUContext, Ti, To> {
     // pocketfft requires std::vector<size_t>
     std::vector<size_t> axes_(axes.size());
     std::copy(axes.begin(), axes.end(), axes_.begin());
-    // compuet normalization factor
+    // compute normalization factor
     size_t signal_numel = 1;
     for (const auto axis : axes) {
       signal_numel *= out_sizes[axis];
diff --git a/paddle/phi/kernels/funcs/jit/README.en.md b/paddle/phi/kernels/funcs/jit/README.en.md
index 28f9e1460f1c2..0e1958a5c1415 100644
--- a/paddle/phi/kernels/funcs/jit/README.en.md
+++ b/paddle/phi/kernels/funcs/jit/README.en.md
@@ -1,6 +1,6 @@
 # JIT Kernel
 
-JIT(Just In Time) Kernel contains actually generated code and some other implemenations with the same logic.
+JIT(Just In Time) Kernel contains actually generated code and some other implementations with the same logic.
 Each implementation has its own condition to use, defined in `CanBeUsed`.
 They are combined together to get the best performance of one single independent function.
 They could be some very simple functions like vector multiply, or some complicated functions like LSTM.
@@ -34,7 +34,7 @@ PaddlePaddle/Paddle/paddle/phi/kernels/
             └── ...
 ```
 
-All basical definitions of jit kernels are addressed in `paddle/phi/kernels/funcs/jit` including these three key folders `refer`, `gen`, `more`. There is only one unique name for each kernel while may have seraval implementations with same functionality.
+All basical definitions of jit kernels are addressed in `paddle/phi/kernels/funcs/jit` including these three key folders `refer`, `gen`, `more`. There is only one unique name for each kernel while may have several implementations with same functionality.
 
 - `refer`: Each kernel must have one reference implementation on CPU, and it should only focus on the correctness and should not depends on any third-party libraries.
 - `gen`: The code generated should be kept here. They should be designed focusing on the best performance, which depends on Xbyak.
@@ -44,7 +44,7 @@ All basical definitions of jit kernels are addressed in `paddle/phi/kernels/func
 
 We present these methods to get the functions:
 - `GetAllCandidateFuncs`. It can return all the implementations supported. All of the implementations can get the same result. You can do some runtime benchmark to choose which should actually be used.
-- `GetDefaultBestFunc`. It only return one default function pointer, which is tuning offline with some genenal configures and attributes. This should cover most situations.
+- `GetDefaultBestFunc`. It only return one default function pointer, which is tuning offline with some general configures and attributes. This should cover most situations.
 - `KernelFuncs::Cache()`. It can get the default functions and save it for next time with the same attribute.
 - `GetReferFunc`. It can only get the reference code in CPU, and all the others implementations have same logic with this reference code.
 
@@ -76,7 +76,7 @@ All kernels are included in `paddle/phi/kernels/funcs/jit/kernels.h`, which is a
 ## Solid Test
 
 - Unit Test
-    All functions should be compared with the corresponding reference functions, including data tyep `float` and `double`.
+    All functions should be compared with the corresponding reference functions, including data type `float` and `double`.
 - Benchmark
     All functions should be tested, and make sure the `jit::GetDefaultBestFunc` function obtain the best performance with all attributes.
 
@@ -89,15 +89,15 @@ All kernels are included in `paddle/phi/kernels/funcs/jit/kernels.h`, which is a
 3. Add reference function of `your_key`.
 Note:
     - this should be run on CPU and do not depend on any third-party.
-    - Add `USE_JITKERNEL_REFER(your_key)` in `refer/CmakeLists.txt` to make sure this code can be used.
-4. Add unit test in `test.cc`, and verfiy at least `float` and `double`.
+    - Add `USE_JITKERNEL_REFER(your_key)` in `refer/CMakeLists.txt` to make sure this code can be used.
+4. Add unit test in `test.cc`, and verify at least `float` and `double
 Test more data type for some special functions if necessary, for example `int8`.
 5. Add functions in `benchmark.cc` to test all function of same `KernelType`. Make sure `GetDefaultBestFunc` always get the best one.
 
 ## Optional
 
-Add more implementations of `your_kery` for performance enhancement.
+Add more implementations of `your_key` for performance enhancement.
 
-1. Add functions based on generated code in `gen`. It should be derived from `JitCode` and should have correpsonding creator from `JitCodeCreator` which will be registered on the `your_key`.
+1. Add functions based on generated code in `gen`. It should be derived from `JitCode` and should have corresponding creator from `JitCodeCreator` which will be registered on the `your_key`.
 2. If new attribute type is added, you should specialize `JitCodeKey` of this type.
 3. Add more functions in `more`，you can use any third party you wish, like mkl, mkldnn or intrinsic code to reach the best performance.
diff --git a/paddle/phi/kernels/funcs/jit/README.md b/paddle/phi/kernels/funcs/jit/README.md
index 9453dac5ad38d..c9154d05b9c22 100644
--- a/paddle/phi/kernels/funcs/jit/README.md
+++ b/paddle/phi/kernels/funcs/jit/README.md
@@ -35,7 +35,7 @@ PaddlePaddle/Paddle/paddle/phi/kernels/
 基本类的定义都放在根目录下，根目录下包括gen,more和refer三个目录。每个目录下都是一种或者多种实现，每种kernel算子都需要有reference的实现，用作单元测试的基准，其他的实现都是可选的。
 - gen: 代表使用jit生成的code，需要依赖xbyak库。该实现最关心的就是性能。
 - refer: 代表reference的实现，每种kernel算子都需要有在CPU上的reference的实现，他主要关心的算法逻辑的正确性。
-- more: 下面可以放入跟多实现，可以包括mkl，mkldnn，intrinsic，openblas等，也可以是自身已有的kernel组合。
+- more: 下面可以放入更多实现，可以包括mkl，mkldnn，intrinsic，openblas等，也可以是自身已有的kernel组合。
 
 ## 动态获取
 
@@ -79,7 +79,7 @@ PaddlePaddle/Paddle/paddle/phi/kernels/
 # 如何添加新的算子
 
 1. 在`KernelType` 中添加 `your_key` 。
-2. 实现Reference 的逻辑，这个是必须是在CPU上的实现，并且不能依赖任何第三方库。实现后在`refer/CmakeLists.txt`中添加`USE_JITKERNEL_REFER(your_key)`来使用该kernel。
+2. 实现Reference 的逻辑，这个是必须是在CPU上的实现，并且不能依赖任何第三方库。实现后在`refer/CMakeLists.txt`中添加`USE_JITKERNEL_REFER(your_key)`来使用该kernel。
 3. (optional) 实现更多的算法在`more`目录下，可以依赖mkl，intrinsic或者mkldnn等第三方库。
 4. (optional) 实现基于Xbyak的生成code，在`gen`目下。 jitcode需要实现自己的`JitCodeCreator`，并注册在与refer相同的`KernelType`上。
 5. 添加新的`KernelTuple`，需要与`KernelType`一一对应，是所有类型的一个打包，包括数据类型，属性的类型，以及返回的函数类型。可以参考`SeqPoolTuple`，新加的Attr类型需要特例化`JitCodeKey`方法。
diff --git a/paddle/phi/kernels/funcs/jit/helper.cc b/paddle/phi/kernels/funcs/jit/helper.cc
index c135d6ee3177d..5ab391678bd90 100644
--- a/paddle/phi/kernels/funcs/jit/helper.cc
+++ b/paddle/phi/kernels/funcs/jit/helper.cc
@@ -111,7 +111,7 @@ void pack_weights<float>(const float* src, float* dst, int n, int k) {
                       0,
                       phi::errors::InvalidArgument(
                           "Each element of groups should be larger than "
-                          "0. However the element: %d doesn't satify.",
+                          "0. However the element: %d doesn't satisfy.",
                           i));
   });
   int sum = std::accumulate(groups.begin(), groups.end(), 0);
diff --git a/paddle/phi/kernels/funcs/jit/helper.h b/paddle/phi/kernels/funcs/jit/helper.h
index c230738db9a5d..50c130bc0c486 100644
--- a/paddle/phi/kernels/funcs/jit/helper.h
+++ b/paddle/phi/kernels/funcs/jit/helper.h
@@ -140,7 +140,7 @@ std::vector<const Kernel*> GetAllCandidateKernels(
   auto ref = GetReferKernel<KernelTuple>();
   PADDLE_ENFORCE_NOT_NULL(
       ref,
-      phi::errors::InvalidArgument("Get all candicate kernel in CPU failed. "
+      phi::errors::InvalidArgument("Get all candidate kernel in CPU failed. "
                                    "The Refer Kernel can not be empty."));
   res.emplace_back(ref);
   return res;
@@ -188,7 +188,7 @@ typename KernelTuple::func_type GetDefaultBestFunc(
   PADDLE_ENFORCE_GE(funcs.size(),
                     1UL,
                     phi::errors::InvalidArgument(
-                        "The candicate jit kernel is at least one in CPU."));
+                        "The candidate jit kernel is at least one in CPU."));
   // Here could do some runtime benchmark of this attr and return the best one.
   // But yet just get the first one as the default best one,
   // which is searched in order and tuned by offline.
diff --git a/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc b/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc
index c53f62100f4d6..c36ca0d7360cc 100644
--- a/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc
+++ b/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc
@@ -88,7 +88,7 @@ void CRFDecoding(const int seq_len,
       /* Calculate the offset of transition_weights.*/
       int trans_offset = state_trans_base_idx * tag_num + j_offset;
       for (int i = 0; i < tag_num; ++i) {
-/* Initalize the content of alpha variable with related offset.*/
+/* Initialize the content of alpha variable with related offset.*/
 #ifdef __AVX512F__
         __m512 alpha_content = _mm512_set1_ps(*(alpha + seq_offset + i));
         /* Obtain the content of weights from un-aligned address.*/
diff --git a/paddle/phi/kernels/funcs/matrix_bit_code.h b/paddle/phi/kernels/funcs/matrix_bit_code.h
index 8d3335791ef69..0b53d7a21155b 100644
--- a/paddle/phi/kernels/funcs/matrix_bit_code.h
+++ b/paddle/phi/kernels/funcs/matrix_bit_code.h
@@ -40,7 +40,7 @@ namespace funcs {
  *   return the maximal code length
  *
  * SimpleCode operator()(size_t i)
- *   return the i-th code. Code class is descriebed below.
+ *   return the i-th code. Code class is described below.
  *
  * SimpleCode class should support 3 functions:
  *
diff --git a/paddle/phi/kernels/funcs/matrix_solve.h b/paddle/phi/kernels/funcs/matrix_solve.h
index f8225bd482385..45a4317a51eab 100644
--- a/paddle/phi/kernels/funcs/matrix_solve.h
+++ b/paddle/phi/kernels/funcs/matrix_solve.h
@@ -82,7 +82,7 @@ static std::vector<int64_t> getNewDimsVec(const DDim& b_dims) {
       true,
       phi::errors::PreconditionNotMet(
           "The size of tensor b must not be %d after getting new dims", 0));
-  // if b_dims_vec.size() == 1, just retun original vec
+  // if b_dims_vec.size() == 1, just return original vec
   return b_dims_vec;
 }
 
diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cc b/paddle/phi/kernels/funcs/selected_rows_functor.cc
index 267463a6b4b13..b37b5bec78d2f 100644
--- a/paddle/phi/kernels/funcs/selected_rows_functor.cc
+++ b/paddle/phi/kernels/funcs/selected_rows_functor.cc
@@ -78,7 +78,7 @@ struct SelectedRowsAdd<phi::CPUContext, T> {
         in1_row_numel,
         out_value->numel() / out_rows.size(),
         phi::errors::InvalidArgument(
-            "The input and oupput width must be equal."
+            "The input and output width must be equal."
             "But received input width = [%d], output width = [%d]",
             in1_row_numel,
             out_value->numel() / out_rows.size()));
diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.h b/paddle/phi/kernels/funcs/selected_rows_functor.h
index 38e68ee0ccfc6..c072f7ff12e88 100644
--- a/paddle/phi/kernels/funcs/selected_rows_functor.h
+++ b/paddle/phi/kernels/funcs/selected_rows_functor.h
@@ -30,7 +30,7 @@ limitations under the License. */
 namespace phi {
 namespace funcs {
 
-// SelectedRows + SelectedRows will simplely concat value and rows.
+// SelectedRows + SelectedRows will simply concat value and rows.
 // The real computation happens in dealing with LoDTensor.
 template <typename DeviceContext, typename T>
 struct SelectedRowsAdd {
@@ -77,7 +77,7 @@ struct SelectedRowsAddToTensor {
 };
 
 namespace scatter {
-// functors for manuplating SelectedRows data
+// functors for manipulating SelectedRows data
 template <typename DeviceContext, typename T>
 struct MergeAdd {
   // unary functor, merge by adding duplicated rows in
diff --git a/paddle/phi/kernels/funcs/seq2col.h b/paddle/phi/kernels/funcs/seq2col.h
index b757f8403d158..14665ada7b4a8 100644
--- a/paddle/phi/kernels/funcs/seq2col.h
+++ b/paddle/phi/kernels/funcs/seq2col.h
@@ -45,7 +45,7 @@ struct Seq2ColFunctor {
       a. Notion
         - `i` stands for the flattened index of a bunch of frames.
         - `src_idx` and `trg_idx` are the 1D indices of seqs and frames
-          respectivly.
+          respectively.
 
       b. Sample idx
         ```cpp
@@ -58,7 +58,7 @@ struct Seq2ColFunctor {
         n = i % (n_frames_ * frame_length_) % n_frames_;
         ```
 
-      d. Replace `sample_idx`, `f` and `n` in the following eqations:
+      d. Replace `sample_idx`, `f` and `n` in the following equations:
         ```cpp
         src_idx = sample_idx * seq_length_ + n * hop_length_ + f;
         trg_idx = sample_idx * n_frames_ * frame_length_ + f * n_frames_ + n;
diff --git a/paddle/phi/kernels/funcs/sparse/convolution.h b/paddle/phi/kernels/funcs/sparse/convolution.h
index 7048ca1a127f5..e250973ba4543 100644
--- a/paddle/phi/kernels/funcs/sparse/convolution.h
+++ b/paddle/phi/kernels/funcs/sparse/convolution.h
@@ -43,8 +43,8 @@ inline HOSTDEVICE bool Check(const IntT& x,
                              const int kdim,
                              const int xdim) {
   const IntT lower = x - dilation * kx + pad;
-  const IntT uper = x + (kdim - kx - 1) * dilation - pad;
-  return (lower >= 0 && lower % stride == 0 && uper < xdim);
+  const IntT upper = x + (kdim - kx - 1) * dilation - pad;
+  return (lower >= 0 && lower % stride == 0 && upper < xdim);
 }
 
 // Check whether the current position(x, y, z) is legal:
diff --git a/paddle/phi/kernels/funcs/unsqueeze.h b/paddle/phi/kernels/funcs/unsqueeze.h
index a8fc8dc849544..eebc12dd1df21 100644
--- a/paddle/phi/kernels/funcs/unsqueeze.h
+++ b/paddle/phi/kernels/funcs/unsqueeze.h
@@ -118,7 +118,7 @@ inline DDim GetUnsqueezeShape(const std::vector<int64_t> unsqz_dims,
 
   for (int axis : unsqz_dims) {
     int cur = axis < 0 ? axis + cur_output_rank + 1 : axis;
-    // Vaildity Check: the axis bound
+    // Validity Check: the axis bound
     PADDLE_ENFORCE_GE(
         cur,
         0,
diff --git a/paddle/phi/kernels/funcs/values_vectors_functor.h b/paddle/phi/kernels/funcs/values_vectors_functor.h
index 0de31efaa19b7..4a8e12d26c2c7 100644
--- a/paddle/phi/kernels/funcs/values_vectors_functor.h
+++ b/paddle/phi/kernels/funcs/values_vectors_functor.h
@@ -229,7 +229,7 @@ struct MatrixEighFunctor<CPUContext, T> {
     ValueType *out_value = dev_ctx.template Alloc<ValueType>(eigen_values);
 
     DenseTensor input_trans;
-    // lapack is a column-major storge, transpose make the input to
+    // lapack is a column-major storage, transpose make the input to
     // have a continuous memory layout
     input_trans = phi::TransposeLast2Dim<T>(dev_ctx, input);
     T *input_vector = input_trans.data<T>();
diff --git a/paddle/phi/kernels/funcs/vol2col.cc b/paddle/phi/kernels/funcs/vol2col.cc
index b7c6a1fd6c1e8..94acac28619b8 100644
--- a/paddle/phi/kernels/funcs/vol2col.cc
+++ b/paddle/phi/kernels/funcs/vol2col.cc
@@ -229,7 +229,7 @@ class Col2VolFunctor<phi::CPUContext, T> {
         input_width_tmp,
         output_width,
         phi::errors::InvalidArgument(
-            "input_width(%d)  and output_width(%d) are mismatching.",
+            "input_width(%d) and output_width(%d) are mismatching.",
             input_width_tmp,
             output_width));
     T* vol_data = vol->data<T>();
diff --git a/paddle/phi/kernels/funcs/vol2col.cu b/paddle/phi/kernels/funcs/vol2col.cu
index 9d6fe1c4d9f3a..ad8be8a806486 100644
--- a/paddle/phi/kernels/funcs/vol2col.cu
+++ b/paddle/phi/kernels/funcs/vol2col.cu
@@ -89,7 +89,7 @@ __global__ void vol2col(int num_kernels,
 }
 
 /*
- * im = [input_channels,intpu_depth, input_height, input_width] for
+ * im = [input_channels,input_depth, input_height, input_width] for
  * channels_first
  * im = [input_depth, input_height, input_width, input_channels] for
  * channels_last
@@ -112,7 +112,7 @@ void Vol2ColFunctor<DeviceContext, T>::operator()(
   PADDLE_ENFORCE_EQ(vol.dims().size(),
                     4,
                     phi::errors::InvalidArgument(
-                        "The dimension of  vol should be 4, but received %d.",
+                        "The dimension of vol should be 4, but received %d.",
                         vol.dims().size()));
   PADDLE_ENFORCE_EQ(col->dims().size(),
                     7,
@@ -318,12 +318,12 @@ void Col2VolFunctor<DeviceContext, T>::operator()(
   PADDLE_ENFORCE_EQ(vol->dims().size(),
                     4,
                     phi::errors::InvalidArgument(
-                        "The dimension of vol  should be 4, but received %d.",
+                        "The dimension of vol should be 4, but received %d.",
                         vol->dims().size()));
   PADDLE_ENFORCE_EQ(col.dims().size(),
                     7,
                     phi::errors::InvalidArgument(
-                        "The dimension of col  should be 7, but received %d.",
+                        "The dimension of col should be 7, but received %d.",
                         col.dims().size()));
 
   int input_channels =
diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_depthwise_bias_act.py b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_depthwise_bias_act.py
index cfeb60dbc154d..0ea8e0a47130d 100644
--- a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_depthwise_bias_act.py
+++ b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_depthwise_bias_act.py
@@ -123,7 +123,7 @@ def intlist2str(input):
     return return_str
 
 
-# Generate simt conv2d_depthwsie code.
+# Generate simt conv2d_depthwise code.
 
 
 def generate_conv2d_depthwise():
diff --git a/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h b/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h
index 9d0cb644b236e..de85ed672ed43 100644
--- a/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h
+++ b/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h
@@ -151,7 +151,7 @@ struct DefaultIteratorsTensorOp<cutlass::bfloat16_t,
 ///
 /// Satisfies: ReadableTileIterator
 ///
-template <typename ThreadMap_  ///< Thread map (conept: OutputTileThreadMap)
+template <typename ThreadMap_  ///< Thread map (concept: OutputTileThreadMap)
           >
 class SharedLoadIteratorMixed<ThreadMap_, int32_t, 32, 16, 8, 8> {
  public:
diff --git a/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h b/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h
index f7c73dc99cede..cb62cd4a35d99 100644
--- a/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h
+++ b/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h
@@ -199,7 +199,7 @@ void generic_mixed_gemm_kernelLauncher(const T* A,
       // now to run bf16 mixgemm, we have set the split-k factor to 1
       VLOG(1) << "Requested split-k but workspace size insufficient. Falling "
                  "back to non-split-k implementation.";
-      VLOG(1) << "need workspace sizoe of: " << gemm.get_workspace_size(args)
+      VLOG(1) << "need workspace size of: " << gemm.get_workspace_size(args)
               << ", but got " << workspace_bytes;
       VLOG(1) << "args.batch_stride_D:" << args.batch_stride_D;
       VLOG(1) << "args.batch_count:" << args.batch_count;
diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/transform/tile_smem_loader.h b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/transform/tile_smem_loader.h
index 43d14db28de2a..57acd005aa88e 100644
--- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/transform/tile_smem_loader.h
+++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/transform/tile_smem_loader.h
@@ -61,7 +61,7 @@ class TileSmemLoader {
 
   using Fragment = typename GmemTileIterator::Fragment;
 
-  /// load a tile from global memory into shared memory
+  // load a tile from global memory into shared memory
   CUTLASS_DEVICE
   static void load(GmemTileIterator tile_load_iter,
                    SmemTileIterator tile_store_iter) {
diff --git a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu
index fd1f754cc9827..37450d3a4e178 100644
--- a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu
+++ b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu
@@ -97,7 +97,7 @@ void FusedBiasDropoutResidualLnKernel(
       ln_var_data);
 #else
   PADDLE_THROW(phi::errors::Unimplemented(
-      "FusedBiasDropoutResidualLnKernel not surpport for rocm"));
+      "FusedBiasDropoutResidualLnKernel not support for rocm"));
 #endif
 }
 }  // namespace fusion
diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc
index bab4694451397..c46d7e77c8420 100644
--- a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc
+++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc
@@ -80,7 +80,7 @@ void FusedElementwiseKernel(const OneDNNContext& dev_ctx,
 
   // For Inplace src and dst should be the same memory object.
   // So x should share buffer with z. But UT mechanics is testing inplace
-  // execution for this op not checking that x can be bradcasted to match in
+  // execution for this op not checking that x can be broadcasted to match in
   // shape y tensor.
   // This is wrong as when x is to be broadcasted then z(out) will match the
   // shape of y which is bigger than x. Hence if x is smaller in shape than z
diff --git a/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc
index 81e6e67093362..82840ec1b3537 100644
--- a/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc
+++ b/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc
@@ -38,7 +38,7 @@ void BNActXPUKernel(const Context& dev_ctx,
                     true,
                     phi::errors::InvalidArgument(
                         "The 'data_layout' attribute must be NCHW or NHWC. "
-                        "But recevived 'data_layout' is [%s].",
+                        "But received 'data_layout' is [%s].",
                         data_layout_str));
 
   const auto& x_dims = x.dims();