From 43a2a608071f95768fd34778cdbb47192f31c793 Mon Sep 17 00:00:00 2001 From: co63oc Date: Tue, 20 Feb 2024 12:01:58 +0800 Subject: [PATCH] Fix some typos (Intermedaite, deperecated, etc.) (#61720) --- .../fused/fused_elemwise_activation_op.h | 12 ++++++------ paddle/phi/kernels/funcs/compound_functors.h | 12 ++++++------ paddle/phi/kernels/funcs/elementwise_base.h | 2 +- paddle/phi/kernels/funcs/elementwise_functor.h | 4 ++-- paddle/phi/kernels/funcs/fft.cc | 6 +++--- paddle/phi/kernels/funcs/jit/README.en.md | 16 ++++++++-------- paddle/phi/kernels/funcs/jit/README.md | 4 ++-- paddle/phi/kernels/funcs/jit/helper.cc | 2 +- paddle/phi/kernels/funcs/jit/helper.h | 4 ++-- .../funcs/jit/more/intrinsic/crf_decoding.cc | 2 +- paddle/phi/kernels/funcs/matrix_bit_code.h | 2 +- paddle/phi/kernels/funcs/matrix_solve.h | 2 +- .../phi/kernels/funcs/selected_rows_functor.cc | 2 +- paddle/phi/kernels/funcs/selected_rows_functor.h | 4 ++-- paddle/phi/kernels/funcs/seq2col.h | 4 ++-- paddle/phi/kernels/funcs/sparse/convolution.h | 4 ++-- paddle/phi/kernels/funcs/unsqueeze.h | 2 +- .../phi/kernels/funcs/values_vectors_functor.h | 2 +- paddle/phi/kernels/funcs/vol2col.cc | 2 +- paddle/phi/kernels/funcs/vol2col.cu | 8 ++++---- .../cutlass/conv2d/conv2d_depthwise_bias_act.py | 2 +- .../threadblock/epilogue_tensor_op_int32.h | 2 +- .../fpA_intB_gemm/fpA_intB_gemm_template.h | 2 +- .../transform/tile_smem_loader.h | 2 +- ...ed_bias_dropout_residual_layer_norm_kernel.cu | 2 +- .../fusion/onednn/fused_elementwise_kernel.cc | 2 +- .../phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc | 2 +- 27 files changed, 55 insertions(+), 55 deletions(-) diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h index ea51fd1a10a83..ad7f79307e628 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h @@ -149,8 +149,8 @@ static void RunBinaryCompoundGradFunctors( UnaryFunctor, UnaryGradFunctor, InPlace>; - using BinaryCompoundDIntermedaiteOutFunctor = - phi::funcs::BinaryCompoundGradDIntermedaiteOutFunctor; @@ -160,7 +160,7 @@ static void RunBinaryCompoundGradFunctors( T, BinaryCompoundDxFunctor, BinaryCompoundDyFunctor, - BinaryCompoundDIntermedaiteOutFunctor, + BinaryCompoundDIntermediateOutFunctor, true /*UseIntermediateOut*/, false /*SameShapeOfIntermediateOutAndOut*/>( ctx, @@ -176,7 +176,7 @@ static void RunBinaryCompoundGradFunctors( BinaryCompoundDxFunctor(binary_grad_functor, unary_functor), BinaryCompoundDyFunctor( binary_grad_functor, unary_functor, unary_grad_functor), - BinaryCompoundDIntermedaiteOutFunctor(binary_grad_functor, + BinaryCompoundDIntermediateOutFunctor(binary_grad_functor, unary_functor)); } else { FusedElemwiseAndActGradComputeEx< @@ -184,7 +184,7 @@ static void RunBinaryCompoundGradFunctors( T, BinaryCompoundDxFunctor, BinaryCompoundDyFunctor, - BinaryCompoundDIntermedaiteOutFunctor, + BinaryCompoundDIntermediateOutFunctor, false /*UseIntermediateOut*/, false /*SameShapeOfIntermediateOutAndOut*/>( ctx, @@ -200,7 +200,7 @@ static void RunBinaryCompoundGradFunctors( BinaryCompoundDxFunctor(binary_grad_functor, unary_functor), BinaryCompoundDyFunctor( binary_grad_functor, unary_functor, unary_grad_functor), - BinaryCompoundDIntermedaiteOutFunctor(binary_grad_functor, + BinaryCompoundDIntermediateOutFunctor(binary_grad_functor, unary_functor)); } } diff --git a/paddle/phi/kernels/funcs/compound_functors.h b/paddle/phi/kernels/funcs/compound_functors.h index 823dcd70a2f3c..72e7e8d872dc0 100644 --- a/paddle/phi/kernels/funcs/compound_functors.h +++ b/paddle/phi/kernels/funcs/compound_functors.h @@ -29,8 +29,8 @@ struct BinaryCompoundFunctor { inline HOSTDEVICE T GetOut(T x, T y) { return func1_(x, func2_(y)); } - inline HOSTDEVICE T GetOutUseIntermediateOut(T x, T intermediat_out) { - return func1_(x, intermediat_out); + inline HOSTDEVICE T GetOutUseIntermediateOut(T x, T intermediate_out) { + return func1_(x, intermediate_out); } inline HOSTDEVICE T GetIntermediateOut(T x UNUSED, T y) { return func2_(y); } @@ -47,8 +47,8 @@ struct UnaryCompoundFunctor { inline HOSTDEVICE T GetOut(T x, T y) { return func1_(func2_(x, y)); } - inline HOSTDEVICE T GetOutUseIntermediateOut(T x UNUSED, T intermediat_out) { - return func1_(intermediat_out); + inline HOSTDEVICE T GetOutUseIntermediateOut(T x UNUSED, T intermediate_out) { + return func1_(intermediate_out); } inline HOSTDEVICE T GetIntermediateOut(T x, T y) { return func2_(x, y); } @@ -209,8 +209,8 @@ struct UnaryCompoundGradDyFunctor { // Z = BinaryFunctor(X, UnaryFunctor(Y)) template -struct BinaryCompoundGradDIntermedaiteOutFunctor { - BinaryCompoundGradDIntermedaiteOutFunctor(const DBinaryFun &d_binary_fun, +struct BinaryCompoundGradDIntermediateOutFunctor { + BinaryCompoundGradDIntermediateOutFunctor(const DBinaryFun &d_binary_fun, const UnaryFun &unary_fun) : d_binary_fun_(d_binary_fun), unary_fun_(unary_fun) {} diff --git a/paddle/phi/kernels/funcs/elementwise_base.h b/paddle/phi/kernels/funcs/elementwise_base.h index c92acdaf4180b..212b6dc4cddd2 100644 --- a/paddle/phi/kernels/funcs/elementwise_base.h +++ b/paddle/phi/kernels/funcs/elementwise_base.h @@ -49,7 +49,7 @@ class RowwiseTransformIterator; template class MidWiseTransformIterator; -// NOTE(dzhwinter): ptrdiff_t in iterator is deperecated in c++17 +// NOTE(dzhwinter): ptrdiff_t in iterator is deprecated in c++17 template class RowwiseTransformIterator : public std::iterator { const dtype::float16 b) const { float b_float = static_cast(b); float res = fmod(static_cast(a), b_float); - // Accoding to #PR26732: in dividen % divsor + // According to #PR26732: in dividen % divsor // remainder shall have the same sign as divsor. if ((res != 0.0f) && ((res < 0.0f) != (b_float < 0.0f))) res += b_float; return static_cast(res); @@ -584,7 +584,7 @@ struct RemainderFunctor { float b_float = static_cast(b); float res = fmod(static_cast(a), b_float); - // Accoding to #PR26732: in dividen % divsor + // According to #PR26732: in dividen % divsor // remainder shall have the same sign as divsor. if ((res != 0.0f) && ((res < 0.0f) != (b_float < 0.0f))) res += b_float; return static_cast(res); diff --git a/paddle/phi/kernels/funcs/fft.cc b/paddle/phi/kernels/funcs/fft.cc index 97502787b6938..beb0a98636039 100644 --- a/paddle/phi/kernels/funcs/fft.cc +++ b/paddle/phi/kernels/funcs/fft.cc @@ -234,7 +234,7 @@ struct FFTC2CFunctor { // pocketfft requires std::vector std::vector axes_(axes.size()); std::copy(axes.begin(), axes.end(), axes_.begin()); - // compuet factor + // compute factor size_t signal_numel = 1; for (const auto axis : axes) { signal_numel *= in_sizes[axis]; @@ -291,7 +291,7 @@ struct FFTR2CFunctor { // pocketfft requires std::vector std::vector axes_(axes.size()); std::copy(axes.begin(), axes.end(), axes_.begin()); - // compuet normalization factor + // compute normalization factor size_t signal_numel = 1; for (const auto axis : axes) { signal_numel *= in_sizes[axis]; @@ -348,7 +348,7 @@ struct FFTC2RFunctor { // pocketfft requires std::vector std::vector axes_(axes.size()); std::copy(axes.begin(), axes.end(), axes_.begin()); - // compuet normalization factor + // compute normalization factor size_t signal_numel = 1; for (const auto axis : axes) { signal_numel *= out_sizes[axis]; diff --git a/paddle/phi/kernels/funcs/jit/README.en.md b/paddle/phi/kernels/funcs/jit/README.en.md index 28f9e1460f1c2..0e1958a5c1415 100644 --- a/paddle/phi/kernels/funcs/jit/README.en.md +++ b/paddle/phi/kernels/funcs/jit/README.en.md @@ -1,6 +1,6 @@ # JIT Kernel -JIT(Just In Time) Kernel contains actually generated code and some other implemenations with the same logic. +JIT(Just In Time) Kernel contains actually generated code and some other implementations with the same logic. Each implementation has its own condition to use, defined in `CanBeUsed`. They are combined together to get the best performance of one single independent function. They could be some very simple functions like vector multiply, or some complicated functions like LSTM. @@ -34,7 +34,7 @@ PaddlePaddle/Paddle/paddle/phi/kernels/ └── ... ``` -All basical definitions of jit kernels are addressed in `paddle/phi/kernels/funcs/jit` including these three key folders `refer`, `gen`, `more`. There is only one unique name for each kernel while may have seraval implementations with same functionality. +All basical definitions of jit kernels are addressed in `paddle/phi/kernels/funcs/jit` including these three key folders `refer`, `gen`, `more`. There is only one unique name for each kernel while may have several implementations with same functionality. - `refer`: Each kernel must have one reference implementation on CPU, and it should only focus on the correctness and should not depends on any third-party libraries. - `gen`: The code generated should be kept here. They should be designed focusing on the best performance, which depends on Xbyak. @@ -44,7 +44,7 @@ All basical definitions of jit kernels are addressed in `paddle/phi/kernels/func We present these methods to get the functions: - `GetAllCandidateFuncs`. It can return all the implementations supported. All of the implementations can get the same result. You can do some runtime benchmark to choose which should actually be used. -- `GetDefaultBestFunc`. It only return one default function pointer, which is tuning offline with some genenal configures and attributes. This should cover most situations. +- `GetDefaultBestFunc`. It only return one default function pointer, which is tuning offline with some general configures and attributes. This should cover most situations. - `KernelFuncs::Cache()`. It can get the default functions and save it for next time with the same attribute. - `GetReferFunc`. It can only get the reference code in CPU, and all the others implementations have same logic with this reference code. @@ -76,7 +76,7 @@ All kernels are included in `paddle/phi/kernels/funcs/jit/kernels.h`, which is a ## Solid Test - Unit Test - All functions should be compared with the corresponding reference functions, including data tyep `float` and `double`. + All functions should be compared with the corresponding reference functions, including data type `float` and `double`. - Benchmark All functions should be tested, and make sure the `jit::GetDefaultBestFunc` function obtain the best performance with all attributes. @@ -89,15 +89,15 @@ All kernels are included in `paddle/phi/kernels/funcs/jit/kernels.h`, which is a 3. Add reference function of `your_key`. Note: - this should be run on CPU and do not depend on any third-party. - - Add `USE_JITKERNEL_REFER(your_key)` in `refer/CmakeLists.txt` to make sure this code can be used. -4. Add unit test in `test.cc`, and verfiy at least `float` and `double`. + - Add `USE_JITKERNEL_REFER(your_key)` in `refer/CMakeLists.txt` to make sure this code can be used. +4. Add unit test in `test.cc`, and verify at least `float` and `double Test more data type for some special functions if necessary, for example `int8`. 5. Add functions in `benchmark.cc` to test all function of same `KernelType`. Make sure `GetDefaultBestFunc` always get the best one. ## Optional -Add more implementations of `your_kery` for performance enhancement. +Add more implementations of `your_key` for performance enhancement. -1. Add functions based on generated code in `gen`. It should be derived from `JitCode` and should have correpsonding creator from `JitCodeCreator` which will be registered on the `your_key`. +1. Add functions based on generated code in `gen`. It should be derived from `JitCode` and should have corresponding creator from `JitCodeCreator` which will be registered on the `your_key`. 2. If new attribute type is added, you should specialize `JitCodeKey` of this type. 3. Add more functions in `more`,you can use any third party you wish, like mkl, mkldnn or intrinsic code to reach the best performance. diff --git a/paddle/phi/kernels/funcs/jit/README.md b/paddle/phi/kernels/funcs/jit/README.md index 9453dac5ad38d..c9154d05b9c22 100644 --- a/paddle/phi/kernels/funcs/jit/README.md +++ b/paddle/phi/kernels/funcs/jit/README.md @@ -35,7 +35,7 @@ PaddlePaddle/Paddle/paddle/phi/kernels/ 基本类的定义都放在根目录下,根目录下包括gen,more和refer三个目录。每个目录下都是一种或者多种实现,每种kernel算子都需要有reference的实现,用作单元测试的基准,其他的实现都是可选的。 - gen: 代表使用jit生成的code,需要依赖xbyak库。该实现最关心的就是性能。 - refer: 代表reference的实现,每种kernel算子都需要有在CPU上的reference的实现,他主要关心的算法逻辑的正确性。 -- more: 下面可以放入跟多实现,可以包括mkl,mkldnn,intrinsic,openblas等,也可以是自身已有的kernel组合。 +- more: 下面可以放入更多实现,可以包括mkl,mkldnn,intrinsic,openblas等,也可以是自身已有的kernel组合。 ## 动态获取 @@ -79,7 +79,7 @@ PaddlePaddle/Paddle/paddle/phi/kernels/ # 如何添加新的算子 1. 在`KernelType` 中添加 `your_key` 。 -2. 实现Reference 的逻辑,这个是必须是在CPU上的实现,并且不能依赖任何第三方库。实现后在`refer/CmakeLists.txt`中添加`USE_JITKERNEL_REFER(your_key)`来使用该kernel。 +2. 实现Reference 的逻辑,这个是必须是在CPU上的实现,并且不能依赖任何第三方库。实现后在`refer/CMakeLists.txt`中添加`USE_JITKERNEL_REFER(your_key)`来使用该kernel。 3. (optional) 实现更多的算法在`more`目录下,可以依赖mkl,intrinsic或者mkldnn等第三方库。 4. (optional) 实现基于Xbyak的生成code,在`gen`目下。 jitcode需要实现自己的`JitCodeCreator`,并注册在与refer相同的`KernelType`上。 5. 添加新的`KernelTuple`,需要与`KernelType`一一对应,是所有类型的一个打包,包括数据类型,属性的类型,以及返回的函数类型。可以参考`SeqPoolTuple`,新加的Attr类型需要特例化`JitCodeKey`方法。 diff --git a/paddle/phi/kernels/funcs/jit/helper.cc b/paddle/phi/kernels/funcs/jit/helper.cc index c135d6ee3177d..5ab391678bd90 100644 --- a/paddle/phi/kernels/funcs/jit/helper.cc +++ b/paddle/phi/kernels/funcs/jit/helper.cc @@ -111,7 +111,7 @@ void pack_weights(const float* src, float* dst, int n, int k) { 0, phi::errors::InvalidArgument( "Each element of groups should be larger than " - "0. However the element: %d doesn't satify.", + "0. However the element: %d doesn't satisfy.", i)); }); int sum = std::accumulate(groups.begin(), groups.end(), 0); diff --git a/paddle/phi/kernels/funcs/jit/helper.h b/paddle/phi/kernels/funcs/jit/helper.h index c230738db9a5d..50c130bc0c486 100644 --- a/paddle/phi/kernels/funcs/jit/helper.h +++ b/paddle/phi/kernels/funcs/jit/helper.h @@ -140,7 +140,7 @@ std::vector GetAllCandidateKernels( auto ref = GetReferKernel(); PADDLE_ENFORCE_NOT_NULL( ref, - phi::errors::InvalidArgument("Get all candicate kernel in CPU failed. " + phi::errors::InvalidArgument("Get all candidate kernel in CPU failed. " "The Refer Kernel can not be empty.")); res.emplace_back(ref); return res; @@ -188,7 +188,7 @@ typename KernelTuple::func_type GetDefaultBestFunc( PADDLE_ENFORCE_GE(funcs.size(), 1UL, phi::errors::InvalidArgument( - "The candicate jit kernel is at least one in CPU.")); + "The candidate jit kernel is at least one in CPU.")); // Here could do some runtime benchmark of this attr and return the best one. // But yet just get the first one as the default best one, // which is searched in order and tuned by offline. diff --git a/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc b/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc index c53f62100f4d6..c36ca0d7360cc 100644 --- a/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc +++ b/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc @@ -88,7 +88,7 @@ void CRFDecoding(const int seq_len, /* Calculate the offset of transition_weights.*/ int trans_offset = state_trans_base_idx * tag_num + j_offset; for (int i = 0; i < tag_num; ++i) { -/* Initalize the content of alpha variable with related offset.*/ +/* Initialize the content of alpha variable with related offset.*/ #ifdef __AVX512F__ __m512 alpha_content = _mm512_set1_ps(*(alpha + seq_offset + i)); /* Obtain the content of weights from un-aligned address.*/ diff --git a/paddle/phi/kernels/funcs/matrix_bit_code.h b/paddle/phi/kernels/funcs/matrix_bit_code.h index 8d3335791ef69..0b53d7a21155b 100644 --- a/paddle/phi/kernels/funcs/matrix_bit_code.h +++ b/paddle/phi/kernels/funcs/matrix_bit_code.h @@ -40,7 +40,7 @@ namespace funcs { * return the maximal code length * * SimpleCode operator()(size_t i) - * return the i-th code. Code class is descriebed below. + * return the i-th code. Code class is described below. * * SimpleCode class should support 3 functions: * diff --git a/paddle/phi/kernels/funcs/matrix_solve.h b/paddle/phi/kernels/funcs/matrix_solve.h index f8225bd482385..45a4317a51eab 100644 --- a/paddle/phi/kernels/funcs/matrix_solve.h +++ b/paddle/phi/kernels/funcs/matrix_solve.h @@ -82,7 +82,7 @@ static std::vector getNewDimsVec(const DDim& b_dims) { true, phi::errors::PreconditionNotMet( "The size of tensor b must not be %d after getting new dims", 0)); - // if b_dims_vec.size() == 1, just retun original vec + // if b_dims_vec.size() == 1, just return original vec return b_dims_vec; } diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cc b/paddle/phi/kernels/funcs/selected_rows_functor.cc index 267463a6b4b13..b37b5bec78d2f 100644 --- a/paddle/phi/kernels/funcs/selected_rows_functor.cc +++ b/paddle/phi/kernels/funcs/selected_rows_functor.cc @@ -78,7 +78,7 @@ struct SelectedRowsAdd { in1_row_numel, out_value->numel() / out_rows.size(), phi::errors::InvalidArgument( - "The input and oupput width must be equal." + "The input and output width must be equal." "But received input width = [%d], output width = [%d]", in1_row_numel, out_value->numel() / out_rows.size())); diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.h b/paddle/phi/kernels/funcs/selected_rows_functor.h index 38e68ee0ccfc6..c072f7ff12e88 100644 --- a/paddle/phi/kernels/funcs/selected_rows_functor.h +++ b/paddle/phi/kernels/funcs/selected_rows_functor.h @@ -30,7 +30,7 @@ limitations under the License. */ namespace phi { namespace funcs { -// SelectedRows + SelectedRows will simplely concat value and rows. +// SelectedRows + SelectedRows will simply concat value and rows. // The real computation happens in dealing with LoDTensor. template struct SelectedRowsAdd { @@ -77,7 +77,7 @@ struct SelectedRowsAddToTensor { }; namespace scatter { -// functors for manuplating SelectedRows data +// functors for manipulating SelectedRows data template struct MergeAdd { // unary functor, merge by adding duplicated rows in diff --git a/paddle/phi/kernels/funcs/seq2col.h b/paddle/phi/kernels/funcs/seq2col.h index b757f8403d158..14665ada7b4a8 100644 --- a/paddle/phi/kernels/funcs/seq2col.h +++ b/paddle/phi/kernels/funcs/seq2col.h @@ -45,7 +45,7 @@ struct Seq2ColFunctor { a. Notion - `i` stands for the flattened index of a bunch of frames. - `src_idx` and `trg_idx` are the 1D indices of seqs and frames - respectivly. + respectively. b. Sample idx ```cpp @@ -58,7 +58,7 @@ struct Seq2ColFunctor { n = i % (n_frames_ * frame_length_) % n_frames_; ``` - d. Replace `sample_idx`, `f` and `n` in the following eqations: + d. Replace `sample_idx`, `f` and `n` in the following equations: ```cpp src_idx = sample_idx * seq_length_ + n * hop_length_ + f; trg_idx = sample_idx * n_frames_ * frame_length_ + f * n_frames_ + n; diff --git a/paddle/phi/kernels/funcs/sparse/convolution.h b/paddle/phi/kernels/funcs/sparse/convolution.h index 7048ca1a127f5..e250973ba4543 100644 --- a/paddle/phi/kernels/funcs/sparse/convolution.h +++ b/paddle/phi/kernels/funcs/sparse/convolution.h @@ -43,8 +43,8 @@ inline HOSTDEVICE bool Check(const IntT& x, const int kdim, const int xdim) { const IntT lower = x - dilation * kx + pad; - const IntT uper = x + (kdim - kx - 1) * dilation - pad; - return (lower >= 0 && lower % stride == 0 && uper < xdim); + const IntT upper = x + (kdim - kx - 1) * dilation - pad; + return (lower >= 0 && lower % stride == 0 && upper < xdim); } // Check whether the current position(x, y, z) is legal: diff --git a/paddle/phi/kernels/funcs/unsqueeze.h b/paddle/phi/kernels/funcs/unsqueeze.h index a8fc8dc849544..eebc12dd1df21 100644 --- a/paddle/phi/kernels/funcs/unsqueeze.h +++ b/paddle/phi/kernels/funcs/unsqueeze.h @@ -118,7 +118,7 @@ inline DDim GetUnsqueezeShape(const std::vector unsqz_dims, for (int axis : unsqz_dims) { int cur = axis < 0 ? axis + cur_output_rank + 1 : axis; - // Vaildity Check: the axis bound + // Validity Check: the axis bound PADDLE_ENFORCE_GE( cur, 0, diff --git a/paddle/phi/kernels/funcs/values_vectors_functor.h b/paddle/phi/kernels/funcs/values_vectors_functor.h index 0de31efaa19b7..4a8e12d26c2c7 100644 --- a/paddle/phi/kernels/funcs/values_vectors_functor.h +++ b/paddle/phi/kernels/funcs/values_vectors_functor.h @@ -229,7 +229,7 @@ struct MatrixEighFunctor { ValueType *out_value = dev_ctx.template Alloc(eigen_values); DenseTensor input_trans; - // lapack is a column-major storge, transpose make the input to + // lapack is a column-major storage, transpose make the input to // have a continuous memory layout input_trans = phi::TransposeLast2Dim(dev_ctx, input); T *input_vector = input_trans.data(); diff --git a/paddle/phi/kernels/funcs/vol2col.cc b/paddle/phi/kernels/funcs/vol2col.cc index b7c6a1fd6c1e8..94acac28619b8 100644 --- a/paddle/phi/kernels/funcs/vol2col.cc +++ b/paddle/phi/kernels/funcs/vol2col.cc @@ -229,7 +229,7 @@ class Col2VolFunctor { input_width_tmp, output_width, phi::errors::InvalidArgument( - "input_width(%d) and output_width(%d) are mismatching.", + "input_width(%d) and output_width(%d) are mismatching.", input_width_tmp, output_width)); T* vol_data = vol->data(); diff --git a/paddle/phi/kernels/funcs/vol2col.cu b/paddle/phi/kernels/funcs/vol2col.cu index 9d6fe1c4d9f3a..ad8be8a806486 100644 --- a/paddle/phi/kernels/funcs/vol2col.cu +++ b/paddle/phi/kernels/funcs/vol2col.cu @@ -89,7 +89,7 @@ __global__ void vol2col(int num_kernels, } /* - * im = [input_channels,intpu_depth, input_height, input_width] for + * im = [input_channels,input_depth, input_height, input_width] for * channels_first * im = [input_depth, input_height, input_width, input_channels] for * channels_last @@ -112,7 +112,7 @@ void Vol2ColFunctor::operator()( PADDLE_ENFORCE_EQ(vol.dims().size(), 4, phi::errors::InvalidArgument( - "The dimension of vol should be 4, but received %d.", + "The dimension of vol should be 4, but received %d.", vol.dims().size())); PADDLE_ENFORCE_EQ(col->dims().size(), 7, @@ -318,12 +318,12 @@ void Col2VolFunctor::operator()( PADDLE_ENFORCE_EQ(vol->dims().size(), 4, phi::errors::InvalidArgument( - "The dimension of vol should be 4, but received %d.", + "The dimension of vol should be 4, but received %d.", vol->dims().size())); PADDLE_ENFORCE_EQ(col.dims().size(), 7, phi::errors::InvalidArgument( - "The dimension of col should be 7, but received %d.", + "The dimension of col should be 7, but received %d.", col.dims().size())); int input_channels = diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_depthwise_bias_act.py b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_depthwise_bias_act.py index cfeb60dbc154d..0ea8e0a47130d 100644 --- a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_depthwise_bias_act.py +++ b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_depthwise_bias_act.py @@ -123,7 +123,7 @@ def intlist2str(input): return return_str -# Generate simt conv2d_depthwsie code. +# Generate simt conv2d_depthwise code. def generate_conv2d_depthwise(): diff --git a/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h b/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h index 9d0cb644b236e..de85ed672ed43 100644 --- a/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h +++ b/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h @@ -151,7 +151,7 @@ struct DefaultIteratorsTensorOp class SharedLoadIteratorMixed { public: diff --git a/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h b/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h index f7c73dc99cede..cb62cd4a35d99 100644 --- a/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h +++ b/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h @@ -199,7 +199,7 @@ void generic_mixed_gemm_kernelLauncher(const T* A, // now to run bf16 mixgemm, we have set the split-k factor to 1 VLOG(1) << "Requested split-k but workspace size insufficient. Falling " "back to non-split-k implementation."; - VLOG(1) << "need workspace sizoe of: " << gemm.get_workspace_size(args) + VLOG(1) << "need workspace size of: " << gemm.get_workspace_size(args) << ", but got " << workspace_bytes; VLOG(1) << "args.batch_stride_D:" << args.batch_stride_D; VLOG(1) << "args.batch_count:" << args.batch_count; diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/transform/tile_smem_loader.h b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/transform/tile_smem_loader.h index 43d14db28de2a..57acd005aa88e 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/transform/tile_smem_loader.h +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/transform/tile_smem_loader.h @@ -61,7 +61,7 @@ class TileSmemLoader { using Fragment = typename GmemTileIterator::Fragment; - /// load a tile from global memory into shared memory + // load a tile from global memory into shared memory CUTLASS_DEVICE static void load(GmemTileIterator tile_load_iter, SmemTileIterator tile_store_iter) { diff --git a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu index fd1f754cc9827..37450d3a4e178 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu @@ -97,7 +97,7 @@ void FusedBiasDropoutResidualLnKernel( ln_var_data); #else PADDLE_THROW(phi::errors::Unimplemented( - "FusedBiasDropoutResidualLnKernel not surpport for rocm")); + "FusedBiasDropoutResidualLnKernel not support for rocm")); #endif } } // namespace fusion diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc index bab4694451397..c46d7e77c8420 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc @@ -80,7 +80,7 @@ void FusedElementwiseKernel(const OneDNNContext& dev_ctx, // For Inplace src and dst should be the same memory object. // So x should share buffer with z. But UT mechanics is testing inplace - // execution for this op not checking that x can be bradcasted to match in + // execution for this op not checking that x can be broadcasted to match in // shape y tensor. // This is wrong as when x is to be broadcasted then z(out) will match the // shape of y which is bigger than x. Hence if x is smaller in shape than z diff --git a/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc index 81e6e67093362..82840ec1b3537 100644 --- a/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc @@ -38,7 +38,7 @@ void BNActXPUKernel(const Context& dev_ctx, true, phi::errors::InvalidArgument( "The 'data_layout' attribute must be NCHW or NHWC. " - "But recevived 'data_layout' is [%s].", + "But received 'data_layout' is [%s].", data_layout_str)); const auto& x_dims = x.dims();