diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 3c3fbfaae51cc..256aded8ca234 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -163,7 +163,12 @@ function(op_library TARGET) # pybind USE_OP if (${pybind_flag} EQUAL 0) + # NOTE(*): activation use macro to regist the kernels, set use_op manually. + if(${TARGET} STREQUAL "activation") + file(APPEND ${pybind_file} "USE_OP(relu);\n") + else() file(APPEND ${pybind_file} "USE_OP(${TARGET});\n") + endif() endif() endfunction() diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 56451f8f147ad..549629ffd6648 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -13,11 +13,48 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" +#include #include "paddle/fluid/operators/mkldnn_activation_op.h" namespace paddle { namespace operators { +#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ + class OP_NAME##OpMaker \ + : public ::paddle::framework::OpProtoAndCheckerMaker { \ + public: \ + OP_NAME##OpMaker(OpProto *proto, OpAttrChecker *op_checker) \ + : ::paddle::framework::OpProtoAndCheckerMaker(proto, op_checker) { \ + AddInput("X", "Input of " #OP_NAME "operator"); \ + AddOutput("Out", "Output of" #OP_NAME "operator"); \ + AddAttr("use_mkldnn", \ + "(bool, default false) Only used in mkldnn kernel") \ + .SetDefault(false); \ + AddComment(#OP_COMMENT); \ + } \ + } + +#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \ + class OP_NAME##GradMaker \ + : public ::paddle::framework::SingleGradOpDescMaker { \ + public: \ + using ::paddle::framework::SingleGradOpDescMaker::SingleGradOpDescMaker; \ + \ + protected: \ + std::unique_ptr<::paddle::framework::OpDesc> Apply() const override { \ + auto *op = new ::paddle::framework::OpDesc(); \ + op->SetType(#KERNEL_TYPE "_grad"); \ + op->SetInput("Out", Output("Out")); \ + op->SetInput(::paddle::framework::GradVarName("Out"), \ + OutputGrad("Out")); \ + \ + op->SetAttrMap(Attrs()); \ + \ + op->SetOutput(::paddle::framework::GradVarName("X"), InputGrad("X")); \ + return std::unique_ptr<::paddle::framework::OpDesc>(op); \ + } \ + } + class ActivationOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -37,346 +74,190 @@ class ActivationOpGrad : public framework::OperatorWithKernel { } }; -class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Sigmoid operator"); - AddOutput("Out", "Output of Sigmoid operator"); - AddComment(R"DOC( +constexpr char SigmoidDoc[] = R"DOC( Sigmoid Activation Operator $$out = \frac{1}{1 + e^{-x}}$$ -)DOC"); - } -}; +)DOC"; -class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { - public: - LogSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of LogSigmoid operator"); - AddOutput("Out", "Output of LogSigmoid operator"); - AddComment(R"DOC( +constexpr char LogSigmoidDoc[] = R"DOC( Logsigmoid Activation Operator $$out = \log \frac{1}{1 + e^{-x}}$$ -)DOC"); - } -}; +)DOC"; -class ExpOpMaker : public framework::OpProtoAndCheckerMaker { - public: - ExpOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Exp operator"); - AddOutput("Out", "Output of Exp operator"); - AddComment(R"DOC( +constexpr char ExpDoc[] = R"DOC( Exp Activation Operator. $out = e^x$ -)DOC"); - } -}; +)DOC"; -class ReluOpMaker : public framework::OpProtoAndCheckerMaker { - public: - ReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Relu operator"); - AddOutput("Out", "Output of Relu operator"); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); - AddComment(R"DOC( +constexpr char ReluDoc[] = R"DOC( Relu Activation Operator. $out = \max(x, 0)$ -)DOC"); - } -}; - -class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { - public: - LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of LeakyRelu operator"); - AddOutput("Out", "Output of LeakyRelu operator"); - AddAttr("alpha", "The small negative slope").SetDefault(0.02f); - AddComment(R"DOC( -LeakyRelu Activation Operator. - -$out = \max(x, \alpha * x)$ - -)DOC"); - } -}; - -class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Softshrink operator"); - AddOutput("Out", "Output of Softshrink operator"); - AddAttr("lambda", "non-negative offset").SetDefault(0.5f); - AddComment(R"DOC( -Softshrink Activation Operator. - -$$ -out = \begin{cases} - x - \lambda, \text{if } x > \lambda \\ - x + \lambda, \text{if } x < -\lambda \\ - 0, \text{otherwise} - \end{cases} -$$ +)DOC"; -)DOC"); - } -}; - -class TanhOpMaker : public framework::OpProtoAndCheckerMaker { - public: - TanhOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Tanh operator"); - AddOutput("Out", "Output of Tanh operator"); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); - AddComment(R"DOC( +constexpr char TanhDoc[] = R"DOC( Tanh Activation Operator. $$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ -)DOC"); - } -}; +)DOC"; -class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { - public: - TanhShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of TanhShrink operator"); - AddOutput("Out", "Output of TanhShrink operator"); - AddComment(R"DOC( +constexpr char TanhShrinkDoc[] = R"DOC( TanhShrink Activation Operator. $$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ -)DOC"); - } -}; - -class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { - public: - HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of HardShrink operator"); - AddOutput("Out", "Output of HardShrink operator"); - AddAttr("threshold", "The value of threshold for HardShrink") - .SetDefault(0.5f); - AddComment(R"DOC( -HardShrink Activation Operator. +)DOC"; -$$ -out = \begin{cases} - x, \text{if } x > \lambda \\ - x, \text{if } x < -\lambda \\ - 0, \text{otherwise} - \end{cases} -$$ - -)DOC"); - } -}; - -class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SqrtOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Sqrt operator"); - AddOutput("Out", "Output of Sqrt operator"); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); - AddComment(R"DOC( +constexpr char SqrtDoc[] = R"DOC( Sqrt Activation Operator. $out = \sqrt{x}$ -)DOC"); - } -}; +)DOC"; -class AbsOpMaker : public framework::OpProtoAndCheckerMaker { - public: - AbsOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Abs operator"); - AddOutput("Out", "Output of Abs operator"); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); - AddComment(R"DOC( +constexpr char AbsDoc[] = R"DOC( Abs Activation Operator. $out = |x|$ -)DOC"); - } -}; +)DOC"; -class CeilOpMaker : public framework::OpProtoAndCheckerMaker { - public: - CeilOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Ceil operator"); - AddOutput("Out", "Output of Ceil operator"); - AddComment(R"DOC( +constexpr char CeilDoc[] = R"DOC( Ceil Activation Operator. $out = ceil(x)$ -)DOC"); - } -}; +)DOC"; -class FloorOpMaker : public framework::OpProtoAndCheckerMaker { - public: - FloorOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Floor operator"); - AddOutput("Out", "Output of Floor operator"); - AddComment(R"DOC( +constexpr char FloorDoc[] = R"DOC( Floor Activation Operator. $out = floor(x)$ -)DOC"); - } -}; +)DOC"; -class CosOpMaker : public framework::OpProtoAndCheckerMaker { - public: - CosOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Cosine operator"); - AddOutput("Out", "Output of Cosine operator"); - AddComment(R"DOC( +constexpr char CosDoc[] = R"DOC( Cosine Activation Operator. $out = cos(x)$ -)DOC"); - } -}; +)DOC"; -class SinOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SinOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Sine operator"); - AddOutput("Out", "Output of Sine operator"); - AddComment(R"DOC( +constexpr char SinDoc[] = R"DOC( Sine Activation Operator. $out = sin(x)$ -)DOC"); - } -}; +)DOC"; -class RoundOpMaker : public framework::OpProtoAndCheckerMaker { - public: - RoundOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Round operator"); - AddOutput("Out", "Output of Round operator"); - AddComment(R"DOC( +constexpr char RoundDoc[] = R"DOC( Round Activation Operator. $out = [x]$ -)DOC"); - } -}; +)DOC"; -class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { - public: - ReciprocalOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Reciprocal operator"); - AddOutput("Out", "Output of Reciprocal operator"); - AddComment(R"DOC( +constexpr char ReciprocalDoc[] = R"DOC( Reciprocal Activation Operator. $$out = \frac{1}{x}$$ -)DOC"); - } -}; +)DOC"; -class LogOpMaker : public framework::OpProtoAndCheckerMaker { - public: - LogOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Log operator"); - AddOutput("Out", "Output of Log operator"); - AddComment(R"DOC( +constexpr char LogDoc[] = R"DOC( Log Activation Operator. $out = \ln(x)$ Natural logarithm of x. -)DOC"); - } -}; +)DOC"; + +constexpr char SquareDoc[] = R"DOC( +Square Activation Operator. + +$out = x^2$ -class SquareOpMaker : public framework::OpProtoAndCheckerMaker { +)DOC"; + +constexpr char SoftplusDoc[] = R"DOC( +Softplus Activation Operator. + +$out = \ln(1 + e^{x})$ + +)DOC"; + +constexpr char SoftsignDoc[] = R"DOC( +Softsign Activation Operator. + +$$out = \frac{x}{1 + |x|}$$ + +)DOC"; + +class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { public: - SquareOpMaker(OpProto *proto, OpAttrChecker *op_checker) + LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Square operator"); - AddOutput("Out", "Output of Square operator"); + AddInput("X", "Input of LeakyRelu operator"); + AddOutput("Out", "Output of LeakyRelu operator"); + AddAttr("alpha", "The small negative slope").SetDefault(0.02f); AddComment(R"DOC( -Square Activation Operator. +LeakyRelu Activation Operator. -$out = x^2$ +$out = \max(x, \alpha * x)$ )DOC"); } }; -class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { +class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: - SoftplusOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Softplus operator"); - AddOutput("Out", "Output of Softplus operator"); + AddInput("X", "Input of Softshrink operator"); + AddOutput("Out", "Output of Softshrink operator"); + AddAttr("lambda", "non-negative offset").SetDefault(0.5f); AddComment(R"DOC( -Softplus Activation Operator. +Softshrink Activation Operator. -$out = \ln(1 + e^{x})$ +$$ +out = \begin{cases} + x - \lambda, \text{if } x > \lambda \\ + x + \lambda, \text{if } x < -\lambda \\ + 0, \text{otherwise} + \end{cases} +$$ )DOC"); } }; -class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { +class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: - SoftsignOpMaker(OpProto *proto, OpAttrChecker *op_checker) + HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Softsign operator"); - AddOutput("Out", "Output of Softsign operator"); + AddInput("X", "Input of HardShrink operator"); + AddOutput("Out", "Output of HardShrink operator"); + AddAttr("threshold", "The value of threshold for HardShrink") + .SetDefault(0.5f); AddComment(R"DOC( -Softsign Activation Operator. +HardShrink Activation Operator. -$$out = \frac{x}{1 + |x|}$$ +$$ +out = \begin{cases} + x, \text{if } x > \lambda \\ + x, \text{if } x < -\lambda \\ + 0, \text{otherwise} + \end{cases} +$$ )DOC"); } @@ -553,131 +434,86 @@ Swish Activation Operator. } }; +REGISTER_ACTIVATION_OP_MAKER(Sigmoid, SigmoidDoc); +REGISTER_ACTIVATION_OP_MAKER(LogSigmoid, LogSigmoidDoc); +REGISTER_ACTIVATION_OP_MAKER(Exp, ExpDoc); +REGISTER_ACTIVATION_OP_MAKER(Relu, ReluDoc); +REGISTER_ACTIVATION_OP_MAKER(Tanh, TanhDoc); +REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc); +REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc); +REGISTER_ACTIVATION_OP_MAKER(Abs, AbsDoc); +REGISTER_ACTIVATION_OP_MAKER(Ceil, CeilDoc); +REGISTER_ACTIVATION_OP_MAKER(Floor, FloorDoc); +REGISTER_ACTIVATION_OP_MAKER(Cos, CosDoc); +REGISTER_ACTIVATION_OP_MAKER(Sin, SinDoc); +REGISTER_ACTIVATION_OP_MAKER(Round, RoundDoc); +REGISTER_ACTIVATION_OP_MAKER(Reciprocal, ReciprocalDoc); +REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc); +REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc); +REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc); +REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc); + +REGISTER_ACTIVATION_OP_GRAD_MAKER(Sigmoid, sigmoid); +REGISTER_ACTIVATION_OP_GRAD_MAKER(Relu, relu); +REGISTER_ACTIVATION_OP_GRAD_MAKER(Exp, exp); +REGISTER_ACTIVATION_OP_GRAD_MAKER(Tanh, tanh); +REGISTER_ACTIVATION_OP_GRAD_MAKER(Ceil, ceil); +REGISTER_ACTIVATION_OP_GRAD_MAKER(Floor, floor); +REGISTER_ACTIVATION_OP_GRAD_MAKER(Sqrt, sqrt); +REGISTER_ACTIVATION_OP_GRAD_MAKER(SoftRelu, soft_relu); +REGISTER_ACTIVATION_OP_GRAD_MAKER(Relu6, relu6); +REGISTER_ACTIVATION_OP_GRAD_MAKER(Reciprocal, reciprocal); +REGISTER_ACTIVATION_OP_GRAD_MAKER(HardSigmoid, hard_sigmoid); } // namespace operators } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(sigmoid_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(logsigmoid, ops::ActivationOp, ops::LogSigmoidOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(logsigmoid_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(exp, ops::ActivationOp, ops::ExpOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(exp_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(relu, ops::ActivationWithMKLDNNOp, ops::ReluOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(relu_grad, ops::ActivationWithMKLDNNOpGrad); - -REGISTER_OPERATOR(tanh, ops::ActivationWithMKLDNNOp, ops::TanhOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(tanh_grad, ops::ActivationWithMKLDNNOpGrad); - -REGISTER_OPERATOR(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(tanh_shrink_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(softshrink_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(sqrt, ops::ActivationWithMKLDNNOp, ops::SqrtOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(sqrt_grad, ops::ActivationWithMKLDNNOpGrad); - -REGISTER_OPERATOR(abs, ops::ActivationWithMKLDNNOp, ops::AbsOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(abs_grad, ops::ActivationWithMKLDNNOpGrad); - -REGISTER_OPERATOR(ceil, ops::ActivationOp, ops::CeilOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(ceil_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(floor, ops::ActivationOp, ops::FloorOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(floor_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(cos, ops::ActivationOp, ops::CosOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(cos_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(sin, ops::ActivationOp, ops::SinOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(sin_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(round, ops::ActivationOp, ops::RoundOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(round_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(reciprocal_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(log, ops::ActivationOp, ops::LogOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(log_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(square, ops::ActivationOp, ops::SquareOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(square_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(softplus, ops::ActivationOp, ops::SoftplusOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(softplus_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(softsign, ops::ActivationOp, ops::SoftsignOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(softsign_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(brelu, ops::ActivationOp, ops::BReluOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(brelu_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(leaky_relu_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(soft_relu_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(elu, ops::ActivationOp, ops::ELUOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(elu_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(relu6, ops::ActivationOp, ops::Relu6OpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(relu6_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(pow, ops::ActivationOp, ops::PowOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(pow_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(stanh, ops::ActivationOp, ops::STanhOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(stanh_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(hard_shrink_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(thresholded_relu, ops::ActivationOp, - ops::ThresholdedReluOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(thresholded_relu_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(hard_sigmoid_grad, ops::ActivationOpGrad); - -REGISTER_OPERATOR(swish, ops::ActivationOp, ops::SwishOpMaker, - paddle::framework::DefaultGradOpDescMaker); -REGISTER_OPERATOR(swish_grad, ops::ActivationOpGrad); +#define FOR_EACH_INPLACE_OP_FUNCTOR(__macro) \ + __macro(Sigmoid, sigmoid); \ + __macro(Relu, relu); \ + __macro(Exp, exp); \ + __macro(Tanh, tanh); \ + __macro(Ceil, ceil); \ + __macro(Floor, floor); \ + __macro(Sqrt, sqrt); \ + __macro(SoftRelu, soft_relu); \ + __macro(Relu6, relu6); \ + __macro(Reciprocal, reciprocal); \ + __macro(HardSigmoid, hard_sigmoid); + +#define FOR_EACH_OP_FUNCTOR(__macro) \ + __macro(LogSigmoid, logsigmoid); \ + __macro(SoftShrink, softshrink); \ + __macro(Abs, abs); \ + __macro(Cos, cos); \ + __macro(Sin, sin); \ + __macro(Round, round); \ + __macro(Log, log); \ + __macro(Square, square); \ + __macro(BRelu, brelu); \ + __macro(Pow, pow); \ + __macro(STanh, stanh); \ + __macro(Softplus, softplus); \ + __macro(Softsign, softsign); \ + __macro(LeakyRelu, leaky_relu); \ + __macro(TanhShrink, tanh_shrink); \ + __macro(ELU, elu); \ + __macro(HardShrink, hard_shrink); \ + __macro(Swish, swish); \ + __macro(ThresholdedRelu, thresholded_relu); + +#define REGISTER_INPLACE_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \ + REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \ + ::paddle::operators::OP_NAME##OpMaker, \ + ::paddle::operators::OP_NAME##GradMaker); \ + REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad) + +#define REGISTER_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \ + REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \ + ::paddle::operators::OP_NAME##OpMaker, \ + ::paddle::framework::DefaultGradOpDescMaker); \ + REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad) #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ REGISTER_OP_CPU_KERNEL( \ @@ -692,4 +528,6 @@ REGISTER_OPERATOR(swish_grad, ops::ActivationOpGrad); ops::ActivationGradKernel>); +FOR_EACH_OP_FUNCTOR(REGISTER_ACTIVATION_OP); +FOR_EACH_INPLACE_OP_FUNCTOR(REGISTER_INPLACE_ACTIVATION_OP); FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL); diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 4f745553c14fc..27487b396ccf6 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -9,7 +9,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#define EIGEN_USE_GPU #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 43856780bf935..912415192659d 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -10,6 +10,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include +#include +#include #include #include @@ -25,6 +28,16 @@ limitations under the License. */ namespace paddle { namespace operators { +/* Use ugly global variable, for the using in python layer side + Please refer to the layer_helper.py and get the details. + */ +static std::unordered_set InplaceOpSet = { + "sigmoid", "exp", "relu", "tanh", "sqrt", "ceil", + "floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid", +}; + +static bool IsInplace(std::string op) { return InplaceOpSet.count(op); } + template class ActivationKernel : public framework::OpKernel { @@ -60,7 +73,6 @@ class ActivationGradKernel public: using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& context) const override { - auto* X = context.Input("X"); auto* Out = context.Input("Out"); auto* dOut = context.Input(framework::GradVarName("Out")); @@ -68,7 +80,6 @@ class ActivationGradKernel dX->mutable_data(context.GetPlace()); auto dout = framework::EigenVector::Flatten(*dOut); - auto x = framework::EigenVector::Flatten(*X); auto out = framework::EigenVector::Flatten(*Out); auto dx = framework::EigenVector::Flatten(*dX); auto* place = @@ -78,7 +89,16 @@ class ActivationGradKernel for (auto& attr : attrs) { *attr.second = context.Attr(attr.first); } - functor(*place, x, out, dout, dx); + bool inplace = functor.Inplace(); + if (!inplace) { + auto* X = context.Input("X"); + auto x = framework::EigenVector::Flatten(*X); + functor(*place, x, out, dout, dx); + } else { + VLOG(10) << " Inplace activation "; + auto x = framework::EigenVector::Flatten(*dX); + functor(*place, x, out, dout, dx); + } } }; @@ -89,6 +109,14 @@ struct BaseActivationFunctor { using AttrPair = std::vector>; AttrPair GetAttrs() { return AttrPair(); } + + /* NOTE(*): Output reuse X memory if X is not dependented by its Gradient. + For example, sigmoid op's gradient didn't involve x, so its output can + reuse + input memory. But abs op's gradient use x, it can not be inplaced. + gradient did use x. + */ + bool Inplace() const { return false; } }; // sigmoid(x) = 1 / (1 + exp(-x)) @@ -102,6 +130,7 @@ struct SigmoidFunctor : public BaseActivationFunctor { template struct SigmoidGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("sigmoid"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -156,6 +185,7 @@ struct ExpFunctor : public BaseActivationFunctor { template struct ExpGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("exp"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -174,10 +204,11 @@ struct ReluFunctor : public BaseActivationFunctor { template struct ReluGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("relu"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * (x > static_cast(0)).template cast(); + dx.device(d) = dout * (out > static_cast(0)).template cast(); } }; @@ -192,6 +223,7 @@ struct TanhFunctor : public BaseActivationFunctor { template struct TanhGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("tanh"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -297,6 +329,7 @@ struct SqrtFunctor : public BaseActivationFunctor { template struct SqrtGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("sqrt"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -316,10 +349,11 @@ struct CeilFunctor : public BaseActivationFunctor { template struct ZeroGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("ceil"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = static_cast(0) / x; + dx.device(d) = static_cast(0) / out; } }; @@ -432,6 +466,7 @@ struct ReciprocalFunctor : public BaseActivationFunctor { template struct ReciprocalGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("reciprocal"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -531,12 +566,14 @@ struct Relu6GradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } + bool Inplace() const { return IsInplace("relu6"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * - ((x > static_cast(0)) * (x < static_cast(threshold))) - .template cast(); + dx.device(d) = + dout * + ((out > static_cast(0)) * (out < static_cast(threshold))) + .template cast(); } }; @@ -611,11 +648,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } + bool Inplace() const { return IsInplace("soft_relu"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto tmp = static_cast(threshold); - auto temp = ((x > -tmp) * (x < tmp)).template cast().eval(); + auto temp = ((out > -tmp) * (out < tmp)).template cast().eval(); dx.device(d) = dout * (static_cast(1) - (-out).exp()) * temp; } }; @@ -791,7 +829,7 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"slope", &slope}, {"offset", &offset}}; } - + bool Inplace() { return IsInplace("hard_sigmoid"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 64d92cac7eca1..1f21e7abe76b2 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -33,6 +33,7 @@ limitations under the License. */ #include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" @@ -461,6 +462,9 @@ All parameter, weight, gradient are variables in Paddle. self.back().set_lod(t.lod()); }); + m.def("IsInplace", + [](std::string op) -> bool { return operators::IsInplace(op); }); + m.def("op_support_gpu", OpSupportGPU); #ifdef PADDLE_WITH_CUDA m.def("get_cuda_device_count", platform::GetCUDADeviceCount); diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index d771837fc5451..62933b512529b 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -19,6 +19,7 @@ import unique_name from paddle.fluid.initializer import Constant, Xavier from param_attr import ParamAttr, WeightNormParamAttr +import core class LayerHelper(object): @@ -398,13 +399,16 @@ def append_activation(self, input_var): return input_var if isinstance(act, basestring): act = {'type': act} - tmp = self.create_tmp_variable(dtype=input_var.dtype) if 'use_mkldnn' in self.kwargs: act['use_mkldnn'] = self.kwargs.get('use_mkldnn') act_type = act.pop('type') if 'use_mkldnn' in self.kwargs: act['use_mkldnn'] = self.kwargs.get('use_mkldnn') + tmp = input_var + # NOTE(dzhwinter): some activation support inplace compution. + if not core.IsInplace(act_type): + tmp = self.create_tmp_variable(dtype=input_var.dtype) self.append_op( type=act_type, inputs={"X": [input_var]}, diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index c9069777faf9d..5ed387fb1247f 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -361,10 +361,7 @@ def setUp(self): def test_check_output(self): self.check_output() - def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad(['X'], 'Out', max_relative_error=0.007) + # The same reason with TestFloor def init_dtype(self): pass @@ -396,10 +393,8 @@ def setUp(self): def test_check_output(self): self.check_output() - def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad(['X'], 'Out', max_relative_error=0.007) + # the gradient on floor, ceil, round is undefined. + # we return zero as gradient, but the numpy return nan def init_dtype(self): pass @@ -501,11 +496,6 @@ def setUp(self): def test_check_output(self): self.check_output() - def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad(['X'], 'Out', max_relative_error=0.007) - def init_dtype(self): pass