diff --git a/paddle/fluid/framework/data_layout.h b/paddle/fluid/framework/data_layout.h index 9c5e2cf7ccdce..b611bb77b4e1e 100644 --- a/paddle/fluid/framework/data_layout.h +++ b/paddle/fluid/framework/data_layout.h @@ -27,6 +27,7 @@ enum class DataLayout { kNHWC = 0, kNCHW = 1, kAnyLayout = 2, + kMKLDNN = 3, // all layouts supported by MKLDNN internally }; inline DataLayout StringToDataLayout(const std::string& str) { @@ -41,6 +42,8 @@ inline DataLayout StringToDataLayout(const std::string& str) { return DataLayout::kNCHW; } else if (s == "ANYLAYOUT") { return DataLayout::kAnyLayout; + } else if (s == "MKLDNNLAYOUT") { + return DataLayout::kMKLDNN; } else { PADDLE_THROW("Unknown storage order string: %s", s); } @@ -54,8 +57,10 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) { return "NCHW"; case DataLayout::kAnyLayout: return "ANY_LAYOUT"; + case DataLayout::kMKLDNN: + return "MKLDNNLAYOUT"; default: - PADDLE_THROW("unknown DataLayou %d", data_layout); + PADDLE_THROW("unknown DataLayout %d", data_layout); } } diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 60ec60a427ba9..5b8dfc57ba020 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -16,6 +16,9 @@ #include #include "paddle/fluid/operators/math/math_function.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif namespace paddle { namespace framework { @@ -88,5 +91,85 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var, out->set_layout(expected_kernel_type.data_layout_); } +#ifdef PADDLE_WITH_MKLDNN +using mkldnn::memory; +using mkldnn::primitive; +using mkldnn::reorder; + +void* GetDataFromTensor(const Tensor& tensor, mkldnn::memory::data_type type) { + switch (type) { + case mkldnn::memory::data_type::f32: + return platform::to_void_cast(tensor.data()); + case mkldnn::memory::data_type::s8: + return platform::to_void_cast(tensor.data()); + case mkldnn::memory::data_type::u8: + return platform::to_void_cast(tensor.data()); + case mkldnn::memory::data_type::s16: + return platform::to_void_cast(tensor.data()); + case mkldnn::memory::data_type::s32: + return platform::to_void_cast(tensor.data()); + default: + PADDLE_THROW("wrong mkldnn type provided"); + } +} +#endif + +void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, + const OpKernelType& expected_kernel_type, + const Tensor& in, Tensor* out) { + auto in_layout = kernel_type_for_var.data_layout_; + auto out_layout = expected_kernel_type.data_layout_; + + PADDLE_ENFORCE( + in_layout == DataLayout::kMKLDNN && out_layout != DataLayout::kMKLDNN, + "TransDataLayoutFromMKLDNN only supports transform from MKLDNN to " + "non-MKLDNN"); + +#ifdef PADDLE_WITH_MKLDNN + PADDLE_ENFORCE(in.format() != memory::format::format_undef && + in.format() != memory::format::any, + "Input tensor should have specified memory format"); + + // Set default as NCHW in case not specified + out_layout = + out_layout == DataLayout::kAnyLayout ? DataLayout::kNCHW : out_layout; + + auto& pool = platform::DeviceContextPool::Instance(); + auto* dev_ctx = dynamic_cast( + pool.Get(expected_kernel_type.place_)); + auto& cpu_engine = dev_ctx->GetEngine(); + + std::vector in_tz = paddle::framework::vectorize2int(in.dims()); + std::vector out_tz = in_tz; + + memory::data_type in_type = ToMKLDNNDataType(in.type()); + PADDLE_ENFORCE(in_type != memory::data_type::data_undef, + "Input tensor type is not supported: ", in.type().name()); + memory::data_type out_type = in_type; + + memory::format in_format = + in_tz.size() == 2 ? memory::format::nc : in.format(); + memory::format out_format = + out_tz.size() == 2 ? memory::format::nc : ToMKLDNNFormat(out_layout); + + void* in_data = GetDataFromTensor(in, in_type); + + // output tensor has the same dims as input. Reorder don't change dims + out->Resize(in.dims()); + + auto out_data = out->mutable_data(expected_kernel_type.place_, in.type()); + + auto in_memory = memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data); + auto out_memory = + memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data); + + platform::Reorder(in_memory, out_memory); + + out->set_layout(out_layout); + // reset format since the out tensor will be feed to non-MKLDNN OPkernel + out->set_format(memory::format::format_undef); +#endif +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index 06b638663dd33..2ba84ce57fd8a 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/tensor.h" @@ -22,6 +23,50 @@ namespace paddle { namespace framework { +#ifdef PADDLE_WITH_MKLDNN +using MKLDNNFormat = mkldnn::memory::format; +using MKLDNNDataType = mkldnn::memory::data_type; + +inline MKLDNNFormat ToMKLDNNFormat(const DataLayout& layout) { + switch (layout) { + case DataLayout::kNHWC: + return MKLDNNFormat::nhwc; + case DataLayout::kNCHW: + return MKLDNNFormat::nchw; + default: + PADDLE_THROW("Fail to convert layout %s to MKLDNN format", + DataLayoutToString(layout)); + } +} + +inline DataLayout ToPaddleLayout(const MKLDNNFormat& format) { + switch (format) { + case MKLDNNFormat::nhwc: + return DataLayout::kNHWC; + case MKLDNNFormat::nchw: + return DataLayout::kNCHW; + default: + PADDLE_THROW("Fail to convert MKLDNN format to paddle layout"); + } +} + +inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) { + static const std::map dict{ + {std::type_index(typeid(float)), MKLDNNDataType::f32}, // NOLINT + {std::type_index(typeid(char)), MKLDNNDataType::s8}, // NOLINT + {std::type_index(typeid(unsigned char)), MKLDNNDataType::u8}, + {std::type_index(typeid(int16_t)), MKLDNNDataType::s16}, + {std::type_index(typeid(int32_t)), MKLDNNDataType::s32}}; + auto iter = dict.find(type); + if (iter != dict.end()) return iter->second; + return MKLDNNDataType::data_undef; +} +#endif + +void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, + const OpKernelType& expected_kernel_type, + const Tensor& in, Tensor* out); + std::vector GetAxis(const DataLayout& from, const DataLayout& to); void TransDataLayout(const OpKernelType& kernel_type_for_var, diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 9c277a27da5af..b8fcc92697ca1 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -33,11 +33,34 @@ void DataTransform(const OpKernelType& expected_kernel_type, Tensor in; in.ShareDataWith(input_tensor); Tensor out; + DataLayout lin = kernel_type_for_var.data_layout_; + DataLayout lout = expected_kernel_type.data_layout_; // do layout transform - if (NeedTransformLayout(expected_kernel_type.data_layout_, - kernel_type_for_var.data_layout_)) { - TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out); + if (NeedTransformLayout(lout, lin)) { + if (lin == DataLayout::kMKLDNN || lout == DataLayout::kMKLDNN) { + PADDLE_ENFORCE( + !(lin == DataLayout::kMKLDNN && lout == DataLayout::kMKLDNN), + "No layout transform needed between two MKLDNN OPKernels"); + + if (lin != DataLayout::kMKLDNN && lout == DataLayout::kMKLDNN) { +#ifdef PADDLE_WITH_MKLDNN + // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel + // Just set layout/format. No real transform occur + out.ShareDataWith(input_tensor); + out.set_layout(DataLayout::kMKLDNN); + out.set_format(ToMKLDNNFormat(lin)); +#endif + } else { + // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel + // Do transform via MKLDNN lib + TransDataLayoutFromMKLDNN(kernel_type_for_var, expected_kernel_type, in, + &out); + } + } else { + // Case3 - transfrom between Non-MKLDNN OPKernels + TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out); + } transformed = true; PassTensorData(&out, &in); } diff --git a/paddle/fluid/framework/op_kernel_type.h b/paddle/fluid/framework/op_kernel_type.h index fab20d75f5a45..f51a184e7bae2 100644 --- a/paddle/fluid/framework/op_kernel_type.h +++ b/paddle/fluid/framework/op_kernel_type.h @@ -87,7 +87,14 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) { } inline bool NeedTransformLayout(const DataLayout& l, const DataLayout& r) { - return l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r; + bool ret = + (l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r); +#ifdef PADDLE_WITH_MKLDNN + // Layout transform needed for either non-MKLDNN to MKLDNN or vice versa + ret |= (l != DataLayout::kMKLDNN && r == DataLayout::kMKLDNN); + ret |= (l == DataLayout::kMKLDNN && r != DataLayout::kMKLDNN); +#endif + return ret; } inline bool TransFromNeeded(const OpKernelType& l, const OpKernelType& r) { diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 748317438b44b..e57c2ff3d0006 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -83,8 +83,14 @@ struct OpKernelRegistrarFunctor { void operator()(const char* op_type, const char* library_type) const { using T = typename KERNEL_TYPE::ELEMENT_TYPE; + std::string library(library_type); + std::string data_layout = "ANYLAYOUT"; + if (library == "MKLDNN") { + data_layout = "MKLDNNLAYOUT"; + } OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(), - DataLayout::kAnyLayout, StringToLibraryType(library_type)); + StringToDataLayout(data_layout), + StringToLibraryType(library_type)); OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE); constexpr auto size = std::tuple_size>::value; @@ -99,7 +105,8 @@ struct OpKernelRegistrarFunctor { void operator()(const char* op_type, const char* library_type) const {} }; -// User can register many kernel in one place. The data type could be different. +// User can register many kernel in one place. The data type could be +// different. template class OpKernelRegistrar : public Registrar { public: diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index f87d552149241..4d1e8d0ebaaf4 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -444,10 +444,25 @@ class RuntimeInferShapeContext : public InferShapeContext { auto* out_tensor = out_var->GetMutable(); out_tensor->set_lod(in_tensor.lod()); - // TODO(dzhwinter) : reuse ShareLoD in most operators. - // Need to call ShareLayout explicitly in sequence related ops. - // Shall we have a better method to shared info between in/out Tensor? - out_tensor->set_layout(in_tensor.layout()); +// TODO(dzhwinter) : reuse ShareLoD in most operators. +// Need to call ShareLayout explicitly in sequence related ops. +// Shall we have a better method to shared info between in/out Tensor? +#ifdef PADDLE_WITH_MKLDNN + // Fix me: ugly workaround below + // Correct solution: + // set_layout() should NOT be called here (i.e. ShareLoD). Instead, + // layout of output tensor should be set "manually" in Compute() + // of each OPKernel. The reason layout should NOT be shared between + // input and output "automatically" (now by InferShape()->ShareLoD()) + // is that layout transform may occur after InferShape(). + // Workaround: + // Skip set_layout() when input layout is kMKLDNN + // This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN + // OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called + // in Compute() + if (in_tensor.layout() != DataLayout::kMKLDNN) +#endif + out_tensor->set_layout(in_tensor.layout()); } void ShareLayout(const std::string& in, const std::string& out, size_t i = 0, @@ -665,7 +680,8 @@ OpKernelType OperatorWithKernel::GetExpectedKernelType( OpKernelType OperatorWithKernel::GetKernelTypeForVar( const std::string& var_name, const Tensor& tensor, const OpKernelType& expected_kernel_type) const { - return OpKernelType(expected_kernel_type.data_type_, tensor.place()); + return OpKernelType(expected_kernel_type.data_type_, tensor.place(), + tensor.layout()); } } // namespace framework diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 29566aaa53370..ef224d68f1fc5 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -34,6 +34,28 @@ namespace framework { class LoDTensor; class Tensor { +#ifdef PADDLE_WITH_MKLDNN + + public: + inline mkldnn::memory::format format() const { return format_; } + + inline void set_format(const mkldnn::memory::format format) { + format_ = format; + } + + protected: + /** + * @brief the detail format of memory block which have layout as kMKLDNN + * + * @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C, + * nChw16c, etc. For a MKLDNN memory block, layout will be set as + * DataLayout::kMKLDNN meanwhile detail memory format will be kept in + * this field. + */ + + mkldnn::memory::format format_ = mkldnn::memory::format::format_undef; +#endif + public: template friend struct EigenTensor; @@ -195,8 +217,10 @@ class Tensor { * N,C,H,W for respectively the batch size, the number of * feature maps, the height. */ - - DataLayout layout_ = DataLayout::kNHWC; + // Fix me: here just change the default layout to kNCHW + // it doesn't fix the real issue, i.e. feeder should set up tensor layout + // according to actual input data + DataLayout layout_ = DataLayout::kNCHW; /** * @brief A PlaceHolder may be shared by more than one tensor. diff --git a/paddle/fluid/framework/tensor_test.cc b/paddle/fluid/framework/tensor_test.cc index e1012de2ec36e..0a1cb6d5703da 100644 --- a/paddle/fluid/framework/tensor_test.cc +++ b/paddle/fluid/framework/tensor_test.cc @@ -209,7 +209,7 @@ TEST(Tensor, ReshapeToMatrix) { TEST(Tensor, Layout) { framework::Tensor src; - ASSERT_EQ(src.layout(), framework::DataLayout::kNHWC); + ASSERT_EQ(src.layout(), framework::DataLayout::kNCHW); src.set_layout(framework::DataLayout::kAnyLayout); ASSERT_EQ(src.layout(), framework::DataLayout::kAnyLayout); } diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index dd71c66a75a03..8478ae20a5925 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -58,14 +58,16 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, const framework::OperatorWithKernel& oper, const std::string& name) { framework::LibraryType library{framework::LibraryType::kPlain}; + + framework::DataLayout layout = framework::DataLayout::kAnyLayout; #ifdef PADDLE_WITH_MKLDNN auto it = oper.Attrs().find("use_mkldnn"); if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() && platform::CanMKLDNNBeUsed(ctx)) { library = framework::LibraryType::kMKLDNN; + layout = framework::DataLayout::kMKLDNN; } #endif - framework::DataLayout layout = framework::DataLayout::kAnyLayout; return framework::OpKernelType( framework::ToDataType(ctx.Input(name)->type()), ctx.GetPlace(), layout, library); diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 6ec8c9d18b466..d7e0af28c1bfa 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -111,14 +111,16 @@ class BatchNormOp : public framework::OperatorWithKernel { "Variance input should be of float type"); framework::LibraryType library_{framework::LibraryType::kPlain}; + // TODO(pzelazko-intel): enable MKLDNN layout when it's ready + framework::DataLayout layout = framework::DataLayout::kAnyLayout; + #ifdef PADDLE_WITH_MKLDNN if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { library_ = framework::LibraryType::kMKLDNN; + layout = framework::DataLayout::kMKLDNN; } #endif - // TODO(pzelazko-intel): enable MKLDNN layout when it's ready - framework::DataLayout layout = framework::DataLayout::kAnyLayout; return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, library_); } @@ -367,17 +369,18 @@ class BatchNormGradOp : public framework::OperatorWithKernel { } framework::LibraryType library_{framework::LibraryType::kPlain}; + // TODO(pzelazko-intel): enable MKLDNN layout when it's ready + framework::DataLayout layout_ = framework::DataLayout::kAnyLayout; #ifdef PADDLE_WITH_MKLDNN if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { library_ = framework::LibraryType::kMKLDNN; + layout_ = framework::DataLayout::kMKLDNN; } #endif - // TODO(pzelazko-intel): enable MKLDNN layout when it's ready - framework::DataLayout layout = framework::DataLayout::kAnyLayout; return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace(), - layout, library_); + layout_, library_); } }; diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 697d914842579..850297a2327f3 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -75,6 +75,11 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { framework::OpKernelType ConvOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { framework::LibraryType library{framework::LibraryType::kPlain}; + + std::string data_format = ctx.Attr("data_format"); + // TODO(pzelazko-intel): enable MKLDNN layout when it's ready + framework::DataLayout layout = framework::StringToDataLayout(data_format); + #ifdef PADDLE_WITH_CUDA if (platform::CanCUDNNBeUsed(ctx)) { library = framework::LibraryType::kCUDNN; @@ -84,6 +89,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( if (library == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { library = framework::LibraryType::kMKLDNN; + layout = framework::DataLayout::kMKLDNN; } #endif @@ -99,9 +105,6 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( "float16 can only be used when CUDNN is used"); } - std::string data_format = ctx.Attr("data_format"); - // TODO(pzelazko-intel): enable MKLDNN layout when it's ready - framework::DataLayout layout = framework::StringToDataLayout(data_format); return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, library); } @@ -309,6 +312,10 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const { framework::OpKernelType ConvOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; + // TODO(pzelazko-intel): enable MKLDNN layout when it's ready + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + #ifdef PADDLE_WITH_CUDA if (platform::CanCUDNNBeUsed(ctx)) { library_ = framework::LibraryType::kCUDNN; @@ -318,12 +325,10 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType( if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { library_ = framework::LibraryType::kMKLDNN; + layout_ = framework::DataLayout::kMKLDNN; } #endif - std::string data_format = ctx.Attr("data_format"); - // TODO(pzelazko-intel): enable MKLDNN layout when it's ready - framework::DataLayout layout_ = framework::StringToDataLayout(data_format); return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), ctx.GetPlace(), layout_, library_); diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index 8843a1c44b700..a9ae1396db8d7 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -43,7 +43,7 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { framework::OpKernelType FCOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { framework::LibraryType library{framework::LibraryType::kMKLDNN}; - framework::DataLayout layout{framework::DataLayout::kAnyLayout}; + framework::DataLayout layout{framework::DataLayout::kMKLDNN}; return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), ctx.GetPlace(), @@ -65,7 +65,7 @@ void FCOpGrad::InferShape(framework::InferShapeContext* ctx) const { framework::OpKernelType FCOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { framework::LibraryType library{framework::LibraryType::kMKLDNN}; - framework::DataLayout layout{framework::DataLayout::kAnyLayout}; + framework::DataLayout layout{framework::DataLayout::kMKLDNN}; return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), ctx.GetPlace(), diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index 52b9cd7fb7019..52b459a6a2e56 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -124,16 +124,17 @@ namespace { framework::OpKernelType GetExpectedLRNKernel( const framework::ExecutionContext& ctx) { framework::LibraryType library_{framework::LibraryType::kPlain}; + std::string data_format = ctx.Attr("data_format"); + // TODO(pzelazko-intel): enable MKLDNN layout when it's ready + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); #ifdef PADDLE_WITH_MKLDNN if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { library_ = framework::LibraryType::kMKLDNN; + layout_ = framework::DataLayout::kMKLDNN; } #endif - std::string data_format = ctx.Attr("data_format"); - // TODO(pzelazko-intel): enable MKLDNN layout when it's ready - framework::DataLayout layout_ = framework::StringToDataLayout(data_format); return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace(), layout_, library_); diff --git a/paddle/fluid/operators/pool_mkldnn_op.cc b/paddle/fluid/operators/pool_mkldnn_op.cc index 60e936298defe..a045f9e98dd73 100644 --- a/paddle/fluid/operators/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/pool_mkldnn_op.cc @@ -24,10 +24,13 @@ using mkldnn::pooling_backward; // Generate keys for storing/retriving primitives for this operator // TODO(jczaja): Make hashing function more optimial -static std::string gethash(memory::dims& input_dims, std::string& pooling_type, - std::vector& ksize, std::vector& strides, - std::vector& paddings, std::string suffix) { - auto dims2str = [](memory::dims& operand_dims) { +static std::string gethash(const memory::dims& input_dims, + const std::string& pooling_type, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string& suffix) { + auto dims2str = [](const memory::dims& operand_dims) { std::string dstr = ""; for (size_t i = 0; i < operand_dims.size(); ++i) { dstr += std::to_string(operand_dims[i]) + "-"; diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index f4fb2b132fe8d..18aa2bd352c5d 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -83,6 +83,9 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const { framework::OpKernelType PoolOp::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + #ifdef PADDLE_WITH_CUDA if (platform::CanCUDNNBeUsed(ctx)) { library_ = framework::LibraryType::kCUDNN; @@ -92,11 +95,10 @@ framework::OpKernelType PoolOp::GetExpectedKernelType( if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { library_ = framework::LibraryType::kMKLDNN; + layout_ = framework::DataLayout::kMKLDNN; } #endif - std::string data_format = ctx.Attr("data_format"); - framework::DataLayout layout_ = framework::StringToDataLayout(data_format); return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace(), layout_, library_); @@ -112,6 +114,9 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { framework::OpKernelType PoolOpGrad::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + #ifdef PADDLE_WITH_CUDA if (platform::CanCUDNNBeUsed(ctx)) { library_ = framework::LibraryType::kCUDNN; @@ -121,6 +126,7 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType( if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { library_ = framework::LibraryType::kMKLDNN; + layout_ = framework::DataLayout::kMKLDNN; } #endif @@ -129,8 +135,6 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType( PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN, "float16 can only be used when CUDNN is used"); } - std::string data_format = ctx.Attr("data_format"); - framework::DataLayout layout_ = framework::StringToDataLayout(data_format); return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_, library_); } diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index cc256aa627bdd..c90a3be964a3a 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -49,6 +49,9 @@ class SoftmaxOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { // choose cudnn kernel if the runtime supported. framework::LibraryType library_{framework::LibraryType::kPlain}; + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + #ifdef PADDLE_WITH_CUDA if (platform::CanCUDNNBeUsed(ctx)) { library_ = framework::LibraryType::kCUDNN; @@ -58,6 +61,7 @@ class SoftmaxOp : public framework::OperatorWithKernel { if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { library_ = framework::LibraryType::kMKLDNN; + layout_ = framework::DataLayout::kMKLDNN; } #endif @@ -68,9 +72,7 @@ class SoftmaxOp : public framework::OperatorWithKernel { "float16 can only be used on GPU place"); } - std::string data_format = ctx.Attr("data_format"); - return framework::OpKernelType(input_data_type, ctx.GetPlace(), - framework::StringToDataLayout(data_format), + return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_, library_); } }; @@ -142,6 +144,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { // choose cudnn kernel if the runtime supported. framework::LibraryType library_{framework::LibraryType::kPlain}; + #ifdef PADDLE_WITH_CUDA if (platform::CanCUDNNBeUsed(ctx)) { library_ = framework::LibraryType::kCUDNN; diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index f1187620d81ff..de711b7d23ef0 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace platform { @@ -86,5 +87,17 @@ inline mkldnn::memory::data_type MKLDNNGetDataType() { return mkldnn::memory::f32; } +inline void Reorder(const mkldnn::memory& src, const mkldnn::memory& dst) { + auto reorder_prim = mkldnn::reorder(src, dst); + std::vector pipeline; + pipeline.push_back(reorder_prim); + mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); +} + +inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) { + return static_cast( + memory.get_primitive_desc().desc().data.format); +} + } // namespace platform } // namespace paddle