From 4d8992c3bc64a835aa6a1e6e12678594d3f117b5 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 18 Aug 2017 09:58:41 +0800 Subject: [PATCH 01/12] check format before set header format --- paddle/parameter/Parameter.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index e31cbc3dee6c5..08a426eb742b3 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -278,7 +278,11 @@ class Parameter { /** * @brief Set the format in header. */ - void setHeaderFormat(int32_t fmt) { headerFormat_ = fmt; } + void setHeaderFormat(int32_t fmt) { + CHECK(isHeaderFormatSupported(fmt)) << "Unsupported format version: " + << fmt; + headerFormat_ = fmt; + } /** * @brief Parameter Update Hook. From 462b9b1d20942dca35dbe532248e53cdeccea6b2 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 18 Aug 2017 10:13:06 +0800 Subject: [PATCH 02/12] update mkldnn tag v0.10 --- cmake/external/mkldnn.cmake | 2 +- cmake/external/mklml.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 25c6b4ef52d3f..9686df0021900 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -51,7 +51,7 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} DEPENDS ${MKLDNN_DEPENDS} GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" - GIT_TAG "v0.9" + GIT_TAG "v0.10" PREFIX ${MKLDNN_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index e9fd3d4bedc98..51fafb94791dd 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -28,7 +28,7 @@ INCLUDE(ExternalProject) SET(MKLML_PROJECT "extern_mklml") SET(MKLML_VER "mklml_lnx_2018.0.20170720") -SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz") +SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.10/${MKLML_VER}.tgz") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DST_DIR "mklml") From 62e6dac402ca63b402b5dfd1d7649cba1e258d41 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 18 Aug 2017 14:30:09 +0800 Subject: [PATCH 03/12] add MKLDNNMatrix files --- paddle/gserver/layers/MKLDNNLayer.h | 1 + paddle/math/CMakeLists.txt | 15 ++++++++++ paddle/math/MKLDNNMatrix.cpp | 19 ++++++++++++ paddle/math/MKLDNNMatrix.h | 45 +++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+) create mode 100644 paddle/math/MKLDNNMatrix.cpp create mode 100644 paddle/math/MKLDNNMatrix.h diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 63e29f447eede..9533027fa6c75 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -18,6 +18,7 @@ limitations under the License. */ #include "Layer.h" #include "MKLDNNBase.h" #include "mkldnn.hpp" +#include "paddle/math/MKLDNNMatrix.h" DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn_wgt); diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index bf28092e82b77..ad6de18c81d60 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -14,6 +14,21 @@ # file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) + +message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}") +message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}") +if(NOT WITH_MKLDNN) + file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") + file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") + message(STATUS "----------DNN_HEADER:${DNN_HEADER}") + message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}") + list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER}) + list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES}) + message(STATUS "Skip compiling with MKLDNNMatrix") +else() + message(STATUS "Compile with MKLDNNMatrix") +endif() + set(MATH_SOURCES "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu" "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu" diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp new file mode 100644 index 0000000000000..df8e72d78bedd --- /dev/null +++ b/paddle/math/MKLDNNMatrix.cpp @@ -0,0 +1,19 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNMatrix.h" + +using namespace mkldnn; // NOLINT + +namespace paddle {} // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h new file mode 100644 index 0000000000000..91ef56f2c3476 --- /dev/null +++ b/paddle/math/MKLDNNMatrix.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +//#include "Matrix.h" +#include "Vector.h" + +#include "mkldnn.hpp" +#include "paddle/parameter/Parameter.h" + +namespace paddle { + +static const std::map PARAM_FOARMAT_MAP = + {{mkldnn::memory::format::oi, PARAM_FORMAT_MKLDNN_OI}}; + +class MKLDNNMatrix; +typedef std::shared_ptr MKLDNNMatrixPtr; + +/** + * @brief MKLDNN Matrix. + * + */ +class MKLDNNMatrix : public CpuVector { +public: + explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {} + + ~MKLDNNMatrix() {} + +protected: + int fmt_; +}; + +} // namespace paddle From 4bffbd30f0dbc2a2bbff4aa8108867fceecc260a Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 21 Aug 2017 16:44:30 +0800 Subject: [PATCH 04/12] use MKLDNNMatrix in fc forward --- paddle/gserver/layers/Layer.cpp | 2 +- paddle/gserver/layers/Layer.h | 20 +++++++- paddle/gserver/layers/MKLDNNFcLayer.cpp | 63 ++++++++++++++++--------- paddle/gserver/layers/MKLDNNLayer.h | 25 +++++++--- paddle/math/CMakeLists.txt | 4 -- paddle/math/MKLDNNMatrix.cpp | 29 +++++++++++- paddle/math/MKLDNNMatrix.h | 43 +++++++++++++---- 7 files changed, 143 insertions(+), 43 deletions(-) diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index d5621412caee8..2bc20eee6c452 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -41,7 +41,7 @@ namespace paddle { Layer::Layer(const LayerConfig& config, bool useGpu) : config_(config), useGpu_(useGpu), - deviceId_(-1), + deviceId_(CPU_DEVICE), needSequenceInfo_(true) {} bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index 0ed482889d0ce..ec4d093e0cac9 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -59,7 +59,12 @@ class Layer { LayerConfig config_; /// whether to use GPU bool useGpu_; - /// Device Id. CPU is -1, and GPU is 0, 1, 2 ... + /// Paddle device ID, MKLDNN is -2, CPU is -1 + enum PADDLE_DEVICE_ID { + MKLDNN_DEVICE = -2, + CPU_DEVICE = -1, + }; + /// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ... int deviceId_; /// Input layers std::vector inputLayers_; @@ -321,6 +326,19 @@ class Layer { if (deviceId == getDeviceId()) { return output_; } else { + bool CPU2MKLDNN = + getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE; + bool MKLDNN2CPU = + getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE; + if (CPU2MKLDNN) { + // TODO: do something + return output_; + } else if (MKLDNN2CPU) { + // TODO: do something + return output_; + } + + // TODO: handle mkldnn device or add mkldnn device to other for (size_t i = 0; i < outputOtherDevice_.size(); i++) { if (outputOtherDevice_[i].deviceId == deviceId) { return outputOtherDevice_[i]; diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index d201fac65e045..fac0390eee501 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -135,33 +135,51 @@ void MKLDNNFcLayer::reshape() { void MKLDNNFcLayer::resetFwd() { bool hasBias = biases_ && biases_->getW(); - real* iData = getInputValue(0)->getData(); - real* oData = getOutputValue()->getData(); - real* wData = weight_->getW()->getData(); - real* bData = hasBias ? biases_->getW()->getData() : NULL; + const MatrixPtr& in = getInputValue(0); + const MatrixPtr& wgt = weight_->getW(); + const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; + const MatrixPtr& out = output_.value; + + if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) { + inVal_ = std::dynamic_pointer_cast(in); + CHECK(inVal_) << "Input should be MKLDNNMatrix"; + // TODO: change input nchw to nc if available + // inVal_->downSpatial() + } else { + inVal_ = MKLDNNMatrix::create( + in, + hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_}, + hasSpatial_ ? format::nchw : format::nc, + engine_); + } - // TODO(TJ): below create should be covered in MkldnnMatrix - // create memory desc - memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) - : createMD({bs_, ic_}, format::nc); - memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) - : createMD({oc_, ic_}, format::oi); - memory::desc bMD = bData != NULL ? createMD({oc_}, format::x) - : createMD({}, format::format_undef); - memory::desc oMD = createMD({bs_, oc_}, format::nc); + wgtVal_ = MKLDNNMatrix::create( + wgt, + hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_}, + hasSpatial_ ? format::oihw : format::oi, + engine_); - // create memory primitive desc and memory self - inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); - wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData)); - outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData)); + biasVal_ = + hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; + + outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); + + // change original output to mkldnn output + output_.value = std::dynamic_pointer_cast(outVal_); + // create forward handle prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD) - : fc_fwd::desc(pk, iMD, wMD, oMD); + fc_fwd::desc fwdDesc = + hasBias ? fc_fwd::desc(pk, + inVal_->getMD(), + wgtVal_->getMD(), + biasVal_->getMD(), + outVal_->getMD()) + : fc_fwd::desc( + pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - if (bData != NULL) { - biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData)); + if (hasBias) { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); } else { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); @@ -197,7 +215,8 @@ void MKLDNNFcLayer::resetBwd() { // update data inVal_->set_data_handle(iData); } else { - inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + LOG(FATAL) << "Should not be empty"; + // inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); } // create memory primitive desc and memory self diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 9533027fa6c75..b44095befb66a 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -21,7 +21,6 @@ limitations under the License. */ #include "paddle/math/MKLDNNMatrix.h" DECLARE_bool(use_mkldnn); -DECLARE_bool(use_mkldnn_wgt); namespace paddle { @@ -54,13 +53,14 @@ class MKLDNNLayer : public Layer { std::vector pipelineBwd_; // TODO(TJ): change below memory as MKLDNNMatrixPtr type - std::shared_ptr inVal_; + // MKLDNNMatrixPtr ; + MKLDNNMatrixPtr inVal_; std::shared_ptr inGrad_; - std::shared_ptr outVal_; + MKLDNNMatrixPtr outVal_; std::shared_ptr outGrad_; - std::shared_ptr wgtVal_; + MKLDNNMatrixPtr wgtVal_; std::shared_ptr wgtGrad_; - std::shared_ptr biasVal_; + MKLDNNMatrixPtr biasVal_; std::shared_ptr biasGrad_; public: @@ -94,7 +94,7 @@ class MKLDNNLayer : public Layer { stream_.reset(new MKLDNNStream()); engine_ = CPUEngine::Instance().getEngine(); - // TODO(TJ): deivecId + setDeviceID(MKLDNN_DEVICE); return true; } @@ -128,6 +128,19 @@ class MKLDNNLayer : public Layer { // TODO(TJ): isFmtSuppoted(fmt) return mkldnn::memory::desc(dims, type, fmt); } + + void resetMKLDNNOutput(size_t height, size_t width) { + Layer::resetOutput(height, width); + // get valu and grad, use mkldnn matrix instaed + // output_.value; + } + +protected: + void setDeviceID(int id) { + deviceId_ = id; + output_.deviceId = id; + // TODO: handle mkldnn device or add mkldnn device to other + } }; } // namespace paddle diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index ad6de18c81d60..8afe6b509d24a 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -15,13 +15,9 @@ file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) -message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}") -message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}") if(NOT WITH_MKLDNN) file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") - message(STATUS "----------DNN_HEADER:${DNN_HEADER}") - message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}") list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER}) list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES}) message(STATUS "Skip compiling with MKLDNNMatrix") diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index df8e72d78bedd..44fc54278c993 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -16,4 +16,31 @@ limitations under the License. */ using namespace mkldnn; // NOLINT -namespace paddle {} // namespace paddle +namespace paddle { + +MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m, + memory::dims dims, + memory::format fmt, + engine& eg, + mkldnn::memory::data_type dtype) { + CpuMatrixPtr cpuM = std::dynamic_pointer_cast(m); + CHECK(cpuM) << "Only support create from CPU matrix yet"; + + size_t ndims = dims.size(); + CHECK(ndims > 0) << "Input dims should not be empty"; + size_t cnt = 1; + for (size_t i = 0; i < ndims; ++i) { + cnt *= dims[i]; + } + CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match"; + + size_t width = m->getWidth(); + size_t height = m->getHeight(); + real* data = m->getData(); + + memory::desc md = memory::desc(dims, dtype, fmt); + memory::primitive_desc pd = memory::primitive_desc(md, eg); + return std::make_shared(data, height, width, pd); +} + +} // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 91ef56f2c3476..73eb50d2a0b3a 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -14,9 +14,8 @@ limitations under the License. */ #pragma once -//#include "Matrix.h" -#include "Vector.h" - +#include +#include "Matrix.h" #include "mkldnn.hpp" #include "paddle/parameter/Parameter.h" @@ -32,14 +31,42 @@ typedef std::shared_ptr MKLDNNMatrixPtr; * @brief MKLDNN Matrix. * */ -class MKLDNNMatrix : public CpuVector { +class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { public: - explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {} + MKLDNNMatrix(real* data, + size_t height, + size_t width, + mkldnn::memory::primitive_desc pd) + : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {} - ~MKLDNNMatrix() {} + MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd) + : CpuMatrix(height, width, false), mkldnn::memory(pd) { + set_data_handle(CpuMatrix::getData()); + } + + static MKLDNNMatrixPtr create( + const MatrixPtr& m, + mkldnn::memory::dims dims, + mkldnn::memory::format fmt, + mkldnn::engine& eg, + mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); + + /** + * Get primitive descriptor + */ + mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); } -protected: - int fmt_; + /** + * Get memory descriptor + */ + mkldnn::memory::desc getMD() { return getPD().desc(); } + + /** + * Get format + */ + int getFormat() { return getMD().data.format; } + + ~MKLDNNMatrix() {} }; } // namespace paddle From 4eecd0c2d531f66e64eebff88a99488275143207 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 22 Aug 2017 14:18:16 +0800 Subject: [PATCH 05/12] use MKLDNNMatrix in fc backward --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 77 ++++++++++++------------- paddle/gserver/layers/MKLDNNLayer.h | 59 ++++++++++++++----- paddle/math/MKLDNNMatrix.h | 33 +++++++++-- 3 files changed, 110 insertions(+), 59 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index fac0390eee501..5463104469632 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -158,10 +158,8 @@ void MKLDNNFcLayer::resetFwd() { hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_}, hasSpatial_ ? format::oihw : format::oi, engine_); - biasVal_ = hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; - outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); // change original output to mkldnn output @@ -193,46 +191,41 @@ void MKLDNNFcLayer::resetBwd() { return; } needResetBwd_ = false; - bool hasBias = biases_ && biases_->getWGrad(); - real* iData = getInputValue(0)->getData(); - real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; - real* oDiff = getOutputGrad()->getData(); - real* wDiff = weight_->getWGrad()->getData(); - real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL; /// backward weight - // create memory desc for backward memory - memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) - : createMD({bs_, ic_}, format::nc); - memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) - : createMD({oc_, ic_}, format::oi); - memory::desc oMD = createMD({bs_, oc_}, format::nc); - memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x) - : createMD({}, format::format_undef); - - if (inVal_) { - // update data - inVal_->set_data_handle(iData); - } else { - LOG(FATAL) << "Should not be empty"; - // inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); - } - - // create memory primitive desc and memory self - wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff)); - outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff)); + CHECK(inVal_) << "Should have input value"; + const MatrixPtr& wgt = weight_->getWGrad(); + const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; + const MatrixPtr& out = output_.grad; + + wgtGrad_ = MKLDNNMatrix::create( + wgt, wgtVal_->getDims(), wgtVal_->getFormat(), engine_); + biasGrad_ = + hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; - fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD); + outGrad_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); + // change original output to mkldnn output + // TODO: right? + output_.grad = std::dynamic_pointer_cast(outGrad_); + + // create memory primitive desc + fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, + inVal_->getMD(), + wgtGrad_->getMD(), + outGrad_->getMD()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL - ? fc_bwdWgt::desc(iMD, wMD, bMD, oMD) - : fc_bwdWgt::desc(iMD, wMD, oMD); + fc_bwdWgt::desc bwdWgtDesc = + hasBias ? fc_bwdWgt::desc(inVal_->getMD(), + wgtGrad_->getMD(), + biasGrad_->getMD(), + outGrad_->getMD()) + : fc_bwdWgt::desc( + inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); - if (bDiff != NULL) { - biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff)); + if (hasBias) { bwdWgt_.reset( new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); } else { @@ -242,13 +235,19 @@ void MKLDNNFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdWgt_); /// backward data - if (iDiff == NULL) { + const MatrixPtr& in = getInputGrad(0); + if (in == nullptr) { return; } - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD); + fc_bwdData::desc bwdDataDesc = + fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); - inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff)); + + // TODO: check right, just from ingrad? + inGrad_ = + MKLDNNMatrix::create(in, inVal_->getDims(), inVal_->getFormat(), engine_); + CHECK(wgtVal_) << "Should have weight memory"; bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); pipelineBwd_.push_back(*bwdData_); @@ -264,7 +263,7 @@ void MKLDNNFcLayer::forward(PassType passType) { // update input data // since it might be changed if this is after data layer real* iData = getInputValue(0)->getData(); - inVal_->set_data_handle(iData); + inVal_->updateData(iData); // just submit forward pipeline stream_->submit(pipelineFwd_); @@ -288,7 +287,7 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) { // update diff real* oDiff = getOutputGrad()->getData(); - outGrad_->set_data_handle(oDiff); + outGrad_->updateData(oDiff); // just sumbmit backward pipeline stream_->submit(pipelineBwd_); diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index b44095befb66a..fbd62d9aaa306 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -52,16 +52,15 @@ class MKLDNNLayer : public Layer { std::vector pipelineFwd_; std::vector pipelineBwd_; - // TODO(TJ): change below memory as MKLDNNMatrixPtr type - // MKLDNNMatrixPtr ; + // MKLDNNMatrixPtr MKLDNNMatrixPtr inVal_; - std::shared_ptr inGrad_; + MKLDNNMatrixPtr inGrad_; MKLDNNMatrixPtr outVal_; - std::shared_ptr outGrad_; + MKLDNNMatrixPtr outGrad_; MKLDNNMatrixPtr wgtVal_; - std::shared_ptr wgtGrad_; + MKLDNNMatrixPtr wgtGrad_; MKLDNNMatrixPtr biasVal_; - std::shared_ptr biasGrad_; + MKLDNNMatrixPtr biasGrad_; public: explicit MKLDNNLayer(const LayerConfig& config) @@ -84,17 +83,24 @@ class MKLDNNLayer : public Layer { virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON " + << "and set use_mkldnn=True"; + if (useGpu_ == true) { + LOG(WARNING) << "Do not support GPU yet, will change to useGpu = false"; + useGpu_ = false; + } + + // set device id before Layer::init + setDevice(MKLDNN_DEVICE); + // change param device to MKLDNN device + setParamsDevice(MKLDNN_DEVICE, parameterMap); if (!Layer::init(layerMap, parameterMap)) { return false; } - CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON " - << "and set use_mkldnn=True"; stream_.reset(new MKLDNNStream()); engine_ = CPUEngine::Instance().getEngine(); - - setDeviceID(MKLDNN_DEVICE); return true; } @@ -136,10 +142,33 @@ class MKLDNNLayer : public Layer { } protected: - void setDeviceID(int id) { - deviceId_ = id; - output_.deviceId = id; - // TODO: handle mkldnn device or add mkldnn device to other + /** + * Set deviceId of this layer. + */ + void setDevice(int id) { deviceId_ = id; } + + /** + * Set deviceId of the params used in this layer. + */ + void setParamsDevice(int id, const ParameterMap& parameterMap) { + for (auto& inputConfig : config_.inputs()) { + if (inputConfig.has_input_parameter_name()) { + ParameterPtr parameter; + std::string name = inputConfig.input_parameter_name(); + CHECK(mapGet(name, parameterMap, ¶meter)) + << "Cannot find input parameter " << name << " for layer " + << getName(); + parameter->setDevice(id); + } + } + if (config_.has_bias_parameter_name()) { + ParameterPtr parameter; + std::string name = config_.bias_parameter_name(); + CHECK(mapGet(name, parameterMap, ¶meter)) + << "Cannot find bias parameter " << name << " for layer " + << getName(); + parameter->setDevice(id); + } } }; diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 73eb50d2a0b3a..54c0a1fdcbc47 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -44,6 +44,8 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { set_data_handle(CpuMatrix::getData()); } + ~MKLDNNMatrix() {} + static MKLDNNMatrixPtr create( const MatrixPtr& m, mkldnn::memory::dims dims, @@ -52,21 +54,42 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); /** - * Get primitive descriptor + * Get primitive descriptor. */ mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); } /** - * Get memory descriptor + * Get memory descriptor. */ mkldnn::memory::desc getMD() { return getPD().desc(); } /** - * Get format + * Get dims. */ - int getFormat() { return getMD().data.format; } + mkldnn::memory::dims getDims() { + mkldnn::memory::dims dst; + int* src = getMD().data.dims; + int ndims = getMD().data.ndims; + dst.resize(ndims); + for (int i = 0; i < ndims; ++i) { + dst[i] = src[i]; + } + return dst; + } - ~MKLDNNMatrix() {} + /** + * Get format. + */ + mkldnn::memory::format getFormat() { + return (mkldnn::memory::format)(getMD().data.format); + } + + /** + * Update the memory data handle. + * Caution: This will not check the buffer size of the data, + * it should be coverd by user. + */ + void updateData(void* data) { set_data_handle(data); } }; } // namespace paddle From 48d87e5e912ad084ccc63dae8649f90a3f0989ba Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 23 Aug 2017 16:47:51 +0800 Subject: [PATCH 06/12] pass test, support input CPU device --- paddle/gserver/layers/Layer.h | 35 +++++--- paddle/gserver/layers/MKLDNNFcLayer.cpp | 108 +++++++++++++++--------- paddle/gserver/layers/MKLDNNLayer.h | 81 +++++++++++++++--- paddle/math/Allocator.h | 6 ++ paddle/math/MKLDNNMatrix.cpp | 71 +++++++++++++--- paddle/math/MKLDNNMatrix.h | 49 ++++++++--- 6 files changed, 258 insertions(+), 92 deletions(-) diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index ec4d093e0cac9..edef36194aabd 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -82,6 +82,7 @@ class Layer { Argument output_; /// Several outputs stored on different devices, used in 'parallel_nn' case, /// and record them by deviceId_. + /// Also used in 'use_mkldnn' case. std::vector outputOtherDevice_; /// If there are several outputs, map them by each name. std::map outputMap_; @@ -177,6 +178,13 @@ class Layer { return inputLayer.getOutput(deviceId_); } + /** + * Get the argument of input layer with deviceId. + */ + const Argument& getInput(size_t inputIndex, int deviceId) const { + return inputLayers_[inputIndex]->getOutput(deviceId); + } + /** * Get the forward-input value. */ @@ -191,6 +199,13 @@ class Layer { return inputLayer.getOutput(deviceId_).value; } + /** + * Get the forward-input value with deviceId. + */ + const MatrixPtr& getInputValue(int inputIndex, int deviceId) { + return inputLayers_[inputIndex]->getOutput(deviceId).value; + } + /** * Get the forward-input grad. */ @@ -205,6 +220,13 @@ class Layer { return inputLayer.getOutput(deviceId_).grad; } + /** + * Get the forward-input grad. + */ + const MatrixPtr& getInputGrad(int inputIndex, int deviceId) { + return inputLayers_[inputIndex]->getOutput(deviceId).grad; + } + /** * Get the forward-input label. */ @@ -326,19 +348,6 @@ class Layer { if (deviceId == getDeviceId()) { return output_; } else { - bool CPU2MKLDNN = - getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE; - bool MKLDNN2CPU = - getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE; - if (CPU2MKLDNN) { - // TODO: do something - return output_; - } else if (MKLDNN2CPU) { - // TODO: do something - return output_; - } - - // TODO: handle mkldnn device or add mkldnn device to other for (size_t i = 0; i < outputOtherDevice_.size(); i++) { if (outputOtherDevice_[i].deviceId == deviceId) { return outputOtherDevice_[i]; diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 5463104469632..a3291e6a8fb75 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -97,7 +97,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { } void MKLDNNFcLayer::reshape() { - const Argument& input = getInput(0); + const Argument& input = getInput(0, getPrev(0)->getDeviceId()); int batchSize = input.getBatchSize(); if (bs_ == batchSize) { return; @@ -135,35 +135,43 @@ void MKLDNNFcLayer::reshape() { void MKLDNNFcLayer::resetFwd() { bool hasBias = biases_ && biases_->getW(); - const MatrixPtr& in = getInputValue(0); const MatrixPtr& wgt = weight_->getW(); const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; const MatrixPtr& out = output_.value; - if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) { + if (prevIsMKLDNN()) { + const MatrixPtr& in = getInputValue(0); inVal_ = std::dynamic_pointer_cast(in); CHECK(inVal_) << "Input should be MKLDNNMatrix"; - // TODO: change input nchw to nc if available - // inVal_->downSpatial() } else { + CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet"; + const MatrixPtr& in = getInputValue(0, CPU_DEVICE); inVal_ = MKLDNNMatrix::create( - in, - hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_}, - hasSpatial_ ? format::nchw : format::nc, - engine_); + in, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); } - + inVal_->downSpatial(); wgtVal_ = MKLDNNMatrix::create( - wgt, - hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_}, - hasSpatial_ ? format::oihw : format::oi, - engine_); + wgt, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); + wgtVal_->downSpatial(); biasVal_ = hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); - // change original output to mkldnn output + // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(outVal_); + if (!nextIsMKLDNN()) { + Argument cpuOutput; + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { + cpuOutput = outputOtherDevice_[i]; + } + } + cpuOutput.setFrameHeight(output_.getFrameHeight()); + cpuOutput.setFrameWidth(output_.getFrameWidth()); + + // fc cpu output value do not need convert + cpuOutput.value = output_.value; + } // create forward handle prop_kind pk = prop_kind::forward; @@ -176,12 +184,13 @@ void MKLDNNFcLayer::resetFwd() { : fc_fwd::desc( pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - if (hasBias) { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); } else { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); } + printValueFormatFlow(); + pipelineFwd_.clear(); pipelineFwd_.push_back(*fwd_); } @@ -197,17 +206,24 @@ void MKLDNNFcLayer::resetBwd() { CHECK(inVal_) << "Should have input value"; const MatrixPtr& wgt = weight_->getWGrad(); const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; - const MatrixPtr& out = output_.grad; - wgtGrad_ = MKLDNNMatrix::create( - wgt, wgtVal_->getDims(), wgtVal_->getFormat(), engine_); - biasGrad_ = - hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; + if (nextIsMKLDNN()) { + // can not directly cast outputgrad to mkldnnmatrix, + // since each layer can not write the inputgrad to mkldnn inputgrad. + // So just create from matrix with outputvalue format. + const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad; + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + // TODO: maybe need merge topdiffs + } else { + // TODO: merge topdiffs + const MatrixPtr& out = getOutput(CPU_DEVICE).grad; + // fc do not need to convert from cpu device since output always nc + // only need create from cpu device + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + } - outGrad_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); - // change original output to mkldnn output - // TODO: right? - output_.grad = std::dynamic_pointer_cast(outGrad_); + wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPD()); + biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPD()) : nullptr; // create memory primitive desc fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, @@ -235,21 +251,38 @@ void MKLDNNFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdWgt_); /// backward data - const MatrixPtr& in = getInputGrad(0); - if (in == nullptr) { - return; + if (prevIsMKLDNN()) { + const MatrixPtr& in = getInputGrad(0, MKLDNN_DEVICE); + if (in == nullptr) { + return; + } + if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { + // TODO: many mkldnn bots + // add sum handle + } else { + inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); + } + } else { + const MatrixPtr& in = getInputGrad(0, CPU_DEVICE); + if (in == nullptr) { + return; + } + if (getInput(0, CPU_DEVICE).getAllCount() > 1) { + // TODO: many bots + // add sum handle + } else { + inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); + } } + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); - // TODO: check right, just from ingrad? - inGrad_ = - MKLDNNMatrix::create(in, inVal_->getDims(), inVal_->getFormat(), engine_); - CHECK(wgtVal_) << "Should have weight memory"; bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); + printGradFormatFlow(); pipelineBwd_.push_back(*bwdData_); } @@ -259,11 +292,7 @@ void MKLDNNFcLayer::forward(PassType passType) { { REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); - - // update input data - // since it might be changed if this is after data layer - real* iData = getInputValue(0)->getData(); - inVal_->updateData(iData); + syncInputValue(); // just submit forward pipeline stream_->submit(pipelineFwd_); @@ -285,10 +314,7 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) { REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); resetBwd(); - // update diff - real* oDiff = getOutputGrad()->getData(); - outGrad_->updateData(oDiff); - + syncOutputGrad(); // just sumbmit backward pipeline stream_->submit(pipelineBwd_); } diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index fbd62d9aaa306..3dd17a36ff7ee 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -125,23 +125,80 @@ class MKLDNNLayer : public Layer { << ", oh: " << oh_ << ", ow: " << ow_; } - // TODO(TJ): move to MkldnnMatrix - // create memory desc - inline mkldnn::memory::desc createMD( - mkldnn::memory::dims dims, - mkldnn::memory::format fmt, - mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) { - // TODO(TJ): isFmtSuppoted(fmt) - return mkldnn::memory::desc(dims, type, fmt); + /** + * Print the mkldnn memory format flow of value + */ + virtual void printValueFormatFlow() { + if (inVal_ && outVal_) { + VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat() + << " >>> " << outVal_->getFormat(); + } } - void resetMKLDNNOutput(size_t height, size_t width) { - Layer::resetOutput(height, width); - // get valu and grad, use mkldnn matrix instaed - // output_.value; + /** + * Print the mkldnn memory format flow of grad + */ + virtual void printGradFormatFlow() { + if (inGrad_ && outGrad_) { + VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat() + << " <<< " << outGrad_->getFormat(); + } } protected: + /** + * If next layer only has MKLDNN type. + * Otherwise, only support otherdevice CPU device. + */ + bool nextIsMKLDNN() { + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) + << "Only support other device is CPU yet"; + } + return outputOtherDevice_.size() == 0; + } + + /** + * Is previous layer MKLDNN type. + * Otherwise, only support otherdevice CPU device. + */ + bool prevIsMKLDNN(int index = 0) { + int prevDevice = getPrev(index)->getDeviceId(); + if (prevDevice == MKLDNN_DEVICE) { + return true; + } else { + // do not support GPU yet + CHECK_EQ(prevDevice, CPU_DEVICE) << "Only support CPU yet"; + return false; + } + } + + /** + * Sync input value data + */ + void syncInputValue() { + if (prevIsMKLDNN()) { + return; + } + real* iData = getInputValue(0, CPU_DEVICE)->getData(); + // update input data + // since it might be changed if this is after data layer + inVal_->updateData(iData); + } + + /** + * Sync output grad data + */ + void syncOutputGrad() { + if (nextIsMKLDNN()) { + return; + } + + // update diff + real* oDiff = getOutput(CPU_DEVICE).grad->getData(); + outGrad_->updateData(oDiff); + } + /** * Set deviceId of this layer. */ diff --git a/paddle/math/Allocator.h b/paddle/math/Allocator.h index 666a8b8368e3e..94ef561f066a1 100644 --- a/paddle/math/Allocator.h +++ b/paddle/math/Allocator.h @@ -48,7 +48,13 @@ class CpuAllocator : public Allocator { */ virtual void* alloc(size_t size) { void* ptr; +#ifdef PADDLE_USE_MKLDNN + // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp + // memory alignment + CHECK_EQ(posix_memalign(&ptr, 4096ul, size), 0); +#else CHECK_EQ(posix_memalign(&ptr, 32ul, size), 0); +#endif CHECK(ptr) << "Fail to allocate CPU memory: size=" << size; return ptr; } diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 44fc54278c993..24d54ec0f7313 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -18,29 +18,74 @@ using namespace mkldnn; // NOLINT namespace paddle { -MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m, - memory::dims dims, - memory::format fmt, - engine& eg, - mkldnn::memory::data_type dtype) { - CpuMatrixPtr cpuM = std::dynamic_pointer_cast(m); - CHECK(cpuM) << "Only support create from CPU matrix yet"; - - size_t ndims = dims.size(); +MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) { + memory::desc md = pd.desc(); + size_t ndims = md.data.ndims; + int* dims = md.data.dims; CHECK(ndims > 0) << "Input dims should not be empty"; - size_t cnt = 1; + size_t cnts = 1; for (size_t i = 0; i < ndims; ++i) { - cnt *= dims[i]; + cnts *= dims[i]; } - CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match"; + if (m == nullptr) { + size_t height = dims[0]; + size_t width = cnts / dims[0]; + // LOG(INFO) << height << "," << width; + m = Matrix::create(height, width, false, false); + } + + CHECK(m) << " Matrix should not be empty"; + CpuMatrixPtr cpuMatrix = std::dynamic_pointer_cast(m); + CHECK(cpuMatrix) << "Only support create from CPU matrix yet"; + + CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match"; size_t width = m->getWidth(); size_t height = m->getHeight(); real* data = m->getData(); + return std::make_shared(data, height, width, pd); +} +MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, + memory::dims dims, + memory::format fmt, + engine& eg, + mkldnn::memory::data_type dtype) { memory::desc md = memory::desc(dims, dtype, fmt); memory::primitive_desc pd = memory::primitive_desc(md, eg); - return std::make_shared(data, height, width, pd); + return create(m, pd); +} + +void MKLDNNMatrix::downSpatial() { + int fmt = getFormat(); + if (!(fmt == memory::format::nchw || fmt == memory::format::oihw)) { + // only support nchw and oihw yet, later can support more like nhwc, ihwo + return; + } + + memory::dims srcDims = getDims(); + const int H = 2, W = 3; + if (srcDims[H] != 1 || srcDims[W] != 1) { + // can not down spatial + return; + } + + memory::dims dstDims = memory::dims{srcDims[0], srcDims[1]}; + memory::format dstFmt; + switch (fmt) { + case memory::format::nchw: + dstFmt = memory::format::nc; + break; + case memory::format::oihw: + dstFmt = memory::format::oi; + break; + default: + LOG(FATAL) << "unsupported format"; + } + memory::desc md = memory::desc(dstDims, getDtype(), dstFmt); + memory::primitive_desc pd = memory::primitive_desc(md, getEngine()); + void* data = getData(); + memory(pd, data); } } // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 54c0a1fdcbc47..05adc867c2076 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -39,20 +39,37 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { mkldnn::memory::primitive_desc pd) : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {} - MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd) - : CpuMatrix(height, width, false), mkldnn::memory(pd) { - set_data_handle(CpuMatrix::getData()); - } - ~MKLDNNMatrix() {} + /** + * Create MKLDNNMatrix from a MatrixPtr and memory primitive_desc + */ + static MKLDNNMatrixPtr create(MatrixPtr m, mkldnn::memory::primitive_desc pd); + + /** + * Create MKLDNNMatrix from a MatrixPtr and memory details info + */ static MKLDNNMatrixPtr create( - const MatrixPtr& m, + MatrixPtr m, mkldnn::memory::dims dims, mkldnn::memory::format fmt, mkldnn::engine& eg, mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); +public: + /** + * Dimensionality reduction. + * Change format "nchw --> nc" or "oihw --> oi" if the h and w are both 1 + */ + void downSpatial(); + + /** + * Update the memory data handle. + * Caution: This will not check the buffer size of the data, + * it should be coverd by user. + */ + void updateData(void* data) { set_data_handle(data); } + /** * Get primitive descriptor. */ @@ -64,12 +81,13 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { mkldnn::memory::desc getMD() { return getPD().desc(); } /** - * Get dims. + * Get dimensions. */ mkldnn::memory::dims getDims() { + mkldnn::memory::desc md = getMD(); + const int* src = md.data.dims; + int ndims = md.data.ndims; mkldnn::memory::dims dst; - int* src = getMD().data.dims; - int ndims = getMD().data.ndims; dst.resize(ndims); for (int i = 0; i < ndims; ++i) { dst[i] = src[i]; @@ -85,11 +103,16 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { } /** - * Update the memory data handle. - * Caution: This will not check the buffer size of the data, - * it should be coverd by user. + * Get memory data type. */ - void updateData(void* data) { set_data_handle(data); } + mkldnn::memory::data_type getDtype() { + return (mkldnn::memory::data_type)(getMD().data.data_type); + } + + /** + * Get engine. + */ + mkldnn::engine getEngine() { return getPD().get_engine(); } }; } // namespace paddle From 780c8d969e0d2d220df19a672c141ff7c44f53d2 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 23 Aug 2017 17:03:16 +0800 Subject: [PATCH 07/12] make downSpatial work, and remove hasSpatial_ --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 4 ---- paddle/gserver/layers/MKLDNNFcLayer.h | 5 +---- paddle/math/MKLDNNMatrix.cpp | 7 ++++++- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index a3291e6a8fb75..a5555c4618aa3 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -111,10 +111,6 @@ void MKLDNNFcLayer::reshape() { if (iw_ == 0) { iw_ = 1; } - hasSpatial_ = true; - if (ih_ == 1 && iw_ == 1) { - hasSpatial_ = false; - } CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); ic_ = iLayerSize_ / (ih_ * iw_); CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index 7954852a23f81..e2657a8d5e9d9 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -32,16 +32,13 @@ class MKLDNNFcLayer : public MKLDNNLayer { // if has already init the weight bool hasInitedWgt_; - // if input layer has image size info (ih>1 && iw>1) - bool hasSpatial_; - // fc weight and bias std::unique_ptr weight_; std::unique_ptr biases_; public: explicit MKLDNNFcLayer(const LayerConfig& config) - : MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} + : MKLDNNLayer(config), hasInitedWgt_(false) {} ~MKLDNNFcLayer() {} diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 24d54ec0f7313..94df9c155084c 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -85,7 +85,12 @@ void MKLDNNMatrix::downSpatial() { memory::desc md = memory::desc(dstDims, getDtype(), dstFmt); memory::primitive_desc pd = memory::primitive_desc(md, getEngine()); void* data = getData(); - memory(pd, data); + mkldnn_primitive_t result; + mkldnn::error::wrap_c_api( + mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr), + "could not create a memory primitive"); + reset(result); + set_data_handle(data); } } // namespace paddle From 4cc57836f393ada9b65cfeef444662afc34f1109 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 25 Aug 2017 17:20:28 +0800 Subject: [PATCH 08/12] enable reorder --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 39 +++++------------ paddle/math/MKLDNNMatrix.cpp | 57 +++++++++++++++++++++++++ paddle/math/MKLDNNMatrix.h | 33 ++++++++++++-- 3 files changed, 97 insertions(+), 32 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index a5555c4618aa3..ad50c15a7dc70 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -61,39 +61,20 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() { return; } - // TODO(TJ): dst format should get from wgtVal_ - int dstFmt = PARAM_FORMAT_MKLDNN_OI; - int srcFmt = weight_->getParameterPtr()->getHeaderFormat(); - if (srcFmt == dstFmt) { - return; - } - - // The weight_ is transposed from initial paddle weight - MatrixPtr paddleWgt = Matrix::create( - weight_->getW()->getData(), iLayerSize_, oc_, false, false); - - // TODO(TJ): remove this print when do not need differ weights - std::ostringstream ostr; - paddleWgt->print(ostr); - VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); - - // The mkldnn weight is transposed from initial paddle matrix - MatrixPtr paddleWgtT; - paddleWgt->transpose(paddleWgtT, true); - weight_->getW()->copyFrom(*paddleWgtT); - weight_->getParameterPtr()->setHeaderFormat(dstFmt); + CHECK(wgtVal_) << "should have been initialized"; + bool hasNoSpatial_ = ih_ == 1 && iw_ == 1; + auto targetDim = wgtVal_->getDims(); + auto srcFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo; + wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim); hasInitedWgt_ = true; } void MKLDNNFcLayer::convertWeightsToPaddle() { - MatrixPtr dnnWgt = weight_->getW(); - MatrixPtr paddleWgt; - dnnWgt->transpose(paddleWgt, true); - - // copy paddle weight and override on weight_ - MatrixPtr dnnWgtT = Matrix::create( - dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false); - dnnWgtT->copyFrom(*paddleWgt); + CHECK(wgtVal_) << "should have been initialized"; + bool hasNoSpatial_ = ih_ == 1 && iw_ == 1; + auto targetDim = wgtVal_->getDims(); + auto dstFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo; + wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); } void MKLDNNFcLayer::reshape() { diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 94df9c155084c..32ae3b1bcf76a 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -56,6 +56,63 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, return create(m, pd); } +void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, + memory::format srcFmt, + memory::dims targetDim) { + memory::format dstFmt = getFormat(); + if (srcFmt == dstFmt) { + return; + } + CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; + real* srcData = getData(); + real* dstData = m->getData(); + reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); +} + +void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m, + memory::format dstFmt, + memory::dims targetDim) { + memory::format srcFmt = getFormat(); + if (srcFmt == dstFmt) { + return; + } + CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; + real* srcData = getData(); + real* dstData = m->getData(); + reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); +} + +void MKLDNNMatrix::reorderOnce(void* srcData, + void* dstData, + memory::format srcFmt, + memory::format dstFmt, + memory::dims dm) { + CHECK(srcData); + CHECK(dstData); + MatrixPtr tmpSrc; + if (dstData == srcData) { + // inplace data + size_t sz = 1; + for (size_t i = 0; i < dm.size(); ++i) { + sz *= dm[i]; + } + tmpSrc = Matrix::create(sz, 1, false, false); + tmpSrc->copyFrom((real*)srcData, sz); + srcData = tmpSrc->getData(); + } + + auto dtype = this->getDtype(); + auto srcMD = memory::desc(dm, dtype, srcFmt); + auto dstMD = memory::desc(dm, dtype, dstFmt); + + auto eg = this->getEngine(); + auto src = memory(memory::primitive_desc(srcMD, eg), srcData); + auto dst = memory(memory::primitive_desc(dstMD, eg), dstData); + + auto r = reorder(src, dst); + stream(stream::kind::eager).submit({r}).wait(); +} + void MKLDNNMatrix::downSpatial() { int fmt = getFormat(); if (!(fmt == memory::format::nchw || fmt == memory::format::oihw)) { diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 05adc867c2076..ea3fd7d461cf8 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -21,9 +21,6 @@ limitations under the License. */ namespace paddle { -static const std::map PARAM_FOARMAT_MAP = - {{mkldnn::memory::format::oi, PARAM_FORMAT_MKLDNN_OI}}; - class MKLDNNMatrix; typedef std::shared_ptr MKLDNNMatrixPtr; @@ -57,6 +54,26 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); public: + /** + * Reorder this MKLDNNMatrix from other format. + * Support inplace reorder + * Pay attention: this function would only reorder the data layout. + * will NOT change this original dim or format info + */ + void reorderDataFrom(const MKLDNNMatrixPtr& m, + memory::format srcFmt, + memory::dims targetDim); + + /** + * Reorder this MKLDNNMatrix to other format. + * Support inplace reorder + * Pay attention: this function would only reorder the data layout. + * will NOT change the dst dim or format info + */ + void reorderDataTo(const MKLDNNMatrixPtr& m, + memory::format dstFmt, + memory::dims targetDim); + /** * Dimensionality reduction. * Change format "nchw --> nc" or "oihw --> oi" if the h and w are both 1 @@ -113,6 +130,16 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { * Get engine. */ mkldnn::engine getEngine() { return getPD().get_engine(); } + +protected: + /** + * Do once reorder supported inplace. + */ + void reorderOnce(void* srcData, + void* dstData, + memory::format srcFmt, + memory::format dstFmt, + memory::dims dm); }; } // namespace paddle From 98b7c6736445de1f287156e933b0d625f648e6da Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 28 Aug 2017 09:52:58 +0800 Subject: [PATCH 09/12] add todo --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index ad50c15a7dc70..d38e6a2099e52 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -184,15 +184,14 @@ void MKLDNNFcLayer::resetBwd() { const MatrixPtr& wgt = weight_->getWGrad(); const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; + // TODO(TJ): merge topdiffs if (nextIsMKLDNN()) { // can not directly cast outputgrad to mkldnnmatrix, // since each layer can not write the inputgrad to mkldnn inputgrad. // So just create from matrix with outputvalue format. const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad; outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); - // TODO: maybe need merge topdiffs } else { - // TODO: merge topdiffs const MatrixPtr& out = getOutput(CPU_DEVICE).grad; // fc do not need to convert from cpu device since output always nc // only need create from cpu device @@ -234,8 +233,7 @@ void MKLDNNFcLayer::resetBwd() { return; } if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { - // TODO: many mkldnn bots - // add sum handle + // TODO(TJ): use outputMaps_ ways when merge topdiff done } else { inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); } @@ -245,8 +243,7 @@ void MKLDNNFcLayer::resetBwd() { return; } if (getInput(0, CPU_DEVICE).getAllCount() > 1) { - // TODO: many bots - // add sum handle + // TODO(TJ): use outputMaps_ ways when merge topdiff done } else { inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); } From fe51f726a2da85b0cb96734bd9b156760b044cf9 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 28 Aug 2017 10:44:31 +0800 Subject: [PATCH 10/12] fix cmake --- paddle/math/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index 8afe6b509d24a..68b5296228cd7 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -16,10 +16,10 @@ file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) if(NOT WITH_MKLDNN) - file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") - file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") - list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER}) - list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES}) + set(DNN_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/MKLDNNMatrix.h") + set(DNN_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/MKLDNNMatrix.cpp") + list(REMOVE_ITEM MATH_HEADERS "${DNN_HEADER}") + list(REMOVE_ITEM MATH_SOURCES "${DNN_SOURCE}") message(STATUS "Skip compiling with MKLDNNMatrix") else() message(STATUS "Compile with MKLDNNMatrix") From bfbd066fdd1c4a81266864bf837d89742b3f2ad6 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 29 Aug 2017 19:55:44 +0800 Subject: [PATCH 11/12] refine --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 117 ++++++++++++------------ paddle/gserver/layers/MKLDNNFcLayer.h | 2 + paddle/gserver/layers/MKLDNNLayer.h | 48 +++++++--- paddle/math/MKLDNNMatrix.cpp | 25 ++--- paddle/math/MKLDNNMatrix.h | 29 +++--- 5 files changed, 118 insertions(+), 103 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index d38e6a2099e52..a08cca318e5ff 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -77,6 +77,24 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); } +void MKLDNNFcLayer::convertOutputToOtherDevice() { + copyOutputInfoToOtherDevice(); + // find other cpu device and reorder output to cpu device + int cnt = 0; + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { + // fc cpu output value do not need convert + // just share point + outputOtherDevice_[i].value = output_.value; + ++cnt; + } + } + + if (cnt > 1) { + LOG(WARNING) << "should not have more than one CPU devie"; + } +} + void MKLDNNFcLayer::reshape() { const Argument& input = getInput(0, getPrev(0)->getDeviceId()); int batchSize = input.getBatchSize(); @@ -116,7 +134,7 @@ void MKLDNNFcLayer::resetFwd() { const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; const MatrixPtr& out = output_.value; - if (prevIsMKLDNN()) { + if (prevIsOnlyMKLDNN()) { const MatrixPtr& in = getInputValue(0); inVal_ = std::dynamic_pointer_cast(in); CHECK(inVal_) << "Input should be MKLDNNMatrix"; @@ -136,30 +154,21 @@ void MKLDNNFcLayer::resetFwd() { // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(outVal_); - if (!nextIsMKLDNN()) { - Argument cpuOutput; - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { - cpuOutput = outputOtherDevice_[i]; - } - } - cpuOutput.setFrameHeight(output_.getFrameHeight()); - cpuOutput.setFrameWidth(output_.getFrameWidth()); - - // fc cpu output value do not need convert - cpuOutput.value = output_.value; + if (!nextIsOnlyMKLDNN()) { + convertOutputToOtherDevice(); } // create forward handle prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = - hasBias ? fc_fwd::desc(pk, - inVal_->getMD(), - wgtVal_->getMD(), - biasVal_->getMD(), - outVal_->getMD()) - : fc_fwd::desc( - pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD()); + fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + biasVal_->getMemoryDesc(), + outVal_->getMemoryDesc()) + : fc_fwd::desc(pk, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + outVal_->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); if (hasBias) { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); @@ -184,36 +193,38 @@ void MKLDNNFcLayer::resetBwd() { const MatrixPtr& wgt = weight_->getWGrad(); const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; - // TODO(TJ): merge topdiffs - if (nextIsMKLDNN()) { + // TODO(TJ): merge outgrad + if (nextIsOnlyMKLDNN()) { // can not directly cast outputgrad to mkldnnmatrix, // since each layer can not write the inputgrad to mkldnn inputgrad. // So just create from matrix with outputvalue format. const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad; - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); } else { const MatrixPtr& out = getOutput(CPU_DEVICE).grad; // fc do not need to convert from cpu device since output always nc // only need create from cpu device - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); } - wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPD()); - biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPD()) : nullptr; + wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc()); + biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc()) + : nullptr; // create memory primitive desc fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, - inVal_->getMD(), - wgtGrad_->getMD(), - outGrad_->getMD()); + inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = - hasBias ? fc_bwdWgt::desc(inVal_->getMD(), - wgtGrad_->getMD(), - biasGrad_->getMD(), - outGrad_->getMD()) - : fc_bwdWgt::desc( - inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); + fc_bwdWgt::desc bwdWgtDesc = hasBias + ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + biasGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()) + : fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()); fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); @@ -227,30 +238,20 @@ void MKLDNNFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdWgt_); /// backward data - if (prevIsMKLDNN()) { - const MatrixPtr& in = getInputGrad(0, MKLDNN_DEVICE); - if (in == nullptr) { - return; - } - if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways when merge topdiff done - } else { - inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); - } + int device = prevIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; + const MatrixPtr& in = getInputGrad(0, device); + if (in == nullptr) { + return; + } + if (getInput(0, device).getAllCount() > 1) { + // TODO(TJ): use outputMaps_ ways when merge outgrad done } else { - const MatrixPtr& in = getInputGrad(0, CPU_DEVICE); - if (in == nullptr) { - return; - } - if (getInput(0, CPU_DEVICE).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways when merge topdiff done - } else { - inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); - } + inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc()); } - fc_bwdData::desc bwdDataDesc = - fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index e2657a8d5e9d9..e138a6faf181c 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -72,6 +72,8 @@ class MKLDNNFcLayer : public MKLDNNLayer { * only would be called when needed */ void resetBwd(); + + void convertOutputToOtherDevice() override; }; } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 3dd17a36ff7ee..8fe9630e82afb 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -86,10 +86,7 @@ class MKLDNNLayer : public Layer { CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." << "Please set WITH_MKLDNN=ON " << "and set use_mkldnn=True"; - if (useGpu_ == true) { - LOG(WARNING) << "Do not support GPU yet, will change to useGpu = false"; - useGpu_ = false; - } + CHECK(!useGpu_) << "Do not support GPU yet"; // set device id before Layer::init setDevice(MKLDNN_DEVICE); @@ -116,6 +113,12 @@ class MKLDNNLayer : public Layer { */ virtual void convertWeightsToPaddle() {} + /** + * convert MKLDNN output to other device. + * only support CPU device yet + */ + virtual void convertOutputToOtherDevice() {} + /** * print info about sizes */ @@ -147,22 +150,25 @@ class MKLDNNLayer : public Layer { protected: /** - * If next layer only has MKLDNN type. - * Otherwise, only support otherdevice CPU device. + * copy image size and sequence info to other device */ - bool nextIsMKLDNN() { + void copyOutputInfoToOtherDevice() { for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) - << "Only support other device is CPU yet"; + outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight()); + outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth()); + outputOtherDevice_[i].sequenceStartPositions = + output_.sequenceStartPositions; + outputOtherDevice_[i].subSequenceStartPositions = + output_.subSequenceStartPositions; + outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; } - return outputOtherDevice_.size() == 0; } /** - * Is previous layer MKLDNN type. - * Otherwise, only support otherdevice CPU device. + * Is previous layer only has MKLDNN type. + * Otherwise, only support the previous layer using CPU device. */ - bool prevIsMKLDNN(int index = 0) { + bool prevIsOnlyMKLDNN(int index = 0) { int prevDevice = getPrev(index)->getDeviceId(); if (prevDevice == MKLDNN_DEVICE) { return true; @@ -173,11 +179,23 @@ class MKLDNNLayer : public Layer { } } + /** + * If output only has MKLDNN device. + * Otherwise, other devices should only using CPU device. + */ + bool nextIsOnlyMKLDNN() { + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) + << "Only support other device is CPU yet"; + } + return outputOtherDevice_.size() == 0; + } + /** * Sync input value data */ void syncInputValue() { - if (prevIsMKLDNN()) { + if (prevIsOnlyMKLDNN()) { return; } real* iData = getInputValue(0, CPU_DEVICE)->getData(); @@ -190,7 +208,7 @@ class MKLDNNLayer : public Layer { * Sync output grad data */ void syncOutputGrad() { - if (nextIsMKLDNN()) { + if (nextIsOnlyMKLDNN()) { return; } diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 32ae3b1bcf76a..0a355e2644cce 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -31,7 +31,6 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) { if (m == nullptr) { size_t height = dims[0]; size_t width = cnts / dims[0]; - // LOG(INFO) << height << "," << width; m = Matrix::create(height, width, false, false); } @@ -40,10 +39,8 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) { CHECK(cpuMatrix) << "Only support create from CPU matrix yet"; CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match"; - size_t width = m->getWidth(); - size_t height = m->getHeight(); - real* data = m->getData(); - return std::make_shared(data, height, width, pd); + return std::make_shared( + m->getData(), m->getHeight(), m->getWidth(), pd); } MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, @@ -51,9 +48,7 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::format fmt, engine& eg, mkldnn::memory::data_type dtype) { - memory::desc md = memory::desc(dims, dtype, fmt); - memory::primitive_desc pd = memory::primitive_desc(md, eg); - return create(m, pd); + return create(m, memory::primitive_desc(memory::desc(dims, dtype, fmt), eg)); } void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, @@ -64,9 +59,7 @@ void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, return; } CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; - real* srcData = getData(); - real* dstData = m->getData(); - reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); + reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim); } void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m, @@ -77,9 +70,7 @@ void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m, return; } CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; - real* srcData = getData(); - real* dstData = m->getData(); - reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); + reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim); } void MKLDNNMatrix::reorderOnce(void* srcData, @@ -120,8 +111,9 @@ void MKLDNNMatrix::downSpatial() { return; } - memory::dims srcDims = getDims(); + // TODO(TJ): change H(height) and W(width) if support nhwc or more const int H = 2, W = 3; + memory::dims srcDims = getDims(); if (srcDims[H] != 1 || srcDims[W] != 1) { // can not down spatial return; @@ -141,13 +133,12 @@ void MKLDNNMatrix::downSpatial() { } memory::desc md = memory::desc(dstDims, getDtype(), dstFmt); memory::primitive_desc pd = memory::primitive_desc(md, getEngine()); - void* data = getData(); mkldnn_primitive_t result; mkldnn::error::wrap_c_api( mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr), "could not create a memory primitive"); reset(result); - set_data_handle(data); + set_data_handle(getData()); } } // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index ea3fd7d461cf8..e50f698b49571 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -56,9 +56,9 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { public: /** * Reorder this MKLDNNMatrix from other format. - * Support inplace reorder - * Pay attention: this function would only reorder the data layout. - * will NOT change this original dim or format info + * Support inplace reorder. + * @note: this function would only reorder the data layout. + * will NOT change this original dim or format info */ void reorderDataFrom(const MKLDNNMatrixPtr& m, memory::format srcFmt, @@ -66,9 +66,9 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { /** * Reorder this MKLDNNMatrix to other format. - * Support inplace reorder - * Pay attention: this function would only reorder the data layout. - * will NOT change the dst dim or format info + * Support inplace reorder. + * @note: this function would only reorder the data layout. + * will NOT change the dst dim or format info */ void reorderDataTo(const MKLDNNMatrixPtr& m, memory::format dstFmt, @@ -90,18 +90,20 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { /** * Get primitive descriptor. */ - mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); } + mkldnn::memory::primitive_desc getPrimitiveDesc() { + return this->get_primitive_desc(); + } /** * Get memory descriptor. */ - mkldnn::memory::desc getMD() { return getPD().desc(); } + mkldnn::memory::desc getMemoryDesc() { return getPrimitiveDesc().desc(); } /** * Get dimensions. */ mkldnn::memory::dims getDims() { - mkldnn::memory::desc md = getMD(); + mkldnn::memory::desc md = getMemoryDesc(); const int* src = md.data.dims; int ndims = md.data.ndims; mkldnn::memory::dims dst; @@ -116,24 +118,25 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { * Get format. */ mkldnn::memory::format getFormat() { - return (mkldnn::memory::format)(getMD().data.format); + return (mkldnn::memory::format)(getMemoryDesc().data.format); } /** * Get memory data type. */ mkldnn::memory::data_type getDtype() { - return (mkldnn::memory::data_type)(getMD().data.data_type); + return (mkldnn::memory::data_type)(getMemoryDesc().data.data_type); } /** * Get engine. */ - mkldnn::engine getEngine() { return getPD().get_engine(); } + mkldnn::engine getEngine() { return getPrimitiveDesc().get_engine(); } protected: /** - * Do once reorder supported inplace. + * Do reorder once. + * Can support inplace. */ void reorderOnce(void* srcData, void* dstData, From c5183caa04557628340983d17a64097f939db132 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 30 Aug 2017 13:37:51 +0800 Subject: [PATCH 12/12] rename --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 29 +++++++++++-------------- paddle/gserver/layers/MKLDNNLayer.h | 12 +++++----- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index a08cca318e5ff..8318c8c519a4c 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -134,7 +134,7 @@ void MKLDNNFcLayer::resetFwd() { const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; const MatrixPtr& out = output_.value; - if (prevIsOnlyMKLDNN()) { + if (inputIsOnlyMKLDNN()) { const MatrixPtr& in = getInputValue(0); inVal_ = std::dynamic_pointer_cast(in); CHECK(inVal_) << "Input should be MKLDNNMatrix"; @@ -154,7 +154,7 @@ void MKLDNNFcLayer::resetFwd() { // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(outVal_); - if (!nextIsOnlyMKLDNN()) { + if (!outputIsOnlyMKLDNN()) { convertOutputToOtherDevice(); } @@ -194,19 +194,16 @@ void MKLDNNFcLayer::resetBwd() { const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; // TODO(TJ): merge outgrad - if (nextIsOnlyMKLDNN()) { - // can not directly cast outputgrad to mkldnnmatrix, - // since each layer can not write the inputgrad to mkldnn inputgrad. - // So just create from matrix with outputvalue format. - const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad; - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); - } else { - const MatrixPtr& out = getOutput(CPU_DEVICE).grad; - // fc do not need to convert from cpu device since output always nc - // only need create from cpu device - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); - } - + int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; + // for MKLDNN device: + // can not directly cast outputgrad to mkldnnmatrix, + // since each layer can not write the inputgrad to mkldnn inputgrad. + // So just create from matrix with outputvalue format. + // for CPU device: + // fc do not need to convert from cpu device since output is always nc format + // only need create from cpu device + const MatrixPtr& out = getOutput(device).grad; + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc()); biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc()) : nullptr; @@ -238,7 +235,7 @@ void MKLDNNFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdWgt_); /// backward data - int device = prevIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; + device = inputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; const MatrixPtr& in = getInputGrad(0, device); if (in == nullptr) { return; diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 8fe9630e82afb..b983b833d510b 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -151,6 +151,8 @@ class MKLDNNLayer : public Layer { protected: /** * copy image size and sequence info to other device + * @note: can not directly use Layer::copyOutputToOtherDevice since here only + * copy base info and do not copy data value */ void copyOutputInfoToOtherDevice() { for (size_t i = 0; i < outputOtherDevice_.size(); i++) { @@ -165,10 +167,10 @@ class MKLDNNLayer : public Layer { } /** - * Is previous layer only has MKLDNN type. + * If input only has MKLDNN device. * Otherwise, only support the previous layer using CPU device. */ - bool prevIsOnlyMKLDNN(int index = 0) { + bool inputIsOnlyMKLDNN(int index = 0) { int prevDevice = getPrev(index)->getDeviceId(); if (prevDevice == MKLDNN_DEVICE) { return true; @@ -183,7 +185,7 @@ class MKLDNNLayer : public Layer { * If output only has MKLDNN device. * Otherwise, other devices should only using CPU device. */ - bool nextIsOnlyMKLDNN() { + bool outputIsOnlyMKLDNN() { for (size_t i = 0; i < outputOtherDevice_.size(); i++) { CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) << "Only support other device is CPU yet"; @@ -195,7 +197,7 @@ class MKLDNNLayer : public Layer { * Sync input value data */ void syncInputValue() { - if (prevIsOnlyMKLDNN()) { + if (inputIsOnlyMKLDNN()) { return; } real* iData = getInputValue(0, CPU_DEVICE)->getData(); @@ -208,7 +210,7 @@ class MKLDNNLayer : public Layer { * Sync output grad data */ void syncOutputGrad() { - if (nextIsOnlyMKLDNN()) { + if (outputIsOnlyMKLDNN()) { return; }