AshburnLee · AshburnLee · Oct 13, 2020 · Sep 30, 2020 · Sep 30, 2020 · Sep 30, 2020
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -131,6 +131,7 @@ option(WITH_LITE   "Compile Paddle Fluid with Lite Engine" OFF)
 option(WITH_NCCL   "Compile PaddlePaddle with NCCL support"             ON)
 option(WITH_CRYPTO   "Compile PaddlePaddle with crypto support"         ON)
 option(WITH_ARM   "Compile PaddlePaddle with arm support"         OFF)
+option(WITH_MUSL        "Compile with musl libc instead of gblic"  OFF)
 
 # PY_VERSION
 if(NOT PY_VERSION)

diff --git a/README.md b/README.md
@@ -33,7 +33,7 @@ pip install paddlepaddle
 # Linux GPU cuda10cudnn7
 pip install paddlepaddle-gpu
 # Linux GPU cuda9cudnn7
-pip install paddlepaddle-gpu==1.8.4.post97
+pip install paddlepaddle-gpu==1.8.5.post97
 
 ```
 It is recommended to read [this doc](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/install/index_en.html) on our website.

diff --git a/README_cn.md b/README_cn.md
@@ -30,7 +30,7 @@ pip install paddlepaddle
 # Linux GPU cuda10cudnn7
 pip install paddlepaddle-gpu
 # Linux GPU cuda9cudnn7
-pip install paddlepaddle-gpu==1.8.4.post97
+pip install paddlepaddle-gpu==1.8.5.post97
 
 ```
 更多安装信息详见官网 [安装说明](http://www.paddlepaddle.org.cn/documentation/docs/zh/1.8/beginners_guide/install/index_cn.html)

diff --git a/cmake/configure.cmake b/cmake/configure.cmake
@@ -51,6 +51,16 @@ if(WIN32)
   endif(NOT MSVC)
 endif(WIN32)
 
+if(WITH_MUSL)
+    add_definitions(-DPADDLE_WITH_MUSL)
+
+    message(STATUS, "Set compile option WITH_MKL=OFF when WITH_MUSL=ON")
+    SET(WITH_MKL OFF)
+
+    message(STATUS, "Set compile option WITH_GPU=OFF when WITH_MUSL=ON")
+    SET(WITH_GPU OFF)
+endif()
+
 if(WITH_PSLIB)
     add_definitions(-DPADDLE_WITH_PSLIB)
 endif()

diff --git a/cmake/external/lite.cmake b/cmake/external/lite.cmake
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-if(NOT LINUX OR NOT WITH_MKL)
-  message("Paddle-lite will not build because the required Linux and MKL do not exist.")
+if(NOT LINUX)
+  message("Paddle-lite will not build because the required Linux do not exist.")
   set(WITH_LITE OFF)
   return()
 endif()
@@ -42,30 +42,30 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
   endif()
 
   # No quotes, so cmake can resolve it as a command with arguments.
-  set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
-  set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON
-                         -DLITE_WITH_CUDA=${WITH_GPU}
-                         -DWITH_MKLDNN=OFF
-                         -DLITE_WITH_X86=ON
-                         -DLITE_WITH_PROFILE=OFF
-                         -DWITH_LITE=OFF
-                         -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-                         -DWITH_PYTHON=OFF
-                         -DWITH_TESTING=OFF
-                         -DLITE_BUILD_EXTRA=ON
-                         -DCUDNN_ROOT=${CUDNN_ROOT}
-                         -DLITE_WITH_STATIC_CUDA=OFF
-                         -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
-                         -DLITE_WITH_XPU=${LITE_WITH_XPU}
-                         -DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-                         -DLITE_WITH_ARM=OFF)
-
-  ExternalProject_Add(
+  if(WITH_ARM)
+    set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
+    message(WARNING "BUILD_COMMAND: ${LITE_BUILD_COMMAND}")
+    set(LITE_OPTIONAL_ARGS -DWITH_MKL=OFF
+                           -DLITE_WITH_CUDA=OFF
+                           -DWITH_MKLDNN=OFF
+                           -DLITE_WITH_X86=OFF
+                           -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON
+                           -DLITE_WITH_PROFILE=OFF
+                           -DARM_TARGET_OS=armlinux
+                           -DWITH_LITE=ON
+                           -DWITH_PYTHON=OFF
+                           -DWITH_TESTING=OFF
+                           -DLITE_BUILD_EXTRA=ON
+                           -DLITE_WITH_XPU=${LITE_WITH_XPU}
+                           -DXPU_SDK_ROOT=${XPU_SDK_ROOT}
+                           -DLITE_WITH_ARM=ON)
+    ExternalProject_Add(
       ${LITE_PROJECT}
       ${EXTERNAL_PROJECT_LOG_ARGS}
       GIT_REPOSITORY      "https://github.com/PaddlePaddle/Paddle-Lite.git"
       GIT_TAG             ${LITE_GIT_TAG}
       PREFIX              ${LITE_SOURCES_DIR}
+      PATCH_COMMAND       mkdir -p ${LITE_SOURCES_DIR}/src/extern_lite-build/lite/gen_code && touch ${LITE_SOURCES_DIR}/src/extern_lite-build/lite/gen_code/__generated_code__.cc
       UPDATE_COMMAND      ""
       BUILD_COMMAND       ${LITE_BUILD_COMMAND}
       INSTALL_COMMAND     ""
@@ -81,7 +81,51 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
                           -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                           ${EXTERNAL_OPTIONAL_ARGS}
                           ${LITE_OPTIONAL_ARGS}
-  )
+    )
+    set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
+  else()
+    set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
+    set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
+    set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON
+                           -DLITE_WITH_CUDA=${WITH_GPU}
+                           -DWITH_MKLDNN=OFF
+                           -DLITE_WITH_X86=ON
+                           -DLITE_WITH_PROFILE=OFF
+                           -DWITH_LITE=OFF
+                           -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
+                           -DWITH_PYTHON=OFF
+                           -DWITH_TESTING=OFF
+                           -DLITE_BUILD_EXTRA=ON
+                           -DCUDNN_ROOT=${CUDNN_ROOT}
+                           -DLITE_WITH_STATIC_CUDA=OFF
+                           -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
+                           -DLITE_WITH_XPU=${LITE_WITH_XPU}
+                           -DXPU_SDK_ROOT=${XPU_SDK_ROOT}
+                           -DLITE_WITH_ARM=OFF)
+
+    ExternalProject_Add(
+        ${LITE_PROJECT}
+        ${EXTERNAL_PROJECT_LOG_ARGS}
+        GIT_REPOSITORY      "https://github.com/PaddlePaddle/Paddle-Lite.git"
+        GIT_TAG             ${LITE_GIT_TAG}
+        PREFIX              ${LITE_SOURCES_DIR}
+        UPDATE_COMMAND      ""
+        BUILD_COMMAND       ${LITE_BUILD_COMMAND}
+        INSTALL_COMMAND     ""
+        CMAKE_ARGS          -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+                            -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+                            -DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS}
+                            -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+                            -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+                            -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                            -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+                            -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+                            -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+                            -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+                            ${EXTERNAL_OPTIONAL_ARGS}
+                            ${LITE_OPTIONAL_ARGS}
+    )
+  endif()
   ExternalProject_Get_property(${LITE_PROJECT} BINARY_DIR)
   ExternalProject_Get_property(${LITE_PROJECT} SOURCE_DIR)
   set(LITE_BINARY_DIR ${BINARY_DIR})
@@ -103,8 +147,8 @@ function(external_lite_libs alias path)
   endif()
 endfunction()
 
-external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/inference_lite_lib/cxx/lib/libpaddle_full_api_shared.so)
-set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/inference_lite_lib/cxx/lib/libpaddle_full_api_shared.so)
+external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
+set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
 
 add_definitions(-DPADDLE_WITH_LITE)
 add_definitions(-DLITE_WITH_LOG)
diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake
@@ -20,7 +20,7 @@ SET(MKLDNN_SOURCE_DIR     ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn)
 SET(MKLDNN_INSTALL_DIR    ${THIRD_PARTY_PATH}/install/mkldnn)
 SET(MKLDNN_INC_DIR        "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
 SET(MKLDNN_REPOSITORY     https://github.com/oneapi-src/oneDNN.git)
-SET(MKLDNN_TAG            64a48f9565aa72f6359917b3406328075a409939)
+SET(MKLDNN_TAG            361725600224f41b7347a1c6bee9b04d1e6c14d7)
 
 # Introduce variables:
 # * CMAKE_INSTALL_LIBDIR

diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
@@ -131,7 +131,7 @@ function(copy_part_of_thrid_party TARGET DST)
     if (LITE_BINARY_DIR)
         set(dst_dir "${DST}/third_party/install/lite")
         copy(${TARGET}
-                SRCS ${LITE_BINARY_DIR}/inference_lite_lib/*
+                SRCS ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/*
                 DSTS ${dst_dir})
     endif()
 endfunction()

diff --git a/cmake/init.cmake b/cmake/init.cmake
@@ -28,5 +28,6 @@ endif()
 
 if(WIN32)
     set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props)
+    set(CMAKE_CXX_FLAGS_RELEASE "-O3 -Os -DNDEBUG")
 endif()
 
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
@@ -123,7 +123,9 @@ cc_library(attribute SRCS attribute.cc DEPS framework_proto boost enforce)
 cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc
 device_context)
 
-cc_library(op_version_registry SRCS op_version_registry.cc DEPS framework_proto boost)
+cc_library(op_version_proto SRCS op_version_proto.cc DEPS framework_proto boost)
+
+cc_library(op_version_registry SRCS op_version_registry.cc DEPS op_version_proto framework_proto boost)
 cc_test(op_version_registry_test SRCS op_version_registry_test.cc DEPS op_version_registry)
 
 cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute glog)

diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc
@@ -203,7 +203,7 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
   // As MKL-DNN description was in NCHW and paddle is expecting NHWC
   platform::MatchShapeToLayout(out, in_layout, out_layout);
 
-  out->set_layout(out_layout);
+  out->set_layout(DataLayout::kNCHW);
   // reset format since the out tensor will be feed to non-MKLDNN OPkernel
   out->set_format(MKLDNNMemoryFormat::undef);
 }

diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc
@@ -117,6 +117,9 @@ void SetTensorToVariable(const Variable &in_var, const Tensor &tensor,
     auto *tran_lod_tensor = out_var->GetMutable<LoDTensor>();
     tran_lod_tensor->set_lod(in_lod_tensor.lod());
     tran_lod_tensor->set_layout(in_lod_tensor.layout());
+#ifdef PADDLE_WITH_MKLDNN
+    tran_lod_tensor->set_format(in_lod_tensor.format());
+#endif
     tran_lod_tensor->ShareDataWith(tensor);
   } else if (in_var.IsType<SelectedRows>()) {
     auto &in_selected_rows = in_var.Get<SelectedRows>();

diff --git a/paddle/fluid/framework/distributed_strategy.proto b/paddle/fluid/framework/distributed_strategy.proto
@@ -98,6 +98,7 @@ message AsyncConfig {
   optional int32 send_wait_times = 7 [ default = 1 ];
   optional bool runtime_split_send_recv = 8 [ default = false ];
   optional bool launch_barrier = 9 [ default = true ];
+  optional string heter_worker_device_guard = 10 [ default = 'cpu' ];
 }
 
 message PipelineConfig { optional int32 micro_batch = 1 [ default = 1 ]; }

diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto
@@ -179,29 +179,15 @@ message BlockDesc {
   optional int32 forward_block_idx = 5 [ default = -1 ];
 }
 
-// CompatibleInfo is used to determine if a feature is compatible and
-// provides the information.
-message CompatibleInfo {
-  enum Type {
-    COMPATIBLE = 0;
-    DEFINITELY_NOT = 1;
-    POSSIBLE = 2;
-    BUG_FIX = 3;
-    PRECISION_CHANGE = 4;
-  }
-  required string version = 1;
-  required Type type = 2;
-}
-
-// In some cases, Paddle Fluid may perform operator definition iterations,
-// and the operator uses OpCompatibleMap for compatibility testing.
-message OpCompatibleMap {
-  message OpCompatiblePair {
+// In some cases, Paddle may perform operator definition iterations,
+// and the operator uses OpVersionMap for compatibility testing.
+message OpVersion { required int32 version = 1; }
+message OpVersionMap {
+  message OpVersionPair {
     required string op_name = 1;
-    required CompatibleInfo compatible_info = 2;
+    required OpVersion op_version = 2;
   }
-  repeated OpCompatiblePair pair = 1;
-  optional string default_required_version = 2;
+  repeated OpVersionPair pair = 1;
 }
 
 // Please refer to
@@ -210,8 +196,8 @@ message OpCompatibleMap {
 // TODO(panyx0718): A model can have multiple programs. Need a
 // way to distinguish them. Maybe ID or name?
 message ProgramDesc {
-  reserved 2; // For backward compatibility.
+  reserved 2, 3; // For backward compatibility.
   repeated BlockDesc blocks = 1;
   optional Version version = 4;
-  optional OpCompatibleMap op_compatible_map = 3;
+  optional OpVersionMap op_version_map = 5;
 }
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -1882,9 +1882,9 @@ PDNode *patterns::MultipleQuantize::operator()() {
 PDNode *patterns::QuantizePlacement::operator()(
     const std::unordered_set<std::string> &quantize_enabled_op_types) {
   std::unordered_set<std::string> supported_op_types =
-      std::unordered_set<std::string>({"concat", "conv2d", "elementwise_add",
-                                       "fc", "matmul", "pool2d", "prior_box",
-                                       "relu", "reshape2", "transpose2"});
+      std::unordered_set<std::string>(
+          {"concat", "conv2d", "elementwise_add", "fc", "matmul", "pool2d",
+           "prior_box", "relu", "reshape2", "transpose2", "fusion_gru"});
   if (!quantize_enabled_op_types.empty()) {
     supported_op_types = quantize_enabled_op_types;
   }
@@ -1894,7 +1894,8 @@ PDNode *patterns::QuantizePlacement::operator()(
 
 PDNode *patterns::Bfloat16Placement::operator()(
     const std::unordered_set<std::string> &bfloat16_enabled_op_types) {
-  std::unordered_set<std::string> supported_op_types{"conv2d"};
+  std::unordered_set<std::string> supported_op_types =
+      std::unordered_set<std::string>({"conv2d", "fusion_gru"});
   if (!bfloat16_enabled_op_types.empty()) {
     supported_op_types = bfloat16_enabled_op_types;
   }
@@ -2280,6 +2281,23 @@ PDNode *patterns::MatmulTransposeReshapePattern::operator()() {
   return reshape_out;
 }
 
+PDNode *patterns::FusionGru::operator()() {
+  auto op = pattern->NewNode(op_repr())->assert_is_op("fusion_gru");
+  auto x = pattern->NewNode(x_repr())->AsInput()->assert_is_op_input(
+      "fusion_gru", "X");
+  auto weight_h = pattern->NewNode(weight_h_repr())
+                      ->AsInput()
+                      ->assert_is_op_input("fusion_gru", "WeightH");
+  auto weight_x = pattern->NewNode(weight_x_repr())
+                      ->AsInput()
+                      ->assert_is_op_input("fusion_gru", "WeightX");
+  auto out = pattern->NewNode(out_repr())
+                 ->AsOutput()
+                 ->assert_is_op_output("fusion_gru", "Hidden");
+  op->LinksFrom({x, weight_h, weight_x}).LinksTo({out});
+  return out;
+}
+
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -1312,6 +1312,21 @@ struct MatmulTransposeReshapePattern : public PatternBase {
   PATTERN_DECL_NODE(reshape_out_xshape);
 };
 
+// fusion_gru op
+// Forward pass for fusion_gru.
+// fusion_gru out is a result of the operator.
+struct FusionGru : public PatternBase {
+  FusionGru(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "fusion_gru") {}
+
+  PDNode* operator()();
+  PATTERN_DECL_NODE(op);
+  PATTERN_DECL_NODE(x);
+  PATTERN_DECL_NODE(weight_h);
+  PATTERN_DECL_NODE(weight_x);
+  PATTERN_DECL_NODE(out);
+};
+
 }  // namespace patterns
 
 // Link two ir::Nodes from each other.