Add an option in BuildStrategy to enable fusion_group_pass and add un…

…ittest. test=develop
PaddlePaddle · Nov 21, 2019 · e4f20ca · e4f20ca
1 parent 6d1159e
commit e4f20ca
Show file tree

Hide file tree

Showing 9 changed files with 146 additions and 105 deletions.
diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt
@@ -62,7 +62,14 @@ cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope d
 
 cc_library(eager_deletion_op_handle SRCS eager_deletion_op_handle.cc DEPS lod_tensor selected_rows reference_count_pass_helper)
 
-set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass buffer_shared_inplace_op_pass buffer_shared_cross_op_memory_reuse_pass)
+set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto
+    sequential_execution_pass
+    modify_op_lock_and_record_event_pass
+    all_reduce_deps_pass
+    reference_count_pass
+    eager_deletion_pass
+    buffer_shared_inplace_op_pass
+    buffer_shared_cross_op_memory_reuse_pass)
 cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ${SSA_GRAPH_EXECUTOR_DEPS})
 
 cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
@@ -98,7 +105,8 @@ endif()
 cc_library(build_strategy SRCS build_strategy.cc DEPS
         graph_viz_pass multi_devices_graph_pass
         multi_devices_graph_print_pass multi_devices_graph_check_pass
-        fuse_elewise_add_act_pass multi_batch_merge_pass 
+        fuse_elewise_add_act_pass fusion_group_pass
+        multi_batch_merge_pass 
         fuse_relu_depthwise_conv_pass
         lock_free_optimize_pass
         coalesce_grad_tensor_pass fuse_all_reduce_op_pass backward_optimizer_op_deps_pass

diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc
@@ -167,6 +167,9 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
                         "fuse_relu_depthwise_conv_pass");
     AppendPassWithCheck(strategy_.fuse_elewise_add_act_ops_,
                         "fuse_elewise_add_act_pass");
+#ifdef PADDLE_WITH_CUDA
+    AppendPassWithCheck(strategy_.enable_auto_fusion_, "fusion_group_pass");
+#endif
     // for single card training, fuse_all_reduce_ops is unnecessary.
     // coalesce_grad_tensor_pass should be before of MultiDevPass.
     AppendPassWithCheck(strategy_.fuse_all_reduce_ops_,
@@ -369,6 +372,12 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
                         "GPU, skipped.";
         continue;
       }
+    } else if (pass->Type() == "fusion_group_pass") {
+      pass->Set("use_gpu", new bool(use_cuda));
+      if (!use_cuda) {
+        LOG(WARNING) << "fusion_group_pass is only supported on GPU, skipped.";
+        continue;
+      }
     } else if (pass->Type() == "mkldnn_placement_pass") {
       pass->Set("mkldnn_enabled_op_types",
                 new std::unordered_set<std::string>(mkldnn_enabled_op_types_));
@@ -419,3 +428,6 @@ USE_PASS(mkldnn_placement_pass);
 #ifdef PADDLE_WITH_NGRAPH
 USE_PASS(ngraph_subgraph_pass);
 #endif
+#ifdef PADDLE_WITH_CUDA
+USE_PASS(fusion_group_pass);
+#endif
diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h
@@ -87,7 +87,8 @@ struct BuildStrategy {
   // TODO(dev-paddle): fuse_elewise_add_act_ops may cause some models have
   // cycle.
   bool fuse_elewise_add_act_ops_{false};
-  // Fuse_all_optimizer_ops and fuse_all_reduce_ops require that gradients
+  bool enable_auto_fusion_{false};
+  // fuse_all_optimizer_ops and fuse_all_reduce_ops require that gradients
   // should not be sparse types
   boost::optional<bool> fuse_all_optimizer_ops_{false};
   boost::optional<bool> fuse_all_reduce_ops_{boost::none};

diff --git a/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.cc b/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.cc
@@ -108,9 +108,9 @@ void ElementwiseGroupDetector::Init(Graph* graph, bool backward) {
       elementwise_ops_.insert(n);
     }
   }
-  LOG(INFO) << "elementise ops for graph:" << graph
-            << ", backward=" << backward;
-  LOG(INFO) << "{\n" << DebugString(elementwise_ops_) << "}\n";
+  // LOG(INFO) << "elementise ops for graph:" << graph
+  //           << ", backward=" << backward;
+  // LOG(INFO) << "{\n" << DebugString(elementwise_ops_) << "}\n";
 }
 
 bool ElementwiseGroupDetector::IsElementwiseOp(Node* n) {

diff --git a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.cc b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/framework/ir/fusion_group/fusion_group_pass.h"
+#include <memory>
+#include <utility>
 #include <vector>
 #include "paddle/fluid/framework/ir/fusion_group/code_generator.h"
 #include "paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.h"
@@ -27,6 +29,7 @@ namespace ir {
 void FusionGroupPass::ApplyImpl(ir::Graph* graph) const {
   PADDLE_ENFORCE_NOT_NULL(graph);
   if (Get<bool>("use_gpu")) {
+    fusion_group::OperationMap::Init();
     int num_elementwise_groups = DetectFusionGroup(graph, 0);
     LOG(INFO) << "Detect " << num_elementwise_groups
               << " elementwise fusion groups.";
@@ -61,6 +64,7 @@ int FusionGroupPass::DetectFusionGroup(Graph* graph, int type) const {
         std::string func_name = "fused_elementwise_" + std::to_string(index++);
         subgraph.SetFuncName(func_name);
         subgraphs.push_back(subgraph);
+        LOG(INFO) << "subgraph: {\n" << DebugString(subgraph.Nodes()) << "}\n";
         begin_of_forward_subgraph.push_back(n);
       }
     }
@@ -88,7 +92,6 @@ int FusionGroupPass::DetectFusionGroup(Graph* graph, int type) const {
 }
 
 void FusionGroupPass::GenerateCode(fusion_group::SubGraph* subgraph) const {
-  fusion_group::OperationMap::Init();
   fusion_group::CodeGenerator code_generator;
   std::string code_str = code_generator.Generate(subgraph);
   VLOG(3) << code_str;

diff --git a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc
@@ -124,8 +124,6 @@ std::unique_ptr<Graph> BuildElementwiseTreeGraph(bool backward = false) {
 }
 
 int TestMain(std::unique_ptr<Graph> graph, std::string prefix) {
-  fusion_group::OperationMap::Init();
-
   VisualizeGraph(&graph, prefix + ".dot");
   auto pass = PassRegistry::Instance().Get("fusion_group_pass");
   pass->Set("use_gpu", new bool(true));

diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -1922,6 +1922,23 @@ All parameter, weight, gradient are variables in Paddle.
                         build_strategy = fluid.BuildStrategy()
                         build_strategy.fuse_elewise_add_act_ops = True
                      )DOC")
+      .def_property(
+          "enable_auto_fusion",
+          [](const BuildStrategy &self) { return self.enable_auto_fusion_; },
+          [](BuildStrategy &self, bool b) {
+            PADDLE_ENFORCE_EQ(!self.IsFinalized(), true,
+                              "BuildStrategy is finlaized.");
+            self.enable_auto_fusion_ = b;
+          },
+          R"DOC((bool, optional):
+
+                Examples:
+                    .. code-block:: python
+
+                        import paddle.fluid as fluid
+                        build_strategy = fluid.BuildStrategy()
+                        build_strategy.enable_auto_fusion = True
+                    )DOC")
       .def_property(
           "fuse_relu_depthwise_conv",
           [](const BuildStrategy &self) {

diff --git a/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py b/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py
@@ -0,0 +1,41 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from test_eager_deletion_padding_rnn import RNNConfig, PaddingRNNTestBase
+
+
+class FusionGroupPaddingRNNTest(PaddingRNNTestBase):
+    def set_customed_config(self):
+        # Enable fusion_group_pass
+        self.build_strategy.enable_auto_fusion = True
+
+        # Use CUDA executor
+        if core.is_compiled_with_cuda():
+            self.exe = fluid.Executor(fluid.CUDAPlace(0))
+
+    def test_train_enable_fusion_group(self):
+        rnn_model = "static"
+        config = RNNConfig("test", rnn_model)
+        with fluid.scope_guard(fluid.Scope()):
+            self.train(config, parallel=True, use_program_cache=False)
+
+
+if __name__ == '__main__':
+    unittest.main()