From 523191806680424aa79031bcdc77fcb274e8b2ac Mon Sep 17 00:00:00 2001 From: minghaoBD Date: Mon, 23 May 2022 14:45:47 +0000 Subject: [PATCH] style check --- .../framework/ir/graph_pattern_detector.cc | 45 +++++++++++-------- ...dense_multihead_matmul_with_sparse_pass.cc | 38 ++++++++++------ ..._dense_multihead_matmul_with_sparse_pass.h | 3 +- .../inference/api/paddle_pass_builder.cc | 42 ++++++++--------- .../convert/sparse_multihead_matmul_op.cc | 23 +++++----- paddle/fluid/inference/tensorrt/op_teller.cc | 5 ++- .../inference/tensorrt/plugin/spmm_plugin.cu | 14 +++--- 7 files changed, 98 insertions(+), 72 deletions(-) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 493f39897a0dc..eb15f80aeceb5 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -3450,30 +3450,39 @@ PDNode *patterns::DenseFC::operator()() { } PDNode *patterns::MultiheadMatmul::operator()() { - auto *multihead_matmul = pattern->NewNode(multihead_matmul_repr())->assert_is_op("multihead_matmul"); + auto *multihead_matmul = pattern->NewNode(multihead_matmul_repr()) + ->assert_is_op("multihead_matmul"); // Input - auto *multihead_matmul_input = pattern->NewNode(multihead_matmul_input_repr()) - ->AsInput() - ->assert_is_op_input("multihead_matmul", "Input"); + auto *multihead_matmul_input = + pattern->NewNode(multihead_matmul_input_repr()) + ->AsInput() + ->assert_is_op_input("multihead_matmul", "Input"); // Filter - auto *multihead_matmul_weights = pattern->NewNode(multihead_matmul_weights_repr()) - ->AsInput() - ->assert_is_op_input("multihead_matmul", "W"); + auto *multihead_matmul_weights = + pattern->NewNode(multihead_matmul_weights_repr()) + ->AsInput() + ->assert_is_op_input("multihead_matmul", "W"); // Bias - auto *multihead_matmul_bias = pattern->NewNode(multihead_matmul_bias_repr()) - ->AsInput() - ->assert_is_op_input("multihead_matmul", "Bias"); + auto *multihead_matmul_bias = + pattern->NewNode(multihead_matmul_bias_repr()) + ->AsInput() + ->assert_is_op_input("multihead_matmul", "Bias"); // BiasQK - auto *multihead_matmul_biasqk = pattern->NewNode(multihead_matmul_biasqk_repr()) - ->AsInput() - ->assert_is_op_input("multihead_matmul", "BiasQK"); + auto *multihead_matmul_biasqk = + pattern->NewNode(multihead_matmul_biasqk_repr()) + ->AsInput() + ->assert_is_op_input("multihead_matmul", "BiasQK"); // Output - auto *multihead_matmul_out = pattern->NewNode(multihead_matmul_out_repr()) - ->AsOutput() - ->assert_is_op_output("multihead_matmul", "Out") - ->assert_is_only_output_of_op("multihead_matmul"); + auto *multihead_matmul_out = + pattern->NewNode(multihead_matmul_out_repr()) + ->AsOutput() + ->assert_is_op_output("multihead_matmul", "Out") + ->assert_is_only_output_of_op("multihead_matmul"); - multihead_matmul->LinksFrom({multihead_matmul_input, multihead_matmul_weights, multihead_matmul_bias, multihead_matmul_biasqk}).LinksTo({multihead_matmul_out}); + multihead_matmul + ->LinksFrom({multihead_matmul_input, multihead_matmul_weights, + multihead_matmul_bias, multihead_matmul_biasqk}) + .LinksTo({multihead_matmul_out}); return multihead_matmul_out; } diff --git a/paddle/fluid/framework/ir/replace_dense_multihead_matmul_with_sparse_pass.cc b/paddle/fluid/framework/ir/replace_dense_multihead_matmul_with_sparse_pass.cc index cb4cc14bfe195..9fdb5e14da153 100644 --- a/paddle/fluid/framework/ir/replace_dense_multihead_matmul_with_sparse_pass.cc +++ b/paddle/fluid/framework/ir/replace_dense_multihead_matmul_with_sparse_pass.cc @@ -20,7 +20,8 @@ namespace paddle { namespace framework { namespace ir { -ReplaceDenseMultiheadMatmulWithSparsePass::ReplaceDenseMultiheadMatmulWithSparsePass() { +ReplaceDenseMultiheadMatmulWithSparsePass:: + ReplaceDenseMultiheadMatmulWithSparsePass() { AddOpCompat(OpCompat("multihead_matmul")) .AddInput("Input") .IsTensor() @@ -47,8 +48,8 @@ void ReplaceDenseMultiheadMatmulWithSparsePass::ApplyImpl(Graph *graph) const { FusePassBase::Init(name_scope, graph); GraphPatternDetector gpd; - patterns::MultiheadMatmul multihead_matmul_pattern(gpd.mutable_pattern(), - "dense_multihead_matmul_replace_pass"); + patterns::MultiheadMatmul multihead_matmul_pattern( + gpd.mutable_pattern(), "dense_multihead_matmul_replace_pass"); multihead_matmul_pattern(); int found_multihead_matmul_count = 0; auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, @@ -60,12 +61,19 @@ void ReplaceDenseMultiheadMatmulWithSparsePass::ApplyImpl(Graph *graph) const { return; }*/ - GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_out, multihead_matmul_out, multihead_matmul_pattern); - GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul, multihead_matmul, multihead_matmul_pattern); - GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_input, multihead_matmul_input, multihead_matmul_pattern); - GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_weights, multihead_matmul_weights, multihead_matmul_pattern); - GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_bias, multihead_matmul_bias, multihead_matmul_pattern); - GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_biasqk, multihead_matmul_biasqk, multihead_matmul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_out, multihead_matmul_out, + multihead_matmul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul, multihead_matmul, + multihead_matmul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_input, multihead_matmul_input, + multihead_matmul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_weights, + multihead_matmul_weights, + multihead_matmul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_bias, multihead_matmul_bias, + multihead_matmul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(multihead_matmul_biasqk, multihead_matmul_biasqk, + multihead_matmul_pattern); auto *multihead_matmul_op = multihead_matmul->Op(); auto w_name = multihead_matmul_op->Input("W")[0]; @@ -84,19 +92,23 @@ void ReplaceDenseMultiheadMatmulWithSparsePass::ApplyImpl(Graph *graph) const { desc.SetAttr("alpha", multihead_matmul_op->GetAttr("alpha")); desc.SetAttr("head_number", multihead_matmul_op->GetAttr("head_number")); if (multihead_matmul_op->HasAttr("Input_scale")) { - desc.SetAttr("Input_scale", multihead_matmul_op->GetAttr("Input_scale")); + desc.SetAttr("Input_scale", + multihead_matmul_op->GetAttr("Input_scale")); } if (multihead_matmul_op->HasAttr("fc_out_threshold")) { - desc.SetAttr("fc_out_threshold", multihead_matmul_op->GetAttr("fc_out_threshold")); + desc.SetAttr("fc_out_threshold", + multihead_matmul_op->GetAttr("fc_out_threshold")); } if (multihead_matmul_op->HasAttr("qkv2context_plugin_int8")) { - desc.SetAttr("qkv2context_plugin_int8", multihead_matmul_op->GetAttr("qkv2context_plugin_int8")); + desc.SetAttr("qkv2context_plugin_int8", + multihead_matmul_op->GetAttr("qkv2context_plugin_int8")); } if (multihead_matmul_op->HasAttr("dp_probs")) { desc.SetAttr("dp_probs", multihead_matmul_op->GetAttr("dp_probs")); } if (multihead_matmul_op->HasAttr("out_threshold")) { - desc.SetAttr("out_threshold", multihead_matmul_op->GetAttr("out_threshold")); + desc.SetAttr("out_threshold", + multihead_matmul_op->GetAttr("out_threshold")); } desc.Flush(); GraphSafeRemoveNodes(g, {multihead_matmul}); diff --git a/paddle/fluid/framework/ir/replace_dense_multihead_matmul_with_sparse_pass.h b/paddle/fluid/framework/ir/replace_dense_multihead_matmul_with_sparse_pass.h index 4c43e9ba4efbb..abfbfb6b7b13d 100644 --- a/paddle/fluid/framework/ir/replace_dense_multihead_matmul_with_sparse_pass.h +++ b/paddle/fluid/framework/ir/replace_dense_multihead_matmul_with_sparse_pass.h @@ -37,7 +37,8 @@ class ReplaceDenseMultiheadMatmulWithSparsePass : public FusePassBase { protected: void ApplyImpl(ir::Graph* graph) const override; - const std::string name_scope_{"replace_dense_multihead_matmul_with_sparse_pass"}; + const std::string name_scope_{ + "replace_dense_multihead_matmul_with_sparse_pass"}; }; } // namespace ir diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 9d7ff1ff2048d..18a4a0f5f56ae 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -91,27 +91,27 @@ const std::vector kTRTSubgraphPasses({ "delete_quant_dequant_linear_op_pass", // "add_support_int8_pass", // // "fc_fuse_pass", // - "simplify_with_basic_ops_pass", // - "embedding_eltwise_layernorm_fuse_pass", // - "preln_embedding_eltwise_layernorm_fuse_pass", // - "multihead_matmul_fuse_pass_v2", // - "multihead_matmul_fuse_pass_v3", // - "skip_layernorm_fuse_pass", // - "preln_skip_layernorm_fuse_pass", // - "conv_bn_fuse_pass", // - "unsqueeze2_eltwise_fuse_pass", // - "trt_squeeze2_matmul_fuse_pass", // - "trt_reshape2_matmul_fuse_pass", // - "trt_flatten2_matmul_fuse_pass", // - "trt_map_matmul_v2_to_mul_pass", // - "trt_map_matmul_v2_to_matmul_pass", // - "trt_map_matmul_to_mul_pass", // - "fc_fuse_pass", // - "conv_elementwise_add_fuse_pass", // - "replace_dense_with_sparse_pass", // - "replace_dense_multihead_matmul_with_sparse_pass", // - "tensorrt_subgraph_pass", // - "conv_bn_fuse_pass", // + "simplify_with_basic_ops_pass", // + "embedding_eltwise_layernorm_fuse_pass", // + "preln_embedding_eltwise_layernorm_fuse_pass", // + "multihead_matmul_fuse_pass_v2", // + "multihead_matmul_fuse_pass_v3", // + "skip_layernorm_fuse_pass", // + "preln_skip_layernorm_fuse_pass", // + "conv_bn_fuse_pass", // + "unsqueeze2_eltwise_fuse_pass", // + "trt_squeeze2_matmul_fuse_pass", // + "trt_reshape2_matmul_fuse_pass", // + "trt_flatten2_matmul_fuse_pass", // + "trt_map_matmul_v2_to_mul_pass", // + "trt_map_matmul_v2_to_matmul_pass", // + "trt_map_matmul_to_mul_pass", // + "fc_fuse_pass", // + "conv_elementwise_add_fuse_pass", // + "replace_dense_with_sparse_pass", // + "replace_dense_multihead_matmul_with_sparse_pass", // + "tensorrt_subgraph_pass", // + "conv_bn_fuse_pass", // #if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be // guaranteed at least v7 // cudnn8.0 has memory leak problem in conv + eltwise + act, so we diff --git a/paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc index 6933eecd5a12d..e1bf903c1f5d7 100644 --- a/paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc @@ -35,7 +35,8 @@ class SparseMultiheadMatMulOpConverter : public OpConverter { void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(3) << "convert a fluid sparse_multihead_matmul op to a corresponding tensorrt " + VLOG(3) << "convert a fluid sparse_multihead_matmul op to a corresponding " + "tensorrt " "network structure"; framework::OpDesc op_desc(op, nullptr); // Declare inputs @@ -335,9 +336,9 @@ class SparseMultiheadMatMulOpConverter : public OpConverter { w_data = static_cast(weight_data); } - TensorRTEngine::Weight weight{with_fp16 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT, - static_cast(w_data), - static_cast(weight_t->numel())}; + TensorRTEngine::Weight weight{ + with_fp16 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT, + static_cast(w_data), static_cast(weight_t->numel())}; weight.dims.assign({n, m}); TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, @@ -360,22 +361,23 @@ class SparseMultiheadMatMulOpConverter : public OpConverter { } reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim); reshape_before_fc_layer->setName( - ("shuffle_before_sparse_multihead_mamul(Output: " + output_name + ")") + ("shuffle_before_sparse_multihead_mamul(Output: " + output_name + + ")") .c_str()); // add layer fc nvinfer1::ILayer* fc_layer = nullptr; if (op_desc.HasAttr("Input_scale")) { - plugin::SpmmPluginDynamic* plugin = new_spmm_plugin( - &weight, &bias, nvinfer1::DataType::kINT8, n); + plugin::SpmmPluginDynamic* plugin = + new_spmm_plugin(&weight, &bias, nvinfer1::DataType::kINT8, n); std::vector plugin_inputs; plugin_inputs.emplace_back(reshape_before_fc_layer->getOutput(0)); fc_layer = engine_->network()->addPluginV2( plugin_inputs.data(), plugin_inputs.size(), *plugin); } else { plugin::SpmmPluginDynamic* plugin = new_spmm_plugin( - &weight, &bias, - with_fp16 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT, + &weight, &bias, with_fp16 ? nvinfer1::DataType::kHALF + : nvinfer1::DataType::kFLOAT, n); std::vector plugin_inputs; plugin_inputs.emplace_back(reshape_before_fc_layer->getOutput(0)); @@ -430,4 +432,5 @@ class SparseMultiheadMatMulOpConverter : public OpConverter { } // namespace inference } // namespace paddle -REGISTER_TRT_OP_CONVERTER(sparse_multihead_matmul, SparseMultiheadMatMulOpConverter); +REGISTER_TRT_OP_CONVERTER(sparse_multihead_matmul, + SparseMultiheadMatMulOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index c60c18e1dbef0..5eaa2fd27ba8b 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -49,7 +49,7 @@ struct SimpleOpTypeSetTeller : public Teller { teller_set.insert("sparse_fc"); int8_teller_set.insert("sparse_fc"); teller_set.insert("sparse_multihead_matmul"); - int8_teller_set.insert("sparse_multihead_matmul"); + int8_teller_set.insert("sparse_multihead_matmul"); #endif } @@ -1742,7 +1742,8 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, #if IS_TRT_VERSION_GE(8000) if (op_type == "sparse_fc" || op_type == "sparse_multihead_matmul") { if (!with_dynamic_shape) { - VLOG(3) << "the sparse_fc and sparse_multihead_matmul does not support static shape yet"; + VLOG(3) << "the sparse_fc and sparse_multihead_matmul does not support " + "static shape yet"; return false; } } diff --git a/paddle/fluid/inference/tensorrt/plugin/spmm_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/spmm_plugin.cu index 389a0807a5d3d..bb3ba9b7fe93d 100644 --- a/paddle/fluid/inference/tensorrt/plugin/spmm_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/spmm_plugin.cu @@ -377,7 +377,7 @@ SpmmPluginDynamic::SpmmPluginDynamic(const std::string& layer_name, compressed_size); cudaMemcpy(weight_compressed_dev_, weight_compressed_, compressed_size, cudaMemcpyHostToDevice); - + has_bias_ = (bias != nullptr); if (has_bias_) { // Each plugin has a copy of bias @@ -552,7 +552,7 @@ void SpmmPluginDynamic::configurePlugin( platform::errors::InvalidArgument( "precision_ should be equal to inputs[0].desc.type")); const auto& inDims0 = inputs[0].desc.dims; - if (inDims0.nbDims==5) { + if (inDims0.nbDims == 5) { PADDLE_ENFORCE_EQ(inDims0.nbDims, 5, platform::errors::InvalidArgument( "inDims0.nbDims should be 5")); PADDLE_ENFORCE_EQ(k_, inDims0.d[2], @@ -565,7 +565,7 @@ void SpmmPluginDynamic::configurePlugin( const int BS = inputs->max.d[0]; const int Seq = inputs->max.d[1]; m_max_ = BS * Seq; - } else if (inDims0.nbDims==4) { + } else if (inDims0.nbDims == 4) { PADDLE_ENFORCE_EQ(inDims0.nbDims, 4, platform::errors::InvalidArgument( "inDims0.nbDims should be 4")); PADDLE_ENFORCE_EQ(k_, inDims0.d[1], @@ -576,7 +576,7 @@ void SpmmPluginDynamic::configurePlugin( PADDLE_ENFORCE_EQ(inDims0.d[3], 1, platform::errors::InvalidArgument( "inDims0.d[3] should be 1")); const int BS_Seq = inputs->max.d[0]; - m_max_ = BS_Seq; + m_max_ = BS_Seq; } // The optimal algorighm id is for m = m_max_ // To Do: configurePlugin takes time when m is changed @@ -642,15 +642,15 @@ int SpmmPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, PADDLE_ENFORCE_EQ(is_configured_, true, platform::errors::InvalidArgument( "The plugin is not configured before enqueue")); - if (inputDesc->dims.nbDims==5){ + if (inputDesc->dims.nbDims == 5) { PADDLE_ENFORCE_EQ( k_, inputDesc->dims.d[2], platform::errors::InvalidArgument("k_ == inputDesc->dims.d[2]")); - } else if (inputDesc->dims.nbDims==4) { + } else if (inputDesc->dims.nbDims == 4) { PADDLE_ENFORCE_EQ( k_, inputDesc->dims.d[1], platform::errors::InvalidArgument("k_ == inputDesc->dims.d[1]")); - } + } float alpha = 1.0f; float beta = 0.0f; if (inputDesc->type == nvinfer1::DataType::kFLOAT) {