PaddlePaddle · shangzhizhou · Jul 29, 2021 · Jul 23, 2021 · Jul 26, 2021 · Jul 26, 2021
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1256,6 +1256,7 @@ USE_TRT_CONVERTER(reshape);
 USE_TRT_CONVERTER(reduce_sum);
 USE_TRT_CONVERTER(gather_nd);
 USE_TRT_CONVERTER(reduce_mean);
+USE_TRT_CONVERTER(tile);
 #endif
 
 namespace paddle_infer {

diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -15,6 +15,7 @@ nv_library(tensorrt_converter
                 reshape_op.cc
                 reduce_op.cc
                 gather_nd_op.cc
+                tile_op.cc
            DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
 
 nv_test(test_op_converter SRCS test_op_converter.cc DEPS

diff --git a/paddle/fluid/inference/tensorrt/convert/tile_op.cc b/paddle/fluid/inference/tensorrt/convert/tile_op.cc
@@ -0,0 +1,78 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * ReshapeOp
+ */
+class TileOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+#if IS_TRT_VERSION_GE(7000)
+    VLOG(4) << "convert a fluid tile op to tensorrt tile layer";
+
+    framework::OpDesc op_desc(op, nullptr);
+    // Declare inputs
+    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
+    nvinfer1::Dims input_shape = input->getDimensions();
+    std::vector<int> repeat_times =
+        BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("repeat_times"));
+
+    nvinfer1::Dims output_dim = input_shape;
+    nvinfer1::Dims output_stride;
+    // If input_dims.nbDims + 1 < repeat_times.size() means we
+    // should expand 1 on batchsize. trt doesn't support this behavior.
+    PADDLE_ENFORCE_GE(input_shape.nbDims + 1, repeat_times.size(),
+                      platform::errors::InvalidArgument(
+                          "Can't change batchsize, please check repeat_times"));
+    int diff = input_shape.nbDims + 1 - repeat_times.size();
+    if (diff > 0) repeat_times.insert(repeat_times.begin(), diff, 1);
+
+    // Can't expand on batchsize
+    PADDLE_ENFORCE_EQ(
+        repeat_times[0], 1,
+        platform::errors::InvalidArgument(
+            "Can't expand on batchsize, please check repeat_times"));
+    output_stride.nbDims = input_shape.nbDims;
+    for (int i = 0; i < input_shape.nbDims; i++) {
+      output_dim.d[i] = output_dim.d[i] * repeat_times[i + 1];
+      output_stride.d[i] = 1;
+    }
+
+    auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, input_shape,
+                                       output_dim, output_stride);
+    layer->setMode(nvinfer1::SliceMode::kWRAP);
+    auto output_name = op_desc.Output("Out")[0];
+    RreplenishLayerAndOutput(layer, "tile", {output_name}, test_mode);
+#endif
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(tile, TileOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -51,6 +51,9 @@ struct SimpleOpTypeSetTeller : public Teller {
 #if IS_TRT_VERSION_GE(7130)
     teller_set.insert("group_norm");
 #endif
+#if IS_TRT_VERSION_GE(7000)
+    teller_set.insert("tile");
+#endif
 #if CUDA_VERSION >= 10020
     teller_set.insert("reshape");
     teller_set.insert("reshape2");
@@ -716,19 +719,36 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
         VLOG(3) << "the " << op_type
                 << " does not have attr (keep_dim or dim or "
                    "reduce_all)";
+        std::cout << "attr " << desc.HasAttr("keep_dim") << " "
+                  << desc.HasAttr("dim") << " " << desc.HasAttr("reduce_all");
         return false;
       }
 
       // The batch size dimension cannot be reduced if it's not dynamic shape.
       if (!with_dynamic_shape) {
-        if (desc.HasAttr("reduce_all")) return false;
+        if (BOOST_GET_CONST(bool, desc.GetAttr("reduce_all"))) return false;
         std::vector<int32_t> dim =
             BOOST_GET_CONST(std::vector<int32_t>, desc.GetAttr("dim"));
         for (auto x : dim) {
           if (!x) return false;
         }
       }
     }
+#if IS_TRT_VERSION_GE(7000)
+    if (op_type == "tile") {
+      // Paddle-TRT does not support the input tensors.
+      auto inputs = desc.InputArgumentNames();
+      for (auto& input : inputs) {
+        if (input == "repeat_times_tensor" &&
+            desc.Input("repeat_times_tensor").size() > 0)
+          return false;
+        if (input == "RepeatTimes" && desc.Input("RepeatTimes").size() > 0)
+          return false;
+      }
+      if (with_dynamic_shape) return false;
+      if (!with_dynamic_shape && !desc.HasAttr("repeat_times")) return false;
+    }
+#endif
 
     if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
   }

diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
@@ -37,4 +37,5 @@ set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120)
 set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120)
 set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45)
 set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60)
+set_tests_properties(test_trt_tile_op PROPERTIES TIMEOUT 60)
 endif()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py
@@ -0,0 +1,121 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import PassVersionChecker
+from paddle.fluid.core import AnalysisConfig
+
+
+class TRTTileTest(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(
+                name="data", shape=[4, 3, 224, 256], dtype="float32")
+            tile_out = paddle.tile(x=data, repeat_times=[1, 1, 1, 1])
+            out = fluid.layers.batch_norm(tile_out, is_test=True)
+
+        self.feeds = {
+            "data": np.random.random([4, 3, 224, 256]).astype("float32"),
+        }
+        self.enable_trt = True
+        self.trt_parameters = TRTTileTest.TensorRTParam(
+            1 << 30, 16, 1, AnalysisConfig.Precision.Float32, False, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(use_gpu, flatten=True)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+class TRTTileExpandTest(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32")
+            tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920])
+            out = fluid.layers.batch_norm(tile_out, is_test=True)
+
+        self.feeds = {
+            "data": np.random.random([1, 1, 1, 1]).astype("float32"),
+        }
+        self.enable_trt = True
+        self.trt_parameters = TRTTileExpandTest.TensorRTParam(
+            1 << 30, 1, 1, AnalysisConfig.Precision.Float32, False, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(use_gpu, flatten=True)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+class TRTTileExpandStaticTest(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32")
+            tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920])
+            out = fluid.layers.batch_norm(tile_out, is_test=True)
+
+        self.feeds = {
+            "data": np.random.random([1, 1, 1, 1]).astype("float32"),
+        }
+        self.enable_trt = True
+        self.trt_parameters = TRTTileExpandStaticTest.TensorRTParam(
+            1 << 30, 1, 1, AnalysisConfig.Precision.Float32, True, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(use_gpu, flatten=True)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+class TRTTileExpandHalfTest(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32")
+            tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920])
+            out = fluid.layers.batch_norm(tile_out, is_test=True)
+
+        self.feeds = {
+            "data": np.random.random([1, 1, 1, 1]).astype("float32"),
+        }
+        self.enable_trt = True
+        self.trt_parameters = TRTTileExpandHalfTest.TensorRTParam(
+            1 << 30, 1, 1, AnalysisConfig.Precision.Half, False, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(use_gpu, flatten=True)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+if __name__ == "__main__":
+    unittest.main()