Skip to content

Commit

Permalink
fix tc trt shape (#32458)
Browse files Browse the repository at this point in the history
* fix tc trt shape

* fix fc dynamic shape

* add fc shape assert

* update
  • Loading branch information
shangzhizhou committed Apr 25, 2021
1 parent 06276f4 commit f272e59
Show file tree
Hide file tree
Showing 4 changed files with 229 additions and 81 deletions.
107 changes: 51 additions & 56 deletions paddle/fluid/inference/tensorrt/convert/fc_op.cc
Expand Up @@ -160,66 +160,61 @@ class FcOpConverter : public OpConverter {
if (engine_->with_dynamic_shape()) {
// not NCHW layout, but NLP layout with added 'x 1 x 1'
auto x_dim = X->getDimensions();
if (x_dim.nbDims == 3 || x_dim.nbDims == 2) {
auto output_name = op_desc.Output("Out").front();
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
reshape_before_fc_dim.nbDims = x_dim.nbDims + 2;
for (int i = 0; i < x_dim.nbDims; i++) {
reshape_before_fc_dim.d[i] = 0;
}
reshape_before_fc_dim.d[x_dim.nbDims] = 1;
reshape_before_fc_dim.d[x_dim.nbDims + 1] = 1;
auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
reshape_before_fc_layer->setName(
("shuffle_before_fc(Output: " + output_name + ")").c_str());
PADDLE_ENFORCE_LE(
x_dim.nbDims - x_num_col_dims, 3,
platform::errors::InvalidArgument(
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_dim.nbDims - x_num_col_dims <= 3, but "
"x_dim.nbDims = %d, x_num_col_dims = %d.",
x_dim.nbDims, x_num_col_dims));
auto output_name = op_desc.Output("Out").front();
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
// padding shape "x 1 x 1"
int padding_length = 3 - (x_dim.nbDims - x_num_col_dims);
reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length;
int cur_dim_index = reshape_before_fc_dim.nbDims - 1;
while (padding_length-- > 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 1;
}
while (cur_dim_index >= 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 0;
}

// add fc layer
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
n_output, weight.get(), bias.get());
fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());
auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
reshape_before_fc_layer->setName(
("shuffle_before_fc(Output: " + output_name + ")").c_str());

// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
if (x_dim.nbDims == 3) {
if (x_num_col_dims == 2) {
reshape_after_fc_dim.nbDims = 3;
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
reshape_after_fc_dim.d[2] = 0;
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
auto dim = fc_layer->getOutput(0)->getDimensions();
reshape_after_fc_dim.d[1] = dim.d[1] * dim.d[2];
}
// x_dim.nbDims == 2
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
}
auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
// add fc layer
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
n_output, weight.get(), bias.get());
fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());

if (activation_type == "relu") {
reshape_after_fc_layer->setName(
("shuffle_after_fc(Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
{output_name}, test_mode);
}
// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
reshape_after_fc_dim.d[i] = 0;
}

auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);

if (activation_type == "relu") {
reshape_after_fc_layer->setName(
("shuffle_after_fc(Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
{output_name}, test_mode);
} else {
regist_fc(X, n_output, weight, bias);
RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
{output_name}, test_mode);
}
return;
}
Expand Down
24 changes: 0 additions & 24 deletions paddle/fluid/inference/tensorrt/op_teller.cc
Expand Up @@ -343,30 +343,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (registry == nullptr) return false;
}

if (op_type == "mul") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}

if (op_type == "fc") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}

if (op_type == "nearest_interp") {
std::vector<std::string> attrs{"data_layout", "interp_method",
"align_corners", "scale",
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/fluid/tests/unittests/CMakeLists.txt
Expand Up @@ -819,7 +819,7 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 120)
set_tests_properties(test_pool2d_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_transpose_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_eager_deletion_gru_net PROPERTIES TIMEOUT 120)
set_tests_properties(test_activation_op PROPERTIES TIMEOUT 180)
set_tests_properties(test_activation_op PROPERTIES TIMEOUT 270)
set_tests_properties(test_normal PROPERTIES TIMEOUT 120)
set_tests_properties(test_lstmp_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120)
Expand Down
Expand Up @@ -55,5 +55,182 @@ def test_check_output(self):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims2Test.DynamicShapeParam(
{
'data': [1, 128]
}, {'data': [64, 128]}, {'data': [32, 128]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims3Cols1Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols1Test.DynamicShapeParam(
{
'data': [1, 128, 32]
}, {'data': [64, 128, 32]}, {'data': [32, 128, 32]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=2,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims3Cols2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols2Test.DynamicShapeParam(
{
'data': [1, 32, 32]
}, {'data': [64, 256, 32]}, {'data': [32, 128, 32]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 12, 4, 6], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {
"data": np.random.random((32, 12, 4, 6)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols1Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols1Test.DynamicShapeParam(
{
'data': [1, 12, 4, 6]
}, {'data': [64, 12, 4, 6]}, {'data': [32, 12, 4, 6]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 128, 32, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=2,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols2Test.DynamicShapeParam(
{
'data': [1, 64, 32, 32]
}, {'data': [64, 256, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 128, 32, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=3,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols3Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols3Test.DynamicShapeParam(
{
'data': [1, 128, 32, 32]
}, {'data': [64, 128, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


if __name__ == "__main__":
unittest.main()

0 comments on commit f272e59

Please sign in to comment.