Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix tc trt shape #32458

Merged
merged 5 commits into from Apr 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
107 changes: 51 additions & 56 deletions paddle/fluid/inference/tensorrt/convert/fc_op.cc
Expand Up @@ -160,66 +160,61 @@ class FcOpConverter : public OpConverter {
if (engine_->with_dynamic_shape()) {
// not NCHW layout, but NLP layout with added 'x 1 x 1'
auto x_dim = X->getDimensions();
if (x_dim.nbDims == 3 || x_dim.nbDims == 2) {
auto output_name = op_desc.Output("Out").front();
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
reshape_before_fc_dim.nbDims = x_dim.nbDims + 2;
for (int i = 0; i < x_dim.nbDims; i++) {
reshape_before_fc_dim.d[i] = 0;
}
reshape_before_fc_dim.d[x_dim.nbDims] = 1;
reshape_before_fc_dim.d[x_dim.nbDims + 1] = 1;
auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
reshape_before_fc_layer->setName(
("shuffle_before_fc(Output: " + output_name + ")").c_str());
PADDLE_ENFORCE_LE(
x_dim.nbDims - x_num_col_dims, 3,
platform::errors::InvalidArgument(
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_dim.nbDims - x_num_col_dims <= 3, but "
"x_dim.nbDims = %d, x_num_col_dims = %d.",
x_dim.nbDims, x_num_col_dims));
auto output_name = op_desc.Output("Out").front();
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
// padding shape "x 1 x 1"
int padding_length = 3 - (x_dim.nbDims - x_num_col_dims);
reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length;
int cur_dim_index = reshape_before_fc_dim.nbDims - 1;
while (padding_length-- > 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 1;
}
while (cur_dim_index >= 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 0;
}

// add fc layer
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
n_output, weight.get(), bias.get());
fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());
auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
reshape_before_fc_layer->setName(
("shuffle_before_fc(Output: " + output_name + ")").c_str());

// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
if (x_dim.nbDims == 3) {
if (x_num_col_dims == 2) {
reshape_after_fc_dim.nbDims = 3;
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
reshape_after_fc_dim.d[2] = 0;
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
auto dim = fc_layer->getOutput(0)->getDimensions();
reshape_after_fc_dim.d[1] = dim.d[1] * dim.d[2];
}
// x_dim.nbDims == 2
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
}
auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
// add fc layer
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
n_output, weight.get(), bias.get());
fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());

if (activation_type == "relu") {
reshape_after_fc_layer->setName(
("shuffle_after_fc(Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
{output_name}, test_mode);
}
// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
reshape_after_fc_dim.d[i] = 0;
}

auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);

if (activation_type == "relu") {
reshape_after_fc_layer->setName(
("shuffle_after_fc(Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
{output_name}, test_mode);
} else {
regist_fc(X, n_output, weight, bias);
RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
{output_name}, test_mode);
}
return;
}
Expand Down
24 changes: 0 additions & 24 deletions paddle/fluid/inference/tensorrt/op_teller.cc
Expand Up @@ -343,30 +343,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (registry == nullptr) return false;
}

if (op_type == "mul") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}

if (op_type == "fc") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}

if (op_type == "nearest_interp") {
std::vector<std::string> attrs{"data_layout", "interp_method",
"align_corners", "scale",
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/fluid/tests/unittests/CMakeLists.txt
Expand Up @@ -818,7 +818,7 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 120)
set_tests_properties(test_pool2d_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_transpose_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_eager_deletion_gru_net PROPERTIES TIMEOUT 120)
set_tests_properties(test_activation_op PROPERTIES TIMEOUT 180)
set_tests_properties(test_activation_op PROPERTIES TIMEOUT 270)
set_tests_properties(test_normal PROPERTIES TIMEOUT 120)
set_tests_properties(test_lstmp_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120)
Expand Down
Expand Up @@ -55,5 +55,182 @@ def test_check_output(self):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims2Test.DynamicShapeParam(
{
'data': [1, 128]
}, {'data': [64, 128]}, {'data': [32, 128]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims3Cols1Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols1Test.DynamicShapeParam(
{
'data': [1, 128, 32]
}, {'data': [64, 128, 32]}, {'data': [32, 128, 32]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=2,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims3Cols2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols2Test.DynamicShapeParam(
{
'data': [1, 32, 32]
}, {'data': [64, 256, 32]}, {'data': [32, 128, 32]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 12, 4, 6], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {
"data": np.random.random((32, 12, 4, 6)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols1Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols1Test.DynamicShapeParam(
{
'data': [1, 12, 4, 6]
}, {'data': [64, 12, 4, 6]}, {'data': [32, 12, 4, 6]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 128, 32, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=2,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols2Test.DynamicShapeParam(
{
'data': [1, 64, 32, 32]
}, {'data': [64, 256, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 128, 32, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=3,
act="relu")
out = fluid.layers.softmax(input=fc_out1)

self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols3Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols3Test.DynamicShapeParam(
{
'data': [1, 128, 32, 32]
}, {'data': [64, 128, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
self.fetch_list = [out]

def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])


if __name__ == "__main__":
unittest.main()