-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
any engine for inference subgraph acceleration naive design #10028
Comments
Convertconstruct an TensorRT network from a sub-block's desc Converter Class and memberusing OpConverter= std::function<void(const framework::OpDesc&)>;
Class Converter {
std::unordered_map<std::string, OpConverter> op_registry_;
// tensorrt input/output tensor list, whose key is the fluid variable name, and value is the pointer position of tensorrt tensor.
std::map<std::string, nvinfer1::ITensor*>tr_tensors_;
// scope: fluid inference scope
const framework::Scope& scope_;
// network: tensorrt network
nvinfer1::INetworkDefinition* network_;
public:
Converter(const framework::BlockDesc& block,
const framework::Scope& scope,
nvinfer1::INetworkDefinition* network) {
block_ = block;
scope_= scope;
network_ = network;
this->register_op_converters();
}
// register different op converters
void register_op_converters();
// construct an TensorRT network from a sub-block's desc
void ConvertSubBlockToTensorRTNetwork();
// convert inputs of op to tensorrt inputs
void ConvertInput(const framework::OpDesc& first_op);
// convert a fluid Mul op to tensorrt fc layer
void ConvertMul(const framework::OpDesc& mul_op);
...
// convert tensorrt outputs to fluid
void ConvertOutput(const framework::OpDesc& last_op);
} function detailsvoid register_op_converters() {
op_registry_["Mul"]=ConvertMul;
op_registry_["Conv2d"]=ConvertConv2d;
...
}
void ConvertSubBlockToTensorRTNetwork() {
// convert fluid inputs of first op to tensorrt inputs.
convertInput(block_.ops[0]);
for (auto op : block_.AllOps()) {
// convert each fluid op to tensorrt layer
OpConverter op_converter = op_registry_.at(op.type());
op_converter(*this, op);
}
// convert tensorrt outputs of last op to fluid outputs.
convertOutput(block_.ops[block_.OpSize() - 1]);
}
void ConvertInput(const framework::OpDesc& first_op) {
auto var_names = first_op.InputArgumentNames();
for(auto var_name : var_names) {
auto fluid_tensor = scope_.FindVar(var_name)->GetMutable<framework::LoDTensor>();
// do some transformation for input fluid tensor, and get its type and dims.
auto shape_tensor = transformation(fluid_tensor);
nvinfer1::DataType type = shape_tensor.type();
nvinfer1::DimsCHW dim = shape_tensor.dims();
// add input into tensorrt network
nvinfer1::ITensor* input_tensor = network_->addInput(
var_name, type, dim);
// insert input tensor into tensorrt's tensor list
tr_tensors_[var_name] = input_tensor
}
}
void ConvertMul(const framework::OpDesc& op) {
// get input tensor from tensorrt's tensor list
std::string x_var_name = op->Input("X");
auto x_tensor = tr_tensors_[x_var_name];
// get weight from fluid inference scope
std::string y_var_name = op->Output("Y");
auto y_tensor = scope_.FindVar(y_var_name)->GetMutable<framework::LoDTensor>();
// do some weight transformation
auto y_shape_tensor = transformation(y_tensor);
// add layer into tensorrt network
nvinfer1::IFullyConnectedLayer* layer = network_->addFullyConnected(
x_tensor, 1, y_shape_tensor, 0 /*bias*/);
// get output tensor, and insert into tensorrt's tensor list
std::string out_var_name = op->Output("Out");
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
tr_tensors_[out_var_name] = output_tensor;
}
void ConvertOutput(const framework::OpDesc& last_op) {
auto var_names = last_op.OutputArgumentNames();
for(auto var_name : var_names) {
// get output tensor from tensorrt's tensor list, and add it into network
auto tr_tensor = tr_tensors_[var_name];
network->markOutput(*tr_tensor);
// do some transformation for output tensorrt tensor, modify the fluid tensor in inference scope
auto fluid_tensor = scope_.FindVar(var_name)->GetMutable<framework::LoDTensor>();
transformation(tr_tensor, fluid_tensor);
}
} how to call convert function
|
|
motivation为什么采用子图的方式来调用TensorRT,它和直接使用TensorRT之间的性能有多少差异呢? 这个issue中的主要信息如下:
|
TensorRTConverterconvert fluid block desc to tensorrt network class and member
function details
usage
ITensorConverterconvert between fluid tensor and nvinfer:ITensor class and member
detail functions
|
For example:
so in different op, different logic. Might be something like |
您好,此issue在近一个月内暂无更新,我们将于今天内关闭。若在关闭后您仍需跟进提问,可重新开启此问题,我们将在24小时内回复您。因关闭带来的不便我们深表歉意,请您谅解~感谢您对PaddlePaddle的支持! |
你好,我想问下int8模式下tensorrt子图会去做conv+bn融合操作吗 |
architecture
phrases
frontend
Some initital ideas, just add some special
with-block
just for inferencebackend
x engine
x op
convert
construct x network from a subgraph's block desc
The text was updated successfully, but these errors were encountered: