Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

增加多线程demo #1179

Open
wants to merge 19 commits into
base: develop
Choose a base branch
from
Open
2 changes: 1 addition & 1 deletion deploy/cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

### 模型预测示例
- [单卡加载模型预测示例](./docs/demo/model_infer.md)
- [多卡加载模型预测示例](./docs/demo/multi_gpu_model_infer.md)
- [多线程预测示例](./docs/demo/multi_thread_infer.md)
- [PaddleInference集成TensorRT加载模型预测示例](./docs/demo/tensorrt_infer.md)
- [模型加密预测示例](./docs/demo/decrypt_infer.md)

Expand Down
10 changes: 7 additions & 3 deletions deploy/cpp/demo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,13 @@ add_executable(batch_infer batch_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC}
ADD_DEPENDENCIES(batch_infer ext-yaml-cpp)
target_link_libraries(batch_infer ${DEPS})

add_executable(multi_gpu_model_infer multi_gpu_model_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC} ${ENCRYPTION_SRC})
ADD_DEPENDENCIES(multi_gpu_model_infer ext-yaml-cpp)
target_link_libraries(multi_gpu_model_infer ${DEPS})
add_executable(multi_thread_infer multi_thread_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC} ${ENCRYPTION_SRC})
ADD_DEPENDENCIES(multi_thread_infer ext-yaml-cpp)
target_link_libraries(multi_thread_infer ${DEPS})

add_executable(multi_thread_infer2 multi_thread_infer2.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC} ${ENCRYPTION_SRC})
ADD_DEPENDENCIES(multi_thread_infer2 ext-yaml-cpp)
target_link_libraries(multi_thread_infer2 ${DEPS})

if (WITH_PADDLE_TENSORRT)
add_executable(tensorrt_infer tensorrt_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC} ${ENCRYPTION_SRC})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@
#include <string>
#include <fstream>

#include "model_deploy/common/include/multi_gpu_model.h"
#include "model_deploy/common/include/multi_thread_model.h"

DEFINE_string(model_filename, "", "Path of det inference model");
DEFINE_string(params_filename, "", "Path of det inference params");
DEFINE_string(cfg_file, "", "Path of yaml file");
DEFINE_string(model_type, "", "model type");
DEFINE_string(image_list, "", "Path of test image file");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(gpu_id, "0", "GPU card id, example: 0,2,3");
DEFINE_int32(batch_size, 1, "Batch size of infering");
DEFINE_int32(thread_num, 1, "thread num of preprocessing");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");

int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
Expand All @@ -45,7 +46,7 @@ int main(int argc, char** argv) {

std::cout << "start create model" << std::endl;
// create model
PaddleDeploy::MultiGPUModel model;
PaddleDeploy::MultiThreadModel model;
if (!model.Init(FLAGS_model_type, FLAGS_cfg_file, gpu_ids.size())) {
return -1;
}
Expand All @@ -54,46 +55,42 @@ int main(int argc, char** argv) {
PaddleDeploy::PaddleEngineConfig engine_config;
engine_config.model_filename = FLAGS_model_filename;
engine_config.params_filename = FLAGS_params_filename;
engine_config.use_gpu = true;
engine_config.use_gpu = FLAGS_use_gpu;
engine_config.max_batch_size = FLAGS_batch_size;
// 如果开启gpu,gpu_ids为gpu的序号(可重复)。 比如 0,0,1 表示0卡上创建两个实例, 1卡上创建1个实例
// 如果使用cpu,gpu_ids可用任意int数字, 数字个数代表线程数量。比如 0,0,0 表示创建三个实例
if (!model.PaddleEngineInit(engine_config, gpu_ids)) {
return -1;
}

// Mini-batch
if (FLAGS_image_list == "") {
std::cerr << "image_list should be defined" << std::endl;
return -1;
}
std::vector<std::string> image_paths;
std::ifstream inf(FLAGS_image_list);
if (!inf) {
std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
return -1;
}
std::string image_path;
while (getline(inf, image_path)) {
image_paths.push_back(image_path);
}
// prepare data
std::vector<cv::Mat> imgs;
imgs.push_back(std::move(cv::imread(FLAGS_image)));

std::cout << "start model predict " << image_paths.size() << std::endl;
// infer
std::vector<PaddleDeploy::Result> results;
for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
// Read image
int im_vec_size =
std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
std::vector<cv::Mat> im_vec(im_vec_size - i);
#pragma omp parallel for num_threads(im_vec_size - i)
for (int j = i; j < im_vec_size; ++j) {
im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
std::vector<std::vector<PaddleDeploy::Result>> results(5);
std::vector<std::future<bool>> futures(5);
for(;;) {
for(int i = 0; i < 5; i++) {
futures[i] = model.AddPredictTask(imgs, &results[i]);
}
model.Predict(im_vec, &results, FLAGS_thread_num);
std::cout << i / FLAGS_batch_size << " group" << std::endl;
for (auto j = 0; j < results.size(); ++j) {
std::cout << "Result for sample " << j << std::endl;
std::cout << results[j] << std::endl;
for(int i = 0; i < 5; i++) {
futures[i].get();
std::cout << i << " result:" << results[i][0] << std::endl;
}

/*
std::vector<PaddleDeploy::Result> batch_results;
std::vector<cv::Mat> batch_imgs;
for(int i = 0; i < 5; i++) {
batch_imgs.push_back(std::move(cv::imread(FLAGS_image)));
}
// 如果输入是大batch, 可用此接口自动拆分输入,均匀分配到各线程上运算(注意:会同步等待所有结果完成)
model.Predict(batch_imgs, &batch_results);
for(int i = 0; i < 5; i++) {
std::cout << i << " batch_result:" << batch_results[i] << std::endl;
}
*/
}

return 0;
}
93 changes: 93 additions & 0 deletions deploy/cpp/demo/multi_thread_infer2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gflags/gflags.h>
#include <omp.h>
#include <memory>
#include <string>
#include <fstream>

#include "model_deploy/common/include/paddle_deploy.h"

DEFINE_string(model_filename, "", "Path of det inference model");
DEFINE_string(params_filename, "", "Path of det inference params");
DEFINE_string(cfg_file, "", "Path of yaml file");
DEFINE_string(model_type, "", "model type");
DEFINE_string(image, "", "Path of test image file");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_int32(gpu_id, 0, "GPU card id");

void infer(PaddleDeploy::Model* model, const std::vector<cv::Mat>& imgs,
std::vector<PaddleDeploy::Result>* results, int thread_num) {
model->Predict(imgs, results, thread_num);
}

int main(int argc, char** argv) {
// Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true);

// create model
PaddleDeploy::Model* model1 = PaddleDeploy::CreateModel(FLAGS_model_type);
PaddleDeploy::Model* model2 = PaddleDeploy::CreateModel(FLAGS_model_type);
PaddleDeploy::Model* model3 = PaddleDeploy::CreateModel(FLAGS_model_type);

// model init
model1->Init(FLAGS_cfg_file);
model2->Init(FLAGS_cfg_file);
model3->Init(FLAGS_cfg_file);

// inference engine init
PaddleDeploy::PaddleEngineConfig engine_config;
engine_config.model_filename = FLAGS_model_filename;
engine_config.params_filename = FLAGS_params_filename;
engine_config.use_gpu = FLAGS_use_gpu;
engine_config.gpu_id = FLAGS_gpu_id;

model1->PaddleEngineInit(engine_config);
model2->PaddleEngineInit(engine_config);
model3->PaddleEngineInit(engine_config);

// 多线程需要用线程池或其它方式复用线程, 否则频繁开关线程可能导致推理引擎内存问题
ThreadPool pool(3);
pool.init();

// prepare data
std::vector<cv::Mat> imgs;
imgs.push_back(std::move(cv::imread(FLAGS_image)));

// predict
std::vector<PaddleDeploy::Result> results1;
std::vector<PaddleDeploy::Result> results2;
std::vector<PaddleDeploy::Result> results3;

auto future1 = pool.submit(infer, model1, ref(imgs), &results1, 1);
future1.get();

auto future2 = pool.submit(infer, model2, ref(imgs), &results2, 1);
future2.get();

auto future3 = pool.submit(infer, model3, ref(imgs), &results3, 1);
future3.get();

// print result
std::cout << "result1:" << results1[0] << std::endl;
std::cout << "result2:" << results2[0] << std::endl;
std::cout << "result3:" << results3[0] << std::endl;

pool.shutdown();
delete model1;
delete model2;
delete model3;
return 0;
}
2 changes: 0 additions & 2 deletions deploy/cpp/docs/compile/openvino/openvino_windows.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ git clone https://github.com/PaddlePaddle/PaddleX.git

### Step 4. 编译
1. 打开Visual Studio 2019 Community,点击`继续但无需代码`

![](../../images/vs2019_step1.png)

2. 点击: `文件`->`打开`->`CMake`
Expand All @@ -99,7 +98,6 @@ git clone https://github.com/PaddlePaddle/PaddleX.git
4. 点击`浏览`,分别设置编译选项指定`gflag`、`OpenCV`、`OpenVINO`的路径(也可以点击右上角的“编辑 JSON”,直接修改json文件,然后保存点 项目->生成缓存)

![](../../images/vs2019_step5.png)

依赖库路径的含义说明如下,注意OpenVINO编译只需要勾选和填写以下参数即可:

| 参数名 | 含义 |
Expand Down
2 changes: 1 addition & 1 deletion deploy/cpp/docs/compile/paddle/linux.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ sh script/build.sh
编译后会在`PaddleX/deploy/cpp/build/demo`目录下生成`model_infer`、`multi_gpu_model_infer`和`batch_infer`等几个可执行二进制文件示例,分别用于在单卡/多卡/多batch上加载模型进行预测,示例使用参考如下文档:

- [单卡加载模型预测示例](../../demo/model_infer.md)
- [多卡加载模型预测示例](../../demo/multi_gpu_model_infer.md)
- [多线程预测示例](../../demo/multi_thread_infer.md)

如果编译时开启TensorRT, 会多成一个`tensorrt_infer`二进制文件示例。示例使用参考如下文档:
- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)
Expand Down
2 changes: 1 addition & 1 deletion deploy/cpp/docs/compile/paddle/windows.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及
编译后会在`PaddleX/deploy/cpp/build/demo`目录下生成`model_infer`和`multi_gpu_model_infer`两个可执行二进制文件示例,分别用于在单卡/多卡上加载模型进行预测,示例使用参考如下文档

- [单卡加载模型预测示例](../../demo/model_infer.md)
- [多卡加载模型预测示例](../../demo/multi_gpu_model_infer.md)
- [多线程预测示例](../../demo/multi_thread_infer.md)

如果编译时开启TensorRT, 会多成一个`tensorrt_infer`二进制文件示例。示例使用参考如下文档:
- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,32 @@
# 多GPU卡模型加载预测示例
# 多线程预测示例

本文档说明`PaddleX/deploy/cpp/demo/multi_gpu_model_infer.cpp`编译后的使用方法,仅供用户参考进行使用,开发者可基于此demo示例进行二次开发,满足集成的需求。
本文档说明`PaddleX/deploy/cpp/demo/multi_thread_infer.cpp`和`multi_thread_infer2.cpp`编译后的使用方法,仅供用户参考进行使用,开发者可基于此demo示例进行二次开发,满足集成的需求。

在多卡上实现机制如下

- 模型初始化,针对每张GPU卡分别加一个独立的模型
- 模型预测时,根据传入的图像vector,将输入均分至每张GPU卡进行多线程并行预测
- 预测结束后,将各GPU卡上预测结果汇总返回
demo适用场景:
- 多GPU卡
- 单GPU卡多个模型(注意:此场景下,如果太多模型同时在一个GPU推理可能会导致每个模型的推理性能下降)
- 多线程CPU推理

**注意:**
- 多线程时不能频繁创建、销毁线程,否则会造成推理引擎的内存问题。可以使用线程池(`multi_thread_infer2.cpp`demo)或其它方式(`multi_thread_infer.cpp`demo)复用线程。
- `multi_thread_infer.cpp`例子中的两个接口选择其中一个使用,不能同时使用


`multi_thread_infer.cpp`说明:
- 根据参数gpu_ids的个数,创建对应数量的实例
- 初始化各个实例(model.Init和model.PaddleEngineInit)
- 推理分 AddPredictTask 和 Predict 两种接口,根据需求选择其中一种即可
- AddPredictTask接口:只需要提交任务(传入输入、输出), 各空闲线程会自动按队列顺序依次获取任务进行推理计算。通过返回的future.get()接口,确定结果计算完毕。
- Predict 接口:传入batch大小图片,根据线程数将输入均摊到各线程进行计算, 最终等待所有线程计算完毕后合并各线程的计算错误。


`multi_thread_infer2.cpp`说明:
- 初始化n个model实例(modelx.Init和modelx.PaddleEngineInit)
- 初始化线程池(ThreadPool pool(n);pool.init();)
- 提交任务(pool.submit), **注意:** 一个实例不能被两个线程同时使用
- futurex.get() 确认结果计算完毕后,处理结果resultx


## 步骤一、编译

Expand All @@ -33,26 +53,15 @@
以步骤二中下载的YOLOv3模型为例,执行如下命令即可进行模型加载和预测

```
build/demo/multi_gpu_model_infer --model_filename=yolov3_mbv1/model/model.pdmodel \
build/demo/multi_thread_infer --model_filename=yolov3_mbv1/model/model.pdmodel \
--params_filename=yolov3_mbv1/model/model.pdiparams \
--cfg_file=yolov3_mbv1/model/infer_cfg.yml \
--image=yolov3_mbv1/file_list.txt \
--image=yolov3_mbv1/images/000000010583.jpg \
--gpu_id=0,1 \
--batch_size=4 \
--use_gpu=1 \
--model_type=det
```

输出结果如下(分别为类别id、标签、置信度、xmin、ymin、w, h)

```
Box(0 person 0.0180757 0 386.488 52.8673 38.5124)
Box(14 bird 0.0226735 7.03722 9.77164 491.656 360.871)
Box(25 umbrella 0.0198202 7.03722 9.77164 491.656 360.871)
Box(26 handbag 0.0108408 0 386.488 52.8673 38.5124)
Box(39 bottle 0.12783 183.808 187.242 8.61859 34.643)
Box(56 chair 0.136626 546.628 283.611 62.4004 138.243)
```

### 参数说明

| 参数 | 说明 |
Expand All @@ -61,9 +70,9 @@ Box(56 chair 0.136626 546.628 283.611 62.4004 138.243)
| params_filename | **[必填]** 模型权重文件路径,如`yolov3_darknet/model.pdiparams` |
| cfg_file | **[必填]** 模型配置文件路径,如`yolov3_darknet/infer_cfg.yml` |
| model_type | **[必填]** 模型来源,det/seg/clas/paddlex,分别表示模型来源于PaddleDetection、PaddleSeg、PaddleClas和PaddleX |
| image_list | 待预测的图片路径列表文件路径,如步骤三中的`yolov3_darknet/file_list.txt` |
| image | 待预测的图片文件路径 |
| gpu_id | 使用GPU预测时的GUI设备ID,默认为0 |
| batch_size | 设定每次预测时的batch大小(最终会均分至各张卡上),默认为1(多卡时可填为,如0,1) |
| use_gpu | 是否使用GPU,1或者0。默认为0,不使用GPU |
| thread_num | 每张GPU卡上模型在图像预处理时的并行线程数,默认为1 |


Expand Down
2 changes: 1 addition & 1 deletion deploy/cpp/docs/models/paddleclas.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ Classify(809 sunscreen 0.939211)
关于demo程序的详细使用方法可分别参考以下文档

- [单卡加载模型预测示例](../demo/model_infer.md)
- [多卡加载模型预测示例](../demo/multi_gpu_model_infer.md)
- [多线程预测示例](../demo/multi_thread_infer.md)
- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)
2 changes: 1 addition & 1 deletion deploy/cpp/docs/models/paddledetection.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,5 @@ Box(39 bottle 0.356306 551.603 288.384 34.9819 112.599)
关于demo程序的详细使用方法可分别参考以下文档

- [单卡加载模型预测示例](../demo/model_infer.md)
- [多卡加载模型预测示例](../demo/multi_gpu_model_infer.md)
- [多线程预测示例](../demo/multi_thread_infer.md)
- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)
2 changes: 1 addition & 1 deletion deploy/cpp/docs/models/paddleseg.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,5 @@ ScoreMask(mean: 12.4814 std: 10.4955) LabelMask(mean: 1.98847 std: 10.3
关于demo程序的详细使用方法可分别参考以下文档

- [单卡加载模型预测示例](../demo/model_infer.md)
- [多卡加载模型预测示例](../demo/multi_gpu_model_infer.md)
- [多线程预测示例](../demo/multi_thread_infer.md)
- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)
2 changes: 1 addition & 1 deletion deploy/cpp/docs/models/paddlex.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,6 @@ Classify(809 sunscreen 0.939211)
关于demo程序的详细使用方法可分别参考以下文档

- [单卡加载模型预测示例](../demo/model_infer.md)
- [多卡加载模型预测示例](../demo/multi_gpu_model_infer.md)
- [多线程预测示例](../demo/multi_thread_infer.md)
- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)
- [模型加密预测示例](./docs/demo/decrypt_infer.md)