From 1fa14712ad80db72be398088fc6dfb1ab32bdfd4 Mon Sep 17 00:00:00 2001
From: DefTruth <31974251+DefTruth@users.noreply.github.com>
Date: Tue, 19 Jul 2022 18:42:12 +0800
Subject: [PATCH] add convert processor to vision (#27)

* update .gitignore

* Added checking for cmake include dir

* fixed missing trt_backend option bug when init from trt

* remove un-need data layout and add pre-check for dtype

* changed RGB2BRG to BGR2RGB in ppcls model

* add model_zoo yolov6 c++/python demo

* fixed CMakeLists.txt typos

* update yolov6 cpp/README.md

* add yolox c++/pybind and model_zoo demo

* move some helpers to private

* fixed CMakeLists.txt typos

* add normalize with alpha and beta

* add version notes for yolov5/yolov6/yolox

* add copyright to yolov5.cc

* revert normalize

* fixed some bugs in yolox

* fixed examples/CMakeLists.txt to avoid conflicts

* add convert processor to vision

* format examples/CMakeLists summary
---
 examples/CMakeLists.txt                       | 25 ++++----
 .../vision/common/processors/convert.cc       | 62 +++++++++++++++++++
 fastdeploy/vision/common/processors/convert.h | 42 +++++++++++++
 .../vision/common/processors/transform.h      |  1 +
 fastdeploy/vision/meituan/yolov6.cc           | 28 +++++----
 fastdeploy/vision/ultralytics/yolov5.cc       | 14 +++--
 6 files changed, 143 insertions(+), 29 deletions(-)
 create mode 100644 fastdeploy/vision/common/processors/convert.cc
 create mode 100644 fastdeploy/vision/common/processors/convert.h

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 9f5a780db4..112193c86a 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,25 +1,26 @@
-function(add_fastdeploy_executable field url model)
+function(add_fastdeploy_executable FIELD CC_FILE)
   # temp target name/file var in function scope
-  set(TEMP_TARGET_FILE ${PROJECT_SOURCE_DIR}/examples/${field}/${url}_${model}.cc)
-  set(TEMP_TARGET_NAME ${field}_${url}_${model})
+  set(TEMP_TARGET_FILE ${CC_FILE})
+  string(REGEX MATCHALL "[0-9A-Za-z_]*.cc" FILE_NAME ${CC_FILE})
+  string(REGEX REPLACE ".cc" "" FILE_PREFIX ${FILE_NAME})
+  set(TEMP_TARGET_NAME ${FIELD}_${FILE_PREFIX})
   if (EXISTS ${TEMP_TARGET_FILE} AND TARGET fastdeploy)
     add_executable(${TEMP_TARGET_NAME} ${TEMP_TARGET_FILE})
     target_link_libraries(${TEMP_TARGET_NAME} PUBLIC fastdeploy)
-    message(STATUS "Found source file: [${field}/${url}_${model}.cc], ADD!!! fastdeploy executable: [${TEMP_TARGET_NAME}] !")
-  else ()
-    message(WARNING "Can not found source file: [${field}/${url}_${model}.cc], SKIP!!! fastdeploy executable: [${TEMP_TARGET_NAME}] !")
+    message(STATUS "  Added FastDeploy Executable       : ${TEMP_TARGET_NAME}")
   endif()
   unset(TEMP_TARGET_FILE)
   unset(TEMP_TARGET_NAME)
 endfunction()
 
 # vision examples
-if (WITH_VISION_EXAMPLES)
-  add_fastdeploy_executable(vision ultralytics yolov5)
-  add_fastdeploy_executable(vision ppdet ppyoloe)
-  add_fastdeploy_executable(vision meituan yolov6)
-  add_fastdeploy_executable(vision wongkinyiu yolov7)
-  add_fastdeploy_executable(vision megvii yolox)
+if(WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples/vision)
+  message(STATUS "")
+  message(STATUS "*************FastDeploy Examples Summary**********")
+  file(GLOB ALL_VISION_EXAMPLE_SRCS ${PROJECT_SOURCE_DIR}/examples/vision/*.cc)
+  foreach(_CC_FILE ${ALL_VISION_EXAMPLE_SRCS})
+    add_fastdeploy_executable(vision ${_CC_FILE})
+  endforeach()
 endif()
 
 # other examples ...
diff --git a/fastdeploy/vision/common/processors/convert.cc b/fastdeploy/vision/common/processors/convert.cc
new file mode 100644
index 0000000000..a7ca6de07a
--- /dev/null
+++ b/fastdeploy/vision/common/processors/convert.cc
@@ -0,0 +1,62 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/common/processors/convert.h"
+
+namespace fastdeploy {
+
+namespace vision {
+
+Convert::Convert(const std::vector<float>& alpha,
+                 const std::vector<float>& beta) {
+  FDASSERT(alpha.size() == beta.size(),
+           "Convert: requires the size of alpha equal to the size of beta.");
+  FDASSERT(alpha.size() != 0,
+           "Convert: requires the size of alpha and beta > 0.");
+  alpha_.assign(alpha.begin(), alpha.end());
+  beta_.assign(beta.begin(), beta.end());
+}
+
+bool Convert::CpuRun(Mat* mat) {
+  cv::Mat* im = mat->GetCpuMat();
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  for (int c = 0; c < im->channels(); c++) {
+    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
+  }
+  cv::merge(split_im, *im);
+  return true;
+}
+
+#ifdef ENABLE_OPENCV_CUDA
+bool Convert::GpuRun(Mat* mat) {
+  cv::cuda::GpuMat* im = mat->GetGpuMat();
+  std::vector<cv::cuda::GpuMat> split_im;
+  cv::cuda::split(*im, split_im);
+  for (int c = 0; c < im->channels(); c++) {
+    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
+  }
+  cv::cuda::merge(split_im, *im);
+  return true;
+}
+#endif
+
+bool Convert::Run(Mat* mat, const std::vector<float>& alpha,
+                  const std::vector<float>& beta, ProcLib lib) {
+  auto c = Convert(alpha, beta);
+  return c(mat, lib);
+}
+
+}  // namespace vision
+}  // namespace fastdeploy
\ No newline at end of file
diff --git a/fastdeploy/vision/common/processors/convert.h b/fastdeploy/vision/common/processors/convert.h
new file mode 100644
index 0000000000..5d5a5276f5
--- /dev/null
+++ b/fastdeploy/vision/common/processors/convert.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/vision/common/processors/base.h"
+
+namespace fastdeploy {
+namespace vision {
+class Convert : public Processor {
+ public:
+  Convert(const std::vector<float>& alpha, const std::vector<float>& beta);
+
+  bool CpuRun(Mat* mat);
+#ifdef ENABLE_OPENCV_CUDA
+  bool GpuRun(Mat* mat);
+#endif
+  std::string Name() { return "Convert"; }
+
+  // Compute `result = mat * alpha + beta` directly by channel.
+  // The default behavior is the same as OpenCV's convertTo method.
+  static bool Run(Mat* mat, const std::vector<float>& alpha,
+                  const std::vector<float>& beta,
+                  ProcLib lib = ProcLib::OPENCV_CPU);
+
+ private:
+  std::vector<float> alpha_;
+  std::vector<float> beta_;
+};
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/common/processors/transform.h b/fastdeploy/vision/common/processors/transform.h
index 12eec8d72d..08073b4e42 100644
--- a/fastdeploy/vision/common/processors/transform.h
+++ b/fastdeploy/vision/common/processors/transform.h
@@ -17,6 +17,7 @@
 #include "fastdeploy/vision/common/processors/cast.h"
 #include "fastdeploy/vision/common/processors/center_crop.h"
 #include "fastdeploy/vision/common/processors/color_space_convert.h"
+#include "fastdeploy/vision/common/processors/convert.h"
 #include "fastdeploy/vision/common/processors/hwc2chw.h"
 #include "fastdeploy/vision/common/processors/normalize.h"
 #include "fastdeploy/vision/common/processors/pad.h"
diff --git a/fastdeploy/vision/meituan/yolov6.cc b/fastdeploy/vision/meituan/yolov6.cc
index 8f37bf89c6..8ac7377194 100644
--- a/fastdeploy/vision/meituan/yolov6.cc
+++ b/fastdeploy/vision/meituan/yolov6.cc
@@ -25,14 +25,14 @@ namespace meituan {
 void LetterBox(Mat* mat, std::vector<int> size, std::vector<float> color,
                bool _auto, bool scale_fill = false, bool scale_up = true,
                int stride = 32) {
-  float scale = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()), 
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));       
+  float scale = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
   if (!scale_up) {
     scale = std::min(scale, 1.0f);
   }
 
   int resize_h = int(round(static_cast<float>(mat->Height()) * scale));
-  int resize_w = int(round(static_cast<float>(mat->Width())  * scale));
+  int resize_w = int(round(static_cast<float>(mat->Width()) * scale));
 
   int pad_w = size[0] - resize_w;
   int pad_h = size[1] - resize_h;
@@ -85,13 +85,13 @@ bool YOLOv6::Initialize() {
   is_scale_up = false;
   stride = 32;
   max_wh = 4096.0f;
-  
+
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
     return false;
   }
-  // Check if the input shape is dynamic after Runtime already initialized, 
-  // Note that, We need to force is_mini_pad 'false' to keep static 
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
   // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
   is_dynamic_input_ = false;
   auto shape = InputInfoOfRuntime(0).shape;
@@ -102,7 +102,7 @@ bool YOLOv6::Initialize() {
       break;
     }
   }
-  if (!is_dynamic_input_) {  
+  if (!is_dynamic_input_) {
     is_mini_pad = false;
   }
   return true;
@@ -111,15 +111,15 @@ bool YOLOv6::Initialize() {
 bool YOLOv6::Preprocess(Mat* mat, FDTensor* output,
                         std::map<std::string, std::array<float, 2>>* im_info) {
   // process after image load
-  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()), 
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));                                          
+  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
   if (ratio != 1.0) {
     int interp = cv::INTER_AREA;
     if (ratio > 1.0) {
       interp = cv::INTER_LINEAR;
     }
     int resize_h = int(round(static_cast<float>(mat->Height()) * ratio));
-    int resize_w = int(round(static_cast<float>(mat->Width())  * ratio));
+    int resize_w = int(round(static_cast<float>(mat->Width()) * ratio));
     Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
   }
   // yolov6's preprocess steps
@@ -129,8 +129,12 @@ bool YOLOv6::Preprocess(Mat* mat, FDTensor* output,
   LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up,
             stride);
   BGR2RGB::Run(mat);
-  Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-                 std::vector<float>(mat->Channels(), 1.0));
+  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
+  //                std::vector<float>(mat->Channels(), 1.0));
+  // Compute `result = mat * alpha + beta` directly by channel
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
 
   // Record output shape of preprocessed image
   (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
diff --git a/fastdeploy/vision/ultralytics/yolov5.cc b/fastdeploy/vision/ultralytics/yolov5.cc
index 193cfe9794..0a04eeb534 100644
--- a/fastdeploy/vision/ultralytics/yolov5.cc
+++ b/fastdeploy/vision/ultralytics/yolov5.cc
@@ -87,8 +87,8 @@ bool YOLOv5::Initialize() {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
     return false;
   }
-  // Check if the input shape is dynamic after Runtime already initialized, 
-  // Note that, We need to force is_mini_pad 'false' to keep static 
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
   // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
   is_dynamic_input_ = false;
   auto shape = InputInfoOfRuntime(0).shape;
@@ -99,7 +99,7 @@ bool YOLOv5::Initialize() {
       break;
     }
   }
-  if (!is_dynamic_input_) {  
+  if (!is_dynamic_input_) {
     is_mini_pad = false;
   }
   return true;
@@ -126,8 +126,12 @@ bool YOLOv5::Preprocess(Mat* mat, FDTensor* output,
   LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up,
             stride);
   BGR2RGB::Run(mat);
-  Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-                 std::vector<float>(mat->Channels(), 1.0));
+  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
+  //                std::vector<float>(mat->Channels(), 1.0));
+  // Compute `result = mat * alpha + beta` directly by channel
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
 
   // Record output shape of preprocessed image
   (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),