fix(demo): update yolox.cpp in ncnn (#402)

fix(demo): update yolox.cpp in ncnn
Megvii-BaseDetection · Aug 6, 2021 · 902b372 · 902b372
1 parent 0f8513d
commit 902b372
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 24 deletions.
diff --git a/demo/ncnn/cpp/README.md b/demo/ncnn/cpp/README.md
@@ -1,6 +1,7 @@
 # YOLOX-CPP-ncnn
 
-Cpp file compile of YOLOX object detection base on [ncnn](https://github.com/Tencent/ncnn).
+Cpp file compile of YOLOX object detection base on [ncnn](https://github.com/Tencent/ncnn).  
+YOLOX is included in ncnn now, you could also try building from ncnn, it's better.
 
 ## Tutorial
 

diff --git a/demo/ncnn/cpp/yolox.cpp b/demo/ncnn/cpp/yolox.cpp
@@ -27,6 +27,10 @@
 #include <stdio.h>
 #include <vector>
 
+#define YOLOX_NMS_THRESH  0.45 // nms threshold
+#define YOLOX_CONF_THRESH 0.25 // threshold of bounding box prob
+#define YOLOX_TARGET_SIZE 640  // target image size after resize, might use 416 for small model
+
 // YOLOX use the same focus in yolov5
 class YoloV5Focus : public ncnn::Layer
 {
@@ -177,14 +181,19 @@ static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vecto
 
 static void generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)
 {
-    for (auto stride : strides)
+    for (int i = 0; i < (int)strides.size(); i++)
     {
+        int stride = strides[i];
         int num_grid = target_size / stride;
         for (int g1 = 0; g1 < num_grid; g1++)
         {
             for (int g0 = 0; g0 < num_grid; g0++)
             {
-                grid_strides.push_back((GridAndStride){g0, g1, stride});
+                GridAndStride gs;
+                gs.grid0 = g0;
+                gs.grid1 = g1;
+                gs.stride = stride;
+                grid_strides.push_back(gs);
             }
         }
     }
@@ -193,10 +202,7 @@ static void generate_grids_and_stride(const int target_size, std::vector<int>& s
 static void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
 {
     const int num_grid = feat_blob.h;
-    fprintf(stderr, "output height: %d, width: %d, channels: %d, dims:%d\n", feat_blob.h, feat_blob.w, feat_blob.c, feat_blob.dims);
-
     const int num_class = feat_blob.w - 5;
-
     const int num_anchors = grid_strides.size();
 
     const float* feat_ptr = feat_blob.channel(0);
@@ -239,25 +245,22 @@ static void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, co
 
     } // point anchor loop
 }
- 
+
 static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
 {
     ncnn::Net yolox;
 
     yolox.opt.use_vulkan_compute = true;
     // yolox.opt.use_bf16_storage = true;
 
+    // Focus in yolov5
     yolox.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator);
 
-    // original pretrained model from https://github.com/yolox
-    // TODO ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+    // original pretrained model from https://github.com/Megvii-BaseDetection/YOLOX
+    // ncnn model param: https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_ncnn.tar.gz
     yolox.load_param("yolox.param");
     yolox.load_model("yolox.bin");
 
-    const int target_size = 416;
-    const float prob_threshold = 0.3f;
-    const float nms_threshold = 0.65f;
-
     int img_w = bgr.cols;
     int img_h = bgr.rows;
 
@@ -266,21 +269,21 @@ static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
     float scale = 1.f;
     if (w > h)
     {
-        scale = (float)target_size / w;
-        w = target_size;
+        scale = (float)YOLOX_TARGET_SIZE / w;
+        w = YOLOX_TARGET_SIZE;
         h = h * scale;
     }
     else
     {
-        scale = (float)target_size / h;
-        h = target_size;
+        scale = (float)YOLOX_TARGET_SIZE / h;
+        h = YOLOX_TARGET_SIZE;
         w = w * scale;
     }
     ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
 
-    // pad to target_size rectangle
-    int wpad = target_size - w;
-    int hpad = target_size - h;
+    // pad to YOLOX_TARGET_SIZE rectangle
+    int wpad = YOLOX_TARGET_SIZE - w;
+    int hpad = YOLOX_TARGET_SIZE - h;
     ncnn::Mat in_pad;
     // different from yolov5, yolox only pad on bottom and right side,
     // which means users don't need to extra padding info to decode boxes coordinate.
@@ -303,18 +306,19 @@ static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
         ncnn::Mat out;
         ex.extract("output", out);
 
-        std::vector<int> strides = {8, 16, 32}; // might have stride=64
+        static const int stride_arr[] = {8, 16, 32}; // might have stride=64 in YOLOX
+        std::vector<int> strides(stride_arr, stride_arr + sizeof(stride_arr) / sizeof(stride_arr[0]));
         std::vector<GridAndStride> grid_strides;
-        generate_grids_and_stride(target_size, strides, grid_strides);
-        generate_yolox_proposals(grid_strides, out, prob_threshold, proposals);
+        generate_grids_and_stride(YOLOX_TARGET_SIZE, strides, grid_strides);
+        generate_yolox_proposals(grid_strides, out, YOLOX_CONF_THRESH, proposals);
     }
 
     // sort all proposals by score from highest to lowest
     qsort_descent_inplace(proposals);
 
     // apply nms with nms_threshold
     std::vector<int> picked;
-    nms_sorted_bboxes(proposals, picked, nms_threshold);
+    nms_sorted_bboxes(proposals, picked, YOLOX_NMS_THRESH);
 
     int count = picked.size();