diff --git a/src/layer/detectionoutput.cpp b/src/layer/detectionoutput.cpp index e0adc699d03..e5143a3e140 100644 --- a/src/layer/detectionoutput.cpp +++ b/src/layer/detectionoutput.cpp @@ -33,6 +33,10 @@ int DetectionOutput::load_param(const ParamDict& pd) nms_top_k = pd.get(2, 300); keep_top_k = pd.get(3, 100); confidence_threshold = pd.get(4, 0.5f); + variances[0] = pd.get(5, 0.1f); + variances[1] = pd.get(6, 0.1f); + variances[2] = pd.get(7, 0.2f); + variances[3] = pd.get(8, 0.2f); return 0; } @@ -161,14 +165,14 @@ int DetectionOutput::forward(const std::vector& bottom_blobs, std::vector& bottom_blobs, std::vector& to int w = bottom_blobs[0].w; int h = bottom_blobs[0].h; + if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes.empty()) + { + // mxnet style _contrib_MultiBoxPrior + fprintf(stderr, "mxnet style _contrib_MultiBoxPrior\n"); + + float step_w = step_width; + float step_h = step_height; + if (step_w == -233) + step_w = 1.f / (float)w; + if (step_h == -233) + step_h = 1.f / (float)h; + + int num_sizes = min_sizes.w; + int num_ratios = aspect_ratios.w; + + int num_prior = num_sizes - 1 + num_ratios; + + Mat& top_blob = top_blobs[0]; + top_blob.create(4 * w * h * num_prior, 4u, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int i = 0; i < h; i++) + { + float* box = (float*)top_blob + i * w * num_prior * 4; + + float center_x = offset * step_w; + float center_y = offset * step_h + i * step_h; + + for (int j = 0; j < w; j++) + { + // ratio = 1, various sizes + for (int k = 0; k < num_sizes; k++) + { + float size = min_sizes[k]; + float cw = size * h / w / 2; + float ch = size / 2; + + box[0] = center_x - cw; + box[1] = center_y - ch; + box[2] = center_x + cw; + box[3] = center_y + ch; + box += 4; + } + + // various ratios, size = min_size = size[0] + float size = min_sizes[0]; + for (int p = 1; p < num_ratios; p++) + { + float ratio = sqrt(aspect_ratios[p]); + float cw = size * h / w * ratio / 2; + float ch = size / ratio / 2; + + box[0] = center_x - cw; + box[1] = center_y - ch; + box[2] = center_x + cw; + box[3] = center_y + ch; + box += 4; + } + + center_x += step_w; + } + } + + if (clip) + { + float* box = top_blob; + for (int i = 0; i < top_blob.w; i++) + { + box[i] = std::min(std::max(box[i], 0.f), 1.f); + } + } + + return 0; + } + int image_w = image_width; int image_h = image_height; if (image_w == -233) @@ -69,26 +146,14 @@ int PriorBox::forward(const std::vector& bottom_blobs, std::vector& to int num_max_size = max_sizes.w; int num_aspect_ratio = aspect_ratios.w; - Mat min_sizes_copy = min_sizes.clone(); - Mat max_sizes_copy = max_sizes.clone(); - - for (int k = 0; k < num_min_size; k++) - { - if (min_sizes_copy[k] < 0.f) - min_sizes_copy[k] = -min_sizes_copy[k] * image_w; - } - for (int k = 0; k < num_max_size; k++) - { - if (max_sizes_copy[k] < 0.f) - max_sizes_copy[k] = -max_sizes_copy[k] * image_w; - } - int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size; if (flip) num_prior += num_min_size * num_aspect_ratio; Mat& top_blob = top_blobs[0]; top_blob.create(4 * w * h * num_prior, 2, 4u, opt.blob_allocator); + if (top_blob.empty()) + return -100; #pragma omp parallel for num_threads(opt.num_threads) for (int i = 0; i < h; i++) @@ -105,7 +170,7 @@ int PriorBox::forward(const std::vector& bottom_blobs, std::vector& to for (int k = 0; k < num_min_size; k++) { - float min_size = min_sizes_copy[k]; + float min_size = min_sizes[k]; // min size box box_w = box_h = min_size; @@ -119,7 +184,7 @@ int PriorBox::forward(const std::vector& bottom_blobs, std::vector& to if (num_max_size > 0) { - float max_size = max_sizes_copy[k]; + float max_size = max_sizes[k]; // max size box box_w = box_h = sqrt(min_size * max_size); @@ -161,8 +226,6 @@ int PriorBox::forward(const std::vector& bottom_blobs, std::vector& to center_x += step_w; } - - center_y += step_h; } if (clip) diff --git a/tools/mxnet/mxnet2ncnn.cpp b/tools/mxnet/mxnet2ncnn.cpp index d2d12ee8929..6043302fd98 100644 --- a/tools/mxnet/mxnet2ncnn.cpp +++ b/tools/mxnet/mxnet2ncnn.cpp @@ -703,6 +703,10 @@ int main(int argc, char** argv) } continue; } + else if (n.op == "_contrib_MultiBoxTarget") + { + n.output_size = 3; + } else if (n.op == "SliceChannel") { n.output_size = n.attr("num_outputs"); @@ -775,6 +779,11 @@ int main(int argc, char** argv) } } +// for (std::map::iterator it = node_reference.begin(); it != node_reference.end(); it++) +// { +// fprintf(stderr, "ref %d %d\n", it->first, it->second); +// } + // op chain fusion int reduced_node_count = 0; for (int i=0; i variances = n.attr("variances"); + if (variances.empty()) + { + fprintf(pp, " 5=0.1"); + fprintf(pp, " 6=0.1"); + fprintf(pp, " 7=0.2"); + fprintf(pp, " 8=0.2"); + } + else + { + fprintf(pp, " 5=%f", variances[0]); + fprintf(pp, " 6=%f", variances[1]); + fprintf(pp, " 7=%f", variances[2]); + fprintf(pp, " 8=%f", variances[3]); + } } else if (n.op == "_contrib_MultiBoxPrior") { + // mxnet-ssd encode size as scale factor, fill min_size std::vector sizes = n.attr("sizes"); - float min_size = sizes[0]; - float max_size = sizes[1]; - - // mxnet-ssd encode size as scale factor - fprintf(pp, " -23300=%d", 1); - fprintf(pp, ",%f", -min_size); - - fprintf(pp, " -23301=%d", 1); - fprintf(pp, ",%f", -max_size); - - // drop 1.0 ratio - std::vector ratios = n.attr("ratios"); - std::vector aspect_ratios; - for (int j=0; j aspect_ratios = n.attr("ratios"); fprintf(pp, " -23302=%d", (int)aspect_ratios.size()); for (int j=0; j<(int)aspect_ratios.size(); j++) { fprintf(pp, ",%f", aspect_ratios[j]); } - float variances[4] = {0.1f, 0.1f, 0.2f, 0.2f}; - fprintf(pp, " 3=%f", variances[0]); - fprintf(pp, " 4=%f", variances[1]); - fprintf(pp, " 5=%f", variances[2]); - fprintf(pp, " 6=%f", variances[3]); - int flip = 0; fprintf(pp, " 7=%d", flip); @@ -1327,8 +1334,8 @@ int main(int argc, char** argv) if (steps.empty() || (steps[0] == -1.f && steps[1] == -1.f)) { // auto step - fprintf(pp, " 11=-233"); - fprintf(pp, " 12=-233"); + fprintf(pp, " 11=-233.0"); + fprintf(pp, " 12=-233.0"); } else {