Large diffs are not rendered by default.

@@ -0,0 +1,119 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

/*M///////////////////////////////////////////////////////////////////////////////////////
//MIT License
//
//Copyright (c) 2017 Joseph Redmon
//
//Permission is hereby granted, free of charge, to any person obtaining a copy
//of this software and associated documentation files (the "Software"), to deal
//in the Software without restriction, including without limitation the rights
//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//copies of the Software, and to permit persons to whom the Software is
//furnished to do so, subject to the following conditions:
//
//The above copyright notice and this permission notice shall be included in all
//copies or substantial portions of the Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
//SOFTWARE.
//
//M*/

#ifndef __OPENCV_DNN_DARKNET_IO_HPP__
#define __OPENCV_DNN_DARKNET_IO_HPP__

#include <opencv2/dnn/dnn.hpp>

namespace darknet {

class LayerParameter {
std::string layer_name, layer_type;
std::vector<std::string> bottom_indexes;
cv::dnn::experimental_dnn_v1::LayerParams layerParams;
public:
friend class setLayersParams;
cv::dnn::experimental_dnn_v1::LayerParams getLayerParams() const { return layerParams; }
std::string name() const { return layer_name; }
std::string type() const { return layer_type; }
int bottom_size() const { return bottom_indexes.size(); }
std::string bottom(const int index) const { return bottom_indexes.at(index); }
int top_size() const { return 1; }
std::string top(const int index) const { return layer_name; }
};

struct layerShape {
int input_channels, input_w, input_h;
};

class NetParameter {
public:
int width, height, channels;
std::vector<LayerParameter> layers;
std::vector<layerShape> out_shape_vec;

std::map<int, std::map<std::string, std::string> > layers_cfg;
std::map<std::string, std::string> net_cfg;

int layer_size() const { return layers.size(); }

int input_size() const { return 1; }
std::string input(const int index) const { return "data"; }
LayerParameter layer(const int index) const { return layers.at(index); }
};
}

namespace cv {
namespace dnn {

// Read parameters from a file into a NetParameter message.
void ReadNetParamsFromCfgFileOrDie(const char *cfgFile, darknet::NetParameter *net);
void ReadNetParamsFromBinaryFileOrDie(const char *darknetModel, darknet::NetParameter *net);

}
}
#endif
@@ -111,6 +111,8 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);
CV_DNN_REGISTER_LAYER_CLASS(Permute, PermuteLayer);
CV_DNN_REGISTER_LAYER_CLASS(PriorBox, PriorBoxLayer);
CV_DNN_REGISTER_LAYER_CLASS(Reorg, ReorgLayer);
CV_DNN_REGISTER_LAYER_CLASS(Region, RegionLayer);
CV_DNN_REGISTER_LAYER_CLASS(DetectionOutput, DetectionOutputLayer);
CV_DNN_REGISTER_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer);
CV_DNN_REGISTER_LAYER_CLASS(Normalize, NormalizeBBoxLayer);
@@ -51,12 +51,14 @@ namespace dnn

class ConcatLayerImpl : public ConcatLayer
{
bool useRoute;
public:
ConcatLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
axis = params.get<int>("axis", 1);
padding = params.get<bool>("padding", false);
useRoute = params.get<bool>("use_route", false);
}

virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -93,7 +95,10 @@ class ConcatLayerImpl : public ConcatLayer
axisSum += curShape[cAxis];
}
outputs[0][cAxis] = axisSum;
return false;

// Darknet ROUTE-layer
if (useRoute) return true;
else return false;
}

virtual bool supportBackend(int backendId)
@@ -0,0 +1,330 @@
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "../precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include <iostream>

namespace cv
{
namespace dnn
{

class RegionLayerImpl : public RegionLayer
{
public:
int coords, classes, anchors, classfix;
float thresh, nmsThreshold;
bool useSoftmaxTree, useSoftmax;

RegionLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
CV_Assert(blobs.size() == 1);

thresh = params.get<float>("thresh", 0.2);
coords = params.get<int>("coords", 4);
classes = params.get<int>("classes", 0);
anchors = params.get<int>("anchors", 5);
classfix = params.get<int>("classfix", 0);
useSoftmaxTree = params.get<bool>("softmax_tree", false);
useSoftmax = params.get<bool>("softmax", false);
nmsThreshold = params.get<float>("nms_threshold", 0.4);

CV_Assert(nmsThreshold >= 0.);
CV_Assert(coords == 4);
CV_Assert(classes >= 1);
CV_Assert(anchors >= 1);
CV_Assert(useSoftmaxTree || useSoftmax);
}

bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const
{
CV_Assert(inputs.size() > 0);
outputs = std::vector<MatShape>(inputs.size(), shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
return true;
}

virtual bool supportBackend(int backendId)
{
return backendId == DNN_BACKEND_DEFAULT;
}

float logistic_activate(float x) { return 1. / (1. + exp(-x)); }

void softmax_activate(const float* input, const int n, const float temp, float* output)
{
int i;
float sum = 0;
float largest = -FLT_MAX;
for (i = 0; i < n; ++i) {
if (input[i] > largest) largest = input[i];
}
for (i = 0; i < n; ++i) {
float e = exp((input[i] - largest) / temp);
sum += e;
output[i] = e;
}
for (i = 0; i < n; ++i) {
output[i] /= sum;
}
}

void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

CV_Assert(inputs.size() >= 1);
int const cell_size = classes + coords + 1;

const float* biasData = blobs[0].ptr<float>();

for (size_t ii = 0; ii < outputs.size(); ii++)
{
Mat &inpBlob = *inputs[ii];
Mat &outBlob = outputs[ii];

int rows = inpBlob.size[1];
int cols = inpBlob.size[2];

const float *srcData = inpBlob.ptr<float>();
float *dstData = outBlob.ptr<float>();

// logistic activation for t0, for each grid cell (X x Y x Anchor-index)
for (int i = 0; i < rows*cols*anchors; ++i) {
int index = cell_size*i;
float x = srcData[index + 4];
dstData[index + 4] = logistic_activate(x); // logistic activation
}

if (useSoftmaxTree) { // Yolo 9000
CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
}
else if (useSoftmax) { // Yolo v2
// softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
for (int i = 0; i < rows*cols*anchors; ++i) {
int index = cell_size*i;
softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
}

for (int x = 0; x < cols; ++x)
for(int y = 0; y < rows; ++y)
for (int a = 0; a < anchors; ++a) {
int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor
int p_index = index * cell_size + 4;
float scale = dstData[p_index];
if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0;
int box_index = index * cell_size;

dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;
dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / cols;
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / rows;

int class_index = index * cell_size + 5;

if (useSoftmaxTree) {
CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
}
else {
for (int j = 0; j < classes; ++j) {
float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
}
}
}

}

if (nmsThreshold > 0) {
do_nms_sort(dstData, rows*cols*anchors, nmsThreshold);
//do_nms(dstData, rows*cols*anchors, nmsThreshold);
}

}
}


struct box {
float x, y, w, h;
float *probs;
};

float overlap(float x1, float w1, float x2, float w2)
{
float l1 = x1 - w1 / 2;
float l2 = x2 - w2 / 2;
float left = l1 > l2 ? l1 : l2;
float r1 = x1 + w1 / 2;
float r2 = x2 + w2 / 2;
float right = r1 < r2 ? r1 : r2;
return right - left;
}

float box_intersection(box a, box b)
{
float w = overlap(a.x, a.w, b.x, b.w);
float h = overlap(a.y, a.h, b.y, b.h);
if (w < 0 || h < 0) return 0;
float area = w*h;
return area;
}

float box_union(box a, box b)
{
float i = box_intersection(a, b);
float u = a.w*a.h + b.w*b.h - i;
return u;
}

float box_iou(box a, box b)
{
return box_intersection(a, b) / box_union(a, b);
}

struct sortable_bbox {
int index;
float *probs;
};

struct nms_comparator {
int k;
nms_comparator(int _k) : k(_k) {}
bool operator ()(sortable_bbox v1, sortable_bbox v2) {
return v2.probs[k] < v1.probs[k];
}
};

void do_nms_sort(float *detections, int total, float nms_thresh)
{
std::vector<box> boxes(total);
for (int i = 0; i < total; ++i) {
box &b = boxes[i];
int box_index = i * (classes + coords + 1);
b.x = detections[box_index + 0];
b.y = detections[box_index + 1];
b.w = detections[box_index + 2];
b.h = detections[box_index + 3];
int class_index = i * (classes + 5) + 5;
b.probs = (detections + class_index);
}

std::vector<sortable_bbox> s(total);

for (int i = 0; i < total; ++i) {
s[i].index = i;
int class_index = i * (classes + 5) + 5;
s[i].probs = (detections + class_index);
}

for (int k = 0; k < classes; ++k) {
std::stable_sort(s.begin(), s.end(), nms_comparator(k));
for (int i = 0; i < total; ++i) {
if (boxes[s[i].index].probs[k] == 0) continue;
box a = boxes[s[i].index];
for (int j = i + 1; j < total; ++j) {
box b = boxes[s[j].index];
if (box_iou(a, b) > nms_thresh) {
boxes[s[j].index].probs[k] = 0;
}
}
}
}
}

void do_nms(float *detections, int total, float nms_thresh)
{
std::vector<box> boxes(total);
for (int i = 0; i < total; ++i) {
box &b = boxes[i];
int box_index = i * (classes + coords + 1);
b.x = detections[box_index + 0];
b.y = detections[box_index + 1];
b.w = detections[box_index + 2];
b.h = detections[box_index + 3];
int class_index = i * (classes + 5) + 5;
b.probs = (detections + class_index);
}

for (int i = 0; i < total; ++i) {
bool any = false;
for (int k = 0; k < classes; ++k) any = any || (boxes[i].probs[k] > 0);
if (!any) {
continue;
}
for (int j = i + 1; j < total; ++j) {
if (box_iou(boxes[i], boxes[j]) > nms_thresh) {
for (int k = 0; k < classes; ++k) {
if (boxes[i].probs[k] < boxes[j].probs[k]) boxes[i].probs[k] = 0;
else boxes[j].probs[k] = 0;
}
}
}
}
}

virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const
{
(void)outputs; // suppress unused variable warning

int64 flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 60*total(inputs[i]);
}
return flops;
}
};

Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
{
return Ptr<RegionLayer>(new RegionLayerImpl(params));
}

} // namespace dnn
} // namespace cv
@@ -0,0 +1,139 @@
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "../precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include <iostream>

namespace cv
{
namespace dnn
{

class ReorgLayerImpl : public ReorgLayer
{
int reorgStride;
public:

ReorgLayerImpl(const LayerParams& params)
{
setParamsFrom(params);

reorgStride = params.get<int>("reorg_stride", 2);
CV_Assert(reorgStride > 0);
}

bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const
{
CV_Assert(inputs.size() > 0);
outputs = std::vector<MatShape>(inputs.size(), shape(
inputs[0][0],
inputs[0][1] * reorgStride * reorgStride,
inputs[0][2] / reorgStride,
inputs[0][3] / reorgStride));

CV_Assert(outputs[0][0] > 0 && outputs[0][1] > 0 && outputs[0][2] > 0 && outputs[0][3] > 0);

return true;
}

virtual bool supportBackend(int backendId)
{
return backendId == DNN_BACKEND_DEFAULT;
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

for (size_t i = 0; i < inputs.size(); i++)
{
Mat srcBlob = *inputs[i];
MatShape inputShape = shape(srcBlob), outShape = shape(outputs[i]);
float *dstData = outputs[0].ptr<float>();
const float *srcData = srcBlob.ptr<float>();

int channels = inputShape[1], height = inputShape[2], width = inputShape[3];

int out_c = channels / (reorgStride*reorgStride);

for (int k = 0; k < channels; ++k) {
for (int j = 0; j < height; ++j) {
for (int i = 0; i < width; ++i) {
int in_index = i + width*(j + height*k);
int c2 = k % out_c;
int offset = k / out_c;
int w2 = i*reorgStride + offset % reorgStride;
int h2 = j*reorgStride + offset / reorgStride;
int out_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2);
dstData[in_index] = srcData[out_index];
}
}
}
}
}

virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const
{
(void)outputs; // suppress unused variable warning

int64 flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 21*total(inputs[i]);
}
return flops;
}
};

Ptr<ReorgLayer> ReorgLayer::create(const LayerParams& params)
{
return Ptr<ReorgLayer>(new ReorgLayerImpl(params));
}

} // namespace dnn
} // namespace cv
@@ -0,0 +1,175 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "test_precomp.hpp"
#include "npy_blob.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <algorithm>

namespace cvtest
{

using namespace cv;
using namespace cv::dnn;

template<typename TString>
static std::string _tf(TString filename)
{
return (getOpenCVExtraDir() + "/dnn/") + filename;
}

TEST(Test_Darknet, read_tiny_yolo_voc)
{
Net net = readNetFromDarknet(_tf("tiny-yolo-voc.cfg"));
ASSERT_FALSE(net.empty());
}

TEST(Test_Darknet, read_yolo_voc)
{
Net net = readNetFromDarknet(_tf("yolo-voc.cfg"));
ASSERT_FALSE(net.empty());
}

TEST(Reproducibility_TinyYoloVoc, Accuracy)
{
Net net;
{
const string cfg = findDataFile("dnn/tiny-yolo-voc.cfg", false);
const string model = findDataFile("dnn/tiny-yolo-voc.weights", false);
net = readNetFromDarknet(cfg, model);
ASSERT_FALSE(net.empty());
}

Mat sample = imread(_tf("dog416.png"));
ASSERT_TRUE(!sample.empty());

Size inputSize(416, 416);

if (sample.size() != inputSize)
resize(sample, sample, inputSize);

net.setInput(blobFromImage(sample, 1 / 255.F), "data");
Mat out = net.forward("detection_out");

Mat detection;
const float confidenceThreshold = 0.24;

for (int i = 0; i < out.rows; i++) {
float const*const prob_ptr = &out.at<float>(i, 5);
size_t objectClass = std::max_element(prob_ptr, prob_ptr + out.cols - 5) - prob_ptr;
float confidence = out.at<float>(i, (int)objectClass + 5);

if (confidence > confidenceThreshold)
detection.push_back(out.row(i));
}

// got by: ./darknet detector test ./cfg/voc.data ./cfg/tiny-yolo-voc.cfg ./tiny-yolo-voc.weights -thresh 0.24 ./dog416.png
// (dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format)
float ref_array[] = {
0.736762F, 0.239551F, 0.315440F, 0.160779F, 0.761977F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
0.000000F, 0.000000F, 0.761967F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,

0.287486F, 0.653731F, 0.315579F, 0.534527F, 0.782737F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.780595F,
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F
};

Mat ref(2, sizeof(ref_array)/(2*sizeof(float)), CV_32FC1, &ref_array);

normAssert(ref, detection);
}

TEST(Reproducibility_YoloVoc, Accuracy)
{
Net net;
{
const string cfg = findDataFile("dnn/yolo-voc.cfg", false);
const string model = findDataFile("dnn/yolo-voc.weights", false);
net = readNetFromDarknet(cfg, model);
ASSERT_FALSE(net.empty());
}

Mat sample = imread(_tf("dog416.png"));
ASSERT_TRUE(!sample.empty());

Size inputSize(416, 416);

if (sample.size() != inputSize)
resize(sample, sample, inputSize);

net.setInput(blobFromImage(sample, 1 / 255.F), "data");
Mat out = net.forward("detection_out");

Mat detection;
const float confidenceThreshold = 0.24;

for (int i = 0; i < out.rows; i++) {
float const*const prob_ptr = &out.at<float>(i, 5);
size_t objectClass = std::max_element(prob_ptr, prob_ptr + out.cols - 5) - prob_ptr;
float confidence = out.at<float>(i, (int)objectClass + 5);

if (confidence > confidenceThreshold)
detection.push_back(out.row(i));
}

// got by: ./darknet detector test ./cfg/voc.data ./cfg/yolo-voc.cfg ./yolo-voc.weights -thresh 0.24 ./dog416.png
// (dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format)
float ref_array[] = {
0.740161F, 0.214100F, 0.325575F, 0.173418F, 0.750769F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
0.000000F, 0.000000F, 0.750469F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,

0.501618F, 0.504757F, 0.461713F, 0.481310F, 0.783550F, 0.000000F, 0.780879F, 0.000000F, 0.000000F,
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,

0.279968F, 0.638651F, 0.282737F, 0.600284F, 0.901864F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.901615F,
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F
};

Mat ref(3, sizeof(ref_array) / (3 * sizeof(float)), CV_32FC1, &ref_array);

normAssert(ref, detection);
}

}
@@ -420,4 +420,36 @@ TEST_F(Layer_RNN_Test, get_set_test)
EXPECT_EQ(shape(outputs[1]), shape(nT, nS, nH));
}

void testLayerUsingDarknetModels(String basename, bool useDarknetModel = false, bool useCommonInputBlob = true)
{
String cfg = _tf(basename + ".cfg");
String weights = _tf(basename + ".weights");

String inpfile = (useCommonInputBlob) ? _tf("blob.npy") : _tf(basename + ".input.npy");
String outfile = _tf(basename + ".npy");

cv::setNumThreads(cv::getNumberOfCPUs());

Net net = readNetFromDarknet(cfg, (useDarknetModel) ? weights : String());
ASSERT_FALSE(net.empty());

Mat inp = blobFromNPY(inpfile);
Mat ref = blobFromNPY(outfile);

net.setInput(inp, "data");
Mat out = net.forward();

normAssert(ref, out);
}

TEST(Layer_Test_Region, Accuracy)
{
testLayerUsingDarknetModels("region", false, false);
}

TEST(Layer_Test_Reorg, Accuracy)
{
testLayerUsingDarknetModels("reorg", false, false);
}

}
@@ -0,0 +1,115 @@
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace cv::dnn;

#include <fstream>
#include <iostream>
#include <algorithm>
#include <cstdlib>
using namespace std;

const size_t network_width = 416;
const size_t network_height = 416;

const char* about = "This sample uses You only look once (YOLO)-Detector "
"(https://arxiv.org/abs/1612.08242)"
"to detect objects on image\n"; // TODO: link

const char* params
= "{ help | false | print usage }"
"{ cfg | | model configuration }"
"{ model | | model weights }"
"{ image | | image for detection }"
"{ min_confidence | 0.24 | min confidence }";

int main(int argc, char** argv)
{
cv::CommandLineParser parser(argc, argv, params);

if (parser.get<bool>("help"))
{
std::cout << about << std::endl;
parser.printMessage();
return 0;
}

String modelConfiguration = parser.get<string>("cfg");
String modelBinary = parser.get<string>("model");

//! [Initialize network]
dnn::Net net = readNetFromDarknet(modelConfiguration, modelBinary);
//! [Initialize network]

if (net.empty())
{
cerr << "Can't load network by using the following files: " << endl;
cerr << "cfg-file: " << modelConfiguration << endl;
cerr << "weights-file: " << modelBinary << endl;
cerr << "Models can be downloaded here:" << endl;
cerr << "https://pjreddie.com/darknet/yolo/" << endl;
exit(-1);
}

cv::Mat frame = cv::imread(parser.get<string>("image"));

//! [Resizing without keeping aspect ratio]
cv::Mat resized;
cv::resize(frame, resized, cv::Size(network_width, network_height));
//! [Resizing without keeping aspect ratio]

//! [Prepare blob]
Mat inputBlob = blobFromImage(resized, 1 / 255.F); //Convert Mat to batch of images
//! [Prepare blob]

//! [Set input blob]
net.setInput(inputBlob, "data"); //set the network input
//! [Set input blob]

//! [Make forward pass]
cv::Mat detectionMat = net.forward("detection_out"); //compute output
//! [Make forward pass]


float confidenceThreshold = parser.get<float>("min_confidence");
for (int i = 0; i < detectionMat.rows; i++)
{
float const*const prob_ptr = &detectionMat.at<float>(i, 5);
size_t objectClass = std::max_element(prob_ptr, prob_ptr + detectionMat.cols-5) - prob_ptr;

float confidence = detectionMat.at<float>(i, (int)objectClass + 5);

if (confidence > confidenceThreshold)
{
float x = detectionMat.at<float>(i, 0);
float y = detectionMat.at<float>(i, 1);
float width = detectionMat.at<float>(i, 2);
float height = detectionMat.at<float>(i, 3);
float xLeftBottom = (x - width / 2) * frame.cols;
float yLeftBottom = (y - height / 2) * frame.rows;
float xRightTop = (x + width / 2) * frame.cols;
float yRightTop = (y + height / 2) * frame.rows;

std::cout << "Class: " << objectClass << std::endl;
std::cout << "Confidence: " << confidence << std::endl;

std::cout << " " << xLeftBottom
<< " " << yLeftBottom
<< " " << xRightTop
<< " " << yRightTop << std::endl;

Rect object((int)xLeftBottom, (int)yLeftBottom,
(int)(xRightTop - xLeftBottom),
(int)(yRightTop - yLeftBottom));

rectangle(frame, object, Scalar(0, 255, 0));
}
}

imshow("detections", frame);
waitKey();

return 0;
} // main