@@ -0,0 +1,493 @@
name: "yolo"
layer {
name: "input"
type: "Input"
top: "data"
input_param { shape: { dim: 1 dim: 3 dim: 416 dim: 416 } }
}
layer {
name: "conv0"
type: "Convolution"
bottom: "data"
top: "conv0"
convolution_param {
num_output: 32
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv0"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
convolution_param {
num_output: 64
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv2"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
convolution_param {
num_output: 128
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
convolution_param {
num_output: 64
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv6"
type: "Convolution"
bottom: "conv5"
top: "conv6"
convolution_param {
num_output: 128
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool7"
type: "Pooling"
bottom: "conv6"
top: "pool7"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv8"
type: "Convolution"
bottom: "pool7"
top: "conv8"
convolution_param {
num_output: 256
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv9"
type: "Convolution"
bottom: "conv8"
top: "conv9"
convolution_param {
num_output: 128
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv10"
type: "Convolution"
bottom: "conv9"
top: "conv10"
convolution_param {
num_output: 256
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool11"
type: "Pooling"
bottom: "conv10"
top: "pool11"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv12"
type: "Convolution"
bottom: "pool11"
top: "conv12"
convolution_param {
num_output: 512
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv13"
type: "Convolution"
bottom: "conv12"
top: "conv13"
convolution_param {
num_output: 256
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv14"
type: "Convolution"
bottom: "conv13"
top: "conv14"
convolution_param {
num_output: 512
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv15"
type: "Convolution"
bottom: "conv14"
top: "conv15"
convolution_param {
num_output: 256
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv16"
type: "Convolution"
bottom: "conv15"
top: "conv16"
convolution_param {
num_output: 512
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool17"
type: "Pooling"
bottom: "conv16"
top: "pool17"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv18"
type: "Convolution"
bottom: "pool17"
top: "conv18"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv19"
type: "Convolution"
bottom: "conv18"
top: "conv19"
convolution_param {
num_output: 512
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv20"
type: "Convolution"
bottom: "conv19"
top: "conv20"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv21"
type: "Convolution"
bottom: "conv20"
top: "conv21"
convolution_param {
num_output: 512
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv22"
type: "Convolution"
bottom: "conv21"
top: "conv22"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv23"
type: "Convolution"
bottom: "conv22"
top: "conv23"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv24"
type: "Convolution"
bottom: "conv23"
top: "conv24"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "concat25"
type: "Concat"
bottom: "conv16"
top: "concat25"
}
layer {
name: "conv26"
type: "Convolution"
bottom: "concat25"
top: "conv26"
convolution_param {
num_output: 64
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "reorg27"
type: "Reorg"
bottom: "conv26"
top: "reorg27"
reorg_param {
stride: 2
}
}
layer {
name: "concat28"
type: "Concat"
bottom: "reorg27"
bottom: "conv24"
top: "concat28"
}
layer {
name: "conv29"
type: "Convolution"
bottom: "concat28"
top: "conv29"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv30"
type: "Convolution"
bottom: "conv29"
top: "conv30"
convolution_param {
num_output: 125
pad: 0
kernel_size: 1
stride: 1
fuse_type: UNFUSED
}
}
layer {
name: "detection_out"
type: "YoloDetectionOutput"
bottom: "conv30"
bottom: "data"
top: "detection_out"
yolo_detection_output_param {
num_classes: 20
coords: 4
confidence_threshold: 0.2
biases: 1.3221
biases: 1.73145
biases: 3.19275
biases: 4.00944
biases: 5.05587
biases: 8.09892
biases: 9.47112
biases: 4.84053
biases: 11.2364
biases: 10.0071
}
}
@@ -0,0 +1,533 @@
name: "yolo"
layer {
name: "data"
type: "VideoData"
top: "data"
transform_param {
scale: 0.0039215686274509803921568627451
resize_param {
prob: 1.0
resize_mode: WARP
height: 416
width: 416
interp_mode: LINEAR
}
}

data_param {
batch_size: 1
}
video_data_param {
video_type: VIDEO
video_file: "examples/videos/ILSVRC2015_train_00755001.mp4"
skip_frames: 0
}
}
layer {
name: "conv0"
type: "Convolution"
bottom: "data"
top: "conv0"
convolution_param {
num_output: 32
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv0"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
convolution_param {
num_output: 64
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv2"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
convolution_param {
num_output: 128
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
convolution_param {
num_output: 64
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv6"
type: "Convolution"
bottom: "conv5"
top: "conv6"
convolution_param {
num_output: 128
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool7"
type: "Pooling"
bottom: "conv6"
top: "pool7"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv8"
type: "Convolution"
bottom: "pool7"
top: "conv8"
convolution_param {
num_output: 256
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv9"
type: "Convolution"
bottom: "conv8"
top: "conv9"
convolution_param {
num_output: 128
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv10"
type: "Convolution"
bottom: "conv9"
top: "conv10"
convolution_param {
num_output: 256
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool11"
type: "Pooling"
bottom: "conv10"
top: "pool11"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv12"
type: "Convolution"
bottom: "pool11"
top: "conv12"
convolution_param {
num_output: 512
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv13"
type: "Convolution"
bottom: "conv12"
top: "conv13"
convolution_param {
num_output: 256
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv14"
type: "Convolution"
bottom: "conv13"
top: "conv14"
convolution_param {
num_output: 512
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv15"
type: "Convolution"
bottom: "conv14"
top: "conv15"
convolution_param {
num_output: 256
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv16"
type: "Convolution"
bottom: "conv15"
top: "conv16"
convolution_param {
num_output: 512
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "pool17"
type: "Pooling"
bottom: "conv16"
top: "pool17"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv18"
type: "Convolution"
bottom: "pool17"
top: "conv18"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv19"
type: "Convolution"
bottom: "conv18"
top: "conv19"
convolution_param {
num_output: 512
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv20"
type: "Convolution"
bottom: "conv19"
top: "conv20"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv21"
type: "Convolution"
bottom: "conv20"
top: "conv21"
convolution_param {
num_output: 512
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv22"
type: "Convolution"
bottom: "conv21"
top: "conv22"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv23"
type: "Convolution"
bottom: "conv22"
top: "conv23"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv24"
type: "Convolution"
bottom: "conv23"
top: "conv24"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "concat25"
type: "Concat"
bottom: "conv16"
top: "concat25"
}
layer {
name: "conv26"
type: "Convolution"
bottom: "concat25"
top: "conv26"
convolution_param {
num_output: 64
bias_term: true
pad: 0
kernel_size: 1
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "reorg27"
type: "Reorg"
bottom: "conv26"
top: "reorg27"
reorg_param {
stride: 2
}
}
layer {
name: "concat28"
type: "Concat"
bottom: "reorg27"
bottom: "conv24"
top: "concat28"
}
layer {
name: "conv29"
type: "Convolution"
bottom: "concat28"
top: "conv29"
convolution_param {
num_output: 1024
bias_term: true
pad: 1
kernel_size: 3
stride: 1
fuse_type: FUSED_CONV_RELU
relu_param {
negative_slope: 0.1
}
}
}
layer {
name: "conv30"
type: "Convolution"
bottom: "conv29"
top: "conv30"
convolution_param {
num_output: 125
pad: 0
kernel_size: 1
stride: 1
fuse_type: UNFUSED
}
}
layer {
name: "detection_out"
type: "YoloDetectionOutput"
bottom: "conv30"
bottom: "data"
top: "detection_out"
transform_param {
scale: 0.0039215686274509803921568627451
resize_param {
prob: 1
resize_mode: WARP
height: 576
width: 1024
interp_mode: LINEAR
}
}
yolo_detection_output_param {
num_classes: 20
coords: 4
confidence_threshold: 0.1
visualize_threshold: 0.3
nms_threshold: 0.3
biases: 1.3221
biases: 1.73145
biases: 3.19275
biases: 4.00944
biases: 5.05587
biases: 8.09892
biases: 9.47112
biases: 4.84053
biases: 11.2364
biases: 10.0071
label_map_file: "data/yolo/labelmap_voc.prototxt"
visualize: true
}
}
layer {
name: "slience"
type: "Silence"
bottom: "detection_out"
include {
phase: TEST
}
}
@@ -866,9 +866,9 @@ bool ConvolutionLayerSpatial<Dtype>::verify_result(
config);
// Currently we can't do verification when conv is fused because the results
// won't match the results of forward_gpu_gemm. Need more work to fix it.
if (IsFused())
// FP16 verification may fail due to the natrue accuracy lost between FP16 and FP32.
if (IsFused() || std::is_same<Dtype, half_float::half>::value)
return true;
return true;
const Dtype *verify_data = verify_blob.cpu_data();
const Dtype *data = top[index]->cpu_data();
Dtype err_factor = 1;
@@ -25,8 +25,8 @@ void YoloDetectionOutputLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bot
num_classes_ = yolo_detection_output_param.num_classes();
num_box_ = yolo_detection_output_param.num_box();
coords_ = yolo_detection_output_param.coords();
confidence_threshold_ = 0.3; //yolo_detection_output_param.confidence_threshold();
nms_threshold_ = 0.05; //yolo_detection_output_param.nms_threshold();
confidence_threshold_ = yolo_detection_output_param.confidence_threshold();
nms_threshold_ = yolo_detection_output_param.nms_threshold();

for (int_tp c = 0; c < yolo_detection_output_param.biases_size(); ++c) {
biases_.push_back(yolo_detection_output_param.biases(c));
@@ -49,7 +49,6 @@ void YoloDetectionOutputLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bot
<< "Failed to convert label to name.";
CHECK(MapLabelToDisplayName(label_map, true, &label_to_display_name_))
<< "Failed to convert label to display name.";

}
}
visualize_ = yolo_detection_output_param.visualize();
@@ -134,7 +133,6 @@ void YoloDetectionOutputLayer<Dtype>::Forward_cpu(
Dtype* top_data;

if (num_kept == 0) {
LOG(INFO) << "Couldn't find any detections";
top_shape[2] = swap.num();
top[0]->Reshape(top_shape);
top_data = top[0]->mutable_cpu_data();
@@ -162,7 +160,7 @@ void YoloDetectionOutputLayer<Dtype>::Forward_cpu(
this->data_transformer_->TransformInv(bottom[1], &cv_imgs);
vector<cv::Scalar> colors = GetColors(label_to_display_name_.size());
VisualizeBBox(cv_imgs, top[0], visualize_threshold_, colors,
label_to_display_name_, "", true);
label_to_display_name_, save_file_, true);
#endif
}
}