New data layer #5709

Open
wants to merge 8 commits into
from
@@ -0,0 +1 @@
+23 56 278 411 0
@@ -0,0 +1 @@
+22 45 153 221 21 32 155 421 301 22
@@ -0,0 +1,54 @@
+#ifndef CAFFE_BBOX_DATA_LAYER_HPP
+#define CAFFE_BBOX_DATA_LAYER_HPP
+
+#include <fstream>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/data_transformer.hpp"
+#include "caffe/internal_thread.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/base_data_layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+/**
+ * @brief Provides data and bounding box details to the Net from image files and txt files
+ */
+typedef struct {
+ int xmin, xmax;
+ int ymin, ymax;
+ int class_idx;
+} single_object;
+
+template <typename Dtype>
+class BboxDataLayer : public BasePrefetchingDataLayer<Dtype> {
+ public:
+ explicit BboxDataLayer(const LayerParameter &param)
+ : BasePrefetchingDataLayer<Dtype>(param) {}
+ virtual ~BboxDataLayer();
+ virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline const char* type() const { return "BboxData"; }
+ virtual inline int ExactNumBottomBlobs() const { return 0; }
+ virtual inline int ExactNumTopBlobs() const { return 2; }
+
+ protected:
+ shared_ptr<Caffe::RNG> prefetch_rng_;
+ virtual void ShuffleImages();
+ virtual void load_batch(Batch<Dtype>* batch);
+
+ vector<std::pair<std::string, std::string> > lines_;
+ int lines_id_;
+ void infer_bbox_shape(const string& filename,
+ std::vector<single_object>* bbox_);
+};
+
+} // namespace caffe
+
+#endif // CAFFE_BBOX_DATA_LAYER_HPP
@@ -0,0 +1,227 @@
+#ifdef USE_OPENCV
+#include <opencv2/core/core.hpp>
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "caffe/data_transformer.hpp"
+#include "caffe/layers/base_data_layer.hpp"
+#include "caffe/layers/bbox_data_layer.hpp"
+#include "caffe/util/benchmark.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/util/rng.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+BboxDataLayer<Dtype>::~BboxDataLayer<Dtype>() {
+ this->StopInternalThread();
+}
+
+template <typename Dtype>
+void BboxDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const int new_height = this->layer_param_.bbox_data_param().new_height();
+ const int new_width = this->layer_param_.bbox_data_param().new_width();
+ const bool is_color = this->layer_param_.bbox_data_param().is_color();
+ string root_folder = this->layer_param_.bbox_data_param().root_folder();
+
+ // Read the file with image file names and label file namespace
+ const string& source = this->layer_param_.bbox_data_param().source();
+ LOG(INFO) << "Opening file " << source;
+ std::ifstream infile(source.c_str());
+ string line;
+ size_t pos;
+ while (std::getline(infile, line)) {
+ pos = line.find_last_of(' ');
+ lines_.push_back(std::make_pair(line.substr(0, pos), line.substr(pos+1)));
+ }
+
+ CHECK(!lines_.empty()) << "File is empty";
+
+ if (this->layer_param_.bbox_data_param().shuffle()) {
+ // randomly shuffle data
+ LOG(INFO) << "Shuffling data";
+ const unsigned int prefetch_rng_seed = caffe_rng_rand();
+ prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
+ ShuffleImages();
+ } else {
+ if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 &&
+ this->layer_param_.bbox_data_param().rand_skip() == 0) {
+ LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU";
+ }
+ }
+ LOG(INFO) << "A total of " << lines_.size() << " images.";
+
+ lines_id_ = 0;
+ // Check if we would need to randomly skip a few data points
+ if (this->layer_param_.bbox_data_param().rand_skip()) {
+ unsigned int skip = caffe_rng_rand() %
+ this->layer_param_.bbox_data_param().rand_skip();
+ LOG(INFO) << "Skipping first " << skip << " data points.";
+ CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
+ lines_id_ = skip;
+ }
+ // Read an image, and use it to initialize the top blob.
+ cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
+ new_height, new_width, is_color);
+ CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
+ // Get the expected blob shape from a cv_image
+ vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
+ this->transformed_data_.Reshape(top_shape);
+ const int batch_size = this->layer_param_.bbox_data_param().batch_size();
+ CHECK_GT(batch_size, 0) << "Positive batch size required";
+ top_shape[0] = batch_size;
+ for (int i = 0; i < this->prefetch_.size(); ++i) {
+ this->prefetch_[i]->data_.Reshape(top_shape);
+ }
+ top[0]->Reshape(top_shape);
+
+ LOG(INFO) << "output data size: " << top[0]->num() << ","
+ << top[0]->channels() << "," << top[0]->height() << ","
+ << top[0]->width();
+ // bbox
+ vector<int> bbox_shape;
+ bbox_shape.push_back(batch_size);
+ bbox_shape.push_back(1);
+ bbox_shape.push_back(1);
+ bbox_shape.push_back(1);
+ vector<single_object> bbox_;
+ infer_bbox_shape(root_folder + lines_[lines_id_].second, &bbox_);
+ bbox_shape[0] = bbox_.size() * 5 + 1;
+ top[1]->Reshape(bbox_shape);
+ for (int i = 0; i < this->prefetch_.size(); ++i) {
+ this->prefetch_[i]->label_.Reshape(bbox_shape);
+ }
+}
+
+template <typename Dtype>
+void BboxDataLayer<Dtype>::infer_bbox_shape(const string& filename,
+ std::vector<single_object>* bbox_) {
+ std::ifstream infile(filename.c_str());
+ std::istream_iterator<int> bbox_begin(infile), bbox_end;
+ std::vector<int> bbox(bbox_begin, bbox_end);
+ bbox_->clear();
+ for (int i = 0; i < bbox.size();) {
+ single_object obj;
+ obj.xmin = bbox[i];
+ obj.ymin = bbox[i+1];
+ obj.xmax = bbox[i+2];
+ obj.ymax = bbox[i+3];
+ obj.class_idx = bbox[i+4];
+
+ i += 5;
+ bbox_->push_back(obj);
+ }
+}
+
+template <typename Dtype>
+void BboxDataLayer<Dtype>::ShuffleImages() {
+ caffe::rng_t* prefetch_rng =
+ static_cast<caffe::rng_t*>(prefetch_rng_->generator());
+ shuffle(lines_.begin(), lines_.end(), prefetch_rng);
+}
+
+template <typename Dtype>
+void BboxDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
+ CPUTimer batch_timer;
+ batch_timer.Start();
+ double read_time = 0;
+ double trans_time = 0;
+ CPUTimer timer;
+ CHECK(batch->data_.count());
+ CHECK(this->transformed_data_.count());
+ BboxDataParameter bbox_data_param = this->layer_param_.bbox_data_param();
+ const int batch_size = bbox_data_param.batch_size();
+ const int new_height = bbox_data_param.new_height();
+ const int new_width = bbox_data_param.new_width();
+ const bool is_color = bbox_data_param.is_color();
+ string root_folder = bbox_data_param.root_folder();
+
+ // Reshape according to the first image of each batch
+ // on single input batches allows for inputs of varying dimension.
+ cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
+ new_height, new_width, is_color);
+ CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
+ // Use data_transformer to infer the expected blob shape from a cv_img
+ vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
+ this->transformed_data_.Reshape(top_shape);
+ // Reshape batch according to the batch_size.
+ top_shape[0] = batch_size;
+ batch->data_.Reshape(top_shape);
+
+ Dtype* prefetch_data = batch->data_.mutable_cpu_data();
+ std::vector<std::vector<single_object> > batch_bboxs;
+
+ // datum scales
+ const int lines_size = lines_.size();
+ for (int item_id = 0; item_id < batch_size; ++item_id) {
+ // get a blob
+ timer.Start();
+ CHECK_GT(lines_size, lines_id_);
+ cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
+ new_height, new_width, is_color);
+ CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
+ read_time += timer.MicroSeconds();
+ timer.Start();
+ // Apply transformations (mirror, crop...) to the image
+ int offset = batch->data_.offset(item_id);
+ this->transformed_data_.set_cpu_data(prefetch_data + offset);
+ this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
+ trans_time += timer.MicroSeconds();
+
+ // get the label data
+ vector<single_object> bbox_;
+ infer_bbox_shape(root_folder + lines_[lines_id_].second, &bbox_);
+ batch_bboxs.push_back(bbox_);
+
+ // go to the next iter
+ lines_id_++;
+ if (lines_id_ >= lines_size) {
+ // we have reached the end, Restart from the start
+ DLOG(INFO) << "Restarting data prefetching from start.";
+ lines_id_ = 0;
+ if (this->layer_param_.bbox_data_param().shuffle()) {
+ ShuffleImages();
+ }
+ }
+ }
+
+ // Reshape the label blob to accomodate all the labels
+ int total_batch_objs = 0;
+ for (int i = 0; i < batch_bboxs.size(); ++i) {
+ total_batch_objs += (batch_bboxs[i].size() * 5);
+ }
+ vector<int> label_shape_;
+ label_shape_.push_back(total_batch_objs + batch_bboxs.size());
+ label_shape_.push_back(1);
+ label_shape_.push_back(1);
+ label_shape_.push_back(1);
+ batch->label_.Reshape(label_shape_);
+ Dtype* prefetch_label = batch->label_.mutable_cpu_data();
+ for (int i = 0, idx = 0; i < batch_bboxs.size(); ++i, ++idx) {
+ prefetch_label[idx] = batch_bboxs[i].size();
+ for (int j = 0; j < batch_bboxs[i].size(); ++j) {
+ prefetch_label[++idx] = batch_bboxs[i][j].xmin;
+ prefetch_label[++idx] = batch_bboxs[i][j].ymin;
+ prefetch_label[++idx] = batch_bboxs[i][j].xmax;
+ prefetch_label[++idx] = batch_bboxs[i][j].ymax;
+ prefetch_label[++idx] = batch_bboxs[i][j].class_idx;
+ }
+ }
+
+ batch_timer.Stop();
+ DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
+ DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
+ DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
+}
+
+INSTANTIATE_CLASS(BboxDataLayer);
+REGISTER_LAYER_CLASS(BboxData);
+
+} // namespace caffe
+#endif // USE_OPENCV
@@ -308,7 +308,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
-// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
+// LayerParameter next available layer-specific ID: 148 (last added: bbox_data_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
@@ -363,6 +363,7 @@ message LayerParameter {
optional AccuracyParameter accuracy_param = 102;
optional ArgMaxParameter argmax_param = 103;
optional BatchNormParameter batch_norm_param = 139;
+ optional BboxDataParameter bbox_data_param = 147;
optional BiasParameter bias_param = 141;
optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105;
@@ -812,6 +813,42 @@ message ImageDataParameter {
optional string root_folder = 12 [default = ""];
}
+message BboxDataParameter {
+ // Specify the data source.
+ // Each line containing: <image-filename> <annotations-filename>.txt
+ // `<annotations-file>` should contain a single line with the following data (space separated)
+ // <xmin> <ymin> <xmax> <ymax> <class_idx>
+ // If an image contains more than one object:
+ // <xmin_1> <ymin_1> <xmax_1> <ymax_1> <class_idx_1> <xmin_2> <ymin_2> <xmax_2> <ymax_2> <class_idx_2> ...
+ optional string source = 1;
+ // Specify the batch size.
+ optional uint32 batch_size = 4 [default = 1];
+ // The rand_skip variable is for the data layer to skip a few data points
+ // to avoid all asynchronous sgd clients to start at the same point. The skip
+ // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
+ // be larger than the number of keys in the database.
+ optional uint32 rand_skip = 7 [default = 0];
+ // Whether or not ImageLayer should shuffle the list of files at every epoch.
+ optional bool shuffle = 8 [default = false];
+ // It will also resize images if new_height or new_width are not zero.
+ optional uint32 new_height = 9 [default = 0];
+ optional uint32 new_width = 10 [default = 0];
+ // Specify if the images are color or gray
+ optional bool is_color = 11 [default = true];
+ // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
+ // simple scaling and subtracting the data mean, if provided. Note that the
+ // mean subtraction is always carried out before scaling.
+ optional float scale = 2 [default = 1];
+ optional string mean_file = 3;
+ // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
+ // crop an image.
+ optional uint32 crop_size = 5 [default = 0];
+ // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
+ // data.
+ optional bool mirror = 6 [default = false];
+ optional string root_folder = 12 [default = ""];
+}
+
message InfogainLossParameter {
// Specify the infogain matrix source.
optional string source = 1;
Oops, something went wrong.