diff --git a/include/caffe/util/insert_splits.hpp b/include/caffe/util/insert_splits.hpp new file mode 100644 index 00000000000..d0df85650c9 --- /dev/null +++ b/include/caffe/util/insert_splits.hpp @@ -0,0 +1,29 @@ +// Copyright 2014 Jeff Donahue + +#ifndef _CAFFE_UTIL_INSERT_SPLITS_HPP_ +#define _CAFFE_UTIL_INSERT_SPLITS_HPP_ + +#include "caffe/proto/caffe.pb.h" + +using std::pair; +using std::string; + +namespace caffe { + +// Copy NetParameters with SplitLayers added to replace any shared bottom +// blobs with unique bottom blobs provided by the SplitLayer. +void insert_splits(const NetParameter& param, NetParameter* param_split); + +void configure_split_layer(const string& layer_name, const string& blob_name, + const int blob_idx, const int split_count, + LayerConnection* split_layer_connection); + +string get_split_layer_name(const string& layer_name, const string& blob_name, + const int blob_idx); + +string get_split_blob_name(const string& layer_name, const string& blob_name, + const int blob_idx, const int split_idx); + +} // namespace caffe + +#endif // CAFFE_UTIL_INSERT_SPLITS_HPP_ diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 47909a21d78..1861535dc52 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -125,6 +125,27 @@ class DropoutLayer : public NeuronLayer { }; +template +class SplitLayer : public Layer { + public: + explicit SplitLayer(const LayerParameter& param) + : Layer(param) {} + virtual void SetUp(const vector*>& bottom, + vector*>* top); + + protected: + virtual void Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual void Forward_gpu(const vector*>& bottom, + vector*>* top); + virtual Dtype Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual Dtype Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + int count_; +}; + + template class FlattenLayer : public Layer { public: diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index cb65e8f77e3..ff69c91e498 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -55,6 +55,8 @@ Layer* GetLayer(const LayerParameter& param) { return new SoftmaxLayer(param); } else if (type == "softmax_loss") { return new SoftmaxWithLossLayer(param); + } else if (type == "split") { + return new SplitLayer(param); } else if (type == "multinomial_logistic_loss") { return new MultinomialLogisticLossLayer(param); } else { diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp new file mode 100644 index 00000000000..5accdd08e32 --- /dev/null +++ b/src/caffe/layers/split_layer.cpp @@ -0,0 +1,101 @@ +// Copyright 2014 Jeff Donahue + +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void SplitLayer::SetUp(const vector*>& bottom, + vector*>* top) { + CHECK_EQ(bottom.size(), 1) << "Split Layer takes a single blob as input."; + CHECK_GE(top->size(), 1) << "Split Layer takes at least one blob as output."; + count_ = bottom[0]->count(); + for (int i = 0; i < top->size(); ++i) { + // Allow the 0th top blob to be 'in-place', but no others. + if (i == 0 && (*top)[i] == bottom[0]) { + continue; + } else { + CHECK_NE((*top)[i], bottom[0]) << "Only 0th top blob may be in place."; + } + (*top)[i]->Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + CHECK_EQ(count_, (*top)[i]->count()); + } +}; + +template +void SplitLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + for (int i = 0; i < top->size(); ++i) { + if (i == 0 && (*top)[i] == bottom[0]) { + continue; + } + Dtype* top_data = (*top)[i]->mutable_cpu_data(); + caffe_copy(count_, bottom_data, top_data); + } +} + +template +void SplitLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + for (int i = 0; i < top->size(); ++i) { + if (i == 0 && (*top)[i] == bottom[0]) { + continue; + } + Dtype* top_data = (*top)[i]->mutable_gpu_data(); + caffe_gpu_copy(count_, bottom_data, top_data); + } +} + +template +Dtype SplitLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + if (propagate_down) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + // Initialize by copying first top blob diff to our diff, unless we're + // doing in-place computation for the first blob, in which case the diff is + // already initialized. + if (top[0] != (*bottom)[0]) { + caffe_copy(count_, top_diff, bottom_diff); + } + // Add remaining top blob diffs. + for (int i = 1; i < top.size(); ++i) { + top_diff = top[i]->cpu_diff(); + caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff); + } + } + return Dtype(0.); +} + + +template +Dtype SplitLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + if (propagate_down) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + // Initialize by copying first top blob diff to our diff, unless we're + // doing in-place computation for the first blob, in which case the diff is + // already initialized. + if (top[0] != (*bottom)[0]) { + caffe_gpu_copy(count_, top_diff, bottom_diff); + } + // Add remaining top blob diffs. + for (int i = 1; i < top.size(); ++i) { + top_diff = top[i]->gpu_diff(); + caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff); + } + } + return Dtype(0.); +} + +INSTANTIATE_CLASS(SplitLayer); + +} // namespace caffe diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index f265cd36c55..e976dfd5fd0 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -9,6 +9,7 @@ #include "caffe/layer.hpp" #include "caffe/net.hpp" #include "caffe/util/io.hpp" +#include "caffe/util/insert_splits.hpp" using std::pair; using std::map; @@ -29,7 +30,10 @@ Net::Net(const string& param_file) { } template -void Net::Init(const NetParameter& param) { +void Net::Init(const NetParameter& in_param) { + // Create a copy of in_param with splits added where necessary. + NetParameter param; + insert_splits(in_param, ¶m); // Basically, build all the layers and set up its connections. name_ = param.name(); map blob_name_to_idx; diff --git a/src/caffe/test/test_split_layer.cpp b/src/caffe/test/test_split_layer.cpp new file mode 100644 index 00000000000..3311c9ac76c --- /dev/null +++ b/src/caffe/test/test_split_layer.cpp @@ -0,0 +1,1128 @@ +// Copyright 2014 Jeff Donahue + +#include +#include +#include + +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/test/test_gradient_check_util.hpp" +#include "caffe/util/insert_splits.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class SplitLayerTest : public ::testing::Test { + protected: + SplitLayerTest() + : blob_bottom_(new Blob(2, 3, 6, 5)), + blob_top_a_(new Blob()), + blob_top_b_(new Blob()) { + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_a_); + blob_top_vec_.push_back(blob_top_b_); + }; + virtual ~SplitLayerTest() { + delete blob_bottom_; + delete blob_top_a_; + delete blob_top_b_; + } + Blob* const blob_bottom_; + Blob* const blob_top_a_; + Blob* const blob_top_b_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(SplitLayerTest, Dtypes); + +TYPED_TEST(SplitLayerTest, TestSetup) { + LayerParameter layer_param; + SplitLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + EXPECT_EQ(this->blob_top_a_->num(), 2); + EXPECT_EQ(this->blob_top_a_->channels(), 3); + EXPECT_EQ(this->blob_top_a_->height(), 6); + EXPECT_EQ(this->blob_top_a_->width(), 5); + EXPECT_EQ(this->blob_top_b_->num(), 2); + EXPECT_EQ(this->blob_top_b_->channels(), 3); + EXPECT_EQ(this->blob_top_b_->height(), 6); + EXPECT_EQ(this->blob_top_b_->width(), 5); +} + +TYPED_TEST(SplitLayerTest, TestCPU) { + LayerParameter layer_param; + SplitLayer layer(layer_param); + Caffe::set_mode(Caffe::CPU); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + TypeParam bottom_value = this->blob_bottom_->cpu_data()[i]; + EXPECT_EQ(bottom_value, this->blob_top_a_->cpu_data()[i]); + EXPECT_EQ(bottom_value, this->blob_top_b_->cpu_data()[i]); + } +} + +TYPED_TEST(SplitLayerTest, TestGPU) { + LayerParameter layer_param; + SplitLayer layer(layer_param); + Caffe::set_mode(Caffe::GPU); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + TypeParam bottom_value = this->blob_bottom_->cpu_data()[i]; + EXPECT_EQ(bottom_value, this->blob_top_a_->cpu_data()[i]); + EXPECT_EQ(bottom_value, this->blob_top_b_->cpu_data()[i]); + } +} + +TYPED_TEST(SplitLayerTest, TestCPUInPlace) { + LayerParameter layer_param; + SplitLayer layer(layer_param); + Caffe::set_mode(Caffe::CPU); + this->blob_top_vec_[0] = this->blob_bottom_vec_[0]; + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + TypeParam bottom_value = this->blob_bottom_->cpu_data()[i]; + EXPECT_EQ(bottom_value, this->blob_top_b_->cpu_data()[i]); + } +} + +TYPED_TEST(SplitLayerTest, TestGPUInPlace) { + LayerParameter layer_param; + SplitLayer layer(layer_param); + Caffe::set_mode(Caffe::GPU); + this->blob_top_vec_[0] = this->blob_bottom_vec_[0]; + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + TypeParam bottom_value = this->blob_bottom_->cpu_data()[i]; + EXPECT_EQ(bottom_value, this->blob_top_b_->cpu_data()[i]); + } +} + +TYPED_TEST(SplitLayerTest, TestCPUGradient) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); + SplitLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(SplitLayerTest, TestGPUGradient) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::GPU); + SplitLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(SplitLayerTest, TestCPUGradientInPlace) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); + SplitLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + this->blob_top_vec_[0] = this->blob_bottom_vec_[0]; + checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(SplitLayerTest, TestGPUGradientInPlace) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::GPU); + SplitLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + this->blob_top_vec_[0] = this->blob_bottom_vec_[0]; + checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + + +template +class SplitLayerInsertionTest : public ::testing::Test { + protected: + SplitLayerInsertionTest() { }; + void RunInsertionTest( + const string& input_param_string, const string& output_param_string) { + // Test that insert_splits called on the proto specified by + // input_param_string results in the proto specified by + // output_param_string. + NetParameter input_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + input_param_string, &input_param)); + NetParameter expected_output_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + output_param_string, &expected_output_param)); + NetParameter actual_output_param; + insert_splits(input_param, &actual_output_param); + EXPECT_EQ(expected_output_param.DebugString(), + actual_output_param.DebugString()); + // Also test idempotence. + NetParameter double_split_insert_param; + insert_splits(actual_output_param, &double_split_insert_param); + EXPECT_EQ(actual_output_param.DebugString(), + double_split_insert_param.DebugString()); + } +}; + +typedef ::testing::Types InsertionDtypes; +TYPED_TEST_CASE(SplitLayerInsertionTest, InsertionDtypes); + +TYPED_TEST(SplitLayerInsertionTest, TestNoInsertion1) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'innerprod' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod' " + "} " + "layers: { " + " layer { " + " name: 'loss' " + " type: 'softmax_with_loss' " + " } " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunInsertionTest(input_proto, input_proto); +} + +TYPED_TEST(SplitLayerInsertionTest, TestNoInsertion2) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'data_split' " + " type: 'split' " + " } " + " bottom: 'data' " + " top: 'data_split_0' " + " top: 'data_split_1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod1' " + " type: 'inner_product' " + " } " + " bottom: 'data_split_0' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2' " + " type: 'inner_product' " + " } " + " bottom: 'data_split_1' " + " top: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'loss' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod1' " + " bottom: 'innerprod2' " + "} "; + this->RunInsertionTest(input_proto, input_proto); +} + +TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) { + const string& input_proto = + "name: 'CaffeNet' " + "layers { " + " layer { " + " name: 'data' " + " type: 'data' " + " source: '/home/jiayq/Data/ILSVRC12/train-leveldb' " + " meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' " + " batchsize: 256 " + " cropsize: 227 " + " mirror: true " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers { " + " layer { " + " name: 'conv1' " + " type: 'conv' " + " num_output: 96 " + " kernelsize: 11 " + " stride: 4 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 0. " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " } " + " bottom: 'data' " + " top: 'conv1' " + "} " + "layers { " + " layer { " + " name: 'relu1' " + " type: 'relu' " + " } " + " bottom: 'conv1' " + " top: 'conv1' " + "} " + "layers { " + " layer { " + " name: 'pool1' " + " type: 'pool' " + " pool: MAX " + " kernelsize: 3 " + " stride: 2 " + " } " + " bottom: 'conv1' " + " top: 'pool1' " + "} " + "layers { " + " layer { " + " name: 'norm1' " + " type: 'lrn' " + " local_size: 5 " + " alpha: 0.0001 " + " beta: 0.75 " + " } " + " bottom: 'pool1' " + " top: 'norm1' " + "} " + "layers { " + " layer { " + " name: 'pad2' " + " type: 'padding' " + " pad: 2 " + " } " + " bottom: 'norm1' " + " top: 'pad2' " + "} " + "layers { " + " layer { " + " name: 'conv2' " + " type: 'conv' " + " num_output: 256 " + " group: 2 " + " kernelsize: 5 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 1. " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " } " + " bottom: 'pad2' " + " top: 'conv2' " + "} " + "layers { " + " layer { " + " name: 'relu2' " + " type: 'relu' " + " } " + " bottom: 'conv2' " + " top: 'conv2' " + "} " + "layers { " + " layer { " + " name: 'pool2' " + " type: 'pool' " + " pool: MAX " + " kernelsize: 3 " + " stride: 2 " + " } " + " bottom: 'conv2' " + " top: 'pool2' " + "} " + "layers { " + " layer { " + " name: 'norm2' " + " type: 'lrn' " + " local_size: 5 " + " alpha: 0.0001 " + " beta: 0.75 " + " } " + " bottom: 'pool2' " + " top: 'norm2' " + "} " + "layers { " + " layer { " + " name: 'pad3' " + " type: 'padding' " + " pad: 1 " + " } " + " bottom: 'norm2' " + " top: 'pad3' " + "} " + "layers { " + " layer { " + " name: 'conv3' " + " type: 'conv' " + " num_output: 384 " + " kernelsize: 3 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 0. " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " } " + " bottom: 'pad3' " + " top: 'conv3' " + "} " + "layers { " + " layer { " + " name: 'relu3' " + " type: 'relu' " + " } " + " bottom: 'conv3' " + " top: 'conv3' " + "} " + "layers { " + " layer { " + " name: 'pad4' " + " type: 'padding' " + " pad: 1 " + " } " + " bottom: 'conv3' " + " top: 'pad4' " + "} " + "layers { " + " layer { " + " name: 'conv4' " + " type: 'conv' " + " num_output: 384 " + " group: 2 " + " kernelsize: 3 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 1. " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " } " + " bottom: 'pad4' " + " top: 'conv4' " + "} " + "layers { " + " layer { " + " name: 'relu4' " + " type: 'relu' " + " } " + " bottom: 'conv4' " + " top: 'conv4' " + "} " + "layers { " + " layer { " + " name: 'pad5' " + " type: 'padding' " + " pad: 1 " + " } " + " bottom: 'conv4' " + " top: 'pad5' " + "} " + "layers { " + " layer { " + " name: 'conv5' " + " type: 'conv' " + " num_output: 256 " + " group: 2 " + " kernelsize: 3 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 1. " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " } " + " bottom: 'pad5' " + " top: 'conv5' " + "} " + "layers { " + " layer { " + " name: 'relu5' " + " type: 'relu' " + " } " + " bottom: 'conv5' " + " top: 'conv5' " + "} " + "layers { " + " layer { " + " name: 'pool5' " + " type: 'pool' " + " kernelsize: 3 " + " pool: MAX " + " stride: 2 " + " } " + " bottom: 'conv5' " + " top: 'pool5' " + "} " + "layers { " + " layer { " + " name: 'fc6' " + " type: 'innerproduct' " + " num_output: 4096 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.005 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 1. " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " } " + " bottom: 'pool5' " + " top: 'fc6' " + "} " + "layers { " + " layer { " + " name: 'relu6' " + " type: 'relu' " + " } " + " bottom: 'fc6' " + " top: 'fc6' " + "} " + "layers { " + " layer { " + " name: 'drop6' " + " type: 'dropout' " + " dropout_ratio: 0.5 " + " } " + " bottom: 'fc6' " + " top: 'fc6' " + "} " + "layers { " + " layer { " + " name: 'fc7' " + " type: 'innerproduct' " + " num_output: 4096 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.005 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 1. " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " } " + " bottom: 'fc6' " + " top: 'fc7' " + "} " + "layers { " + " layer { " + " name: 'relu7' " + " type: 'relu' " + " } " + " bottom: 'fc7' " + " top: 'fc7' " + "} " + "layers { " + " layer { " + " name: 'drop7' " + " type: 'dropout' " + " dropout_ratio: 0.5 " + " } " + " bottom: 'fc7' " + " top: 'fc7' " + "} " + "layers { " + " layer { " + " name: 'fc8' " + " type: 'innerproduct' " + " num_output: 1000 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 0 " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " } " + " bottom: 'fc7' " + " top: 'fc8' " + "} " + "layers { " + " layer { " + " name: 'loss' " + " type: 'softmax_loss' " + " } " + " bottom: 'fc8' " + " bottom: 'label' " + "} "; + this->RunInsertionTest(input_proto, input_proto); +} + +TYPED_TEST(SplitLayerInsertionTest, TestInsertionWithInPlace) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'innerprod' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod' " + "} " + "layers: { " + " layer { " + " name: 'relu' " + " type: 'relu' " + " } " + " bottom: 'innerprod' " + " top: 'innerprod' " + "} " + "layers: { " + " layer { " + " name: 'loss' " + " type: 'softmax_with_loss' " + " } " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunInsertionTest(input_proto, input_proto); +} + +TYPED_TEST(SplitLayerInsertionTest, TestInsertion) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'innerprod1' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'innerprod3' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod3' " + "} " + "layers: { " + " layer { " + " name: 'loss1' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod1' " + " bottom: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'loss2' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod2' " + " bottom: 'innerprod3' " + "} "; + const string& expected_output_proto = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'data_data_0_split' " + " type: 'split' " + " } " + " bottom: 'data' " + " top: 'data' " + " top: 'data_data_0_split_1' " + " top: 'data_data_0_split_2' " + "} " + "layers: { " + " layer { " + " name: 'innerprod1' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2' " + " type: 'inner_product' " + " } " + " bottom: 'data_data_0_split_1' " + " top: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2_innerprod2_0_split' " + " type: 'split' " + " } " + " bottom: 'innerprod2' " + " top: 'innerprod2' " + " top: 'innerprod2_innerprod2_0_split_1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod3' " + " type: 'inner_product' " + " } " + " bottom: 'data_data_0_split_2' " + " top: 'innerprod3' " + "} " + "layers: { " + " layer { " + " name: 'loss1' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod1' " + " bottom: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'loss2' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod2_innerprod2_0_split_1' " + " bottom: 'innerprod3' " + "} "; + this->RunInsertionTest(input_proto, expected_output_proto); +} + +TYPED_TEST(SplitLayerInsertionTest, TestInsertionTwoTop) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'innerprod1' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2' " + " type: 'inner_product' " + " } " + " bottom: 'label' " + " top: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'innerprod3' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod3' " + "} " + "layers: { " + " layer { " + " name: 'innerprod4' " + " type: 'inner_product' " + " } " + " bottom: 'label' " + " top: 'innerprod4' " + "} " + "layers: { " + " layer { " + " name: 'loss1' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod1' " + " bottom: 'innerprod3' " + "} " + "layers: { " + " layer { " + " name: 'loss2' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod2' " + " bottom: 'innerprod4' " + "} "; + const string& expected_output_proto = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'data_data_0_split' " + " type: 'split' " + " } " + " bottom: 'data' " + " top: 'data' " + " top: 'data_data_0_split_1' " + "} " + "layers: { " + " layer { " + " name: 'label_data_1_split' " + " type: 'split' " + " } " + " bottom: 'label' " + " top: 'label' " + " top: 'label_data_1_split_1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod1' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2' " + " type: 'inner_product' " + " } " + " bottom: 'label' " + " top: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'innerprod3' " + " type: 'inner_product' " + " } " + " bottom: 'data_data_0_split_1' " + " top: 'innerprod3' " + "} " + "layers: { " + " layer { " + " name: 'innerprod4' " + " type: 'inner_product' " + " } " + " bottom: 'label_data_1_split_1' " + " top: 'innerprod4' " + "} " + "layers: { " + " layer { " + " name: 'loss1' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod1' " + " bottom: 'innerprod3' " + "} " + "layers: { " + " layer { " + " name: 'loss2' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod2' " + " bottom: 'innerprod4' " + "} "; + this->RunInsertionTest(input_proto, expected_output_proto); +} + +TYPED_TEST(SplitLayerInsertionTest, TestInputInsertion) { + const string& input_proto = + "name: 'TestNetwork' " + "input: 'data' " + "input_dim: 10 " + "input_dim: 3 " + "input_dim: 227 " + "input_dim: 227 " + "layers: { " + " layer { " + " name: 'innerprod1' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'loss' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod1' " + " bottom: 'innerprod2' " + "} "; + const string& expected_output_proto = + "name: 'TestNetwork' " + "input: 'data' " + "input_dim: 10 " + "input_dim: 3 " + "input_dim: 227 " + "input_dim: 227 " + "layers: { " + " layer { " + " name: 'data_input_0_split' " + " type: 'split' " + " } " + " bottom: 'data' " + " top: 'data' " + " top: 'data_input_0_split_1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod1' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2' " + " type: 'inner_product' " + " } " + " bottom: 'data_input_0_split_1' " + " top: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'loss' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod1' " + " bottom: 'innerprod2' " + "} "; + this->RunInsertionTest(input_proto, expected_output_proto); +} + +TYPED_TEST(SplitLayerInsertionTest, TestWithInPlace) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'innerprod1' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'relu1' " + " type: 'relu' " + " } " + " bottom: 'innerprod1' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2' " + " type: 'inner_product' " + " } " + " bottom: 'innerprod1' " + " top: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'loss1' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod1' " + " bottom: 'label' " + "} " + "layers: { " + " layer { " + " name: 'loss2' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod2' " + " bottom: 'data' " + "} "; + const string& expected_output_proto = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'data_data_0_split' " + " type: 'split' " + " } " + " bottom: 'data' " + " top: 'data' " + " top: 'data_data_0_split_1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod1' " + " type: 'inner_product' " + " } " + " bottom: 'data' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'relu1' " + " type: 'relu' " + " } " + " bottom: 'innerprod1' " + " top: 'innerprod1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod1_relu1_0_split' " + " type: 'split' " + " } " + " bottom: 'innerprod1' " + " top: 'innerprod1' " + " top: 'innerprod1_relu1_0_split_1' " + "} " + "layers: { " + " layer { " + " name: 'innerprod2' " + " type: 'inner_product' " + " } " + " bottom: 'innerprod1' " + " top: 'innerprod2' " + "} " + "layers: { " + " layer { " + " name: 'loss1' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod1_relu1_0_split_1' " + " bottom: 'label' " + "} " + "layers: { " + " layer { " + " name: 'loss2' " + " type: 'euclidean_loss' " + " } " + " bottom: 'innerprod2' " + " bottom: 'data_data_0_split_1' " + "} "; + this->RunInsertionTest(input_proto, expected_output_proto); +} + +} diff --git a/src/caffe/util/insert_splits.cpp b/src/caffe/util/insert_splits.cpp new file mode 100644 index 00000000000..6db6458c4af --- /dev/null +++ b/src/caffe/util/insert_splits.cpp @@ -0,0 +1,129 @@ +// Copyright 2014 Jeff Donahue + +#include +#include +#include + +#include "caffe/common.hpp" +#include "caffe/util/insert_splits.hpp" + +using std::map; +using std::ostringstream; +using std::pair; +using std::make_pair; + +namespace caffe { + +void insert_splits(const NetParameter& param, NetParameter* param_split) { + // Initialize by copying from the input NetParameter. + param_split->CopyFrom(param); + param_split->clear_layers(); + map > blob_name_to_last_top_idx; + map, pair > bottom_idx_to_source_top_idx; + map, int> top_idx_to_bottom_count; + map, int> top_idx_to_bottom_split_idx; + map layer_idx_to_layer_name; + layer_idx_to_layer_name[-1] = "input"; + // Determine the number of times each blob is used as an input (bottom) blob. + for (int i = 0; i < param.input_size(); ++i) { + const string& blob_name = param.input(i); + blob_name_to_last_top_idx[blob_name] = make_pair(-1, i); + } + for (int i = 0; i < param.layers_size(); ++i) { + const LayerConnection& layer_connection = param.layers(i); + layer_idx_to_layer_name[i] = layer_connection.layer().name(); + for (int j = 0; j < layer_connection.bottom_size(); ++j) { + const string& blob_name = layer_connection.bottom(j); + if (blob_name_to_last_top_idx.find(blob_name) == + blob_name_to_last_top_idx.end()) { + LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j; + } + const pair& bottom_idx = make_pair(i, j); + const pair& top_idx = blob_name_to_last_top_idx[blob_name]; + bottom_idx_to_source_top_idx[bottom_idx] = top_idx; + ++top_idx_to_bottom_count[top_idx]; + } + for (int j = 0; j < layer_connection.top_size(); ++j) { + const string& blob_name = layer_connection.top(j); + blob_name_to_last_top_idx[blob_name] = make_pair(i, j); + } + } + // Create split layer for any input blobs used by other layers as bottom + // blobs more than once. + for (int i = 0; i < param.input_size(); ++i) { + const int split_count = top_idx_to_bottom_count[make_pair(-1, i)]; + if (split_count > 1) { + const string& layer_name = layer_idx_to_layer_name[-1]; + const string& blob_name = param.input(i); + LayerConnection* split_layer_connection = param_split->add_layers(); + configure_split_layer(layer_name, blob_name, i, split_count, + split_layer_connection); + } + } + for (int i = 0; i < param.layers_size(); ++i) { + LayerConnection* layer_connection = param_split->add_layers(); + layer_connection->CopyFrom(param.layers(i)); + // Replace any shared bottom blobs with split layer outputs. + for (int j = 0; j < layer_connection->bottom_size(); ++j) { + const pair& top_idx = + bottom_idx_to_source_top_idx[make_pair(i, j)]; + const int split_count = top_idx_to_bottom_count[top_idx]; + if (split_count > 1) { + const string& layer_name = layer_idx_to_layer_name[top_idx.first]; + const string& blob_name = layer_connection->bottom(j); + layer_connection->set_bottom(j, get_split_blob_name(layer_name, + blob_name, top_idx.second, top_idx_to_bottom_split_idx[top_idx]++)); + } + } + // Create split layer for any top blobs used by other layers as bottom + // blobs more than once. + for (int j = 0; j < layer_connection->top_size(); ++j) { + const int split_count = top_idx_to_bottom_count[make_pair(i, j)]; + if (split_count > 1) { + const string& layer_name = layer_idx_to_layer_name[i]; + const string& blob_name = layer_connection->top(j); + LayerConnection* split_layer_connection = param_split->add_layers(); + configure_split_layer(layer_name, blob_name, j, split_count, + split_layer_connection); + } + } + } +} + +void configure_split_layer(const string& layer_name, const string& blob_name, + const int blob_idx, const int split_count, + LayerConnection* split_layer_connection) { + split_layer_connection->Clear(); + split_layer_connection->add_bottom(blob_name); + LayerParameter* split_layer_param = split_layer_connection->mutable_layer(); + split_layer_param->set_name( + get_split_layer_name(layer_name, blob_name, blob_idx)); + split_layer_param->set_type("split"); + for (int k = 0; k < split_count; ++k) { + split_layer_connection->add_top( + get_split_blob_name(layer_name, blob_name, blob_idx, k)); + } +} + +string get_split_layer_name(const string& layer_name, const string& blob_name, + const int blob_idx) { + ostringstream split_layer_name; + split_layer_name << blob_name << "_" << layer_name << "_" << blob_idx + << "_split"; + return split_layer_name.str(); +} + +string get_split_blob_name(const string& layer_name, const string& blob_name, + const int blob_idx, const int split_idx) { + // 0th split top blob is given the same name as the bottom blob so that + // computation is done 'in-place', saving a bit of time and memory. + if (split_idx == 0) { + return blob_name; + } + ostringstream split_blob_name; + split_blob_name << blob_name << "_" << layer_name << "_" << blob_idx + << "_split_" << split_idx; + return split_blob_name.str(); +} + +} // namespace caffe