Skip to content

Commit

Permalink
Merge pull request #129 from jeffdonahue/dags-by-split
Browse files Browse the repository at this point in the history
Generalize architectures to arbitrary DAGs by split layers
  • Loading branch information
shelhamer committed Feb 23, 2014
2 parents 2fce080 + 26630fe commit 5792f44
Show file tree
Hide file tree
Showing 7 changed files with 1,415 additions and 1 deletion.
29 changes: 29 additions & 0 deletions include/caffe/util/insert_splits.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2014 Jeff Donahue

#ifndef _CAFFE_UTIL_INSERT_SPLITS_HPP_
#define _CAFFE_UTIL_INSERT_SPLITS_HPP_

#include "caffe/proto/caffe.pb.h"

using std::pair;
using std::string;

namespace caffe {

// Copy NetParameters with SplitLayers added to replace any shared bottom
// blobs with unique bottom blobs provided by the SplitLayer.
void insert_splits(const NetParameter& param, NetParameter* param_split);

void configure_split_layer(const string& layer_name, const string& blob_name,
const int blob_idx, const int split_count,
LayerConnection* split_layer_connection);

string get_split_layer_name(const string& layer_name, const string& blob_name,
const int blob_idx);

string get_split_blob_name(const string& layer_name, const string& blob_name,
const int blob_idx, const int split_idx);

} // namespace caffe

#endif // CAFFE_UTIL_INSERT_SPLITS_HPP_
21 changes: 21 additions & 0 deletions include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,27 @@ class DropoutLayer : public NeuronLayer<Dtype> {
};


template <typename Dtype>
class SplitLayer : public Layer<Dtype> {
public:
explicit SplitLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
int count_;
};


template <typename Dtype>
class FlattenLayer : public Layer<Dtype> {
public:
Expand Down
2 changes: 2 additions & 0 deletions src/caffe/layer_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) {
return new SoftmaxLayer<Dtype>(param);
} else if (type == "softmax_loss") {
return new SoftmaxWithLossLayer<Dtype>(param);
} else if (type == "split") {
return new SplitLayer<Dtype>(param);
} else if (type == "multinomial_logistic_loss") {
return new MultinomialLogisticLossLayer<Dtype>(param);
} else {
Expand Down
101 changes: 101 additions & 0 deletions src/caffe/layers/split_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright 2014 Jeff Donahue

#include <vector>

#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void SplitLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
CHECK_EQ(bottom.size(), 1) << "Split Layer takes a single blob as input.";
CHECK_GE(top->size(), 1) << "Split Layer takes at least one blob as output.";
count_ = bottom[0]->count();
for (int i = 0; i < top->size(); ++i) {
// Allow the 0th top blob to be 'in-place', but no others.
if (i == 0 && (*top)[i] == bottom[0]) {
continue;
} else {
CHECK_NE((*top)[i], bottom[0]) << "Only 0th top blob may be in place.";
}
(*top)[i]->Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
CHECK_EQ(count_, (*top)[i]->count());
}
};

template <typename Dtype>
void SplitLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
for (int i = 0; i < top->size(); ++i) {
if (i == 0 && (*top)[i] == bottom[0]) {
continue;
}
Dtype* top_data = (*top)[i]->mutable_cpu_data();
caffe_copy(count_, bottom_data, top_data);
}
}

template <typename Dtype>
void SplitLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
for (int i = 0; i < top->size(); ++i) {
if (i == 0 && (*top)[i] == bottom[0]) {
continue;
}
Dtype* top_data = (*top)[i]->mutable_gpu_data();
caffe_gpu_copy(count_, bottom_data, top_data);
}
}

template <typename Dtype>
Dtype SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
if (propagate_down) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
// Initialize by copying first top blob diff to our diff, unless we're
// doing in-place computation for the first blob, in which case the diff is
// already initialized.
if (top[0] != (*bottom)[0]) {
caffe_copy(count_, top_diff, bottom_diff);
}
// Add remaining top blob diffs.
for (int i = 1; i < top.size(); ++i) {
top_diff = top[i]->cpu_diff();
caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff);
}
}
return Dtype(0.);
}


template <typename Dtype>
Dtype SplitLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
if (propagate_down) {
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
// Initialize by copying first top blob diff to our diff, unless we're
// doing in-place computation for the first blob, in which case the diff is
// already initialized.
if (top[0] != (*bottom)[0]) {
caffe_gpu_copy(count_, top_diff, bottom_diff);
}
// Add remaining top blob diffs.
for (int i = 1; i < top.size(); ++i) {
top_diff = top[i]->gpu_diff();
caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff);
}
}
return Dtype(0.);
}

INSTANTIATE_CLASS(SplitLayer);

} // namespace caffe
6 changes: 5 additions & 1 deletion src/caffe/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "caffe/layer.hpp"
#include "caffe/net.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/insert_splits.hpp"

using std::pair;
using std::map;
Expand All @@ -29,7 +30,10 @@ Net<Dtype>::Net(const string& param_file) {
}

template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& param) {
void Net<Dtype>::Init(const NetParameter& in_param) {
// Create a copy of in_param with splits added where necessary.
NetParameter param;
insert_splits(in_param, &param);
// Basically, build all the layers and set up its connections.
name_ = param.name();
map<string, int> blob_name_to_idx;
Expand Down

0 comments on commit 5792f44

Please sign in to comment.