Scale and Bias Layers #3591

Merged
merged 2 commits into from Jan 27, 2016
Jump to file or symbol
Failed to load files and symbols.
+1,717 −1
Split
@@ -0,0 +1,54 @@
+#ifndef CAFFE_BIAS_LAYER_HPP_
+#define CAFFE_BIAS_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+/**
+ * @brief Computes a sum of two input Blobs, with the shape of the
+ * latter Blob "broadcast" to match the shape of the former.
+ * Equivalent to tiling the latter Blob, then computing the elementwise
+ * sum.
+ *
+ * The second input may be omitted, in which case it's learned as a parameter
+ * of the layer.
+ */
+template <typename Dtype>
+class BiasLayer : public Layer<Dtype> {
+ public:
+ explicit BiasLayer(const LayerParameter& param)
+ : Layer<Dtype>(param) {}
+ virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline const char* type() const { return "Bias"; }
+ virtual inline int MinBottomBlobs() const { return 1; }
+ virtual inline int MaxBottomBlobs() const { return 2; }
+ virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+ private:
+ Blob<Dtype> bias_multiplier_;
+ int outer_dim_, bias_dim_, inner_dim_, dim_;
+};
+
+
+
+} // namespace caffe
+
+#endif // CAFFE_BIAS_LAYER_HPP_
@@ -0,0 +1,83 @@
+#ifndef CAFFE_SCALE_LAYER_HPP_
+#define CAFFE_SCALE_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/bias_layer.hpp"
+
+namespace caffe {
+
+/**
+ * @brief Computes a product of two input Blobs, with the shape of the
+ * latter Blob "broadcast" to match the shape of the former.
+ * Equivalent to tiling the latter Blob, then computing the elementwise
+ * product.
+ *
+ * The second input may be omitted, in which case it's learned as a parameter
+ * of the layer.
+ */
+template <typename Dtype>
+class ScaleLayer: public Layer<Dtype> {
+ public:
+ explicit ScaleLayer(const LayerParameter& param)
+ : Layer<Dtype>(param) {}
+ virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline const char* type() const { return "Scale"; }
+ // Scale
+ virtual inline int MinBottomBlobs() const { return 1; }
+ virtual inline int MaxBottomBlobs() const { return 2; }
+ virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+ /**
+ * In the below shape specifications, @f$ i @f$ denotes the value of the
+ * `axis` field given by `this->layer_param_.scale_param().axis()`, after
+ * canonicalization (i.e., conversion from negative to positive index,
+ * if applicable).
+ *
+ * @param bottom input Blob vector (length 2)
+ * -# @f$ (d_0 \times ... \times
+ * d_i \times ... \times d_j \times ... \times d_n) @f$
+ * the first factor @f$ x @f$
+ * -# @f$ (d_i \times ... \times d_j) @f$
+ * the second factor @f$ y @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (d_0 \times ... \times
+ * d_i \times ... \times d_j \times ... \times d_n) @f$
+ * the product @f$ z = x y @f$ computed after "broadcasting" y.
+ * Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$,
+ * then computing the elementwise product.
+ */
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+ shared_ptr<Layer<Dtype> > bias_layer_;
+ vector<Blob<Dtype>*> bias_bottom_vec_;
+ vector<bool> bias_propagate_down_;
+ int bias_param_id_;
+
+ Blob<Dtype> sum_multiplier_;
+ Blob<Dtype> sum_result_;
+ Blob<Dtype> temp_;
+ int axis_;
+ int outer_dim_, scale_dim_, inner_dim_;
+};
+
+
+} // namespace caffe
+
+#endif // CAFFE_SCALE_LAYER_HPP_
@@ -0,0 +1,121 @@
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layers/bias_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void BiasLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ if (bottom.size() == 1 && this->blobs_.size() > 0) {
+ LOG(INFO) << "Skipping parameter initialization";
+ } else if (bottom.size() == 1) {
+ // bias is a learned parameter; initialize it
+ const BiasParameter& param = this->layer_param_.bias_param();
+ const int axis = bottom[0]->CanonicalAxisIndex(param.axis());
+ const int num_axes = param.num_axes();
+ CHECK_GE(num_axes, -1) << "num_axes must be non-negative, "
+ << "or -1 to extend to the end of bottom[0]";
+ if (num_axes >= 0) {
+ CHECK_GE(bottom[0]->num_axes(), axis + num_axes)
+ << "bias blob's shape extends past bottom[0]'s shape when applied "
+ << "starting with bottom[0] axis = " << axis;
+ }
+ this->blobs_.resize(1);
+ const vector<int>::const_iterator& shape_start =
+ bottom[0]->shape().begin() + axis;
+ const vector<int>::const_iterator& shape_end =
+ (num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes);
+ vector<int> bias_shape(shape_start, shape_end);
+ this->blobs_[0].reset(new Blob<Dtype>(bias_shape));
+ shared_ptr<Filler<Dtype> > filler(GetFiller<Dtype>(param.filler()));
+ filler->Fill(this->blobs_[0].get());
+ }
+ this->param_propagate_down_.resize(this->blobs_.size(), true);
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const BiasParameter& param = this->layer_param_.bias_param();
+ Blob<Dtype>* bias = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
+ // Always set axis == 0 in special case where bias is a scalar
+ // (num_axes == 0). Mathematically equivalent for any choice of axis, so the
+ // actual setting can be safely ignored; and computation is most efficient
+ // with axis == 0 and (therefore) outer_dim_ == 1.
+ const int axis = (bias->num_axes() == 0) ?
+ 0 : bottom[0]->CanonicalAxisIndex(param.axis());
+ CHECK_GE(bottom[0]->num_axes(), axis + bias->num_axes())
+ << "bias blob's shape extends past bottom[0]'s shape when applied "
+ << "starting with bottom[0] axis = " << axis;
+ for (int i = 0; i < bias->num_axes(); ++i) {
+ CHECK_EQ(bottom[0]->shape(axis + i), bias->shape(i))
+ << "dimension mismatch between bottom[0]->shape(" << axis + i
+ << ") and bias->shape(" << i << ")";
+ }
+ outer_dim_ = bottom[0]->count(0, axis);
+ bias_dim_ = bias->count();
+ inner_dim_ = bottom[0]->count(axis + bias->num_axes());
+ dim_ = bias_dim_ * inner_dim_;
+ if (bottom[0] != top[0]) {
+ top[0]->ReshapeLike(*bottom[0]);
+ }
+ bias_multiplier_.Reshape(vector<int>(1, inner_dim_));
+ if (bias_multiplier_.cpu_data()[inner_dim_ - 1] != Dtype(1)) {
+ caffe_set(inner_dim_, Dtype(1), bias_multiplier_.mutable_cpu_data());
+ }
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const Dtype* bias_data =
+ ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
+ Dtype* top_data = top[0]->mutable_cpu_data();
+ if (bottom[0] != top[0]) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ caffe_copy(bottom[0]->count(), bottom_data, top_data);
+ }
+ for (int n = 0; n < outer_dim_; ++n) {
+ caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_,
+ inner_dim_, Dtype(1), Dtype(1), bias_data,
+ bias_multiplier_.cpu_data(), Dtype(1), top_data);
+ top_data += dim_;
+ }
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ if (propagate_down[0] && bottom[0] != top[0]) {
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+ caffe_copy(bottom[0]->count(), top_diff, bottom_diff);
+ }
+ // in-place, we don't need to do anything with the data diff
+ const bool bias_param = (bottom.size() == 1);
+ if ((!bias_param && propagate_down[1]) ||
+ (bias_param && this->param_propagate_down_[0])) {
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1])
+ ->mutable_cpu_diff();
+ bool accum = bias_param;
+ for (int n = 0; n < outer_dim_; ++n) {
+ caffe_cpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1),
+ top_diff, bias_multiplier_.cpu_data(), Dtype(accum), bias_diff);
+ top_diff += dim_;
+ accum = true;
+ }
+ }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(BiasLayer);
+#endif
+
+INSTANTIATE_CLASS(BiasLayer);
+REGISTER_LAYER_CLASS(Bias);
+
+} // namespace caffe
@@ -0,0 +1,59 @@
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layers/bias_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void BiasForward(const int n, const Dtype* in,
+ const Dtype* bias, const int bias_dim, const int inner_dim,
+ Dtype* out) {
+ CUDA_KERNEL_LOOP(index, n) {
+ const int bias_index = (index / inner_dim) % bias_dim;
+ out[index] = in[index] + bias[bias_index];
+ }
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const int count = top[0]->count();
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ const Dtype* bias_data =
+ ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data();
+ Dtype* top_data = top[0]->mutable_gpu_data();
+ BiasForward<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ count, bottom_data, bias_data, bias_dim_, inner_dim_, top_data);
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ if (propagate_down[0] && bottom[0] != top[0]) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+ caffe_copy(bottom[0]->count(), top_diff, bottom_diff);
+ }
+ // in-place, we don't need to do anything with the data diff
+ const bool bias_param = (bottom.size() == 1);
+ if ((!bias_param && propagate_down[1]) ||
+ (bias_param && this->param_propagate_down_[0])) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1])
+ ->mutable_gpu_diff();
+ bool accum = bias_param;
+ for (int n = 0; n < outer_dim_; ++n) {
+ caffe_gpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1),
+ top_diff, bias_multiplier_.gpu_data(), Dtype(accum), bias_diff);
+ top_diff += dim_;
+ accum = true;
+ }
+ }
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(BiasLayer);
+
+} // namespace caffe
Oops, something went wrong.