Add PReLU Layer #1940

Merged
merged 1 commit into from Mar 12, 2015
Jump to file or symbol
Failed to load files and symbols.
+563 −1
Split
@@ -654,6 +654,90 @@ class ThresholdLayer : public NeuronLayer<Dtype> {
Dtype threshold_;
};
+/**
+ * @brief Parameterized Rectified Linear Unit non-linearity @f$
+ * y_i = \max(0, x_i) + a_i \min(0, x_i)
+ * @f$. The differences from ReLULayer are 1) negative slopes are
+ * learnable though backprop and 2) negative slopes can vary across
+ * channels. The number of axes of input blob should be greater than or
+ * equal to 2. The 1st axis (0-based) is seen as channels.
+ */
+template <typename Dtype>
+class PReLULayer : public NeuronLayer<Dtype> {
+ public:
+ /**
+ * @param param provides PReLUParameter prelu_param,
+ * with PReLULayer options:
+ * - filler (\b optional, FillerParameter,
+ * default {'type': constant 'value':0.25}).
+ * - channel_shared (\b optional, default false).
+ * negative slopes are shared across channels.
+ */
+ explicit PReLULayer(const LayerParameter& param)
+ : NeuronLayer<Dtype>(param) {}
+
+ virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline const char* type() const { return "PReLU"; }
+
+ protected:
+ /**
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times ...) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times ...) @f$
+ * the computed outputs for each channel @f$i@f$ @f$
+ * y_i = \max(0, x_i) + a_i \min(0, x_i)
+ * @f$.
+ */
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ /**
+ * @brief Computes the error gradient w.r.t. the PReLU inputs.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (N \times C \times ...) @f$
+ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+ * with respect to computed outputs @f$ y @f$
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times ...) @f$
+ * the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their
+ * diff with gradients @f$
+ * \frac{\partial E}{\partial x_i} = \left\{
+ * \begin{array}{lr}
+ * a_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\
+ * \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i > 0
+ * \end{array} \right.
+ * @f$.
+ * If param_propagate_down_[0] is true, it fills the diff with gradients
+ * @f$
+ * \frac{\partial E}{\partial a_i} = \left\{
+ * \begin{array}{lr}
+ * \sum_{x_i} x_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\
+ * 0 & \mathrm{if} \; x_i > 0
+ * \end{array} \right.
+ * @f$.
+ */
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+ bool channel_shared_;
+ Blob<Dtype> multiplier_; // dot multipler for backward computation of params
+ Blob<Dtype> bottom_memory_; // memory for in-place computation
+};
+
} // namespace caffe
#endif // CAFFE_NEURON_LAYERS_HPP_
@@ -0,0 +1,140 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void PReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ CHECK_GE(bottom[0]->num_axes(), 2)
+ << "Number of axes of bottom blob must be >=2.";
+ PReLUParameter prelu_param = this->layer_param().prelu_param();
+ int channels = bottom[0]->channels();
+ channel_shared_ = prelu_param.channel_shared();
+ if (this->blobs_.size() > 0) {
+ LOG(INFO) << "Skipping parameter initialization";
+ } else {
+ this->blobs_.resize(1);
+ if (channel_shared_) {
+ this->blobs_[0].reset(new Blob<Dtype>(vector<int>(0)));
+ } else {
+ this->blobs_[0].reset(new Blob<Dtype>(vector<int>(1, channels)));
+ }
+ shared_ptr<Filler<Dtype> > filler;
+ if (prelu_param.has_filler()) {
+ filler.reset(GetFiller<Dtype>(prelu_param.filler()));
+ } else {
+ FillerParameter filler_param;
+ filler_param.set_type("constant");
+ filler_param.set_value(0.25);
+ filler.reset(GetFiller<Dtype>(filler_param));
+ }
+ filler->Fill(this->blobs_[0].get());
+ }
+ if (channel_shared_) {
+ CHECK_EQ(this->blobs_[0]->count(), 1)
+ << "Negative slope size is inconsistent with prototxt config";
+ } else {
+ CHECK_EQ(this->blobs_[0]->count(), channels)
+ << "Negative slope size is inconsistent with prototxt config";
+ }
+
+ // Propagate gradients to the parameters (as directed by backward pass).
+ this->param_propagate_down_.resize(this->blobs_.size(), true);
+ multiplier_.Reshape(vector<int>(1, bottom[0]->count() / bottom[0]->num()));
+ caffe_set(multiplier_.count(), Dtype(1), multiplier_.mutable_cpu_data());
+}
+
+template <typename Dtype>
+void PReLULayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ CHECK_GE(bottom[0]->num_axes(), 2)
+ << "Number of axes of bottom blob must be >=2.";
+ top[0]->ReshapeLike(*bottom[0]);
+ if (bottom[0] == top[0]) {
+ // For in-place computation
+ bottom_memory_.ReshapeLike(*bottom[0]);
+ }
+}
+
+template <typename Dtype>
+void PReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = top[0]->mutable_cpu_data();
+ const int count = bottom[0]->count();
+ const int dim = bottom[0]->count(2);
+ const int channels = bottom[0]->channels();
+ const Dtype* slope_data = this->blobs_[0]->cpu_data();
+
+ // For in-place computation
+ if (bottom[0] == top[0]) {
+ caffe_copy(count, bottom_data, bottom_memory_.mutable_cpu_data());
+ }
+
+ // if channel_shared, channel index in the following computation becomes
+ // always zero.
+ const int div_factor = channel_shared_ ? channels : 1;
+ for (int i = 0; i < count; ++i) {
+ int c = (i / dim) % channels / div_factor;
+ top_data[i] = std::max(bottom_data[i], Dtype(0))
+ + slope_data[c] * std::min(bottom_data[i], Dtype(0));
+ }
+}
+
+template <typename Dtype>
+void PReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down,
+ const vector<Blob<Dtype>*>& bottom) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ const Dtype* slope_data = this->blobs_[0]->cpu_data();
+ const Dtype* top_diff = top[0]->cpu_diff();
+ const int count = bottom[0]->count();
+ const int dim = bottom[0]->count(2);
+ const int channels = bottom[0]->channels();
+
+ // For in-place computation
+ if (top[0] == bottom[0]) {
+ bottom_data = bottom_memory_.cpu_data();
+ }
+
+ // if channel_shared, channel index in the following computation becomes
+ // always zero.
+ const int div_factor = channel_shared_ ? channels : 1;
+
+ // Propagte to param
+ // Since to write bottom diff will affect top diff if top and bottom blobs
+ // are identical (in-place computaion), we first compute param backward to
+ // keep top_diff unchanged.
+ if (this->param_propagate_down_[0]) {
+ Dtype* slope_diff = this->blobs_[0]->mutable_cpu_diff();
+ caffe_set(this->blobs_[0]->count(), Dtype(0), slope_diff);
+ for (int i = 0; i < count; ++i) {
+ int c = (i / dim) % channels / div_factor;
+ slope_diff[c] += top_diff[i] * bottom_data[i] * (bottom_data[i] <= 0);
+ }
+ }
+ // Propagate to bottom
+ if (propagate_down[0]) {
+ Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+ for (int i = 0; i < count; ++i) {
+ int c = (i / dim) % channels / div_factor;
+ bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0)
+ + slope_data[c] * (bottom_data[i] <= 0));
+ }
+ }
+}
+
+
+#ifdef CPU_ONLY
+STUB_GPU(PReLULayer);
+#endif
+
+INSTANTIATE_CLASS(PReLULayer);
+REGISTER_LAYER_CLASS(PReLU);
+
+} // namespace caffe
@@ -0,0 +1,130 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+// CUDA kernele for forward
+template <typename Dtype>
+__global__ void PReLUForward(const int n, const int channels, const int dim,
+ const Dtype* in, Dtype* out, const Dtype* slope_data,
+ const int div_factor) {
+ CUDA_KERNEL_LOOP(index, n) {
+ int c = (index / dim) % channels / div_factor;
+ out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c];
+ }
+}
+
+// CUDA kernel for bottom backward
+template <typename Dtype>
+__global__ void PReLUBackward(const int n, const int channels, const int dim,
+ const Dtype* in_diff, const Dtype* in_data, Dtype* out_diff,
+ const Dtype* slope_data, const int div_factor) {
+ CUDA_KERNEL_LOOP(index, n) {
+ int c = (index / dim) % channels / div_factor;
+ out_diff[index] = in_diff[index] * ((in_data[index] > 0)
+ + (in_data[index] <= 0) * slope_data[c]);
+ }
+}
+
+// CUDA kernel for element-wise parameter backward
+template <typename Dtype>
+__global__ void PReLUParamBackward(const int n, const Dtype* in_diff,
+ const Dtype* in_data, Dtype* out_diff) {
+ CUDA_KERNEL_LOOP(index, n) {
+ out_diff[index] = in_diff[index] * in_data[index] * (in_data[index] <= 0);
+ }
+}
+
+template <typename Dtype>
+void PReLULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = top[0]->mutable_gpu_data();
+ const int count = bottom[0]->count();
+ const int dim = bottom[0]->count(2);
+ const int channels = bottom[0]->channels();
+ const Dtype* slope_data = this->blobs_[0]->gpu_data();
+ const int div_factor = channel_shared_ ? channels : 1;
+
+ // For in-place computation
+ if (top[0] == bottom[0]) {
+ caffe_copy(count, bottom_data, bottom_memory_.mutable_gpu_data());
+ }
+
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ PReLUForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ count, channels, dim, bottom_data, top_data, slope_data, div_factor);
+ CUDA_POST_KERNEL_CHECK;
+}
+
+template <typename Dtype>
+void PReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down,
+ const vector<Blob<Dtype>*>& bottom) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ const Dtype* top_diff = top[0]->gpu_diff();
+ const int count = bottom[0]->count();
+ const int dim = bottom[0]->count(2);
+ const int channels = bottom[0]->channels();
+
+ // For in-place computation
+ if (top[0] == bottom[0]) {
+ bottom_data = bottom_memory_.gpu_data();
+ }
+
+ // Propagte to param
+ // Since to write bottom diff will affect top diff if top and bottom blobs
+ // are identical (in-place computaion), we first compute param backward to
+ // keep top_diff unchanged.
+ if (this->param_propagate_down_[0]) {
+ Dtype* slope_diff = this->blobs_[0]->mutable_gpu_diff();
+ // slope_diff is set as 0, then accumulated over batches
+ caffe_gpu_set<Dtype>(this->blobs_[0]->count(), Dtype(0), slope_diff);
+ int cdim = channels * dim;
+ Dtype dsum = 0.;
+ for (int n = 0; n < bottom[0]->num(); ++n) {
+ Dtype* temp_buff = multiplier_.mutable_gpu_diff();
+ // compute element-wise diff
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ PReLUParamBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
+ CAFFE_CUDA_NUM_THREADS>>>(
+ cdim, top_diff + top[0]->offset(n),
+ bottom_data + bottom[0]->offset(n), multiplier_.mutable_gpu_diff());
+ CUDA_POST_KERNEL_CHECK;
+ if (channel_shared_) {
+ Dtype d;
+ caffe_gpu_dot<Dtype>(channels * dim, multiplier_.gpu_diff(),
+ multiplier_.gpu_data(), &d);
+ dsum += d;
+ } else {
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, channels, dim, 1.,
+ multiplier_.gpu_diff(), multiplier_.gpu_data(), 1.,
+ slope_diff);
+ }
+ }
+ if (channel_shared_) {
+ caffe_gpu_set(this->blobs_[0]->count(), Dtype(dsum), slope_diff);
+ }
+ }
+ // Propagate to bottom
+ if (propagate_down[0]) {
+ Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+ const Dtype* slope_data = this->blobs_[0]->gpu_data();
+ int div_factor = channel_shared_ ? channels : 1;
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ PReLUBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
+ CAFFE_CUDA_NUM_THREADS>>>(
+ count, channels, dim, top_diff, bottom_data, bottom_diff, slope_data,
+ div_factor);
+ CUDA_POST_KERNEL_CHECK;
+ }
+}
+
+
+INSTANTIATE_LAYER_GPU_FUNCS(PReLULayer);
+
+
+} // namespace caffe
@@ -259,7 +259,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
-// LayerParameter next available layer-specific ID: 131 (last added: python_param)
+// LayerParameter next available layer-specific ID: 132 (last added: prelu_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
@@ -323,6 +323,7 @@ message LayerParameter {
optional MVNParameter mvn_param = 120;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
+ optional PReLUParameter prelu_param = 131;
optional PythonParameter python_param = 130;
optional ReLUParameter relu_param = 123;
optional SigmoidParameter sigmoid_param = 124;
@@ -946,3 +947,14 @@ message V0LayerParameter {
optional HDF5OutputParameter hdf5_output_param = 1001;
}
+
+// Message that stores parameters used by PReLULayer
+message PReLUParameter {
+ // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
+ // Surpassing Human-Level Performance on ImageNet Classification, 2015.
+
+ // Initial value of a_i. Default is a_i=0.25 for all i.
+ optional FillerParameter filler = 1;
+ // Whether or not slope paramters are shared across channels.
+ optional bool channel_shared = 2 [default = false];
+}
Oops, something went wrong.