Exponential Linear Units #3388
Merged
Commits
Jump to file or symbol
Failed to load files and symbols.
| @@ -0,0 +1,86 @@ | ||
| +#ifndef CAFFE_ELU_LAYER_HPP_ | ||
| +#define CAFFE_ELU_LAYER_HPP_ | ||
| + | ||
| +#include <vector> | ||
| + | ||
| +#include "caffe/blob.hpp" | ||
| +#include "caffe/layer.hpp" | ||
| +#include "caffe/proto/caffe.pb.h" | ||
| + | ||
| +#include "caffe/layers/neuron_layer.hpp" | ||
| + | ||
| +namespace caffe { | ||
| + | ||
| +/** | ||
| + * @brief Exponential Linear Unit non-linearity @f$ | ||
| + * y = \left\{ | ||
| + * \begin{array}{lr} | ||
| + * x & \mathrm{if} \; x > 0 \\ | ||
| + * \alpha (\exp(x)-1) & \mathrm{if} \; x \le 0 | ||
| + * \end{array} \right. | ||
| + * @f$. | ||
| + */ | ||
| +template <typename Dtype> | ||
| +class ELULayer : public NeuronLayer<Dtype> { | ||
| + public: | ||
| + /** | ||
| + * @param param provides ELUParameter elu_param, | ||
| + * with ELULayer options: | ||
| + * - alpha (\b optional, default 1). | ||
| + * the value @f$ \alpha @f$ by which controls saturation for negative inputs. | ||
| + */ | ||
| + explicit ELULayer(const LayerParameter& param) | ||
| + : NeuronLayer<Dtype>(param) {} | ||
| + | ||
| + virtual inline const char* type() const { return "ELU"; } | ||
| + | ||
| + protected: | ||
| + /** | ||
| + * @param bottom input Blob vector (length 1) | ||
| + * -# @f$ (N \times C \times H \times W) @f$ | ||
| + * the inputs @f$ x @f$ | ||
| + * @param top output Blob vector (length 1) | ||
| + * -# @f$ (N \times C \times H \times W) @f$ | ||
| + * the computed outputs @f$ | ||
| + * y = \left\{ | ||
| + * \begin{array}{lr} | ||
| + * x & \mathrm{if} \; x > 0 \\ | ||
| + * \alpha (\exp(x)-1) & \mathrm{if} \; x \le 0 | ||
| + * \end{array} \right. | ||
| + * @f$. | ||
| + */ | ||
| + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
| + const vector<Blob<Dtype>*>& top); | ||
| + virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
| + const vector<Blob<Dtype>*>& top); | ||
| + | ||
| + /** | ||
| + * @brief Computes the error gradient w.r.t. the ELU inputs. | ||
| + * | ||
| + * @param top output Blob vector (length 1), providing the error gradient with | ||
| + * respect to the outputs | ||
| + * -# @f$ (N \times C \times H \times W) @f$ | ||
| + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ | ||
| + * with respect to computed outputs @f$ y @f$ | ||
| + * @param propagate_down see Layer::Backward. | ||
| + * @param bottom input Blob vector (length 1) | ||
| + * -# @f$ (N \times C \times H \times W) @f$ | ||
| + * the inputs @f$ x @f$; Backward fills their diff with | ||
| + * gradients @f$ | ||
| + * \frac{\partial E}{\partial x} = \left\{ | ||
| + * \begin{array}{lr} | ||
| + * 1 & \mathrm{if} \; x > 0 \\ | ||
| + * y + \alpha & \mathrm{if} \; x \le 0 | ||
| + * \end{array} \right. | ||
| + * @f$ if propagate_down[0]. | ||
| + */ | ||
| + virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
| + const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
| + virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
| + const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
| +}; | ||
| + | ||
| + | ||
| +} // namespace caffe | ||
| + | ||
| +#endif // CAFFE_ELU_LAYER_HPP_ |
| @@ -0,0 +1,47 @@ | ||
| +#include <algorithm> | ||
| +#include <vector> | ||
| + | ||
| +#include "caffe/layers/elu_layer.hpp" | ||
| + | ||
| +namespace caffe { | ||
| + | ||
| +template <typename Dtype> | ||
| +void ELULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
| + const vector<Blob<Dtype>*>& top) { | ||
| + const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
| + Dtype* top_data = top[0]->mutable_cpu_data(); | ||
| + const int count = bottom[0]->count(); | ||
| + Dtype alpha = this->layer_param_.elu_param().alpha(); | ||
| + for (int i = 0; i < count; ++i) { | ||
| + top_data[i] = std::max(bottom_data[i], Dtype(0)) | ||
| + + alpha * (exp(std::min(bottom_data[i], Dtype(0))) - Dtype(1)); | ||
| + } | ||
| +} | ||
| + | ||
| +template <typename Dtype> | ||
| +void ELULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
| + const vector<bool>& propagate_down, | ||
| + const vector<Blob<Dtype>*>& bottom) { | ||
| + if (propagate_down[0]) { | ||
| + const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
| + const Dtype* top_data = top[0]->cpu_data(); | ||
| + const Dtype* top_diff = top[0]->cpu_diff(); | ||
| + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); | ||
| + const int count = bottom[0]->count(); | ||
| + Dtype alpha = this->layer_param_.elu_param().alpha(); | ||
| + for (int i = 0; i < count; ++i) { | ||
| + bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0) | ||
| + + (alpha + top_data[i]) * (bottom_data[i] <= 0)); | ||
| + } | ||
| + } | ||
| +} | ||
| + | ||
| + | ||
| +#ifdef CPU_ONLY | ||
| +STUB_GPU(ELULayer); | ||
| +#endif | ||
| + | ||
| +INSTANTIATE_CLASS(ELULayer); | ||
| +REGISTER_LAYER_CLASS(ELU); | ||
| + | ||
| +} // namespace caffe |
| @@ -0,0 +1,62 @@ | ||
| +#include <algorithm> | ||
| +#include <vector> | ||
| + | ||
| +#include "caffe/layers/elu_layer.hpp" | ||
| + | ||
| +namespace caffe { | ||
| + | ||
| +template <typename Dtype> | ||
| +__global__ void ELUForward(const int n, const Dtype* in, Dtype* out, | ||
| + Dtype alpha) { | ||
| + CUDA_KERNEL_LOOP(index, n) { | ||
| + out[index] = in[index] > 0 ? in[index] : | ||
| + alpha * (exp(in[index]) - 1); | ||
| + } | ||
| +} | ||
| + | ||
| +template <typename Dtype> | ||
| +void ELULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
| + const vector<Blob<Dtype>*>& top) { | ||
| + const Dtype* bottom_data = bottom[0]->gpu_data(); | ||
| + Dtype* top_data = top[0]->mutable_gpu_data(); | ||
| + const int count = bottom[0]->count(); | ||
| + Dtype alpha = this->layer_param_.elu_param().alpha(); | ||
| + // NOLINT_NEXT_LINE(whitespace/operators) | ||
| + ELUForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( | ||
| + count, bottom_data, top_data, alpha); | ||
| + CUDA_POST_KERNEL_CHECK; | ||
| +} | ||
| + | ||
| +template <typename Dtype> | ||
| +__global__ void ELUBackward(const int n, const Dtype* in_diff, | ||
| + const Dtype* out_data, const Dtype* in_data, | ||
| + Dtype* out_diff, Dtype alpha) { | ||
| + CUDA_KERNEL_LOOP(index, n) { | ||
| + out_diff[index] = in_data[index] > 0 ? in_diff[index] : | ||
| + in_diff[index] * (out_data[index] + alpha); | ||
| + } | ||
| +} | ||
| + | ||
| +template <typename Dtype> | ||
| +void ELULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
| + const vector<bool>& propagate_down, | ||
| + const vector<Blob<Dtype>*>& bottom) { | ||
| + if (propagate_down[0]) { | ||
| + const Dtype* bottom_data = bottom[0]->gpu_data(); | ||
| + const Dtype* top_diff = top[0]->gpu_diff(); | ||
| + const Dtype* top_data = top[0]->gpu_data(); | ||
| + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); | ||
| + const int count = bottom[0]->count(); | ||
| + Dtype alpha = this->layer_param_.elu_param().alpha(); | ||
| + // NOLINT_NEXT_LINE(whitespace/operators) | ||
| + ELUBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( | ||
| + count, top_diff, top_data, bottom_data, bottom_diff, alpha); | ||
| + CUDA_POST_KERNEL_CHECK; | ||
| + } | ||
| +} | ||
| + | ||
| + | ||
| +INSTANTIATE_LAYER_GPU_FUNCS(ELULayer); | ||
| + | ||
| + | ||
| +} // namespace caffe |