forked from BVLC/caffe
-
Notifications
You must be signed in to change notification settings - Fork 73
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
400 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,34 @@ | ||
Please use the [caffe-users list](https://groups.google.com/forum/#!forum/caffe-users) for usage, installation, or modeling questions, or other requests for help. | ||
_Do not post such requests to Issues._ Doing so interferes with the development of Caffe. | ||
## Important - read before submitting | ||
|
||
Please read the [guidelines for contributing](https://github.com/BVLC/caffe/blob/master/CONTRIBUTING.md) before submitting this issue. | ||
*Please read the [guidelines for contributing](https://github.com/BVLC/caffe/blob/master/CONTRIBUTING.md) before submitting this issue!* | ||
|
||
*Please do not post installation, build, usage, or modeling questions, or other requests for help to Issues.* | ||
Use the [caffe-users list](https://groups.google.com/forum/#!forum/caffe-users) instead. | ||
This helps developers maintain a clear, uncluttered, and efficient view of the state of Caffe. | ||
|
||
### Issue summary | ||
|
||
|
||
### Steps to reproduce | ||
|
||
If you are having difficulty building Caffe or training a model, please ask the caffe-users mailing list. If you are reporting a build error that seems to be due to a bug in Caffe, please attach your build configuration (either Makefile.config or CMakeCache.txt) and the output of the make (or cmake) command. | ||
|
||
### Your system configuration | ||
Operating system: | ||
Compiler: | ||
CUDA version (if applicable): | ||
CUDNN version (if applicable): | ||
BLAS: | ||
Python or MATLAB version (for pycaffe and matcaffe respectively): | ||
### Tried solutions | ||
|
||
|
||
### System configuration | ||
|
||
* Operating system: | ||
* Compiler: | ||
* CUDA version (if applicable): | ||
* CUDNN version (if applicable): | ||
* BLAS: | ||
* Python version (if using pycaffe): | ||
* MATLAB version (if using matcaffe): | ||
|
||
### Issue checklist | ||
|
||
- [ ] read the guidelines and removed the first paragraph | ||
- [ ] written a short summary and detailed steps to reproduce | ||
- [ ] explained how solutions to related problems failed (tick if found none) | ||
- [ ] filled system configuration | ||
- [ ] attached relevant logs/config files (tick if not applicable) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#ifndef CAFFE_SWISH_LAYER_HPP_ | ||
#define CAFFE_SWISH_LAYER_HPP_ | ||
|
||
#include <vector> | ||
|
||
#include "caffe/blob.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
|
||
#include "caffe/layers/neuron_layer.hpp" | ||
#include "caffe/layers/sigmoid_layer.hpp" | ||
|
||
namespace caffe { | ||
|
||
/** | ||
* @brief Swish non-linearity @f$ y = x \sigma (\beta x) @f$. | ||
* A novel activation function that tends to work better than ReLU [1]. | ||
* | ||
* [1] Prajit Ramachandran, Barret Zoph, Quoc V. Le. "Searching for | ||
* Activation Functions". arXiv preprint arXiv:1710.05941v2 (2017). | ||
*/ | ||
template <typename Dtype> | ||
class SwishLayer : public NeuronLayer<Dtype> { | ||
public: | ||
/** | ||
* @param param provides SwishParameter swish_param, | ||
* with SwishLayer options: | ||
* - beta (\b optional, default 1). | ||
* the value @f$ \beta @f$ in the @f$ y = x \sigma (\beta x) @f$. | ||
*/ | ||
explicit SwishLayer(const LayerParameter& param) | ||
: NeuronLayer<Dtype>(param), | ||
sigmoid_layer_(new SigmoidLayer<Dtype>(param)), | ||
sigmoid_input_(new Blob<Dtype>()), | ||
sigmoid_output_(new Blob<Dtype>()) {} | ||
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
virtual inline const char* type() const { return "Swish"; } | ||
|
||
protected: | ||
/** | ||
* @param bottom input Blob vector (length 1) | ||
* -# @f$ (N \times C \times H \times W) @f$ | ||
* the inputs @f$ x @f$ | ||
* @param top output Blob vector (length 1) | ||
* -# @f$ (N \times C \times H \times W) @f$ | ||
* the computed outputs @f$ | ||
* y = x \sigma (\beta x) | ||
* @f$. | ||
*/ | ||
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
/** | ||
* @brief Computes the error gradient w.r.t. the sigmoid inputs. | ||
* | ||
* @param top output Blob vector (length 1), providing the error gradient with | ||
* respect to the outputs | ||
* -# @f$ (N \times C \times H \times W) @f$ | ||
* containing error gradients @f$ \frac{\partial E}{\partial y} @f$ | ||
* with respect to computed outputs @f$ y @f$ | ||
* @param propagate_down see Layer::Backward. | ||
* @param bottom input Blob vector (length 1) | ||
* -# @f$ (N \times C \times H \times W) @f$ | ||
* the inputs @f$ x @f$; Backward fills their diff with | ||
* gradients @f$ | ||
* \frac{\partial E}{\partial x} | ||
* = \frac{\partial E}{\partial y}(\beta y + | ||
* \sigma (\beta x)(1 - \beta y)) | ||
* @f$ if propagate_down[0] | ||
*/ | ||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
|
||
/// The internal SigmoidLayer | ||
shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_; | ||
/// sigmoid_input_ stores the input of the SigmoidLayer. | ||
shared_ptr<Blob<Dtype> > sigmoid_input_; | ||
/// sigmoid_output_ stores the output of the SigmoidLayer. | ||
shared_ptr<Blob<Dtype> > sigmoid_output_; | ||
/// bottom vector holder to call the underlying SigmoidLayer::Forward | ||
vector<Blob<Dtype>*> sigmoid_bottom_vec_; | ||
/// top vector holder to call the underlying SigmoidLayer::Forward | ||
vector<Blob<Dtype>*> sigmoid_top_vec_; | ||
}; | ||
|
||
} // namespace caffe | ||
|
||
#endif // CAFFE_SWISH_LAYER_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#include <cmath> | ||
#include <vector> | ||
|
||
#include "caffe/layers/swish_layer.hpp" | ||
#include "caffe/util/math_functions.hpp" | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
void SwishLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
NeuronLayer<Dtype>::LayerSetUp(bottom, top); | ||
sigmoid_bottom_vec_.clear(); | ||
sigmoid_bottom_vec_.push_back(sigmoid_input_.get()); | ||
sigmoid_top_vec_.clear(); | ||
sigmoid_top_vec_.push_back(sigmoid_output_.get()); | ||
sigmoid_layer_->SetUp(sigmoid_bottom_vec_, sigmoid_top_vec_); | ||
} | ||
|
||
template <typename Dtype> | ||
void SwishLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
NeuronLayer<Dtype>::Reshape(bottom, top); | ||
sigmoid_input_->ReshapeLike(*bottom[0]); | ||
sigmoid_layer_->Reshape(sigmoid_bottom_vec_, sigmoid_top_vec_); | ||
} | ||
|
||
template <typename Dtype> | ||
void SwishLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
Dtype* sigmoid_input_data = sigmoid_input_->mutable_cpu_data(); | ||
Dtype* top_data = top[0]->mutable_cpu_data(); | ||
const int count = bottom[0]->count(); | ||
Dtype beta = this->layer_param_.swish_param().beta(); | ||
caffe_copy(count, bottom_data, sigmoid_input_data); | ||
caffe_scal(count, beta, sigmoid_input_data); | ||
sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_); | ||
caffe_mul(count, bottom_data, sigmoid_output_->cpu_data(), top_data); | ||
} | ||
|
||
template <typename Dtype> | ||
void SwishLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, | ||
const vector<Blob<Dtype>*>& bottom) { | ||
if (propagate_down[0]) { | ||
const Dtype* top_data = top[0]->cpu_data(); | ||
const Dtype* top_diff = top[0]->cpu_diff(); | ||
const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data(); | ||
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); | ||
const int count = bottom[0]->count(); | ||
Dtype beta = this->layer_param_.swish_param().beta(); | ||
for (int i = 0; i < count; ++i) { | ||
const Dtype swish_x = top_data[i]; | ||
bottom_diff[i] = top_diff[i] * (beta * swish_x + sigmoid_output_data[i] | ||
* (1. - beta * swish_x)); | ||
} | ||
} | ||
} | ||
|
||
#ifdef CPU_ONLY | ||
STUB_GPU(SwishLayer); | ||
#endif | ||
|
||
INSTANTIATE_CLASS(SwishLayer); | ||
REGISTER_LAYER_CLASS(Swish); | ||
|
||
} // namespace caffe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#include <cmath> | ||
#include <vector> | ||
|
||
#include "caffe/layers/swish_layer.hpp" | ||
#include "caffe/util/math_functions.hpp" | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
void SwishLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
const Dtype* bottom_data = bottom[0]->gpu_data(); | ||
Dtype* sigmoid_input_data = sigmoid_input_->mutable_gpu_data(); | ||
Dtype* top_data = top[0]->mutable_gpu_data(); | ||
const int count = bottom[0]->count(); | ||
Dtype beta = this->layer_param_.swish_param().beta(); | ||
caffe_copy(count, bottom_data, sigmoid_input_data); | ||
caffe_gpu_scal(count, beta, sigmoid_input_data); | ||
sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_); | ||
caffe_gpu_mul(count, bottom_data, sigmoid_output_->gpu_data(), top_data); | ||
} | ||
|
||
template <typename Dtype> | ||
__global__ void SwishBackward(const int n, const Dtype* in_diff, | ||
const Dtype* out_data, const Dtype* sigmoid_output_data, Dtype* out_diff, | ||
const Dtype beta) { | ||
CUDA_KERNEL_LOOP(index, n) { | ||
const Dtype swish_x = out_data[index]; | ||
out_diff[index] = in_diff[index] * (beta * swish_x | ||
+ sigmoid_output_data[index] * (1 - beta * swish_x)); | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void SwishLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, | ||
const vector<Blob<Dtype>*>& bottom) { | ||
if (propagate_down[0]) { | ||
const Dtype* top_data = top[0]->gpu_data(); | ||
const Dtype* top_diff = top[0]->gpu_diff(); | ||
const Dtype* sigmoid_output_data = sigmoid_output_->gpu_data(); | ||
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); | ||
const int count = bottom[0]->count(); | ||
Dtype beta = this->layer_param_.swish_param().beta(); | ||
// NOLINT_NEXT_LINE(whitespace/operators) | ||
SwishBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( | ||
count, top_diff, top_data, sigmoid_output_data, bottom_diff, beta); | ||
CUDA_POST_KERNEL_CHECK; | ||
} | ||
} | ||
|
||
INSTANTIATE_LAYER_GPU_FUNCS(SwishLayer); | ||
|
||
} // namespace caffe |
Oops, something went wrong.