LogLayer #2090

Merged
merged 1 commit into from Jun 3, 2015
Jump to file or symbol
Failed to load files and symbols.
+378 −1
Split
@@ -268,6 +268,72 @@ class ExpLayer : public NeuronLayer<Dtype> {
};
/**
+ * @brief Computes @f$ y = log_{\gamma}(\alpha x + \beta) @f$,
+ * as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$,
+ * and base @f$ \gamma @f$.
+ */
+template <typename Dtype>
+class LogLayer : public NeuronLayer<Dtype> {
+ public:
+ /**
+ * @param param provides LogParameter log_param,
+ * with LogLayer options:
+ * - scale (\b optional, default 1) the scale @f$ \alpha @f$
+ * - shift (\b optional, default 0) the shift @f$ \beta @f$
+ * - base (\b optional, default -1 for a value of @f$ e \approx 2.718 @f$)
+ * the base @f$ \gamma @f$
+ */
+ explicit LogLayer(const LayerParameter& param)
+ : NeuronLayer<Dtype>(param) {}
+ virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline const char* type() const { return "Log"; }
+
+ protected:
+ /**
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$
+ * y = log_{\gamma}(\alpha x + \beta)
+ * @f$
+ */
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ /**
+ * @brief Computes the error gradient w.r.t. the exp inputs.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (N \times C \times H \times W) @f$
+ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+ * with respect to computed outputs @f$ y @f$
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$; Backward fills their diff with
+ * gradients @f$
+ * \frac{\partial E}{\partial x} =
+ * \frac{\partial E}{\partial y} y \alpha \log_e(gamma)
+ * @f$ if propagate_down[0]
+ */
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+ Dtype base_scale_;
+ Dtype input_scale_, input_shift_;
+ Dtype backward_num_scale_;
+};
+
+/**
* @brief Computes @f$ y = (\alpha x + \beta) ^ \gamma @f$,
* as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$,
* and power @f$ \gamma @f$.
@@ -89,6 +89,9 @@ template <typename Dtype>
void caffe_exp(const int n, const Dtype* a, Dtype* y);
template <typename Dtype>
+void caffe_log(const int n, const Dtype* a, Dtype* y);
+
+template <typename Dtype>
void caffe_abs(const int n, const Dtype* a, Dtype* y);
template <typename Dtype>
@@ -204,6 +207,9 @@ template <typename Dtype>
void caffe_gpu_exp(const int n, const Dtype* a, Dtype* y);
template <typename Dtype>
+void caffe_gpu_log(const int n, const Dtype* a, Dtype* y);
+
+template <typename Dtype>
void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
// caffe_gpu_rng_uniform with two arguments generates integers in the range
@@ -33,6 +33,7 @@ extern "C" {
DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i]);
DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i]));
+DEFINE_VSL_UNARY_FUNC(Ln, y[i] = log(a[i]));
@longjon

longjon May 14, 2015

Contributor

I kinda prefer the name log here, sticking with the name of the elementwise call, but either one is okay...

@jeffdonahue

jeffdonahue May 14, 2015

Contributor

not my choice -- the MKL function is vsLn/vdLn.

@longjon

longjon May 15, 2015

Contributor

Oh. :(

DEFINE_VSL_UNARY_FUNC(Abs, y[i] = fabs(a[i]));
// A simple way to define the vsl unary functions with singular parameter b.
@@ -0,0 +1,136 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/neuron_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void LogLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ NeuronLayer<Dtype>::LayerSetUp(bottom, top);
+ const Dtype base = this->layer_param_.log_param().base();
+ if (base != Dtype(-1)) {
+ CHECK_GT(base, 0) << "base must be strictly positive.";
+ }
+ // If base == -1, interpret the base as e and set log_base = 1 exactly.
+ // Otherwise, calculate its log explicitly.
+ const Dtype log_base = (base == Dtype(-1)) ? Dtype(1) : log(base);
+ CHECK(!isnan(log_base))
+ << "NaN result: log(base) = log(" << base << ") = " << log_base;
+ CHECK(!isinf(log_base))
+ << "Inf result: log(base) = log(" << base << ") = " << log_base;
+ base_scale_ = Dtype(1) / log_base;
+ CHECK(!isnan(base_scale_))
+ << "NaN result: 1/log(base) = 1/log(" << base << ") = " << base_scale_;
+ CHECK(!isinf(base_scale_))
+ << "Inf result: 1/log(base) = 1/log(" << base << ") = " << base_scale_;
+ input_scale_ = this->layer_param_.log_param().scale();
+ input_shift_ = this->layer_param_.log_param().shift();
+ backward_num_scale_ = input_scale_ / log_base;
+}
+
+template <typename Dtype>
+void LogLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const int count = bottom[0]->count();
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = top[0]->mutable_cpu_data();
+ if (input_scale_ == Dtype(1) && input_shift_ == Dtype(0)) {
+ caffe_log(count, bottom_data, top_data);
+ } else {
+ caffe_copy(count, bottom_data, top_data);
+ if (input_scale_ != Dtype(1)) {
+ caffe_scal(count, input_scale_, top_data);
+ }
+ if (input_shift_ != Dtype(0)) {
+ caffe_add_scalar(count, input_shift_, top_data);
+ }
+ caffe_log(count, top_data, top_data);
+ }
+ if (base_scale_ != Dtype(1)) {
+ caffe_scal(count, base_scale_, top_data);
+ }
+}
+
+template <typename Dtype>
+void LogLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ if (!propagate_down[0]) { return; }
+ const int count = bottom[0]->count();
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+ caffe_copy(count, bottom_data, bottom_diff);
+ if (input_scale_ != Dtype(1)) {
+ caffe_scal(count, input_scale_, bottom_diff);
+ }
+ if (input_shift_ != Dtype(0)) {
+ caffe_add_scalar(count, input_shift_, bottom_diff);
+ }
+ caffe_powx(count, bottom_diff, Dtype(-1), bottom_diff);
+ if (backward_num_scale_ != Dtype(1)) {
+ caffe_scal(count, backward_num_scale_, bottom_diff);
+ }
+ caffe_mul(count, top_diff, bottom_diff, bottom_diff);
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(LogLayer);
+#else
+
+template <typename Dtype>
+void LogLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const int count = bottom[0]->count();
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = top[0]->mutable_gpu_data();
+ if (input_scale_ == Dtype(1) && input_shift_ == Dtype(0)) {
+ caffe_gpu_log(count, bottom_data, top_data);
+ } else {
+ caffe_copy(count, bottom_data, top_data);
+ if (input_scale_ != Dtype(1)) {
+ caffe_gpu_scal(count, input_scale_, top_data);
+ }
+ if (input_shift_ != Dtype(0)) {
+ caffe_gpu_add_scalar(count, input_shift_, top_data);
+ }
+ caffe_gpu_log(count, top_data, top_data);
+ }
+ if (base_scale_ != Dtype(1)) {
+ caffe_gpu_scal(count, base_scale_, top_data);
+ }
+}
+
+template <typename Dtype>
+void LogLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ if (!propagate_down[0]) { return; }
+ const int count = bottom[0]->count();
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ const Dtype* top_diff = top[0]->gpu_diff();
+ Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+ caffe_copy(count, bottom_data, bottom_diff);
+ if (input_scale_ != Dtype(1)) {
+ caffe_gpu_scal(count, input_scale_, bottom_diff);
+ }
+ if (input_shift_ != Dtype(0)) {
+ caffe_gpu_add_scalar(count, input_shift_, bottom_diff);
+ }
+ caffe_gpu_powx(count, bottom_diff, Dtype(-1), bottom_diff);
+ if (backward_num_scale_ != Dtype(1)) {
+ caffe_gpu_scal(count, backward_num_scale_, bottom_diff);
+ }
+ caffe_gpu_mul(count, top_diff, bottom_diff, bottom_diff);
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(LogLayer);
+
+#endif
+
+INSTANTIATE_CLASS(LogLayer);
+REGISTER_LAYER_CLASS(Log);
+
+} // namespace caffe
@@ -269,7 +269,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
-// LayerParameter next available layer-specific ID: 134 (last added: reshape_param)
+// LayerParameter next available layer-specific ID: 135 (last added: log_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
@@ -332,6 +332,7 @@ message LayerParameter {
optional ImageDataParameter image_data_param = 115;
optional InfogainLossParameter infogain_loss_param = 116;
optional InnerProductParameter inner_product_param = 117;
+ optional LogParameter log_param = 134;
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
@@ -607,6 +608,17 @@ message InnerProductParameter {
optional int32 axis = 5 [default = 1];
}
+// Message that stores parameters used by LogLayer
+message LogParameter {
+ // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
+ // Or if base is set to the default (-1), base is set to e,
+ // so y = ln(shift + scale * x) = log_e(shift + scale * x)
+ optional float base = 1 [default = -1.0];
+ optional float scale = 2 [default = 1.0];
+ optional float shift = 3 [default = 0.0];
+}
+
+// Message that stores parameters used by LRNLayer
message LRNParameter {
optional uint32 local_size = 1 [default = 5];
optional float alpha = 2 [default = 1.];
Oops, something went wrong.