Weighted euclidean loss #5775

Open
wants to merge 7 commits into
from
@@ -0,0 +1,111 @@
+#ifndef CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_
+#define CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/loss_layer.hpp"
+
+namespace caffe {
+
+/**
+ * @brief Computes weighted Euclidean (L2) loss @f$
+ * E = \frac{1}{2N} \sum\limits_{n=1}^N w_n \left| \left| \hat{y}_n - y_n
+ * \right| \right|_2^2 @f$ for real-valued regression tasks.
+ *
+ * @param bottom input Blob vector (length 3)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the predictions @f$ \hat{y} \in [-\infty, +\infty]@f$
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the targets @f$ y \in [-\infty, +\infty]@f$
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the weights @f$ w \in [0, +\infty] @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (1 \times 1 \times 1 \times 1) @f$
+ * the computed Euclidean loss: @f$ E =
+ * \frac{1}{2n} \sum\limits_{n=1}^N \w_n \left| \left| \hat{y}_n - y_n
+ * \right| \right|_2^2 @f$
+ *
+ * This can be used for least-squares regression tasks. An InnerProductLayer
+ * input to a EuclideanLossLayer exactly formulates a linear least squares
+ * regression problem. With non-zero weight decay the problem becomes one of
+ * ridge regression -- see src/caffe/test/test_sgd_solver.cpp for a concrete
+ * example wherein we check that the gradients computed for a Net with exactly
+ * this structure match hand-computed gradient formulas for ridge regression.
+ *
+ * (Note: Caffe, and SGD in general, is certainly \b not the best way to solve
+ * linear least squares problems! We use it only as an instructive example.)
+ */
+template <typename Dtype>
+class WeightedEuclideanLossLayer : public LossLayer<Dtype> {
+ public:
+ explicit WeightedEuclideanLossLayer(const LayerParameter& param)
+ : LossLayer<Dtype>(param), diff_() {}
+ virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline const char* type() const { return "WeightedEuclideanLoss"; }
+ /**
+ * Unlike most loss layers, in the EuclideanLossLayer we can backpropagate
+ * to both inputs -- override to return true and always allow force_backward.
+ */
+ virtual inline bool AllowForceBackward(const int bottom_index) const {
+ return true;
+ }
+
+ virtual inline int ExactNumBottomBlobs() const { return 3; }
+
+ protected:
+ /// @copydoc EuclideanLossLayer
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ /**
+ * @brief Computes the Euclidean error gradient w.r.t. the inputs.
+ *
+ * Unlike other children of LossLayer, EuclideanLossLayer \b can compute
+ * gradients with respect to the label inputs bottom[1] (but still only will
+ * if propagate_down[1] is set, due to being produced by learnable parameters
+ * or if force_backward is set). In fact, this layer is "commutative" -- the
+ * result is the same regardless of the order of the two bottoms.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (1 \times 1 \times 1 \times 1) @f$
+ * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
+ * as @f$ \lambda @f$ is the coefficient of this layer's output
+ * @f$\ell_i@f$ in the overall Net loss
+ * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
+ * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
+ * (*Assuming that this top Blob is not used as a bottom (input) by any
+ * other layer of the Net.)
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 2)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the predictions @f$\hat{y}@f$; Backward fills their diff with
+ * gradients @f$
+ * \frac{\partial E}{\partial \hat{y}} =
+ * \frac{1}{n} \sum\limits_{n=1}^N (\hat{y}_n - y_n)
+ * @f$ if propagate_down[0]
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the targets @f$y@f$; Backward fills their diff with gradients
+ * @f$ \frac{\partial E}{\partial y} =
+ * \frac{1}{n} \sum\limits_{n=1}^N (y_n - \hat{y}_n)
+ * @f$ if propagate_down[1]
+ */
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+ Blob<Dtype> diff_;
+};
+
+} // namespace caffe
+
+#endif // CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_
@@ -0,0 +1,69 @@
+#include <vector>
+
+#include "caffe/layers/weighted_euclidean_loss_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void WeightedEuclideanLossLayer<Dtype>::Reshape(
+ const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+ LossLayer<Dtype>::Reshape(bottom, top);
+ CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
+ << "Inputs must have the same dimension.";
+ diff_.ReshapeLike(*bottom[0]);
+}
+
+template <typename Dtype>
+void WeightedEuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ int count = bottom[0]->count();
+ caffe_sub(
+ count,
+ bottom[0]->cpu_data(),
+ bottom[1]->cpu_data(),
+ diff_.mutable_cpu_data());
+
+ Dtype wdot(0.0);
+ for (int i = 0; i < count; ++i)
+ {
+ wdot += bottom[2]->cpu_data()[i] * diff_.cpu_data()[i] * diff_.cpu_data()[i];
+ }
+
+ Dtype loss = wdot / bottom[0]->num() / Dtype(2);
+ top[0]->mutable_cpu_data()[0] = loss;
+}
+
+template <typename Dtype>
+void WeightedEuclideanLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ if (propagate_down[2]) {
+ LOG(FATAL) << this->type()
+ << " Weighted Euclidean loss layer cannot backpropagate to certainty inputs.";
+ }
+ for (int i = 0; i < 2; ++i) {
+ if (propagate_down[i]) {
+ const Dtype sign = (i == 0) ? 1 : -1;
+ const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
+ caffe_cpu_axpby(
+ bottom[i]->count(), // count
+ alpha, // alpha
+ diff_.cpu_data(), // a
+ Dtype(0), // beta
+ bottom[i]->mutable_cpu_diff()); // b
+ for (int j = 0; j < bottom[i]->count(); ++j)
+ {
+ bottom[i]->mutable_cpu_diff()[j] *= bottom[2]->cpu_data()[j];
+ }
+ }
+ }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(WeightedEuclideanLossLayer);
+#endif
+
+INSTANTIATE_CLASS(WeightedEuclideanLossLayer);
+REGISTER_LAYER_CLASS(WeightedEuclideanLoss);
+
+} // namespace caffe
@@ -1235,6 +1235,7 @@ message V1LayerParameter {
DROPOUT = 6;
DUMMY_DATA = 32;
EUCLIDEAN_LOSS = 7;
+ WEIGHTED_EUCLIDEAN_LOSS = 40;
ELTWISE = 25;
EXP = 38;
FLATTEN = 8;
@@ -0,0 +1,92 @@
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layers/weighted_euclidean_loss_layer.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class WeightedEuclideanLossLayerTest : public CPUDeviceTest<TypeParam> {
+ typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+ WeightedEuclideanLossLayerTest()
+ : blob_bottom_data_(new Blob<Dtype>(10, 5, 1, 1)),
+ blob_bottom_label_(new Blob<Dtype>(10, 5, 1, 1)),
+ blob_bottom_certainty_(new Blob<Dtype>(10, 5, 1, 1)),
+ blob_top_loss_(new Blob<Dtype>()) {
+
+ // fill the values
+ FillerParameter filler_param;
+ GaussianFiller<Dtype> filler(filler_param);
+ filler.Fill(this->blob_bottom_data_);
+ blob_bottom_vec_.push_back(blob_bottom_data_);
+ filler.Fill(this->blob_bottom_label_);
+ blob_bottom_vec_.push_back(blob_bottom_label_);
+ filler.Fill(this->blob_bottom_certainty_);
+ blob_bottom_vec_.push_back(blob_bottom_certainty_);
+
+ blob_top_vec_.push_back(blob_top_loss_);
+ }
+ virtual ~WeightedEuclideanLossLayerTest() {
+ delete blob_bottom_data_;
+ delete blob_bottom_label_;
+ delete blob_bottom_certainty_;
+ delete blob_top_loss_;
+ }
+
+ void TestForward() {
+ // Get the loss without a specified objective weight -- should be
+ // equivalent to explicitly specifying a weight of 1.
+ LayerParameter layer_param;
+ WeightedEuclideanLossLayer<Dtype> layer_weight_1(layer_param);
+ layer_weight_1.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype loss_weight_1 =
+ layer_weight_1.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+
+ // Get the loss again with a different objective weight; check that it is
+ // scaled appropriately.
+ const Dtype kLossWeight = 3.7;
+ layer_param.add_loss_weight(kLossWeight);
+ WeightedEuclideanLossLayer<Dtype> layer_weight_2(layer_param);
+ layer_weight_2.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype loss_weight_2 =
+ layer_weight_2.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype kErrorMargin = 1e-5;
+ EXPECT_NEAR(loss_weight_1 * kLossWeight, loss_weight_2, kErrorMargin);
+ // Make sure the loss is non-trivial.
+ const Dtype kNonTrivialAbsThresh = 1e-1;
+ EXPECT_GE(fabs(loss_weight_1), kNonTrivialAbsThresh);
+ }
+
+ Blob<Dtype>* const blob_bottom_data_;
+ Blob<Dtype>* const blob_bottom_label_;
+ Blob<Dtype>* const blob_bottom_certainty_;
+ Blob<Dtype>* const blob_top_loss_;
+ vector<Blob<Dtype>*> blob_bottom_vec_;
+ vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(WeightedEuclideanLossLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(WeightedEuclideanLossLayerTest, TestForward) {
+ this->TestForward();
+}
+
+TYPED_TEST(WeightedEuclideanLossLayerTest, TestGradient) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ WeightedEuclideanLossLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-4, 2e-2, 1701, 1, 0.01);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_, 0);
+}
+
+} // namespace caffe
@@ -563,6 +563,8 @@ V1LayerParameter_LayerType UpgradeV0LayerType(const string& type) {
return V1LayerParameter_LayerType_DROPOUT;
} else if (type == "euclidean_loss") {
return V1LayerParameter_LayerType_EUCLIDEAN_LOSS;
+ } else if (type == "weighted_euclidean_loss") {
+ return V1LayerParameter_LayerType_WEIGHTED_EUCLIDEAN_LOSS;
} else if (type == "flatten") {
return V1LayerParameter_LayerType_FLATTEN;
} else if (type == "hdf5_data") {
@@ -899,6 +901,8 @@ const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type) {
return "DummyData";
case V1LayerParameter_LayerType_EUCLIDEAN_LOSS:
return "EuclideanLoss";
+ case V1LayerParameter_LayerType_WEIGHTED_EUCLIDEAN_LOSS:
+ return "WeightedEuclideanLoss";
case V1LayerParameter_LayerType_ELTWISE:
return "Eltwise";
case V1LayerParameter_LayerType_EXP: