Weighted euclidean loss #5775
Open
AlexanderSavochkin
wants to merge 7 commits into
BVLC:master
from
AlexanderSavochkin:weighted_euclidean_loss
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
f4c25a4
Weighted Euclidean distance
AlexanderSavochkin e532a00
Fix in weighted euclidean loss
AlexanderSavochkin ecbe635
Weighted euclidean loss layer
AlexanderSavochkin 59b5858
Test for weighted euclidean loss
AlexanderSavochkin 2e827cf
Test for weighted euclidean loss
AlexanderSavochkin 9d6feec
Merge branch 'master' of https://github.com/AlexanderSavochkin/caffe
AlexanderSavochkin 3f5cd1b
Merge branch 'weighted_euclidean_loss' of https://github.com/Alexande…
AlexanderSavochkin
Jump to file or symbol
Failed to load files and symbols.
| @@ -0,0 +1,111 @@ | ||
| +#ifndef CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_ | ||
| +#define CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_ | ||
| + | ||
| +#include <vector> | ||
| + | ||
| +#include "caffe/blob.hpp" | ||
| +#include "caffe/layer.hpp" | ||
| +#include "caffe/proto/caffe.pb.h" | ||
| + | ||
| +#include "caffe/layers/loss_layer.hpp" | ||
| + | ||
| +namespace caffe { | ||
| + | ||
| +/** | ||
| + * @brief Computes weighted Euclidean (L2) loss @f$ | ||
| + * E = \frac{1}{2N} \sum\limits_{n=1}^N w_n \left| \left| \hat{y}_n - y_n | ||
| + * \right| \right|_2^2 @f$ for real-valued regression tasks. | ||
| + * | ||
| + * @param bottom input Blob vector (length 3) | ||
| + * -# @f$ (N \times C \times H \times W) @f$ | ||
| + * the predictions @f$ \hat{y} \in [-\infty, +\infty]@f$ | ||
| + * -# @f$ (N \times C \times H \times W) @f$ | ||
| + * the targets @f$ y \in [-\infty, +\infty]@f$ | ||
| + * -# @f$ (N \times C \times H \times W) @f$ | ||
| + * the weights @f$ w \in [0, +\infty] @f$ | ||
| + * @param top output Blob vector (length 1) | ||
| + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ | ||
| + * the computed Euclidean loss: @f$ E = | ||
| + * \frac{1}{2n} \sum\limits_{n=1}^N \w_n \left| \left| \hat{y}_n - y_n | ||
| + * \right| \right|_2^2 @f$ | ||
| + * | ||
| + * This can be used for least-squares regression tasks. An InnerProductLayer | ||
| + * input to a EuclideanLossLayer exactly formulates a linear least squares | ||
| + * regression problem. With non-zero weight decay the problem becomes one of | ||
| + * ridge regression -- see src/caffe/test/test_sgd_solver.cpp for a concrete | ||
| + * example wherein we check that the gradients computed for a Net with exactly | ||
| + * this structure match hand-computed gradient formulas for ridge regression. | ||
| + * | ||
| + * (Note: Caffe, and SGD in general, is certainly \b not the best way to solve | ||
| + * linear least squares problems! We use it only as an instructive example.) | ||
| + */ | ||
| +template <typename Dtype> | ||
| +class WeightedEuclideanLossLayer : public LossLayer<Dtype> { | ||
| + public: | ||
| + explicit WeightedEuclideanLossLayer(const LayerParameter& param) | ||
| + : LossLayer<Dtype>(param), diff_() {} | ||
| + virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
| + const vector<Blob<Dtype>*>& top); | ||
| + | ||
| + virtual inline const char* type() const { return "WeightedEuclideanLoss"; } | ||
| + /** | ||
| + * Unlike most loss layers, in the EuclideanLossLayer we can backpropagate | ||
| + * to both inputs -- override to return true and always allow force_backward. | ||
| + */ | ||
| + virtual inline bool AllowForceBackward(const int bottom_index) const { | ||
| + return true; | ||
| + } | ||
| + | ||
| + virtual inline int ExactNumBottomBlobs() const { return 3; } | ||
| + | ||
| + protected: | ||
| + /// @copydoc EuclideanLossLayer | ||
| + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
| + const vector<Blob<Dtype>*>& top); | ||
| + virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
| + const vector<Blob<Dtype>*>& top); | ||
| + | ||
| + /** | ||
| + * @brief Computes the Euclidean error gradient w.r.t. the inputs. | ||
| + * | ||
| + * Unlike other children of LossLayer, EuclideanLossLayer \b can compute | ||
| + * gradients with respect to the label inputs bottom[1] (but still only will | ||
| + * if propagate_down[1] is set, due to being produced by learnable parameters | ||
| + * or if force_backward is set). In fact, this layer is "commutative" -- the | ||
| + * result is the same regardless of the order of the two bottoms. | ||
| + * | ||
| + * @param top output Blob vector (length 1), providing the error gradient with | ||
| + * respect to the outputs | ||
| + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ | ||
| + * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, | ||
| + * as @f$ \lambda @f$ is the coefficient of this layer's output | ||
| + * @f$\ell_i@f$ in the overall Net loss | ||
| + * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence | ||
| + * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. | ||
| + * (*Assuming that this top Blob is not used as a bottom (input) by any | ||
| + * other layer of the Net.) | ||
| + * @param propagate_down see Layer::Backward. | ||
| + * @param bottom input Blob vector (length 2) | ||
| + * -# @f$ (N \times C \times H \times W) @f$ | ||
| + * the predictions @f$\hat{y}@f$; Backward fills their diff with | ||
| + * gradients @f$ | ||
| + * \frac{\partial E}{\partial \hat{y}} = | ||
| + * \frac{1}{n} \sum\limits_{n=1}^N (\hat{y}_n - y_n) | ||
| + * @f$ if propagate_down[0] | ||
| + * -# @f$ (N \times C \times H \times W) @f$ | ||
| + * the targets @f$y@f$; Backward fills their diff with gradients | ||
| + * @f$ \frac{\partial E}{\partial y} = | ||
| + * \frac{1}{n} \sum\limits_{n=1}^N (y_n - \hat{y}_n) | ||
| + * @f$ if propagate_down[1] | ||
| + */ | ||
| + virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
| + const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
| + virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
| + const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
| + | ||
| + Blob<Dtype> diff_; | ||
| +}; | ||
| + | ||
| +} // namespace caffe | ||
| + | ||
| +#endif // CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_ |
| @@ -0,0 +1,69 @@ | ||
| +#include <vector> | ||
| + | ||
| +#include "caffe/layers/weighted_euclidean_loss_layer.hpp" | ||
| +#include "caffe/util/math_functions.hpp" | ||
| + | ||
| +namespace caffe { | ||
| + | ||
| +template <typename Dtype> | ||
| +void WeightedEuclideanLossLayer<Dtype>::Reshape( | ||
| + const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { | ||
| + LossLayer<Dtype>::Reshape(bottom, top); | ||
| + CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1)) | ||
| + << "Inputs must have the same dimension."; | ||
| + diff_.ReshapeLike(*bottom[0]); | ||
| +} | ||
| + | ||
| +template <typename Dtype> | ||
| +void WeightedEuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
| + const vector<Blob<Dtype>*>& top) { | ||
| + int count = bottom[0]->count(); | ||
| + caffe_sub( | ||
| + count, | ||
| + bottom[0]->cpu_data(), | ||
| + bottom[1]->cpu_data(), | ||
| + diff_.mutable_cpu_data()); | ||
| + | ||
| + Dtype wdot(0.0); | ||
| + for (int i = 0; i < count; ++i) | ||
| + { | ||
| + wdot += bottom[2]->cpu_data()[i] * diff_.cpu_data()[i] * diff_.cpu_data()[i]; | ||
| + } | ||
| + | ||
| + Dtype loss = wdot / bottom[0]->num() / Dtype(2); | ||
| + top[0]->mutable_cpu_data()[0] = loss; | ||
| +} | ||
| + | ||
| +template <typename Dtype> | ||
| +void WeightedEuclideanLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
| + const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
| + if (propagate_down[2]) { | ||
| + LOG(FATAL) << this->type() | ||
| + << " Weighted Euclidean loss layer cannot backpropagate to certainty inputs."; | ||
| + } | ||
| + for (int i = 0; i < 2; ++i) { | ||
| + if (propagate_down[i]) { | ||
| + const Dtype sign = (i == 0) ? 1 : -1; | ||
| + const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num(); | ||
| + caffe_cpu_axpby( | ||
| + bottom[i]->count(), // count | ||
| + alpha, // alpha | ||
| + diff_.cpu_data(), // a | ||
| + Dtype(0), // beta | ||
| + bottom[i]->mutable_cpu_diff()); // b | ||
| + for (int j = 0; j < bottom[i]->count(); ++j) | ||
| + { | ||
| + bottom[i]->mutable_cpu_diff()[j] *= bottom[2]->cpu_data()[j]; | ||
| + } | ||
| + } | ||
| + } | ||
| +} | ||
| + | ||
| +#ifdef CPU_ONLY | ||
| +STUB_GPU(WeightedEuclideanLossLayer); | ||
| +#endif | ||
| + | ||
| +INSTANTIATE_CLASS(WeightedEuclideanLossLayer); | ||
| +REGISTER_LAYER_CLASS(WeightedEuclideanLoss); | ||
| + | ||
| +} // namespace caffe |
| @@ -0,0 +1,92 @@ | ||
| +#include <vector> | ||
| + | ||
| +#include "gtest/gtest.h" | ||
| + | ||
| +#include "caffe/blob.hpp" | ||
| +#include "caffe/common.hpp" | ||
| +#include "caffe/filler.hpp" | ||
| +#include "caffe/layers/weighted_euclidean_loss_layer.hpp" | ||
| + | ||
| +#include "caffe/test/test_caffe_main.hpp" | ||
| +#include "caffe/test/test_gradient_check_util.hpp" | ||
| + | ||
| +namespace caffe { | ||
| + | ||
| +template <typename TypeParam> | ||
| +class WeightedEuclideanLossLayerTest : public CPUDeviceTest<TypeParam> { | ||
| + typedef typename TypeParam::Dtype Dtype; | ||
| + | ||
| + protected: | ||
| + WeightedEuclideanLossLayerTest() | ||
| + : blob_bottom_data_(new Blob<Dtype>(10, 5, 1, 1)), | ||
| + blob_bottom_label_(new Blob<Dtype>(10, 5, 1, 1)), | ||
| + blob_bottom_certainty_(new Blob<Dtype>(10, 5, 1, 1)), | ||
| + blob_top_loss_(new Blob<Dtype>()) { | ||
| + | ||
| + // fill the values | ||
| + FillerParameter filler_param; | ||
| + GaussianFiller<Dtype> filler(filler_param); | ||
| + filler.Fill(this->blob_bottom_data_); | ||
| + blob_bottom_vec_.push_back(blob_bottom_data_); | ||
| + filler.Fill(this->blob_bottom_label_); | ||
| + blob_bottom_vec_.push_back(blob_bottom_label_); | ||
| + filler.Fill(this->blob_bottom_certainty_); | ||
| + blob_bottom_vec_.push_back(blob_bottom_certainty_); | ||
| + | ||
| + blob_top_vec_.push_back(blob_top_loss_); | ||
| + } | ||
| + virtual ~WeightedEuclideanLossLayerTest() { | ||
| + delete blob_bottom_data_; | ||
| + delete blob_bottom_label_; | ||
| + delete blob_bottom_certainty_; | ||
| + delete blob_top_loss_; | ||
| + } | ||
| + | ||
| + void TestForward() { | ||
| + // Get the loss without a specified objective weight -- should be | ||
| + // equivalent to explicitly specifying a weight of 1. | ||
| + LayerParameter layer_param; | ||
| + WeightedEuclideanLossLayer<Dtype> layer_weight_1(layer_param); | ||
| + layer_weight_1.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); | ||
| + const Dtype loss_weight_1 = | ||
| + layer_weight_1.Forward(this->blob_bottom_vec_, this->blob_top_vec_); | ||
| + | ||
| + // Get the loss again with a different objective weight; check that it is | ||
| + // scaled appropriately. | ||
| + const Dtype kLossWeight = 3.7; | ||
| + layer_param.add_loss_weight(kLossWeight); | ||
| + WeightedEuclideanLossLayer<Dtype> layer_weight_2(layer_param); | ||
| + layer_weight_2.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); | ||
| + const Dtype loss_weight_2 = | ||
| + layer_weight_2.Forward(this->blob_bottom_vec_, this->blob_top_vec_); | ||
| + const Dtype kErrorMargin = 1e-5; | ||
| + EXPECT_NEAR(loss_weight_1 * kLossWeight, loss_weight_2, kErrorMargin); | ||
| + // Make sure the loss is non-trivial. | ||
| + const Dtype kNonTrivialAbsThresh = 1e-1; | ||
| + EXPECT_GE(fabs(loss_weight_1), kNonTrivialAbsThresh); | ||
| + } | ||
| + | ||
| + Blob<Dtype>* const blob_bottom_data_; | ||
| + Blob<Dtype>* const blob_bottom_label_; | ||
| + Blob<Dtype>* const blob_bottom_certainty_; | ||
| + Blob<Dtype>* const blob_top_loss_; | ||
| + vector<Blob<Dtype>*> blob_bottom_vec_; | ||
| + vector<Blob<Dtype>*> blob_top_vec_; | ||
| +}; | ||
| + | ||
| +TYPED_TEST_CASE(WeightedEuclideanLossLayerTest, TestDtypesAndDevices); | ||
| + | ||
| +TYPED_TEST(WeightedEuclideanLossLayerTest, TestForward) { | ||
| + this->TestForward(); | ||
| +} | ||
| + | ||
| +TYPED_TEST(WeightedEuclideanLossLayerTest, TestGradient) { | ||
| + typedef typename TypeParam::Dtype Dtype; | ||
| + LayerParameter layer_param; | ||
| + WeightedEuclideanLossLayer<Dtype> layer(layer_param); | ||
| + GradientChecker<Dtype> checker(1e-4, 2e-2, 1701, 1, 0.01); | ||
| + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, | ||
| + this->blob_top_vec_, 0); | ||
| +} | ||
| + | ||
| +} // namespace caffe |