why use max_elem #5682

Open
wants to merge 12 commits into
from
View
@@ -1,37 +1,21 @@
-# Caffe
+# Deep Metric Learning via Lifted Structured Feature Embedding
+This repository is an extension of [Caffe](https://github.com/bvlc/caffe) for the paper "Deep Metric Learning via Lifted Structured Feature Embedding" (CVPR16). Also, this is a Github submodule for the main repository at [Deep-Metric-Learning-CVPR16](https://github.com/rksltnl/Deep-Metric-Learning-CVPR16).
-[![Build Status](https://travis-ci.org/BVLC/caffe.svg?branch=master)](https://travis-ci.org/BVLC/caffe)
-[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE)
+## Citing this work
+If you find this work useful in your research, please consider citing:
-Caffe is a deep learning framework made with expression, speed, and modularity in mind.
-It is developed by the Berkeley Vision and Learning Center ([BVLC](http://bvlc.eecs.berkeley.edu)) and community contributors.
-
-Check out the [project site](http://caffe.berkeleyvision.org) for all the details like
-
-- [DIY Deep Learning for Vision with Caffe](https://docs.google.com/presentation/d/1UeKXVgRvvxg9OUdh_UiC5G71UMscNPlvArsWER41PsU/edit#slide=id.p)
-- [Tutorial Documentation](http://caffe.berkeleyvision.org/tutorial/)
-- [BVLC reference models](http://caffe.berkeleyvision.org/model_zoo.html) and the [community model zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo)
-- [Installation instructions](http://caffe.berkeleyvision.org/installation.html)
-
-and step-by-step examples.
-
-[![Join the chat at https://gitter.im/BVLC/caffe](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/BVLC/caffe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-
-Please join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) or [gitter chat](https://gitter.im/BVLC/caffe) to ask questions and talk about methods and models.
-Framework development discussions and thorough bug reports are collected on [Issues](https://github.com/BVLC/caffe/issues).
-
-Happy brewing!
-
-## License and Citation
+ @inproceedings{songCVPR16,
+ Author = {Hyun Oh Song and Yu Xiang and Stefanie Jegelka and Silvio Savarese},
+ Title = {Deep Metric Learning via Lifted Structured Feature Embedding},
+ Booktitle = {Computer Vision and Pattern Recognition (CVPR)},
+ Year = {2016}
+ }
-Caffe is released under the [BSD 2-Clause license](https://github.com/BVLC/caffe/blob/master/LICENSE).
-The BVLC reference models are released for unrestricted use.
+## Installation
+1. Install prerequisites for `Caffe` (see: [Caffe installation instructions](http://caffe.berkeleyvision.org/installation.html))
-Please cite Caffe in your publications if it helps your research:
+2. Compile this caffe extension.
+Run `make all`, `make test`
- @article{jia2014caffe,
- Author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor},
- Journal = {arXiv preprint arXiv:1408.5093},
- Title = {Caffe: Convolutional Architecture for Fast Feature Embedding},
- Year = {2014}
- }
+## Licence
+MIT Licence
@@ -0,0 +1,47 @@
+#ifndef CAFFE_LIFTED_STRUCT_SIMILARITY_LOSS_LAYER_HPP_
+#define CAFFE_LIFTED_STRUCT_SIMILARITY_LOSS_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/loss_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+class LiftedStructSimilaritySoftmaxLossLayer : public LossLayer<Dtype> {
+ public:
+ explicit LiftedStructSimilaritySoftmaxLossLayer(const LayerParameter& param)
+ : LossLayer<Dtype>(param) {}
+ virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline int ExactNumBottomBlobs() const { return 2; }
+ virtual inline const char* type() const { return "LiftedStructSimilaritySoftmaxLoss"; }
+ virtual inline bool AllowForceBackward(const int bottom_index) const {
+ return bottom_index != 1;
+ }
+
+ protected:
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+ Blob<Dtype> dist_sq_; // cached for backward pass
+ Blob<Dtype> dot_;
+ Blob<Dtype> ones_;
+ Blob<Dtype> blob_pos_diff_;
+ Blob<Dtype> blob_neg_diff_;
+ Blob<Dtype> loss_aug_inference_;
+ Blob<Dtype> summer_vec_;
+ Dtype num_constraints;
+};
+
+} // namespace caffe
+
+#endif // CAFFE_LIFTED_STRUCT_SIMILARITY_LOSS_LAYER_HPP_
@@ -0,0 +1,219 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layers/lifted_struct_similarity_softmax_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void LiftedStructSimilaritySoftmaxLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+ LossLayer<Dtype>::LayerSetUp(bottom, top);
+ CHECK_EQ(bottom[0]->height(), 1);
+ CHECK_EQ(bottom[0]->width(), 1);
+ CHECK_EQ(bottom[1]->channels(), 1);
+ CHECK_EQ(bottom[1]->height(), 1);
+ CHECK_EQ(bottom[1]->width(), 1);
+ // List of member variables defined in /include/caffe/loss_layers.hpp;
+ // diff_, dist_sq_, summer_vec_, loss_aug_inference_;
+ dist_sq_.Reshape(bottom[0]->num(), 1, 1, 1);
+ dot_.Reshape(bottom[0]->num(), bottom[0]->num(), 1, 1);
+ ones_.Reshape(bottom[0]->num(), 1, 1, 1); // n by 1 vector of ones.
+ for (int i=0; i < bottom[0]->num(); ++i){
+ ones_.mutable_cpu_data()[i] = Dtype(1);
+ }
+ blob_pos_diff_.Reshape(bottom[0]->channels(), 1, 1, 1);
+ blob_neg_diff_.Reshape(bottom[0]->channels(), 1, 1, 1);
+}
+
+template <typename Dtype>
+void LiftedStructSimilaritySoftmaxLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+
+ const int channels = bottom[0]->channels();
+ for (int i = 0; i < bottom[0]->num(); i++){
+ dist_sq_.mutable_cpu_data()[i] = caffe_cpu_dot(channels, bottom[0]->cpu_data() + (i*channels), bottom[0]->cpu_data() + (i*channels));
+ }
+
+ int M_ = bottom[0]->num();
+ int N_ = bottom[0]->num();
+ int K_ = bottom[0]->channels();
+
+ const Dtype* bottom_data1 = bottom[0]->cpu_data();
+ const Dtype* bottom_data2 = bottom[0]->cpu_data();
+
+ Dtype dot_scaler(-2.0);
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_, dot_scaler, bottom_data1, bottom_data2, (Dtype)0., dot_.mutable_cpu_data());
+
+ // add ||x_i||^2 to all elements in row i
+ for (int i=0; i<N_; i++){
+ caffe_axpy(N_, dist_sq_.cpu_data()[i], ones_.cpu_data(), dot_.mutable_cpu_data() + i*N_);
+ }
+
+ // add the norm vector to row i
+ for (int i=0; i<N_; i++){
+ caffe_axpy(N_, Dtype(1.0), dist_sq_.cpu_data(), dot_.mutable_cpu_data() + i*N_);
+ }
+
+ // construct pairwise label matrix
+ vector<vector<bool> > label_mat(N_, vector<bool>(N_, false));
+ for (int i=0; i<N_; i++){
+ for (int j=0; j<N_; j++){
+ label_mat[i][j] = (bottom[1]->cpu_data()[i] == bottom[1]->cpu_data()[j]);
+ }
+ }
+
+ Dtype margin = this->layer_param_.lifted_struct_sim_softmax_loss_param().margin();
+ Dtype loss(0.0);
+ num_constraints = Dtype(0.0);
+ const Dtype* bin = bottom[0]->cpu_data();
+ Dtype* bout = bottom[0]->mutable_cpu_diff();
+
+ // zero initialize bottom[0]->mutable_cpu_diff();
+ for (int i=0; i<N_; i++){
+ caffe_set(K_, Dtype(0.0), bout + i*K_);
+ }
+
+ // loop upper triangular matrix and look for positive anchors
+ for (int i=0; i<N_; i++){
+ for (int j=i+1; j<N_; j++){
+
+ // found a positive pair @ anchor (i, j)
+ if (label_mat[i][j]){
+ Dtype dist_pos = sqrt(dot_.cpu_data()[i*N_ + j]);
+
+ caffe_sub(K_, bin + i*K_, bin + j*K_, blob_pos_diff_.mutable_cpu_data());
+
+ // 1.count the number of negatives for this positive
+ int num_negatives = 0;
+ for (int k=0; k<N_; k++){
+ if (!label_mat[i][k]){
+ num_negatives += 1;
+ }
+ }
+
+ for (int k=0; k<N_; k++){
+ if (!label_mat[j][k]){
+ num_negatives += 1;
+ }
+ }
+
+ loss_aug_inference_.Reshape(num_negatives, 1, 1, 1);
+
+ // vector of ones used to sum along channels
+ summer_vec_.Reshape(num_negatives, 1, 1, 1);
+ for (int ss = 0; ss < num_negatives; ++ss){
+ summer_vec_.mutable_cpu_data()[ss] = Dtype(1);
+ }
+
+ // 2. compute loss augmented inference
+ int neg_idx = 0;
+ // mine negative (anchor i, neg k)
+ for (int k=0; k<N_; k++){
+ if (!label_mat[i][k]){
+ loss_aug_inference_.mutable_cpu_data()[neg_idx] = margin - sqrt(dot_.cpu_data()[i*N_ + k]);
+ neg_idx++;
+ }
+ }
+
+ // mine negative (anchor j, neg k)
+ for (int k=0; k<N_; k++){
+ if (!label_mat[j][k]){
+ loss_aug_inference_.mutable_cpu_data()[neg_idx] = margin - sqrt(dot_.cpu_data()[j*N_ + k]);
+ neg_idx++;
+ }
+ }
+
+ // compute softmax of loss aug inference vector;
+ Dtype max_elem = *std::max_element(loss_aug_inference_.cpu_data(), loss_aug_inference_.cpu_data() + num_negatives);
+
+ caffe_add_scalar(loss_aug_inference_.count(), Dtype(-1.0)*max_elem, loss_aug_inference_.mutable_cpu_data());
+ caffe_exp(loss_aug_inference_.count(), loss_aug_inference_.mutable_cpu_data(), loss_aug_inference_.mutable_cpu_data());
+ Dtype soft_maximum = log(caffe_cpu_dot(num_negatives, summer_vec_.cpu_data(), loss_aug_inference_.mutable_cpu_data())) + max_elem;
+
+ // hinge the soft_maximum - S_ij (positive pair similarity)
+ Dtype this_loss = std::max(soft_maximum + dist_pos, Dtype(0.0));
+
+ // squared hinge
+ loss += this_loss * this_loss;
+ num_constraints += Dtype(1.0);
+
+ // 3. compute gradients
+ Dtype sum_exp = caffe_cpu_dot(num_negatives, summer_vec_.cpu_data(), loss_aug_inference_.mutable_cpu_data());
+
+ // update from positive distance dJ_dD_{ij}; update x_i, x_j
+ Dtype scaler(0.0);
+
+ scaler = Dtype(2.0)*this_loss / dist_pos;
+ // update x_i
+ caffe_axpy(K_, scaler * Dtype(1.0), blob_pos_diff_.cpu_data(), bout + i*K_);
+ // update x_j
+ caffe_axpy(K_, scaler * Dtype(-1.0), blob_pos_diff_.cpu_data(), bout + j*K_);
+
+ // update from negative distance dJ_dD_{ik}; update x_i, x_k
+ neg_idx = 0;
+ Dtype dJ_dDik(0.0);
+ for (int k=0; k<N_; k++){
+ if (!label_mat[i][k]){
+ caffe_sub(K_, bin + i*K_, bin + k*K_, blob_neg_diff_.mutable_cpu_data());
+
+ dJ_dDik = Dtype(2.0)*this_loss * Dtype(-1.0)* loss_aug_inference_.cpu_data()[neg_idx] / sum_exp;
+ neg_idx++;
+
+ scaler = dJ_dDik / sqrt(dot_.cpu_data()[i*N_ + k]);
+
+ // update x_i
+ caffe_axpy(K_, scaler * Dtype(1.0), blob_neg_diff_.cpu_data(), bout + i*K_);
+ // update x_k
+ caffe_axpy(K_, scaler * Dtype(-1.0), blob_neg_diff_.cpu_data(), bout + k*K_);
+ }
+ }
+
+ // update from negative distance dJ_dD_{jk}; update x_j, x_k
+ Dtype dJ_dDjk(0.0);
+ for (int k=0; k<N_; k++){
+ if (!label_mat[j][k]){
+ caffe_sub(K_, bin + j*K_, bin + k*K_, blob_neg_diff_.mutable_cpu_data());
+
+ dJ_dDjk = Dtype(2.0)*this_loss * Dtype(-1.0)*loss_aug_inference_.cpu_data()[neg_idx] / sum_exp;
+ neg_idx++;
+
+ scaler = dJ_dDjk / sqrt(dot_.cpu_data()[j*N_ + k]);
+
+ // update x_j
+ caffe_axpy(K_, scaler * Dtype(1.0), blob_neg_diff_.cpu_data(), bout + j*K_);
+ // update x_k
+ caffe_axpy(K_, scaler * Dtype(-1.0), blob_neg_diff_.cpu_data(), bout + k*K_);
+ }
+ }
+ } // close this postive pair
+ }
+ }
+ loss = loss / num_constraints / Dtype(2.0);
+ top[0]->mutable_cpu_data()[0] = loss;
+}
+
+template <typename Dtype>
+void LiftedStructSimilaritySoftmaxLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+
+ const Dtype alpha = top[0]->cpu_diff()[0] / num_constraints / Dtype(2.0);
+
+ int num = bottom[0]->num();
+ int channels = bottom[0]->channels();
+ for (int i = 0; i < num; i++){
+ Dtype* bout = bottom[0]->mutable_cpu_diff();
+ caffe_scal(channels, alpha, bout + (i*channels));
+ }
+}
+
+//#ifdef CPU_ONLY
+//STUB_GPU(LiftedStructSimilaritySoftmaxLossLayer);
+//#endif
+
+INSTANTIATE_CLASS(LiftedStructSimilaritySoftmaxLossLayer);
+REGISTER_LAYER_CLASS(LiftedStructSimilaritySoftmaxLoss);
+
+} // namespace caffe
+
+
+
@@ -306,7 +306,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
-// LayerParameter next available layer-specific ID: 145 (last added: crop_param)
+// LayerParameter next available layer-specific ID: 146 (last added: lifted_struct_sim)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
@@ -376,6 +376,7 @@ message LayerParameter {
optional InfogainLossParameter infogain_loss_param = 116;
optional InnerProductParameter inner_product_param = 117;
optional InputParameter input_param = 143;
+ optional LiftedStructSimilaritySoftmaxLossParameter lifted_struct_sim_softmax_loss_param = 145;
optional LogParameter log_param = 134;
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
@@ -760,6 +761,10 @@ message HingeLossParameter {
optional Norm norm = 1 [default = L1];
}
+message LiftedStructSimilaritySoftmaxLossParameter {
+ optional float margin = 1 [default = 1.0];
+}
+
message ImageDataParameter {
// Specify the data source.
optional string source = 1;
Oops, something went wrong.