-
Notifications
You must be signed in to change notification settings - Fork 18.7k
/
softmax_loss_layer.cu
73 lines (61 loc) · 2.39 KB
/
softmax_loss_layer.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// Copyright 2013 Yangqing Jia
#include <algorithm>
#include <cfloat>
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/util/math_functions.hpp"
using std::max;
namespace caffe {
template <typename Dtype>
void SoftmaxWithLossLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
CHECK_EQ(bottom.size(), 2) << "SoftmaxLoss Layer takes a single blob as input.";
CHECK_EQ(top->size(), 0) << "SoftmaxLoss Layer takes no blob as output.";
softmax_bottom_vec_.clear();
softmax_bottom_vec_.push_back(bottom[0]);
softmax_top_vec_.push_back(&prob_);
softmax_layer_->SetUp(softmax_bottom_vec_, &softmax_top_vec_);
};
template <typename Dtype>
void SoftmaxWithLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
// The forward pass computes the softmax prob values.
softmax_bottom_vec_[0] = bottom[0];
softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_);
}
template <typename Dtype>
void SoftmaxWithLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
// The forward pass computes the softmax prob values.
softmax_bottom_vec_[0] = bottom[0];
softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_);
}
template <typename Dtype>
Dtype SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
// First, compute the diff
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
const Dtype* prob_data = prob_.cpu_data();
memcpy(bottom_diff, prob_data, sizeof(Dtype) * prob_.count());
const Dtype* label = (*bottom)[1]->cpu_data();
int num = prob_.num();
int dim = prob_.count() / num;
Dtype loss = 0;
for (int i = 0; i < num; ++i) {
bottom_diff[i * dim + static_cast<int>(label[i])] -= 1;
loss += -log(max(prob_data[i * dim + static_cast<int>(label[i])], FLT_MIN));
}
// Scale down gradient
caffe_scal(prob_.count(), Dtype(1) / num, bottom_diff);
return loss / num;
}
template <typename Dtype>
Dtype SoftmaxWithLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
// TODO(Yangqing): implement the GPU version of softmax.
return Backward_cpu(top, propagate_down, bottom);
}
INSTANTIATE_CLASS(SoftmaxWithLossLayer);
} // namespace caffe