-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Smoothl1 loss and Python API. #1870
Changes from 3 commits
1fd0075
737b4e6
22f2519
4d23a94
6dd90f4
7f3e576
6c654c0
2838491
63e4e16
8b997df
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,8 +91,8 @@ class MultiClassCrossEntropy : public CostLayer { | |
* | ||
* [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar, | ||
* Richard Schwartz, and John Makhoul. Fast and robust neural | ||
* network joint models for statistical machine translation. | ||
* In Proceedings of the ACL 2014 Conference. | ||
* network joint models for statistical machine translation. * In | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 格式好像出错了 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert了~ |
||
* Proceedings of the ACL 2014 Conference. | ||
*/ | ||
class MultiClassCrossEntropyWithSelfNorm : public CostLayer { | ||
public: | ||
|
@@ -164,9 +164,11 @@ class SumOfSquaresCostLayer : public CostLayer { | |
* tasks. | ||
* \f[ | ||
* L = | ||
* (output - label)^2 * 0.5 / -1 < (output - label) < 1 / | ||
* (output - label) - 0.5 / otherwise / | ||
* 0.5 * x^2 if / -1 < |x| < 1 / | ||
* |x| - 0.5 / otherwise / | ||
* \f] | ||
* | ||
* x = output - label | ||
*/ | ||
class SmoothL1CostLayer : public CostLayer { | ||
public: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1685,7 +1685,7 @@ TEST(Layer, smooth_l1) { | |
config.layerConfig.add_inputs(); | ||
|
||
for (auto useGpu : {false, true}) { | ||
testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0); | ||
testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); | ||
} | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you extend the loss test to have more than 1 dim output ?
Change the size of input and label to be 10 in order to make sure it works with more than 1 dim There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Thanks! |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3616,17 +3616,18 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) { | |
CHECK_EQ(output.getHeight(), numSamples); | ||
CHECK_EQ(label.getWidth(), dim); | ||
CHECK_EQ(getWidth(), (size_t)1); | ||
real* out = output.getData(); | ||
|
||
real* cost = getData(); | ||
real* out = output.getData(); | ||
real* lbl = label.getData(); | ||
|
||
for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { | ||
for (size_t i = 0; i < numSamples; ++i, out += dim, lbl += dim) { | ||
for (size_t j = 0; j < dim; ++j) { | ||
cost[j] = std::fabs(out[j] - lbl[j]); | ||
if (cost[j] < 1.0) | ||
cost[j] = 0.5 * cost[j] * cost[j]; | ||
real absVal = std::fabs(out[j] - lbl[j]); | ||
if (absVal < 1.0) | ||
cost[i] += 0.5 * absVal * absVal; | ||
else | ||
cost[j] = cost[j] - 0.5; | ||
cost[i] += absVal - 0.5; | ||
} | ||
} | ||
} | ||
|
@@ -3640,17 +3641,20 @@ void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) { | |
CHECK_EQ(label.getHeight(), numSamples); | ||
CHECK_EQ(output.getHeight(), numSamples); | ||
CHECK_EQ(label.getWidth(), dim); | ||
CHECK_EQ(getWidth(), (size_t)1); | ||
CHECK_EQ(getWidth(), dim); | ||
|
||
real* out = output.getData(); | ||
real* cost = getData(); | ||
real* lbl = label.getData(); | ||
real* grad = getData(); | ||
|
||
// f'(x) = x if |x| < 1 | ||
// = sign(x) otherwise | ||
for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { | ||
for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim, lbl += dim) { | ||
for (size_t j = 0; j < dim; ++j) { | ||
cost[j] = out[j] - lbl[j]; | ||
if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0); | ||
real val = out[j] - lbl[j]; | ||
if (std::fabs(val) < 1) { | ||
grad[j] += val; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里是不是应该是grad[i] += val; 上面循环中去掉grad += dim grad的维度应该与numSamples的数量一致吧。之前在循环里加了dim,因为check了dim必须是1,如果dim不是1就错了。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. grad的大小为:numSamples * dim,代表第一个input对应的gradient. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 可以看https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/gserver/layers/CostLayer.cpp 这个文件 60行->68行-> 227行 -> 244行。 grad和out的维度一致。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 嗯。。是我想错了。 |
||
} else { | ||
grad[j] += (real(0) < val) - (val < real(0)); | ||
} | ||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
type: "nn" | ||
layers { | ||
name: "input" | ||
type: "data" | ||
size: 300 | ||
active_type: "" | ||
} | ||
layers { | ||
name: "label" | ||
type: "data" | ||
size: 300 | ||
active_type: "" | ||
} | ||
layers { | ||
name: "__smooth_l1_cost_0__" | ||
type: "smooth_l1" | ||
size: 1 | ||
active_type: "" | ||
inputs { | ||
input_layer_name: "input" | ||
} | ||
inputs { | ||
input_layer_name: "label" | ||
} | ||
coeff: 1.0 | ||
} | ||
input_layer_names: "input" | ||
input_layer_names: "label" | ||
output_layer_names: "__smooth_l1_cost_0__" | ||
sub_models { | ||
name: "root" | ||
layer_names: "input" | ||
layer_names: "label" | ||
layer_names: "__smooth_l1_cost_0__" | ||
input_layer_names: "input" | ||
input_layer_names: "label" | ||
output_layer_names: "__smooth_l1_cost_0__" | ||
is_recurrent_layer_group: false | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from paddle.trainer_config_helpers import * | ||
|
||
data = data_layer(name='input', size=300) | ||
lbl = data_layer(name='label', size=300) | ||
smooth_l1 = smooth_l1_cost(input=data, label=lbl) | ||
|
||
outputs(smooth_l1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
请加到doc/api/v2/config/layer.rst里面,v1里面的不用改了。
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.