From 1fd00758acc6c536ef3979a2d9aaf670da7a2fbe Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 24 Apr 2017 15:31:30 +0800 Subject: [PATCH 1/7] SmoothL1 loss --- doc/api/v1/trainer_config_helpers/layers.rst | 6 ++ paddle/gserver/layers/CostLayer.cpp | 2 +- paddle/gserver/layers/CostLayer.h | 10 ++-- paddle/gserver/tests/test_LayerGrad.cpp | 2 +- paddle/math/Matrix.cpp | 30 +++++----- python/paddle/trainer/config_parser.py | 1 + .../paddle/trainer_config_helpers/layers.py | 57 ++++++++++++++++++- .../tests/configs/file_list.sh | 2 +- .../configs/protostr/test_smooth_l1.protostr | 40 +++++++++++++ .../tests/configs/test_smooth_l1.py | 7 +++ 10 files changed, 135 insertions(+), 22 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py diff --git a/doc/api/v1/trainer_config_helpers/layers.rst b/doc/api/v1/trainer_config_helpers/layers.rst index 24389c2d8574d..9dec89063ae94 100644 --- a/doc/api/v1/trainer_config_helpers/layers.rst +++ b/doc/api/v1/trainer_config_helpers/layers.rst @@ -498,6 +498,12 @@ hsigmoid :members: hsigmoid :noindex: +smooth_l1 +--------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: smooth_l1 + :noindex: + Check Layer ============ diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 4ae5b828707eb..69d5830dd2a1a 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -217,7 +217,7 @@ void SmoothL1CostLayer::forwardImp(Matrix& output, targetCpu->copyFrom(target); outputCpu->copyFrom(output); labelCpu->copyFrom(*label.value); - targetCpu->smoothL1(*outputCpu, *(labelCpu)); + targetCpu->smoothL1(*outputCpu, *labelCpu); target.copyFrom(*targetCpu); } else { target.smoothL1(output, *label.value); diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 569a6840f0d44..856d1012adc6c 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -91,8 +91,8 @@ class MultiClassCrossEntropy : public CostLayer { * * [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar, * Richard Schwartz, and John Makhoul. Fast and robust neural - * network joint models for statistical machine translation. - * In Proceedings of the ACL 2014 Conference. + * network joint models for statistical machine translation. * In + * Proceedings of the ACL 2014 Conference. */ class MultiClassCrossEntropyWithSelfNorm : public CostLayer { public: @@ -164,9 +164,11 @@ class SumOfSquaresCostLayer : public CostLayer { * tasks. * \f[ * L = - * (output - label)^2 * 0.5 / -1 < (output - label) < 1 / - * (output - label) - 0.5 / otherwise / + * 0.5 * x^2 if / -1 < |x| < 1 / + * |x| - 0.5 / otherwise / * \f] + * + * x = output - label */ class SmoothL1CostLayer : public CostLayer { public: diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0d7bd8c3b8522..1bc5256b67c9e 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1685,7 +1685,7 @@ TEST(Layer, smooth_l1) { config.layerConfig.add_inputs(); for (auto useGpu : {false, true}) { - testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0); + testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); } } diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 55a7344495f8e..6ac61be0bf1b7 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3616,17 +3616,18 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) { CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(label.getWidth(), dim); CHECK_EQ(getWidth(), (size_t)1); - real* out = output.getData(); + real* cost = getData(); + real* out = output.getData(); real* lbl = label.getData(); - for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { + for (size_t i = 0; i < numSamples; ++i, out += dim, lbl += dim) { for (size_t j = 0; j < dim; ++j) { - cost[j] = std::fabs(out[j] - lbl[j]); - if (cost[j] < 1.0) - cost[j] = 0.5 * cost[j] * cost[j]; + real absVal = std::fabs(out[j] - lbl[j]); + if (absVal < 1.0) + cost[i] += 0.5 * absVal * absVal; else - cost[j] = cost[j] - 0.5; + cost[i] += absVal - 0.5; } } } @@ -3640,17 +3641,20 @@ void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) { CHECK_EQ(label.getHeight(), numSamples); CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(label.getWidth(), dim); - CHECK_EQ(getWidth(), (size_t)1); + CHECK_EQ(getWidth(), dim); + real* out = output.getData(); - real* cost = getData(); real* lbl = label.getData(); + real* grad = getData(); - // f'(x) = x if |x| < 1 - // = sign(x) otherwise - for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { + for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim, lbl += dim) { for (size_t j = 0; j < dim; ++j) { - cost[j] = out[j] - lbl[j]; - if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0); + real val = out[j] - lbl[j]; + if (std::fabs(val) < 1) { + grad[j] += val; + } else { + grad[j] += (real(0) < val) - (val < real(0)); + } } } } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index dc89419c40f8d..32e31fe2c446f 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2119,6 +2119,7 @@ def init(cls, name, inputs, device=None, coeff=1.): define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') define_cost('HuberTwoClass', 'huber') define_cost('SumCost', 'sum_cost') +define_cost('SmoothL1Cost', 'smooth_l1') @config_layer('hsigmoid') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index f906126d87941..a0432b3966b31 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -116,6 +116,7 @@ 'spp_layer', 'pad_layer', 'eos_layer', + 'smooth_l1_cost', 'layer_support', ] @@ -201,6 +202,7 @@ class LayerType(object): SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" SUM_COST = "sum_cost" + SMOOTH_L1 = "smooth_l1" @staticmethod def is_layer_type(type_name): @@ -5249,8 +5251,6 @@ def multi_binary_label_cross_entropy(input, :type input: LayerOutput :param label: The input label. :type input: LayerOutput - :param type: The type of cost. - :type type: basestring :param name: The name of this layers. It is not necessary. :type name: None|basestring :param coeff: The coefficient affects the gradient in the backward. @@ -5279,3 +5279,56 @@ def multi_binary_label_cross_entropy(input, LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY, parents=[input, label], size=1) + + +@wrap_name_default() +@layer_support() +def smooth_l1_cost(input, label, name=None, layer_attr=None): + """ + This is a L1 loss but more smooth. It requires that the + size of input and label are equal. + + More details can be found by referring to `Fast R-CNN + `_ + + .. math:: + + L = \sum_{i} smooth_{L1}(input_i - label_i) + + in which + + .. math:: + + mooth_{L1}(x) = + \begin{cases} + 0.5x^2& \text{if} |x| < 1 \\ + |x|-0.5& \text{otherwise} + \end{cases} + + .. code-block:: python + + cost = smooth_l1_cost(input=input_layer, + label=label_layer) + + :param input: The input layer. + :type input: LayerOutput + :param label: The input label. + :type input: LayerOutput + :param name: The name of this layers. It is not necessary. + :type name: None|basestring + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input, LayerOutput) + assert isinstance(label, LayerOutput) + assert input.size == label.size + + Layer( + name=name, + type=LayerType.SMOOTH_L1, + inputs=[input.name, label.name], + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name, LayerType.SMOOTH_L1, parents=[input, label], size=1) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index c9178e3c6a46a..ecb2e53364e32 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -5,6 +5,6 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops -test_seq_concat_reshape) +test_seq_concat_reshape test_smooth_l1) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr new file mode 100644 index 0000000000000..4aa041ea2e173 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr @@ -0,0 +1,40 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "label" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "__smooth_l1_cost_0__" + type: "smooth_l1" + size: 1 + active_type: "" + inputs { + input_layer_name: "input" + } + inputs { + input_layer_name: "label" + } + coeff: 1.0 +} +input_layer_names: "input" +input_layer_names: "label" +output_layer_names: "__smooth_l1_cost_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "label" + layer_names: "__smooth_l1_cost_0__" + input_layer_names: "input" + input_layer_names: "label" + output_layer_names: "__smooth_l1_cost_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py new file mode 100644 index 0000000000000..66629662dd916 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py @@ -0,0 +1,7 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) +lbl = data_layer(name='label', size=300) +smooth_l1 = smooth_l1_cost(input=data, label=lbl) + +outputs(smooth_l1) From 22f2519ebac6a30ba46115d14d21f5814bbb2602 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 24 Apr 2017 15:34:25 +0800 Subject: [PATCH 2/7] refine documents --- python/paddle/trainer_config_helpers/layers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index a0432b3966b31..56fca13d37242 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -5286,10 +5286,7 @@ def multi_binary_label_cross_entropy(input, def smooth_l1_cost(input, label, name=None, layer_attr=None): """ This is a L1 loss but more smooth. It requires that the - size of input and label are equal. - - More details can be found by referring to `Fast R-CNN - `_ + size of input and label are equal. The formula is as follows, .. math:: @@ -5305,6 +5302,9 @@ def smooth_l1_cost(input, label, name=None, layer_attr=None): |x|-0.5& \text{otherwise} \end{cases} + More details can be found by referring to `Fast R-CNN + `_ + .. code-block:: python cost = smooth_l1_cost(input=input_layer, From 4d23a942c7da4770b369c346440a6eb9f2b1c456 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 24 Apr 2017 16:58:55 +0800 Subject: [PATCH 3/7] fix code format --- paddle/gserver/layers/CostLayer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 856d1012adc6c..14c0b33ec1a62 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -91,8 +91,8 @@ class MultiClassCrossEntropy : public CostLayer { * * [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar, * Richard Schwartz, and John Makhoul. Fast and robust neural - * network joint models for statistical machine translation. * In - * Proceedings of the ACL 2014 Conference. + * network joint models for statistical machine translation. + * In Proceedings of the ACL 2014 Conference. */ class MultiClassCrossEntropyWithSelfNorm : public CostLayer { public: From 6dd90f47c3c290199f659c9e0531ef556a9f5a07 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 25 Apr 2017 10:25:21 +0800 Subject: [PATCH 4/7] Change the size in test_LayerGrad and pass unit test --- paddle/gserver/tests/test_LayerGrad.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 1bc5256b67c9e..e1e8e7fae7ca4 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1679,8 +1679,8 @@ TEST(Layer, smooth_l1) { TestConfig config; config.layerConfig.set_type("smooth_l1"); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 200, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 200, 0}); config.layerConfig.add_inputs(); config.layerConfig.add_inputs(); From 2838491235c1eb85262f41ae45f2dee5c5fe2d72 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 25 Apr 2017 11:04:32 +0800 Subject: [PATCH 5/7] add smooth_l1 interface to v2 doc. --- doc/api/v2/config/layer.rst | 5 +++++ python/paddle/trainer_config_helpers/layers.py | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 2a02baf17ba0d..4e3589ebc4703 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -419,6 +419,11 @@ hsigmoid .. autoclass:: paddle.v2.layer.hsigmoid :noindex: +smooth_l1 +--------- +.. automodule:: paddle.v2.layer.smooth_l1 + :noindex: + Check Layer ============ diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 56fca13d37242..1796e48f09ae5 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -116,7 +116,7 @@ 'spp_layer', 'pad_layer', 'eos_layer', - 'smooth_l1_cost', + 'smooth_l1', 'layer_support', ] @@ -5283,7 +5283,7 @@ def multi_binary_label_cross_entropy(input, @wrap_name_default() @layer_support() -def smooth_l1_cost(input, label, name=None, layer_attr=None): +def smooth_l1(input, label, name=None, layer_attr=None): """ This is a L1 loss but more smooth. It requires that the size of input and label are equal. The formula is as follows, @@ -5307,8 +5307,8 @@ def smooth_l1_cost(input, label, name=None, layer_attr=None): .. code-block:: python - cost = smooth_l1_cost(input=input_layer, - label=label_layer) + cost = smooth_l1(input=input_layer, + label=label_layer) :param input: The input layer. :type input: LayerOutput From 63e4e16ced2ad8ccf150fd7f3930d7afdb2ce2c4 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 25 Apr 2017 13:50:25 +0800 Subject: [PATCH 6/7] add smooth_l1 interface to v2 doc. --- doc/api/v1/trainer_config_helpers/layers.rst | 6 +++--- doc/api/v2/config/layer.rst | 2 +- python/paddle/trainer_config_helpers/layers.py | 14 +++++--------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/doc/api/v1/trainer_config_helpers/layers.rst b/doc/api/v1/trainer_config_helpers/layers.rst index 9dec89063ae94..75c1b35246486 100644 --- a/doc/api/v1/trainer_config_helpers/layers.rst +++ b/doc/api/v1/trainer_config_helpers/layers.rst @@ -498,10 +498,10 @@ hsigmoid :members: hsigmoid :noindex: -smooth_l1 ---------- +smooth_l1_cost +-------------- .. automodule:: paddle.trainer_config_helpers.layers - :members: smooth_l1 + :members: smooth_l1_cost :noindex: Check Layer diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 4e3589ebc4703..0ade7990193d7 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -421,7 +421,7 @@ hsigmoid smooth_l1 --------- -.. automodule:: paddle.v2.layer.smooth_l1 +.. autoclass:: paddle.v2.layer.smooth_l1_cost :noindex: Check Layer diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1796e48f09ae5..b9e3d26404227 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -116,7 +116,7 @@ 'spp_layer', 'pad_layer', 'eos_layer', - 'smooth_l1', + 'smooth_l1_cost', 'layer_support', ] @@ -5283,7 +5283,7 @@ def multi_binary_label_cross_entropy(input, @wrap_name_default() @layer_support() -def smooth_l1(input, label, name=None, layer_attr=None): +def smooth_l1_cost(input, label, name=None, layer_attr=None): """ This is a L1 loss but more smooth. It requires that the size of input and label are equal. The formula is as follows, @@ -5296,19 +5296,15 @@ def smooth_l1(input, label, name=None, layer_attr=None): .. math:: - mooth_{L1}(x) = - \begin{cases} - 0.5x^2& \text{if} |x| < 1 \\ - |x|-0.5& \text{otherwise} - \end{cases} + smooth_{L1}(x) = \\begin{cases} 0.5x^2& \\text{if} \\ |x| < 1 \\\\ |x|-0.5& \\text{otherwise} \end{cases} More details can be found by referring to `Fast R-CNN `_ .. code-block:: python - cost = smooth_l1(input=input_layer, - label=label_layer) + cost = smooth_l1_cost(input=input_layer, + label=label_layer) :param input: The input layer. :type input: LayerOutput From 8b997df35e009b6c8d9d14c82b042c01afbc87ba Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 25 Apr 2017 13:57:03 +0800 Subject: [PATCH 7/7] rename --- doc/api/v2/config/layer.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 0ade7990193d7..154cfe24432f3 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -419,8 +419,8 @@ hsigmoid .. autoclass:: paddle.v2.layer.hsigmoid :noindex: -smooth_l1 ---------- +smooth_l1_cost +-------------- .. autoclass:: paddle.v2.layer.smooth_l1_cost :noindex: