diff --git a/doc/api/v1/trainer_config_helpers/layers.rst b/doc/api/v1/trainer_config_helpers/layers.rst index 24389c2d8574d..75c1b35246486 100644 --- a/doc/api/v1/trainer_config_helpers/layers.rst +++ b/doc/api/v1/trainer_config_helpers/layers.rst @@ -498,6 +498,12 @@ hsigmoid :members: hsigmoid :noindex: +smooth_l1_cost +-------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: smooth_l1_cost + :noindex: + Check Layer ============ diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 2a02baf17ba0d..154cfe24432f3 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -419,6 +419,11 @@ hsigmoid .. autoclass:: paddle.v2.layer.hsigmoid :noindex: +smooth_l1_cost +-------------- +.. autoclass:: paddle.v2.layer.smooth_l1_cost + :noindex: + Check Layer ============ diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 4ae5b828707eb..69d5830dd2a1a 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -217,7 +217,7 @@ void SmoothL1CostLayer::forwardImp(Matrix& output, targetCpu->copyFrom(target); outputCpu->copyFrom(output); labelCpu->copyFrom(*label.value); - targetCpu->smoothL1(*outputCpu, *(labelCpu)); + targetCpu->smoothL1(*outputCpu, *labelCpu); target.copyFrom(*targetCpu); } else { target.smoothL1(output, *label.value); diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 569a6840f0d44..14c0b33ec1a62 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -164,9 +164,11 @@ class SumOfSquaresCostLayer : public CostLayer { * tasks. * \f[ * L = - * (output - label)^2 * 0.5 / -1 < (output - label) < 1 / - * (output - label) - 0.5 / otherwise / + * 0.5 * x^2 if / -1 < |x| < 1 / + * |x| - 0.5 / otherwise / * \f] + * + * x = output - label */ class SmoothL1CostLayer : public CostLayer { public: diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0d7bd8c3b8522..e1e8e7fae7ca4 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1679,13 +1679,13 @@ TEST(Layer, smooth_l1) { TestConfig config; config.layerConfig.set_type("smooth_l1"); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 200, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 200, 0}); config.layerConfig.add_inputs(); config.layerConfig.add_inputs(); for (auto useGpu : {false, true}) { - testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0); + testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); } } diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 55a7344495f8e..6ac61be0bf1b7 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3616,17 +3616,18 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) { CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(label.getWidth(), dim); CHECK_EQ(getWidth(), (size_t)1); - real* out = output.getData(); + real* cost = getData(); + real* out = output.getData(); real* lbl = label.getData(); - for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { + for (size_t i = 0; i < numSamples; ++i, out += dim, lbl += dim) { for (size_t j = 0; j < dim; ++j) { - cost[j] = std::fabs(out[j] - lbl[j]); - if (cost[j] < 1.0) - cost[j] = 0.5 * cost[j] * cost[j]; + real absVal = std::fabs(out[j] - lbl[j]); + if (absVal < 1.0) + cost[i] += 0.5 * absVal * absVal; else - cost[j] = cost[j] - 0.5; + cost[i] += absVal - 0.5; } } } @@ -3640,17 +3641,20 @@ void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) { CHECK_EQ(label.getHeight(), numSamples); CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(label.getWidth(), dim); - CHECK_EQ(getWidth(), (size_t)1); + CHECK_EQ(getWidth(), dim); + real* out = output.getData(); - real* cost = getData(); real* lbl = label.getData(); + real* grad = getData(); - // f'(x) = x if |x| < 1 - // = sign(x) otherwise - for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { + for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim, lbl += dim) { for (size_t j = 0; j < dim; ++j) { - cost[j] = out[j] - lbl[j]; - if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0); + real val = out[j] - lbl[j]; + if (std::fabs(val) < 1) { + grad[j] += val; + } else { + grad[j] += (real(0) < val) - (val < real(0)); + } } } } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index dc89419c40f8d..32e31fe2c446f 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2119,6 +2119,7 @@ def init(cls, name, inputs, device=None, coeff=1.): define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') define_cost('HuberTwoClass', 'huber') define_cost('SumCost', 'sum_cost') +define_cost('SmoothL1Cost', 'smooth_l1') @config_layer('hsigmoid') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index f906126d87941..b9e3d26404227 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -116,6 +116,7 @@ 'spp_layer', 'pad_layer', 'eos_layer', + 'smooth_l1_cost', 'layer_support', ] @@ -201,6 +202,7 @@ class LayerType(object): SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" SUM_COST = "sum_cost" + SMOOTH_L1 = "smooth_l1" @staticmethod def is_layer_type(type_name): @@ -5249,8 +5251,6 @@ def multi_binary_label_cross_entropy(input, :type input: LayerOutput :param label: The input label. :type input: LayerOutput - :param type: The type of cost. - :type type: basestring :param name: The name of this layers. It is not necessary. :type name: None|basestring :param coeff: The coefficient affects the gradient in the backward. @@ -5279,3 +5279,52 @@ def multi_binary_label_cross_entropy(input, LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY, parents=[input, label], size=1) + + +@wrap_name_default() +@layer_support() +def smooth_l1_cost(input, label, name=None, layer_attr=None): + """ + This is a L1 loss but more smooth. It requires that the + size of input and label are equal. The formula is as follows, + + .. math:: + + L = \sum_{i} smooth_{L1}(input_i - label_i) + + in which + + .. math:: + + smooth_{L1}(x) = \\begin{cases} 0.5x^2& \\text{if} \\ |x| < 1 \\\\ |x|-0.5& \\text{otherwise} \end{cases} + + More details can be found by referring to `Fast R-CNN + `_ + + .. code-block:: python + + cost = smooth_l1_cost(input=input_layer, + label=label_layer) + + :param input: The input layer. + :type input: LayerOutput + :param label: The input label. + :type input: LayerOutput + :param name: The name of this layers. It is not necessary. + :type name: None|basestring + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input, LayerOutput) + assert isinstance(label, LayerOutput) + assert input.size == label.size + + Layer( + name=name, + type=LayerType.SMOOTH_L1, + inputs=[input.name, label.name], + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name, LayerType.SMOOTH_L1, parents=[input, label], size=1) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 164d365c15b8a..c5dc8e1aab08d 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -5,6 +5,6 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops -test_seq_concat_reshape test_pad) +test_seq_concat_reshape test_pad test_smooth_l1) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr new file mode 100644 index 0000000000000..4aa041ea2e173 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr @@ -0,0 +1,40 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "label" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "__smooth_l1_cost_0__" + type: "smooth_l1" + size: 1 + active_type: "" + inputs { + input_layer_name: "input" + } + inputs { + input_layer_name: "label" + } + coeff: 1.0 +} +input_layer_names: "input" +input_layer_names: "label" +output_layer_names: "__smooth_l1_cost_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "label" + layer_names: "__smooth_l1_cost_0__" + input_layer_names: "input" + input_layer_names: "label" + output_layer_names: "__smooth_l1_cost_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py new file mode 100644 index 0000000000000..66629662dd916 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py @@ -0,0 +1,7 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) +lbl = data_layer(name='label', size=300) +smooth_l1 = smooth_l1_cost(input=data, label=lbl) + +outputs(smooth_l1)