PaddlePaddle · pkuyym · May 22, 2017 · May 16, 2017 · May 16, 2017 · May 17, 2017
diff --git a/paddle/gserver/evaluators/ChunkEvaluator.cpp b/paddle/gserver/evaluators/ChunkEvaluator.cpp
@@ -16,6 +16,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/math/Vector.h"
+#include "paddle/utils/StringUtil.h"
 
 #include "Evaluator.h"
 
@@ -74,6 +75,7 @@ class ChunkEvaluator : public Evaluator {
   std::vector<Segment> labelSegments_;
   std::vector<Segment> outputSegments_;
   std::set<int> excludedChunkTypes_;
+  mutable std::unordered_map<std::string, real> values_;
 
 public:
   virtual void init(const EvaluatorConfig& config) {
@@ -121,11 +123,9 @@ class ChunkEvaluator : public Evaluator {
   }
 
   virtual void printStats(std::ostream& os) const {
-    double precision = (double)numCorrect_ / numOutputSegments_;
-    double recall = (double)numCorrect_ / numLabelSegments_;
-    double f1 =
-        !numCorrect_ ? 0 : 2 * precision * recall / (precision + recall);
-    os << config_.name() << "=" << f1 << " true_chunks=" << numLabelSegments_
+    storeLocalValues();
+    os << config_.name() << "=" << values_["F1-score"]
+       << " true_chunks=" << numLabelSegments_
        << " result_chunks=" << numOutputSegments_
        << " correct_chunks=" << numCorrect_;
   }
@@ -243,6 +243,46 @@ class ChunkEvaluator : public Evaluator {
     if (tag == tagSingle_) return true;
     return false;
   }
+
+  // three metrics: precision, recall and F1-score
+  void getNames(std::vector<std::string>* names) {
+    storeLocalValues();
+    names->reserve(names->size() + values_.size());
+    for (auto it = values_.begin(); it != values_.end(); ++it) {
+      names->push_back(config_.name() + "." + it->first);
+    }
+  }
+
+  // get value by field name
+  real getValue(const std::string& name, Error* err) const {
+    storeLocalValues();
+    std::vector<std::string> buffers;
+    paddle::str::split(name, '.', &buffers);
+    auto it = values_.find(buffers.back());
+    if (it == values_.end()) {  // not found
+      *err = Error("No such key %s", name.c_str());
+      return 0.0f;
+    }
+
+    return it->second;
+  }
+
+  // get type of evaluator
+  std::string getTypeImpl() const { return "chunk"; }
+
+private:
+  void storeLocalValues() const {
+    CHECK_GE(numOutputSegments_, 0);
+    CHECK_GE(numLabelSegments_, 0);
+    double precision =
+        !numOutputSegments_ ? 0 : (double)numCorrect_ / numOutputSegments_;
+    double recall =
+        !numLabelSegments_ ? 0 : (double)numCorrect_ / numLabelSegments_;
+    values_["precision"] = precision;
+    values_["recall"] = recall;
+    values_["F1-score"] =
+        !numCorrect_ ? 0 : 2 * precision * recall / (precision + recall);
+  }
 };
 
 REGISTER_EVALUATOR(chunk, ChunkEvaluator);

diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py
@@ -347,39 +347,77 @@ def chunk_evaluator(
         excluded_chunk_types=None, ):
     """
     Chunk evaluator is used to evaluate segment labelling accuracy for a
-    sequence. It calculates the chunk detection F1 score.
+    sequence. It calculates precision, recall and F1 scores for the chunk detection.
 
-    A chunk is correctly detected if its beginning, end and type are correct.
-    Other chunk type is ignored.
+    To use chunk evaluator, several concepts need to be clarified firstly.
+    Chunk type is the type of the whole chunk and a chunk consists of one or several words.  (For example in NER, ORG for organization name, PER for person name etc.)
+    Tag indicates the position of a word in a chunk. (B for begin, I for inside, E for end, S for single)
+    We can name a label by combining tag type and chunk type. (ie. B-ORG for begining of an organization name)
 
-    For each label in the label sequence, we have:
+    The construction of label dict should obey the following rules:
+    (1) Use one of the listed labelling schemes. These schemes differ in ways indicating chunk boundry.
 
     .. code-block:: python
+     Scheme    Description                                                                                  
+      plain    Use the same label for the whole chunk.
+      IOB      Two labels for chunk type X, B-X for chunk begining and I-X for chunk inside. 
+      IOE      Two labels for chunk type X, E-X for chunk ending and I-X for chunk inside.
+      IOBES    Four labels for chunk type X, B-X for chunk begining, I-X for chunk inside, E-X for chunk end and S-X for single word chunk. 
+    .. code-block:: python
+
+    To make it clear, let's illustrate by an NER example.
+    Assuming that there are three named entity types including ORG, PER and LOC which are called 'chunk type' here,
+    if 'IOB' scheme were used, the label set will be extended to a set including B-ORG, I-ORG, B-PER, I-PER, B-LOC, I-LOC and O,
+    in which B-ORG for begining of ORG and I-ORG for inside of ORG.
+    Prefixes which are called 'tag type' here are added to chunk types and there are two tag types including B and I.
+    Of course, the training data should be labeled accordingly.
 
-       tagType = label % numTagType
-       chunkType = label / numTagType
-       otherChunkType = numChunkTypes
+    (2) Mapping is done correctly by the listed equations and assigning protocol.
 
-    The total number of different labels is numTagType*numChunkTypes+1.
-    We support 4 labelling scheme.
-    The tag type for each of the scheme is shown as follows:
+    The following table are equations to extract tag type and chunk type from a label.
 
     .. code-block:: python
+    tagType = label % numTagType
+    chunkType = label / numTagType
+    otherChunkType = numChunkTypes
+    .. code-block:: python
+
+    The following table shows the mapping rule between tagType and tag type in each scheme.
 
-       Scheme Begin Inside End   Single
-       plain  0     -      -     -
-       IOB    0     1      -     -
-       IOE    -     0      1     -
-       IOBES  0     1      2     3
+    .. code-block:: python
+     Scheme Begin Inside End   Single
+      plain  0     -      -     -
+      IOB    0     1      -     -
+      IOE    -     0      1     -
+      IOBES  0     1      2     3
+    .. code-block:: python
 
-    'plain' means the whole chunk must contain exactly the same chunk label.
+    Continue the NER example, and the label dict should look like this to satify above equations:
+
+    .. code-block:: python
+      B-ORG  0
+      I-ORG  1
+      B-PER  2
+      I-PER  3
+      B-LOC  4
+      I-LOC  5
+      O      6
+    .. code-block:: python
+
+    In this example, chunkType has three values: 0 for ORG, 1 for PER, 2 for LOC, because the scheme is
+    "IOB" so tagType has two values: 0 for B and 1 for I. 
+    Here we will use I-LOC to explain the above mapping rules in detail.
+    For I-LOC, the label id is 5, so we can get tagType=1 and ChunkType=2, which means I-LOC is a part of NER chunk LOC
+    and the tag is I.
 
     The simple usage is:
 
     .. code-block:: python
 
        eval = chunk_evaluator(input, label, chunk_scheme, num_chunk_types)
 
+    .. code-block:: python
+
     :param input: The input layers.
     :type input: LayerOutput
     :param label: An input layer containing the ground truth label.