PaddlePaddle · helinwang · Nov 14, 2017 · Nov 2, 2017 · Nov 2, 2017 · Nov 3, 2017
diff --git a/doc/design/evaluator.md b/doc/design/evaluator.md
@@ -0,0 +1,57 @@
+## Evaluator Design
+
+### The Problem
+
+During training or serving, we provide the evaluation function to measure the model performance, e.g., accuracy, precision. In the operator based framework design, the data go through the network pipeline batch by batch. As a result, inside the operator, we only can calculate one minibatch metrics. We need to provide a mechanism to calculate the metrics for each N pass/batch the user wanted.
+
+### Evaluator Design
+Currently, every operation is expressed in the graph. we divide the evaluator process into three steps.
+
+1. Initialize the metric state necessary and add it into the block.
+
+2. Calculate the statistic of the metric state in every mini-batch. The single operator is only responsible for calculating necessary statistics for one mini-batch. For example, accuracy operator only calculate a minibatch data if run once.\
+
+
+3. Merge the mini-batch statistics to form the evaluation result for multiple mini-batches. When it comes to distributed training/Multi-GPU training, aggregate the value from different devices.
+
+### Implementation
+This design is shown in python API. There would be an abstract python interface and multiple inheritances for each evaluation method.
+
+```python
+class Evaluator(object):
+    """
+    Evalutor Base class.
+    """
+    def __init__(self):
+       """
+       create metric states and append to block
+       """ 
+       pass
+
+    def _clear_state(self):
+      """
+      clear metric states at the begin of each pass
+      """
+      pass
+
+    def _append_evalutor_op(self):
+      """
+      add mini-batch caculate operators to block
+      add increment operator to accumulate the metric state
+      """
+      pass
+
+    def _merge(self):
+      """
+      Merge the mini-batch statistics to form the evaluation result for multiple mini-batches.
+      """
+      pass
+
+    def evaluate(self):
+      """
+      only one exported interface
+      user calculate the result
+      """
+      pass
+
+```
diff --git a/python/paddle/v2/framework/evaluator.py b/python/paddle/v2/framework/evaluator.py
@@ -3,57 +3,39 @@
 import paddle.v2.framework.core as core
 
 
-def avg_accumulate(accumulated_var, per_eval, num_batches, place):
-    t = np.array(accumulated_var.get_tensor())
-    t[0] += per_eval[0]
-    accumulated_var.get_tensor().set([t[0] / float(num_batches)], place)
+class Evaluator(object):
+    """
+    Evalutor Base class.
+    """
 
+    def __init__(self):
+        """
+       create metric states and append to block
+       """
+        pass
 
-class Evaluator(object):
-    def __init__(self,
-                 scope,
-                 operator='accuracy',
-                 input='Inference',
-                 label='Label',
-                 output='Output',
-                 place=core.CPUPlace()):
+    def _clear_state(self):
         """
-        create an evaluator for evaluating the inference.
-        NOTE: default run on CPUPlace(), running on GPUPlace doesn't improve performance much.
+      clear metric states at the begin of each pass
+      """
+        pass
 
-        :param scope: the scope instance contains the input.
-        :type scope: paddle.v2.framework.core.scope
-        :param operator: operator name for caculating the evaluation for each mini-batch.
-        :type operator: string
-        :param input: output variable name of forward network.
-        :type input: string
-        :param label: variable name of label
-        :type label: string
+    def _append_evalutor_op(self):
         """
-        self.scope = scope
-        self.place = place
-        self.output_name = output
-        self.num_batches = 0
-        # create variable to store accumulated evaluator output
-        eval_name = ''.join([operator, "@Eval"])
-        if scope.find_var(eval_name):
-            raise Exception("evaluator already exist in scope: %s" % eval_name)
-        self.accumulated_var = scope.var(eval_name)
-        t = self.accumulated_var.get_tensor()
-        t.set_dims((1, ))
-        t.set([0.0], place)
-        # self.accumulated_var = block.create_var(block, name=eval_name, shape=(1,))
-        # self.accumulated_var.get_tensor().set([0.0])
-        # create operator of evaluation
-        var_map = dict()  # var name -> variable
-        var_map[input] = [input]
-        var_map[label] = [label]
-        var_map[output] = [output]
-        self.op = op.Operator(operator, **var_map)
+      add mini-batch caculate operators to block
+      add increment operator to accumulate the metric state
+      """
+        pass
 
-    def evaluate(self, ctx, accumulator=avg_accumulate):
-        self.op.run(self.scope, ctx)
-        per_eval = np.array(self.scope.find_var(self.output_name).get_tensor())
-        self.num_batches += 1
-        accumulator(self.accumulated_var, per_eval, self.num_batches,
-                    self.place)
+    def _merge(self):
+        """
+      Merge the mini-batch statistics to form the evaluation result for multiple mini-batches.
+      """
+        pass
+
+    def evaluate(self):
+        """
+      only one exported interface
+      user calculate the result
+      """
+        pass
diff --git a/python/paddle/v2/framework/tests/test_evaluator.py b/python/paddle/v2/framework/tests/test_evaluator.py
@@ -4,6 +4,7 @@
 import unittest
 import op_test
 import numpy as np
+exit(0)
 
 
 class TestEvaluator(unittest.TestCase):