From c418dac5b44148c039e34ec9c12075b09f37534d Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Fri, 7 Jul 2017 18:45:42 +0800
Subject: [PATCH 01/37] add rnn op interfaces

---
 paddle/framework/recurrent_network_op.h | 98 +++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 paddle/framework/recurrent_network_op.h
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
new file mode 100644
index 0000000000000..de8c3ba6c1c1e
--- /dev/null
+++ b/paddle/framework/recurrent_network_op.h
@@ -0,0 +1,98 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+#include "paddle/framework/scope.h"
+
+namespace paddle {
+namespace framework {
+
+// fake interfaces that has not be implemented by other modules.
+struct OpRunContext {
+  Scope* scope;
+};
+
+class OperatorBase {
+ public:
+  virtual ~OperatorBase() {}
+  virtual void Run(OpRunContext* context) const = 0;
+  virtual void InferShape(const Scope* scope) const = 0;
+
+ protected:
+  std::vector<std::string> inputs_;
+  std::vector<std::string> outputs_;
+}
+
+class RecurrentForwardOp {
+ public:
+  virtual void InferShape(const Scope* scope) = 0;
+  /*
+   * Forward run the RNN.
+   *
+   * NOTE the context's scope is not given until `Run` called, so step scopes'
+   * father should be set/updated in this method.
+   */
+  virtual void Run(OpRunContext* contex) const = 0;
+
+ protected:
+  /*
+   * Prepare inputs for each stepnet.
+   */
+  void ApplyInLinks(Scope* scope);
+
+  /*
+   * Process outputs of stepnets and merge to variables.
+   */
+  void ApplyOutLinks(Scope* scope);
+
+  /*
+   * Build a `Net` which is shared across all steps.
+   */
+  void BuildStepNet(Scope* scope);
+
+  /*
+   * Create a scope for each step, the context's scope is shared across all the
+   * step scopes as the father scope. The step scopes will be stored in the
+   * father scope as a variable.
+   */
+  void CreateScopes(Scope* scope);
+
+  /*
+   * Prepare steps' states and relations.
+   */
+  void PrepareStates(Scope* scope);
+
+ protected:
+  /*
+   * these are defined in BaseOperator
+   *
+   * std::vector<std::string> inputs_;
+   * std::vector<std::string> outputs_;
+   */
+
+  // State of a RNN (same as the role of `Momory` in PaddlePaddle)
+  struct StateAttr {
+    // name of current state variable
+    std::string var;
+    // name of previous step's state variable
+    std::string pre_var;
+    // name of the variable to init a state, which is store in context's scope.
+    std::string boot_var;
+  };
+  std::vector<StateAttr> states_;
+};
+
+class RecurrentBackwardOp;
+}  // namespace framework
+}  // namespace paddle

From 604279516bc6801fe1720ca1bd95f84dcfe34958 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Fri, 7 Jul 2017 21:38:10 +0800
Subject: [PATCH 02/37] add Run

---
 paddle/framework/recurrent_network_op.h | 57 ++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index de8c3ba6c1c1e..b4ad7e1d94ec7 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -13,7 +13,10 @@
    limitations under the License. */
 
 #pragma once
+
+#include "paddle/framework/enforce.h"
 #include "paddle/framework/scope.h"
+#include "paddle/framework/variable.h"
 
 namespace paddle {
 namespace framework {
@@ -23,6 +26,11 @@ struct OpRunContext {
   Scope* scope;
 };
 
+// TODO replace this with Net's proto.
+struct NetDesc {
+  std::string name;
+}
+
 class OperatorBase {
  public:
   virtual ~OperatorBase() {}
@@ -34,8 +42,13 @@ class OperatorBase {
   std::vector<std::string> outputs_;
 }
 
-class RecurrentForwardOp {
+class RecurrentGroupForwardOp {
  public:
+  RecurrentGroupForwardOp(NetDesc& net_desc)
+      : name_(net_desc.name),
+        net_name_(net_desc.name + "__net__"),
+        step_scopes_name_(net_desc.name + "__step_scopes_") {}
+
   virtual void InferShape(const Scope* scope) = 0;
   /*
    * Forward run the RNN.
@@ -43,7 +56,31 @@ class RecurrentForwardOp {
    * NOTE the context's scope is not given until `Run` called, so step scopes'
    * father should be set/updated in this method.
    */
-  virtual void Run(OpRunContext* contex) const = 0;
+  virtual void Run(OpRunContext* contex) const {
+    auto scope = contex.scope;
+
+    Variable* net = scope->GetVariable(net_name_);
+    if (net == nullptr) {
+      BuildStepNet(scope);
+      net = scope->GetVariable(net_name_);
+    }
+    PADDLE_ENFORCE(net);
+
+    // expand lazily.
+    CreateScopes(scope);
+    ApplyInLinks(scope);
+    PrepareStates(scope);
+    Variable* step_scopes = scope->GetVariable(step_scopes_name_);
+    PADDLE_ENFORCE(step_scopes);
+
+    // forward
+    for (Scope* step_scope : step_scopes->GetMutable<std::vector<Scope*>>()) {
+      net->Run(step_scope);
+    }
+
+    // prepare outputs
+    ApplyOutLinks(scope);
+  }
 
  protected:
   /*
@@ -62,9 +99,9 @@ class RecurrentForwardOp {
   void BuildStepNet(Scope* scope);
 
   /*
-   * Create a scope for each step, the context's scope is shared across all the
-   * step scopes as the father scope. The step scopes will be stored in the
-   * father scope as a variable.
+   * Create a scope for each step, the context's scope is shared across all
+   * the step scopes as the father scope. The step scopes will be stored in
+   * the father scope as a variable.
    */
   void CreateScopes(Scope* scope);
 
@@ -87,12 +124,18 @@ class RecurrentForwardOp {
     std::string var;
     // name of previous step's state variable
     std::string pre_var;
-    // name of the variable to init a state, which is store in context's scope.
+    // name of the variable to init a state, which is store in context's
+    // scope.
     std::string boot_var;
   };
+
   std::vector<StateAttr> states_;
+  std::string name_;
+
+  const std::string net_name_;
+  const std::string step_scopes_name_;
 };
 
-class RecurrentBackwardOp;
+class RecurrentGroupBackwardOp;
 }  // namespace framework
 }  // namespace paddle

From 13d8ca9357de5a5e53dcf09fb0bcc08b97c2dc47 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Fri, 7 Jul 2017 21:39:27 +0800
Subject: [PATCH 03/37] rename state -> memory

---
 paddle/framework/recurrent_network_op.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index b4ad7e1d94ec7..a3c8ed4cb7623 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -108,7 +108,7 @@ class RecurrentGroupForwardOp {
   /*
    * Prepare steps' states and relations.
    */
-  void PrepareStates(Scope* scope);
+  void PrepareMemorys(Scope* scope);
 
  protected:
   /*
@@ -118,8 +118,8 @@ class RecurrentGroupForwardOp {
    * std::vector<std::string> outputs_;
    */
 
-  // State of a RNN (same as the role of `Momory` in PaddlePaddle)
-  struct StateAttr {
+  // Memory of a RNN (same as the role of `Momory` in PaddlePaddle)
+  struct MemoryAttr {
     // name of current state variable
     std::string var;
     // name of previous step's state variable
@@ -129,7 +129,7 @@ class RecurrentGroupForwardOp {
     std::string boot_var;
   };
 
-  std::vector<StateAttr> states_;
+  std::vector<MemoryAttr> memorys_;
   std::string name_;
 
   const std::string net_name_;

From a645ae661ab71f0cb74dbc06cffcae68bc83ce58 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Fri, 7 Jul 2017 21:43:39 +0800
Subject: [PATCH 04/37] change state -> memory

---
 paddle/framework/recurrent_network_op.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index a3c8ed4cb7623..38f6af3517157 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -68,8 +68,8 @@ class RecurrentGroupForwardOp {
 
     // expand lazily.
     CreateScopes(scope);
-    ApplyInLinks(scope);
-    PrepareStates(scope);
+    ScatterLinks(scope);
+    PrepareMemories(scope);
     Variable* step_scopes = scope->GetVariable(step_scopes_name_);
     PADDLE_ENFORCE(step_scopes);
 
@@ -79,19 +79,19 @@ class RecurrentGroupForwardOp {
     }
 
     // prepare outputs
-    ApplyOutLinks(scope);
+    GatherOutLinks(scope);
   }
 
  protected:
   /*
    * Prepare inputs for each stepnet.
    */
-  void ApplyInLinks(Scope* scope);
+  void ScatterInLinks(Scope* scope);
 
   /*
    * Process outputs of stepnets and merge to variables.
    */
-  void ApplyOutLinks(Scope* scope);
+  void GatherOutLinks(Scope* scope);
 
   /*
    * Build a `Net` which is shared across all steps.
@@ -108,7 +108,7 @@ class RecurrentGroupForwardOp {
   /*
    * Prepare steps' states and relations.
    */
-  void PrepareMemorys(Scope* scope);
+  void PrepareMemories(Scope* scope);
 
  protected:
   /*
@@ -129,7 +129,7 @@ class RecurrentGroupForwardOp {
     std::string boot_var;
   };
 
-  std::vector<MemoryAttr> memorys_;
+  std::vector<MemoryAttr> memories_;
   std::string name_;
 
   const std::string net_name_;

From 8640f96a4d9b4e86bf691eedf96c411eaf3b1554 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Sat, 8 Jul 2017 11:20:56 +0800
Subject: [PATCH 05/37] make compilable

---
 paddle/framework/CMakeLists.txt         |  1 +
 paddle/framework/recurrent_network_op.h | 85 ++++++++++++-------------
 2 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 4409c6feae218..f7d640e1e54d3 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -16,3 +16,4 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
 # Generate an empty __init__.py to make framework_py_proto as a valid python module.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
+add_library(recurrent_network_op recurrent_network_op.cc)
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 38f6af3517157..5ec83de450d22 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -21,7 +21,9 @@
 namespace paddle {
 namespace framework {
 
+// --------------------------------------------------------------------
 // fake interfaces that has not be implemented by other modules.
+// TODO keep updating according to other modules' designs.
 struct OpRunContext {
   Scope* scope;
 };
@@ -29,7 +31,14 @@ struct OpRunContext {
 // TODO replace this with Net's proto.
 struct NetDesc {
   std::string name;
-}
+};
+
+class PlainNet {
+ public:
+  PlainNet() {}
+  PlainNet(const NetDesc& desc) {}
+  void Run(Scope* scope) {}
+};
 
 class OperatorBase {
  public:
@@ -40,75 +49,59 @@ class OperatorBase {
  protected:
   std::vector<std::string> inputs_;
   std::vector<std::string> outputs_;
-}
+};
+// fake interfaces end
+// --------------------------------------------------------------------
 
-class RecurrentGroupForwardOp {
+class RecurrentOp : public OperatorBase {
  public:
-  RecurrentGroupForwardOp(NetDesc& net_desc)
+  RecurrentOp(NetDesc& net_desc)
       : name_(net_desc.name),
         net_name_(net_desc.name + "__net__"),
         step_scopes_name_(net_desc.name + "__step_scopes_") {}
 
-  virtual void InferShape(const Scope* scope) = 0;
+  virtual void InferShape(const Scope* scope) const override;
+
   /*
    * Forward run the RNN.
    *
    * NOTE the context's scope is not given until `Run` called, so step scopes'
    * father should be set/updated in this method.
    */
-  virtual void Run(OpRunContext* contex) const {
-    auto scope = contex.scope;
-
-    Variable* net = scope->GetVariable(net_name_);
-    if (net == nullptr) {
-      BuildStepNet(scope);
-      net = scope->GetVariable(net_name_);
-    }
-    PADDLE_ENFORCE(net);
-
-    // expand lazily.
-    CreateScopes(scope);
-    ScatterLinks(scope);
-    PrepareMemories(scope);
-    Variable* step_scopes = scope->GetVariable(step_scopes_name_);
-    PADDLE_ENFORCE(step_scopes);
-
-    // forward
-    for (Scope* step_scope : step_scopes->GetMutable<std::vector<Scope*>>()) {
-      net->Run(step_scope);
-    }
-
-    // prepare outputs
-    GatherOutLinks(scope);
-  }
+  virtual void Run(OpRunContext* contex) const override;
 
  protected:
   /*
    * Prepare inputs for each stepnet.
    */
-  void ScatterInLinks(Scope* scope);
+  void SegmentInputs(Scope* scope) const;
 
   /*
    * Process outputs of stepnets and merge to variables.
    */
-  void GatherOutLinks(Scope* scope);
+  void ConcateOutputs(Scope* scope) const;
 
   /*
-   * Build a `Net` which is shared across all steps.
+   * Create a `Net` which is shared across all steps.
    */
-  void BuildStepNet(Scope* scope);
+  void CreateStepNet(Scope* scope) const;
 
   /*
    * Create a scope for each step, the context's scope is shared across all
    * the step scopes as the father scope. The step scopes will be stored in
-   * the father scope as a variable.
+   * the father scope as a variable whose name is specified by
+   * `step_scopes_name_`.
+   *
+   * NOTE the scopes are reused by both the `Forward` and `Backward`, so just
+   * create once and expand its size if more steps need.
    */
-  void CreateScopes(Scope* scope);
+  void CreateScopes(Scope* scope) const;
 
   /*
-   * Prepare steps' states and relations.
+   * Prepare steps' states and link previous state's memory to current scope by
+   * a `reference`.
    */
-  void PrepareMemories(Scope* scope);
+  void PrepareMemories(Scope* scope) const;
 
  protected:
   /*
@@ -124,18 +117,24 @@ class RecurrentGroupForwardOp {
     std::string var;
     // name of previous step's state variable
     std::string pre_var;
-    // name of the variable to init a state, which is store in context's
-    // scope.
+    // name of the variables to init this memory (same role of `boot_layer` in
+    // PaddlePaddle), which is store in father's scope.
     std::string boot_var;
   };
 
-  std::vector<MemoryAttr> memories_;
+  // this op's name, used as a unique key in father scope.
+  // TODO repace it with OpBase's interface if supported.
   std::string name_;
-
+  // name of rnn op's step net, the step net will be shared by both `Forward`
+  // and `Backward`, so we store it as a variable in father's scope, with a
+  // unique key specified by `net_name_`.
   const std::string net_name_;
+  // name of steps' scopes which is store in father scope with a unique key
+  // specified by `step_scopes_name_`.
   const std::string step_scopes_name_;
 };
 
-class RecurrentGroupBackwardOp;
+class RecurrentGradientOp;
+
 }  // namespace framework
 }  // namespace paddle

From d4cde5176162ab508673ae13aca6e8d43528b7b7 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Sat, 8 Jul 2017 11:24:21 +0800
Subject: [PATCH 06/37] add .cc

---
 paddle/framework/recurrent_network_op.cc | 32 ++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 paddle/framework/recurrent_network_op.cc

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
new file mode 100644
index 0000000000000..ada7934f72038
--- /dev/null
+++ b/paddle/framework/recurrent_network_op.cc
@@ -0,0 +1,32 @@
+#include "paddle/framework/recurrent_network_op.h"
+
+namespace paddle {
+namespace framework {
+
+void RecurrentOp::Run(OpRunContext* contex) const {
+  auto scope = contex->scope;
+
+  Variable* net = scope->GetVariable(net_name_);
+  if (net == nullptr) {
+    CreateStepNet(scope);
+    net = scope->GetVariable(net_name_);
+  }
+  PADDLE_ENFORCE(net, "failed to create step net");
+
+  CreateScopes(scope);
+  SegmentInputs(scope);
+  PrepareMemories(scope);
+
+  Variable* step_scopes = scope->GetVariable(step_scopes_name_);
+  PADDLE_ENFORCE(step_scopes, "failed to get scopes");
+  // forward
+  for (Scope* step_scope : *step_scopes->GetMutable<std::vector<Scope*>>()) {
+    net->GetMutable<PlainNet>()->Run(step_scope);
+  }
+
+  // prepare outputs
+  ConcateOutputs(scope);
+}
+
+}  // namespace framework
+}  // namespace paddle

From 6e9928960d98da7597af938f8299da66dcdd5f5c Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Sat, 8 Jul 2017 19:05:43 +0800
Subject: [PATCH 07/37] init test

---
 paddle/framework/CMakeLists.txt               |  1 +
 paddle/framework/recurrent_network_op.cc      | 54 ++++++++++++++++---
 paddle/framework/recurrent_network_op.h       | 16 ++++--
 paddle/framework/recurrent_network_op_test.cc | 20 +++++++
 4 files changed, 79 insertions(+), 12 deletions(-)
 create mode 100644 paddle/framework/recurrent_network_op_test.cc

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index f7d640e1e54d3..6b8e8a38f9eee 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -17,3 +17,4 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
 add_library(recurrent_network_op recurrent_network_op.cc)
+cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc)
diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index ada7934f72038..a0819f06d0098 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -1,4 +1,5 @@
 #include "paddle/framework/recurrent_network_op.h"
+#include "paddle/framework/tensor.h"
 
 namespace paddle {
 namespace framework {
@@ -6,21 +7,38 @@ namespace framework {
 void RecurrentOp::Run(OpRunContext* contex) const {
   auto scope = contex->scope;
 
-  Variable* net = scope->GetVariable(net_name_);
-  if (net == nullptr) {
+  if (!scope->HasVariable(net_name_)) {
     CreateStepNet(scope);
-    net = scope->GetVariable(net_name_);
   }
-  PADDLE_ENFORCE(net, "failed to create step net");
+  Variable* net = scope->GetVariable(net_name_);
+  PADDLE_ENFORCE(net, "failed to get step net");
 
   CreateScopes(scope);
   SegmentInputs(scope);
-  PrepareMemories(scope);
+  CreateMemories(scope);
 
   Variable* step_scopes = scope->GetVariable(step_scopes_name_);
-  PADDLE_ENFORCE(step_scopes, "failed to get scopes");
+  PADDLE_ENFORCE(step_scopes, "failed to get step scopes");
   // forward
-  for (Scope* step_scope : *step_scopes->GetMutable<std::vector<Scope*>>()) {
+  auto& scopes = *step_scopes->GetMutable<std::vector<Scope*>>();
+  for (size_t step_id = 0; step_id < scopes.size(); step_id++) {
+    Scope* step_scope = scopes[step_id];
+    // TODO replace memorys' copy with reference
+    // copy pre-memory
+    for (const auto& attr : memory_attrs_) {
+      Variable* pre_memory_var = step_scope->CreateVariable(attr.pre_var);
+      // copy boot_var to current memory in first step
+      if (step_id == 0) {
+        Variable* boot_var = step_scope->GetVariable(attr.boot_var);
+        *pre_memory_var->GetMutable<Tensor>() = *boot_var->GetMutable<Tensor>();
+        // copy varible of memory in previous scope to current pre-memory
+      } else {
+        Variable* pre_state_var = scopes[step_id - 1]->GetVariable(attr.var);
+        *pre_memory_var->GetMutable<Tensor>() =
+            *pre_state_var->GetMutable<Tensor>();
+      }
+    }
+
     net->GetMutable<PlainNet>()->Run(step_scope);
   }
 
@@ -28,5 +46,27 @@ void RecurrentOp::Run(OpRunContext* contex) const {
   ConcateOutputs(scope);
 }
 
+void RecurrentOp::CreateMemories(Scope* scope) const {
+  Variable* scopes_var = scope->CreateVariable(step_scopes_name_);
+  auto scopes = scopes_var->GetMutable<std::vector<Scope*>>();
+  PADDLE_ENFORCE(!scopes->empty(), "step scopes should be created before.");
+
+  PADDLE_ENFORCE(!memory_attrs_.empty(),
+                 "memory attributes should be provided.");
+  for (size_t i = 0; i < scopes->size(); i++) {
+    for (const auto& attr : memory_attrs_) {
+      // check boot var exists
+      PADDLE_ENFORCE(scope->HasVariable(attr.boot_var),
+                     "boot var %s not in context scope", attr.boot_var);
+      // create the memory in this scope
+      scope->CreateVariable(attr.var);
+      // create pre-memory in this scope
+      scope->CreateVariable(attr.pre_var);
+      // TODO reference pre-memory to the memory in previous scope if Variance
+      // supports reference
+    }
+  }
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 5ec83de450d22..96fe666125e5a 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -98,12 +98,16 @@ class RecurrentOp : public OperatorBase {
   void CreateScopes(Scope* scope) const;
 
   /*
-   * Prepare steps' states and link previous state's memory to current scope by
-   * a `reference`.
+   * Create memories in each step scope.
    */
-  void PrepareMemories(Scope* scope) const;
+  void CreateMemories(Scope* scope) const;
 
- protected:
+  /*
+   * Link memory in previous step scope to current scope.
+   */
+  // void LinkMemories(Scope* scope) const;
+
+ private:
   /*
    * these are defined in BaseOperator
    *
@@ -122,6 +126,8 @@ class RecurrentOp : public OperatorBase {
     std::string boot_var;
   };
 
+  std::vector<MemoryAttr> memory_attrs_;
+
   // this op's name, used as a unique key in father scope.
   // TODO repace it with OpBase's interface if supported.
   std::string name_;
@@ -129,7 +135,7 @@ class RecurrentOp : public OperatorBase {
   // and `Backward`, so we store it as a variable in father's scope, with a
   // unique key specified by `net_name_`.
   const std::string net_name_;
-  // name of steps' scopes which is store in father scope with a unique key
+  // name of steps' scopes which is stored in father scope with a unique key
   // specified by `step_scopes_name_`.
   const std::string step_scopes_name_;
 };
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
new file mode 100644
index 0000000000000..1647f170ca65c
--- /dev/null
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -0,0 +1,20 @@
+/*
+  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+*/
+
+#include "paddle/framework/recurrent_network_op.h"
+#include "gtest/gtest.h"
+
+namespace paddle {
+namespace framework {}  // namespace framework
+
+}  // namespace paddle

From 007ca1e2f0605b5d447dddcdb4e7a070a0e7c75f Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Mon, 10 Jul 2017 09:13:42 +0800
Subject: [PATCH 08/37] add op fake implementation

---
 paddle/framework/recurrent_network_op.cc      | 35 +++++++++++++++++++
 paddle/framework/recurrent_network_op_test.cc |  8 ++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index a0819f06d0098..ef09fca47d211 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -4,6 +4,41 @@
 namespace paddle {
 namespace framework {
 
+// fake op implementations
+namespace fake {
+class FcOp : public OperatorBase {
+ public:
+  FcOp(NetDesc& net_desc) : name_(net_desc.name) {}
+
+  virtual void InferShape(const Scope* scope) const override {
+    LOG(INFO) << "fc InferShape";
+  }
+
+  virtual void Run(OpRunContext* contex) const override {
+    LOG(INFO) << "fc Run";
+  }
+
+ private:
+  std::string name_;
+};
+
+class SGDOptimizerOp : public OperatorBase {
+ public:
+  FcOp(NetDesc& net_desc) : name_(net_desc.name) {}
+
+  virtual void InferShape(const Scope* scope) const override {
+    LOG(INFO) << "optimizer InferShape";
+  }
+
+  virtual void Run(OpRunContext* contex) const override {
+    LOG(INFO) << "optimizer Run";
+  }
+
+ private:
+  std::string name_;
+};
+};  // namespace fake
+
 void RecurrentOp::Run(OpRunContext* contex) const {
   auto scope = contex->scope;
 
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index 1647f170ca65c..83d7bb3c15388 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -15,6 +15,12 @@
 #include "gtest/gtest.h"
 
 namespace paddle {
-namespace framework {}  // namespace framework
+namespace framework {
+
+class RecurrentOpTest : public ::testing::Test {
+ protected:
+  virtual void SetUp() override {}
+};
+}  // namespace framework
 
 }  // namespace paddle

From 2538b2fac5f8409cd15c04cbb3c2ae89cd9feee7 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Mon, 10 Jul 2017 09:48:54 +0800
Subject: [PATCH 09/37] add CreateStepNet and CreateScopes implementation.

---
 paddle/framework/recurrent_network_op.cc | 33 +++++++++++++++++++++++-
 paddle/framework/recurrent_network_op.h  | 22 +++++++++++++---
 paddle/framework/tensor.h                |  4 +++
 paddle/framework/variable.h              |  5 ++++
 4 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index a0819f06d0098..a6a6d3d4c33b1 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -20,8 +20,10 @@ void RecurrentOp::Run(OpRunContext* contex) const {
   Variable* step_scopes = scope->GetVariable(step_scopes_name_);
   PADDLE_ENFORCE(step_scopes, "failed to get step scopes");
   // forward
+  auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
+  size_t seq_len = dims[1];
   auto& scopes = *step_scopes->GetMutable<std::vector<Scope*>>();
-  for (size_t step_id = 0; step_id < scopes.size(); step_id++) {
+  for (size_t step_id = 0; step_id < seq_len; step_id++) {
     Scope* step_scope = scopes[step_id];
     // TODO replace memorys' copy with reference
     // copy pre-memory
@@ -46,6 +48,35 @@ void RecurrentOp::Run(OpRunContext* contex) const {
   ConcateOutputs(scope);
 }
 
+void RecurrentOp::CreateScopes(Scope* scope) const {
+  auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
+  size_t seq_len = dims[1];
+  Variable* scopes_var = scope->GetVariable(step_scopes_name_);
+  // auto step_scopes =
+  // scopes_var->GetMutable<std::vector<std::shared_ptr<Scope>>>();
+  auto step_scopes = scopes_var->GetMutable<std::vector<Scope*>>();
+  // TODO Only two scopes are needed for inference, this case will be supported
+  // later.
+  if (seq_len > step_scopes->size()) {
+    for (size_t i = step_scopes->size(); i < seq_len; ++i) {
+      // step_scopes->push_back(std::make_shared<Scope>(
+      // std::shared_ptr<Scope>(scope)));
+      step_scopes->push_back(new Scope(std::shared_ptr<Scope>(scope)));
+    }
+  }
+}
+
+void RecurrentOp::CreateStepNet(Scope* scope) const {
+  Variable* var = scope->CreateVariable(net_name_);
+  auto step_net = GetAttr<std::string>("step_net");
+  // get the step net proto from the string.
+  // PADDLE_ENFORCE(
+  //   google::protobuf::TextFormat::ParseFromString(step_net,
+  //   &step_net_desc_));
+  // this is a fake net, it will be rewrite after the network has been merged.
+  var->Reset<PlainNet>(new PlainNet(step_net));
+}
+
 void RecurrentOp::CreateMemories(Scope* scope) const {
   Variable* scopes_var = scope->CreateVariable(step_scopes_name_);
   auto scopes = scopes_var->GetMutable<std::vector<Scope*>>();
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 96fe666125e5a..1a509deda36b8 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -14,6 +14,8 @@
 
 #pragma once
 
+#include <google/protobuf/text_format.h>
+#include "paddle/framework/attr_checker.h"
 #include "paddle/framework/enforce.h"
 #include "paddle/framework/scope.h"
 #include "paddle/framework/variable.h"
@@ -30,13 +32,14 @@ struct OpRunContext {
 
 // TODO replace this with Net's proto.
 struct NetDesc {
-  std::string name;
+  std::string name_;
 };
 
 class PlainNet {
  public:
   PlainNet() {}
   PlainNet(const NetDesc& desc) {}
+  PlainNet(const std::string desc) {}
   void Run(Scope* scope) {}
 };
 
@@ -45,10 +48,19 @@ class OperatorBase {
   virtual ~OperatorBase() {}
   virtual void Run(OpRunContext* context) const = 0;
   virtual void InferShape(const Scope* scope) const = 0;
+  inline Variable* Input(Scope* scope, int index) const {
+    return scope->GetVariable(inputs_[index]);
+  };
+
+  template <typename T>
+  inline const T GetAttr(const std::string& name) const {
+    return boost::get<T>(attrs_.at(name));
+  }
 
  protected:
   std::vector<std::string> inputs_;
   std::vector<std::string> outputs_;
+  AttributeMap attrs_;
 };
 // fake interfaces end
 // --------------------------------------------------------------------
@@ -56,9 +68,9 @@ class OperatorBase {
 class RecurrentOp : public OperatorBase {
  public:
   RecurrentOp(NetDesc& net_desc)
-      : name_(net_desc.name),
-        net_name_(net_desc.name + "__net__"),
-        step_scopes_name_(net_desc.name + "__step_scopes_") {}
+      : name_(net_desc.name_),
+        net_name_(net_desc.name_ + "__net__"),
+        step_scopes_name_(net_desc.name_ + "__step_scopes_") {}
 
   virtual void InferShape(const Scope* scope) const override;
 
@@ -138,6 +150,8 @@ class RecurrentOp : public OperatorBase {
   // name of steps' scopes which is stored in father scope with a unique key
   // specified by `step_scopes_name_`.
   const std::string step_scopes_name_;
+
+  const NetDesc step_net_desc_;
 };
 
 class RecurrentGradientOp;
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index ce5d98b04e6b5..6f1ae09fc0060 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -42,6 +42,7 @@ class Tensor {
         || holder_->Size() < product(dims) * sizeof(T)) {
       holder_.reset(new PlaceholderImpl<T>(place, product(dims) * sizeof(T)));
     }
+    dims_ = dims;
     return static_cast<T*>(holder_->Ptr());
   }
 
@@ -51,6 +52,8 @@ class Tensor {
     return mutable_data<T>(dims, paddle::platform::get_place());
   }
 
+  const DDim& dims() const { return dims_; }
+
  private:
   // Placeholder hides type T, so it doesn't appear as a template
   // parameter of Variable.
@@ -91,6 +94,7 @@ class Tensor {
     size_t size_;                    // size of the memory block.
   };
 
+  DDim dims_;
   std::shared_ptr<Placeholder> holder_;  // holds the memory block if allocated.
 };
 
diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h
index 72c4a7a2a1d1c..adc00f5492fd4 100644
--- a/paddle/framework/variable.h
+++ b/paddle/framework/variable.h
@@ -29,6 +29,11 @@ class Variable {
     return *static_cast<const T*>(holder_->Ptr());
   }
 
+  template <typename T>
+  void Reset(T* p) {
+    holder_.reset(new PlaceholderImpl<T>(p));
+  }
+
   template <typename T>
   T* GetMutable() {
     if (!IsType<T>()) {

From 5eb87f0c697ba2a5a9a30da38939b7ded5077322 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Mon, 10 Jul 2017 11:29:24 +0800
Subject: [PATCH 10/37] add TODO list

---
 paddle/framework/recurrent_network_op.cc | 16 +++++++---------
 paddle/framework/recurrent_network_op.h  |  5 ++++-
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index a6a6d3d4c33b1..5ff71ee83a2ca 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -30,15 +30,13 @@ void RecurrentOp::Run(OpRunContext* contex) const {
     for (const auto& attr : memory_attrs_) {
       Variable* pre_memory_var = step_scope->CreateVariable(attr.pre_var);
       // copy boot_var to current memory in first step
-      if (step_id == 0) {
-        Variable* boot_var = step_scope->GetVariable(attr.boot_var);
-        *pre_memory_var->GetMutable<Tensor>() = *boot_var->GetMutable<Tensor>();
-        // copy varible of memory in previous scope to current pre-memory
-      } else {
-        Variable* pre_state_var = scopes[step_id - 1]->GetVariable(attr.var);
-        *pre_memory_var->GetMutable<Tensor>() =
-            *pre_state_var->GetMutable<Tensor>();
-      }
+
+      Variable* pre_state_var =
+          (step_id == 0) ? step_scope->GetVariable(attr.boot_var)
+                         : scopes[step_id - 1]->GetVariable(attr.var);
+      // copy varible of memory in previous scope to current pre-memory
+      *pre_memory_var->GetMutable<Tensor>() =
+          *pre_state_var->GetMutable<Tensor>();
     }
 
     net->GetMutable<PlainNet>()->Run(step_scope);
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 1a509deda36b8..0f5bcd2ad316d 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -64,7 +64,10 @@ class OperatorBase {
 };
 // fake interfaces end
 // --------------------------------------------------------------------
-
+// TODO:
+// 1. No-padding computing for sequences with indifinite length in one batch.
+// 2. Hierarchical RNN for sequence with sub-sequence.
+// 3. Multi-inputs with indifinate length for RecurrentOp.
 class RecurrentOp : public OperatorBase {
  public:
   RecurrentOp(NetDesc& net_desc)

From ca53f3a746b4e059fa64fb5e06ade14e81d694a9 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Mon, 10 Jul 2017 14:51:17 +0800
Subject: [PATCH 11/37] init memory attributes.

---
 paddle/framework/CMakeLists.txt         |  4 +-
 paddle/framework/op_desc.proto          |  7 +++-
 paddle/framework/recurrent_network_op.h | 51 ++++++++++++++++++++-----
 3 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 6b8e8a38f9eee..e61979f265d71 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -16,5 +16,5 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
 # Generate an empty __init__.py to make framework_py_proto as a valid python module.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
-add_library(recurrent_network_op recurrent_network_op.cc)
-cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc)
+cc_library(recurrent_network_op SRCS recurrent_network_op.cc DEPS op_desc)
+#cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc)
diff --git a/paddle/framework/op_desc.proto b/paddle/framework/op_desc.proto
index 89497f3c16bc2..84aaf59ac5532 100644
--- a/paddle/framework/op_desc.proto
+++ b/paddle/framework/op_desc.proto
@@ -51,6 +51,9 @@ message OpDesc {
     // type of this Operator, such as "add", "sub", "fc".
     required string type = 3;
 
+    // the name of this Operator.
+    required string name = 4;
+
     // Attributes of this Operator. e.g., scale=3.0 in cosine op.
-    repeated AttrDesc attrs = 4;
-};
\ No newline at end of file
+    repeated AttrDesc attrs = 5;
+};
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 1a509deda36b8..613e00f0b8367 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -20,6 +20,10 @@
 #include "paddle/framework/scope.h"
 #include "paddle/framework/variable.h"
 
+// Remove when including operator.h
+#include "paddle/framework/attr_checker.h"
+#include "paddle/framework/op_desc.pb.h"
+
 namespace paddle {
 namespace framework {
 
@@ -46,6 +50,7 @@ class PlainNet {
 class OperatorBase {
  public:
   virtual ~OperatorBase() {}
+  void Init(const OpDesc& op_desc, AttributeMap& attrs) {}
   virtual void Run(OpRunContext* context) const = 0;
   virtual void InferShape(const Scope* scope) const = 0;
   inline Variable* Input(Scope* scope, int index) const {
@@ -67,10 +72,22 @@ class OperatorBase {
 
 class RecurrentOp : public OperatorBase {
  public:
-  RecurrentOp(NetDesc& net_desc)
-      : name_(net_desc.name_),
-        net_name_(net_desc.name_ + "__net__"),
-        step_scopes_name_(net_desc.name_ + "__step_scopes_") {}
+  void Init(const OpDesc& op_desc, AttributeMap& attrs) {
+    OperatorBase::Init(op_desc, attrs);
+    name_ = op_desc.name();
+    net_name_ = op_desc.name() + "_net";
+    step_scopes_name_ = op_desc.name() + "_step_scopes";
+    auto memories = GetAttr<std::vector<std::string>>("memories");
+    auto boot_memories = GetAttr<std::vector<std::string>>("boot_memories");
+    PADDLE_ENFORCE(memories.size() == boot_memories.size(),
+                   "The size of memories and boot_memories is mismatched.");
+    for (size_t i = 0; i < memories.size(); ++i) {
+      MemoryAttr mem_attr;
+      mem_attr.var = memories[i];
+      mem_attr.boot_var = boot_memories[i];
+      memory_attrs_.push_back(mem_attr);
+    }
+  }
 
   virtual void InferShape(const Scope* scope) const override;
 
@@ -86,12 +103,12 @@ class RecurrentOp : public OperatorBase {
   /*
    * Prepare inputs for each stepnet.
    */
-  void SegmentInputs(Scope* scope) const;
+  void SegmentInputs(Scope* scope) const {};
 
   /*
    * Process outputs of stepnets and merge to variables.
    */
-  void ConcateOutputs(Scope* scope) const;
+  void ConcateOutputs(Scope* scope) const {};
 
   /*
    * Create a `Net` which is shared across all steps.
@@ -138,6 +155,22 @@ class RecurrentOp : public OperatorBase {
     std::string boot_var;
   };
 
+  /*
+   * The attributes in protobuf about the memory description and the booted
+   * memory description are as follows. The number of booted memories should
+   * equal to the memories number.
+   *
+   *   arg {
+   *       name: “memories”
+   *       strings: "hidden”
+   *       strings: "state”
+   *   }
+   *   arg {
+   *       name: “boot_memories”
+   *       strings: "boot_hidden”
+   *       strings: "boot_state”
+   *   }
+   */
   std::vector<MemoryAttr> memory_attrs_;
 
   // this op's name, used as a unique key in father scope.
@@ -146,12 +179,12 @@ class RecurrentOp : public OperatorBase {
   // name of rnn op's step net, the step net will be shared by both `Forward`
   // and `Backward`, so we store it as a variable in father's scope, with a
   // unique key specified by `net_name_`.
-  const std::string net_name_;
+  std::string net_name_;
   // name of steps' scopes which is stored in father scope with a unique key
   // specified by `step_scopes_name_`.
-  const std::string step_scopes_name_;
+  std::string step_scopes_name_;
 
-  const NetDesc step_net_desc_;
+  NetDesc step_net_desc_;
 };
 
 class RecurrentGradientOp;

From 1e48cc8546e992dc77d10a889ff1d11e29673b70 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Mon, 10 Jul 2017 16:16:27 +0800
Subject: [PATCH 12/37] add LinkMemories

---
 paddle/framework/recurrent_network_op.cc | 73 +++++++++++++-----------
 paddle/framework/recurrent_network_op.h  | 20 ++++---
 2 files changed, 53 insertions(+), 40 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 3aa0c030ac6a6..e4d7a327c5a1f 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -1,3 +1,6 @@
+#include <glog/logging.h>
+#include <cstring>
+
 #include "paddle/framework/recurrent_network_op.h"
 #include "paddle/framework/tensor.h"
 
@@ -8,7 +11,7 @@ namespace framework {
 namespace fake {
 class FcOp : public OperatorBase {
  public:
-  FcOp(NetDesc& net_desc) : name_(net_desc.name) {}
+  FcOp(NetDesc& net_desc) : name_(net_desc.name_) {}
 
   virtual void InferShape(const Scope* scope) const override {
     LOG(INFO) << "fc InferShape";
@@ -24,7 +27,7 @@ class FcOp : public OperatorBase {
 
 class SGDOptimizerOp : public OperatorBase {
  public:
-  FcOp(NetDesc& net_desc) : name_(net_desc.name) {}
+  SGDOptimizerOp(NetDesc& net_desc) : name_(net_desc.name_) {}
 
   virtual void InferShape(const Scope* scope) const override {
     LOG(INFO) << "optimizer InferShape";
@@ -50,7 +53,6 @@ void RecurrentOp::Run(OpRunContext* contex) const {
 
   CreateScopes(scope);
   SegmentInputs(scope);
-  CreateMemories(scope);
 
   Variable* step_scopes = scope->GetVariable(step_scopes_name_);
   PADDLE_ENFORCE(step_scopes, "failed to get step scopes");
@@ -61,18 +63,7 @@ void RecurrentOp::Run(OpRunContext* contex) const {
   for (size_t step_id = 0; step_id < seq_len; step_id++) {
     Scope* step_scope = scopes[step_id];
     // TODO replace memorys' copy with reference
-    // copy pre-memory
-    for (const auto& attr : memory_attrs_) {
-      Variable* pre_memory_var = step_scope->CreateVariable(attr.pre_var);
-      // copy boot_var to current memory in first step
-
-      Variable* pre_state_var =
-          (step_id == 0) ? step_scope->GetVariable(attr.boot_var)
-                         : scopes[step_id - 1]->GetVariable(attr.var);
-      // copy varible of memory in previous scope to current pre-memory
-      *pre_memory_var->GetMutable<Tensor>() =
-          *pre_state_var->GetMutable<Tensor>();
-    }
+    LinkMemories(scope, scopes, step_id);
 
     net->GetMutable<PlainNet>()->Run(step_scope);
   }
@@ -110,24 +101,42 @@ void RecurrentOp::CreateStepNet(Scope* scope) const {
   var->Reset<PlainNet>(new PlainNet(step_net));
 }
 
-void RecurrentOp::CreateMemories(Scope* scope) const {
-  Variable* scopes_var = scope->CreateVariable(step_scopes_name_);
-  auto scopes = scopes_var->GetMutable<std::vector<Scope*>>();
-  PADDLE_ENFORCE(!scopes->empty(), "step scopes should be created before.");
-
-  PADDLE_ENFORCE(!memory_attrs_.empty(),
-                 "memory attributes should be provided.");
-  for (size_t i = 0; i < scopes->size(); i++) {
-    for (const auto& attr : memory_attrs_) {
-      // check boot var exists
+void RecurrentOp::LinkMemories(Scope* scope, std::vector<Scope*>& step_scopes,
+                               size_t step) const {
+  PADDLE_ENFORCE(step < step_scopes.size(),
+                 "step [%d] out of range of step scopes' size [%d]", step,
+                 step_scopes.size());
+  // copy boot memory
+  for (auto& attr : memory_attrs_) {
+    Scope* step_scope = step_scopes[step];
+
+    Tensor* boot_tensor{nullptr};
+    Variable* memory_var = step_scope->CreateVariable(attr.pre_var);
+    if (step == 0) {
       PADDLE_ENFORCE(scope->HasVariable(attr.boot_var),
-                     "boot var %s not in context scope", attr.boot_var);
-      // create the memory in this scope
-      scope->CreateVariable(attr.var);
-      // create pre-memory in this scope
-      scope->CreateVariable(attr.pre_var);
-      // TODO reference pre-memory to the memory in previous scope if Variance
-      // supports reference
+                     "memory [%s]'s boot variable [%s] not exists", attr.var,
+                     attr.boot_var);
+      // update memory's ddim
+      boot_tensor = scope->CreateVariable(attr.boot_var)->GetMutable<Tensor>();
+      attr.dims = boot_tensor->dims();
+    }
+
+    // copy from boot memory
+    // TODO support more device
+    float* memory_tensor_val =
+        memory_var->GetMutable<Tensor>()->mutable_data<float>(
+            attr.dims, platform::CPUPlace());
+    if (step == 0) {
+      PADDLE_ENFORCE(boot_tensor, "boot_tensor should be retrieved before");
+      // copy from boot memory
+      std::memcpy(memory_tensor_val, boot_tensor->data<float>(),
+                  product(attr.dims));
+    } else {
+      // copy from previous step scope's memory to this scope's `pre-memory`
+      Tensor* pre_step_memory =
+          step_scopes[step - 1]->GetVariable(attr.var)->GetMutable<Tensor>();
+      std::memcpy(memory_tensor_val, pre_step_memory->data<float>(),
+                  product(attr.dims));
     }
   }
 }
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 0f5bcd2ad316d..45de31664bfbf 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -16,6 +16,7 @@
 
 #include <google/protobuf/text_format.h>
 #include "paddle/framework/attr_checker.h"
+#include "paddle/framework/ddim.h"
 #include "paddle/framework/enforce.h"
 #include "paddle/framework/scope.h"
 #include "paddle/framework/variable.h"
@@ -115,22 +116,22 @@ class RecurrentOp : public OperatorBase {
   /*
    * Create memories in each step scope.
    */
-  void CreateMemories(Scope* scope) const;
+  // void CreateMemories(Scope* scope) const;
 
   /*
    * Link memory in previous step scope to current scope.
    */
-  // void LinkMemories(Scope* scope) const;
+  void LinkMemories(Scope* scope, std::vector<Scope*>& step_scopes,
+                    size_t step) const;
 
  private:
   /*
-   * these are defined in BaseOperator
+   * Memory of a RNN (same as the role of `Momory` in PaddlePaddle).
    *
-   * std::vector<std::string> inputs_;
-   * std::vector<std::string> outputs_;
+   * Memory attributes cached by this op, dims will be infered from
+   * boot memories in father scope. Other attributes are copied from Op's proto
+   * attributes.
    */
-
-  // Memory of a RNN (same as the role of `Momory` in PaddlePaddle)
   struct MemoryAttr {
     // name of current state variable
     std::string var;
@@ -139,9 +140,12 @@ class RecurrentOp : public OperatorBase {
     // name of the variables to init this memory (same role of `boot_layer` in
     // PaddlePaddle), which is store in father's scope.
     std::string boot_var;
+    // this dim will infered from boot memories's tensor in the first step.
+    DDim dims;
   };
 
-  std::vector<MemoryAttr> memory_attrs_;
+  // TODO copy from OpBase's
+  mutable std::vector<MemoryAttr> memory_attrs_;
 
   // this op's name, used as a unique key in father scope.
   // TODO repace it with OpBase's interface if supported.

From f7916a6b5fdde2d19f56ec3631ab9b03eea086fc Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Mon, 10 Jul 2017 17:20:09 +0800
Subject: [PATCH 13/37] add PlainNet fake implementation

---
 paddle/framework/recurrent_network_op.cc | 27 ++++++---------
 paddle/framework/recurrent_network_op.h  | 42 ++++++++++++++++--------
 2 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index e4d7a327c5a1f..129b24348fc15 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -11,7 +11,7 @@ namespace framework {
 namespace fake {
 class FcOp : public OperatorBase {
  public:
-  FcOp(NetDesc& net_desc) : name_(net_desc.name_) {}
+  FcOp(const OpDesc& desc) {}
 
   virtual void InferShape(const Scope* scope) const override {
     LOG(INFO) << "fc InferShape";
@@ -24,23 +24,13 @@ class FcOp : public OperatorBase {
  private:
   std::string name_;
 };
+};  // namespace fake
 
-class SGDOptimizerOp : public OperatorBase {
- public:
-  SGDOptimizerOp(NetDesc& net_desc) : name_(net_desc.name_) {}
-
-  virtual void InferShape(const Scope* scope) const override {
-    LOG(INFO) << "optimizer InferShape";
-  }
-
-  virtual void Run(OpRunContext* contex) const override {
-    LOG(INFO) << "optimizer Run";
+void PlainNet::AddOp(const OpDesc& desc) {
+  if (desc.type() == "fc") {
+    ops_.emplace_back(new fake::FcOp(desc));
   }
-
- private:
-  std::string name_;
-};
-};  // namespace fake
+}
 
 void RecurrentOp::Run(OpRunContext* contex) const {
   auto scope = contex->scope;
@@ -98,7 +88,10 @@ void RecurrentOp::CreateStepNet(Scope* scope) const {
   //   google::protobuf::TextFormat::ParseFromString(step_net,
   //   &step_net_desc_));
   // this is a fake net, it will be rewrite after the network has been merged.
-  var->Reset<PlainNet>(new PlainNet(step_net));
+  NetDesc desc;
+  desc.name_ = "rnn_step_net";
+  var->Reset<PlainNet>(new PlainNet(desc));
+  // TODO add op descs
 }
 
 void RecurrentOp::LinkMemories(Scope* scope, std::vector<Scope*>& step_scopes,
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 030efd97bc15b..eb9a390728c0f 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -35,19 +35,6 @@ struct OpRunContext {
   Scope* scope;
 };
 
-// TODO replace this with Net's proto.
-struct NetDesc {
-  std::string name_;
-};
-
-class PlainNet {
- public:
-  PlainNet() {}
-  PlainNet(const NetDesc& desc) {}
-  PlainNet(const std::string desc) {}
-  void Run(Scope* scope) {}
-};
-
 class OperatorBase {
  public:
   virtual ~OperatorBase() {}
@@ -68,6 +55,35 @@ class OperatorBase {
   std::vector<std::string> outputs_;
   AttributeMap attrs_;
 };
+
+// TODO replace this with Net's proto.
+struct NetDesc {
+  std::string name_;
+  std::vector<OpDesc> op_descs;
+};
+
+class PlainNet {
+ public:
+  PlainNet() {}
+  PlainNet(const NetDesc& desc) {
+    for (const OpDesc& proto : desc.op_descs) {
+      AddOp(proto);
+    }
+  }
+  // PlainNet(const std::string desc) {}
+  void AddOp(const OpDesc& desc);
+  void Run(Scope* scope) {
+    OpRunContext ctx;
+    ctx.scope = scope;
+    for (auto& op : ops_) {
+      op->Run(&ctx);
+    }
+  }
+
+ private:
+  std::vector<std::unique_ptr<OperatorBase>> ops_;
+};
+
 // fake interfaces end
 // --------------------------------------------------------------------
 // TODO:

From 089c44810566a85592e0e904f825ada98ed353f3 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Mon, 10 Jul 2017 21:15:51 +0800
Subject: [PATCH 14/37] Use std::shared_ptr<Scope> in the OpRunContext.

---
 paddle/framework/recurrent_network_op.cc      | 64 ++++++++++++++-----
 paddle/framework/recurrent_network_op.h       | 44 +++++--------
 .../paddle/trainer_config_helpers/networks.py |  4 +-
 3 files changed, 66 insertions(+), 46 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index e4d7a327c5a1f..b22b81e5bf3c6 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -1,7 +1,22 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/recurrent_network_op.h"
+
 #include <glog/logging.h>
 #include <cstring>
 
-#include "paddle/framework/recurrent_network_op.h"
 #include "paddle/framework/tensor.h"
 
 namespace paddle {
@@ -13,7 +28,7 @@ class FcOp : public OperatorBase {
  public:
   FcOp(NetDesc& net_desc) : name_(net_desc.name_) {}
 
-  virtual void InferShape(const Scope* scope) const override {
+  virtual void InferShape(const ScopePtr scope) const override {
     LOG(INFO) << "fc InferShape";
   }
 
@@ -29,7 +44,7 @@ class SGDOptimizerOp : public OperatorBase {
  public:
   SGDOptimizerOp(NetDesc& net_desc) : name_(net_desc.name_) {}
 
-  virtual void InferShape(const Scope* scope) const override {
+  virtual void InferShape(const ScopePtr scope) const override {
     LOG(INFO) << "optimizer InferShape";
   }
 
@@ -59,9 +74,9 @@ void RecurrentOp::Run(OpRunContext* contex) const {
   // forward
   auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
   size_t seq_len = dims[1];
-  auto& scopes = *step_scopes->GetMutable<std::vector<Scope*>>();
+  auto& scopes = *step_scopes->GetMutable<std::vector<ScopePtr>>();
   for (size_t step_id = 0; step_id < seq_len; step_id++) {
-    Scope* step_scope = scopes[step_id];
+    ScopePtr step_scope = scopes[step_id];
     // TODO replace memorys' copy with reference
     LinkMemories(scope, scopes, step_id);
 
@@ -72,43 +87,58 @@ void RecurrentOp::Run(OpRunContext* contex) const {
   ConcateOutputs(scope);
 }
 
-void RecurrentOp::CreateScopes(Scope* scope) const {
+void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
+  OperatorBase::Init(op_desc, attrs);
+  name_ = op_desc.name();
+  net_name_ = op_desc.name() + "_net";
+  step_scopes_name_ = op_desc.name() + "_step_scopes";
+  auto memories = GetAttr<std::vector<std::string>>("memories");
+  auto boot_memories = GetAttr<std::vector<std::string>>("boot_memories");
+  PADDLE_ENFORCE(memories.size() == boot_memories.size(),
+                 "The size of memories and boot_memories is mismatched.");
+  for (size_t i = 0; i < memories.size(); ++i) {
+    MemoryAttr mem_attr;
+    mem_attr.var = memories[i];
+    mem_attr.boot_var = boot_memories[i];
+    memory_attrs_.push_back(mem_attr);
+  }
+}
+
+void RecurrentOp::CreateScopes(ScopePtr scope) const {
   auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
   size_t seq_len = dims[1];
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
-  // auto step_scopes =
-  // scopes_var->GetMutable<std::vector<std::shared_ptr<Scope>>>();
-  auto step_scopes = scopes_var->GetMutable<std::vector<Scope*>>();
-  // TODO Only two scopes are needed for inference, this case will be supported
-  // later.
+  auto step_scopes = scopes_var->GetMutable<std::vector<ScopePtr>>();
+  // TODO Only two scopes are needed for inference, this case will be
+  // supported later.
   if (seq_len > step_scopes->size()) {
     for (size_t i = step_scopes->size(); i < seq_len; ++i) {
-      // step_scopes->push_back(std::make_shared<Scope>(
-      // std::shared_ptr<Scope>(scope)));
-      step_scopes->push_back(new Scope(std::shared_ptr<Scope>(scope)));
+      step_scopes->push_back(std::make_shared<Scope>(scope));
     }
   }
 }
 
-void RecurrentOp::CreateStepNet(Scope* scope) const {
+void RecurrentOp::CreateStepNet(ScopePtr scope) const {
   Variable* var = scope->CreateVariable(net_name_);
   auto step_net = GetAttr<std::string>("step_net");
   // get the step net proto from the string.
   // PADDLE_ENFORCE(
   //   google::protobuf::TextFormat::ParseFromString(step_net,
   //   &step_net_desc_));
+  // var->Reset<PlainNet>(new PlainNet(step_net_desc_));
   // this is a fake net, it will be rewrite after the network has been merged.
   var->Reset<PlainNet>(new PlainNet(step_net));
 }
 
-void RecurrentOp::LinkMemories(Scope* scope, std::vector<Scope*>& step_scopes,
+void RecurrentOp::LinkMemories(ScopePtr scope,
+                               std::vector<ScopePtr>& step_scopes,
                                size_t step) const {
   PADDLE_ENFORCE(step < step_scopes.size(),
                  "step [%d] out of range of step scopes' size [%d]", step,
                  step_scopes.size());
   // copy boot memory
   for (auto& attr : memory_attrs_) {
-    Scope* step_scope = step_scopes[step];
+    ScopePtr step_scope = step_scopes[step];
 
     Tensor* boot_tensor{nullptr};
     Variable* memory_var = step_scope->CreateVariable(attr.pre_var);
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 030efd97bc15b..f11a470d8a38b 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -31,8 +31,9 @@ namespace framework {
 // --------------------------------------------------------------------
 // fake interfaces that has not be implemented by other modules.
 // TODO keep updating according to other modules' designs.
+typedef std::shared_ptr<Scope> ScopePtr;
 struct OpRunContext {
-  Scope* scope;
+  ScopePtr scope;
 };
 
 // TODO replace this with Net's proto.
@@ -45,7 +46,7 @@ class PlainNet {
   PlainNet() {}
   PlainNet(const NetDesc& desc) {}
   PlainNet(const std::string desc) {}
-  void Run(Scope* scope) {}
+  void Run(ScopePtr scope) {}
 };
 
 class OperatorBase {
@@ -53,8 +54,8 @@ class OperatorBase {
   virtual ~OperatorBase() {}
   void Init(const OpDesc& op_desc, AttributeMap& attrs) {}
   virtual void Run(OpRunContext* context) const = 0;
-  virtual void InferShape(const Scope* scope) const = 0;
-  inline Variable* Input(Scope* scope, int index) const {
+  virtual void InferShape(const ScopePtr scope) const = 0;
+  inline Variable* Input(ScopePtr scope, int index) const {
     return scope->GetVariable(inputs_[index]);
   };
 
@@ -76,24 +77,13 @@ class OperatorBase {
 // 3. Multi-inputs with indifinate length for RecurrentOp.
 class RecurrentOp : public OperatorBase {
  public:
-  void Init(const OpDesc& op_desc, AttributeMap& attrs) {
-    OperatorBase::Init(op_desc, attrs);
-    name_ = op_desc.name();
-    net_name_ = op_desc.name() + "_net";
-    step_scopes_name_ = op_desc.name() + "_step_scopes";
-    auto memories = GetAttr<std::vector<std::string>>("memories");
-    auto boot_memories = GetAttr<std::vector<std::string>>("boot_memories");
-    PADDLE_ENFORCE(memories.size() == boot_memories.size(),
-                   "The size of memories and boot_memories is mismatched.");
-    for (size_t i = 0; i < memories.size(); ++i) {
-      MemoryAttr mem_attr;
-      mem_attr.var = memories[i];
-      mem_attr.boot_var = boot_memories[i];
-      memory_attrs_.push_back(mem_attr);
-    }
-  }
+  /*
+   * Initialize the recurrent operator from the operator protobuf
+   * and attributes.
+   */
+  void Init(const OpDesc& op_desc, AttributeMap& attrs);
 
-  virtual void InferShape(const Scope* scope) const override;
+  virtual void InferShape(const ScopePtr scope) const override;
 
   /*
    * Forward run the RNN.
@@ -107,17 +97,17 @@ class RecurrentOp : public OperatorBase {
   /*
    * Prepare inputs for each stepnet.
    */
-  void SegmentInputs(Scope* scope) const {};
+  void SegmentInputs(ScopePtr scope) const {};
 
   /*
    * Process outputs of stepnets and merge to variables.
    */
-  void ConcateOutputs(Scope* scope) const {};
+  void ConcateOutputs(ScopePtr scope) const {};
 
   /*
    * Create a `Net` which is shared across all steps.
    */
-  void CreateStepNet(Scope* scope) const;
+  void CreateStepNet(ScopePtr scope) const;
 
   /*
    * Create a scope for each step, the context's scope is shared across all
@@ -128,17 +118,17 @@ class RecurrentOp : public OperatorBase {
    * NOTE the scopes are reused by both the `Forward` and `Backward`, so just
    * create once and expand its size if more steps need.
    */
-  void CreateScopes(Scope* scope) const;
+  void CreateScopes(ScopePtr scope) const;
 
   /*
    * Create memories in each step scope.
    */
-  // void CreateMemories(Scope* scope) const;
+  // void CreateMemories(ScopePtr scope) const;
 
   /*
    * Link memory in previous step scope to current scope.
    */
-  void LinkMemories(Scope* scope, std::vector<Scope*>& step_scopes,
+  void LinkMemories(ScopePtr scope, std::vector<ScopePtr>& step_scopes,
                     size_t step) const;
 
  private:
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
index b77932ce5f094..f0b6625dc3736 100755
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -1395,7 +1395,7 @@ def inputs(layers, *args):
     if len(args) != 0:
         layers.extend(args)
 
-    Inputs(* [l.name for l in layers])
+    Inputs(*[l.name for l in layers])
 
 
 def outputs(layers, *args):
@@ -1438,7 +1438,7 @@ def __dfs_travel__(layer,
     assert len(layers) > 0
 
     if HasInputsSet():  # input already set
-        Outputs(* [l.name for l in layers])
+        Outputs(*[l.name for l in layers])
         return  # just return outputs.
 
     if len(layers) != 1:

From bffd11e91ac2602dc21b17e371eaac5790f47214 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Mon, 10 Jul 2017 21:19:42 +0800
Subject: [PATCH 15/37] add test

---
 paddle/framework/CMakeLists.txt               |  2 +-
 paddle/framework/recurrent_network_op.cc      | 23 ++++++--
 paddle/framework/recurrent_network_op.h       |  6 +-
 paddle/framework/recurrent_network_op_test.cc | 57 ++++++++++++++++++-
 4 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index e61979f265d71..cdf29c9aa1299 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -17,4 +17,4 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
 cc_library(recurrent_network_op SRCS recurrent_network_op.cc DEPS op_desc)
-#cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc)
+cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc recurrent_network_op.cc DEPS glog gtest gflags ddim system_allocator op_desc)
diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 129b24348fc15..83447df392b58 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -11,20 +11,33 @@ namespace framework {
 namespace fake {
 class FcOp : public OperatorBase {
  public:
-  FcOp(const OpDesc& desc) {}
+  FcOp(const OpDesc& desc) : name_(desc.name()) {}
 
-  virtual void InferShape(const Scope* scope) const override {
-    LOG(INFO) << "fc InferShape";
+  virtual void InferShape(Scope* scope) const override {
+    for (const auto& output : outputs_) {
+      LOG(INFO) << "fc [" << name_ << "]"
+                << " create output variable [" << output << "]";
+      scope->CreateVariable(output);
+    }
   }
 
   virtual void Run(OpRunContext* contex) const override {
-    LOG(INFO) << "fc Run";
+    for (const auto& input : inputs_) {
+      PADDLE_ENFORCE(contex->scope->HasVariable(input),
+                     "no input variable [%s] exists");
+      LOG(INFO) << "fc [" << name_ << "] read input [" << input << "]";
+    }
+    for (const auto& output : outputs_) {
+      PADDLE_ENFORCE(contex->scope->HasVariable(output),
+                     "no output variable [%s] exists");
+      LOG(INFO) << "fc [" << name_ << "] write output [" << output << "]";
+    }
   }
 
  private:
   std::string name_;
 };
-};  // namespace fake
+}  // namespace fake
 
 void PlainNet::AddOp(const OpDesc& desc) {
   if (desc.type() == "fc") {
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index eb9a390728c0f..3c1e060f0667c 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -40,7 +40,7 @@ class OperatorBase {
   virtual ~OperatorBase() {}
   void Init(const OpDesc& op_desc, AttributeMap& attrs) {}
   virtual void Run(OpRunContext* context) const = 0;
-  virtual void InferShape(const Scope* scope) const = 0;
+  virtual void InferShape(Scope* scope) const = 0;
   inline Variable* Input(Scope* scope, int index) const {
     return scope->GetVariable(inputs_[index]);
   };
@@ -109,7 +109,7 @@ class RecurrentOp : public OperatorBase {
     }
   }
 
-  virtual void InferShape(const Scope* scope) const override;
+  virtual void InferShape(Scope* scope) const override {}
 
   /*
    * Forward run the RNN.
@@ -119,6 +119,8 @@ class RecurrentOp : public OperatorBase {
    */
   virtual void Run(OpRunContext* contex) const override;
 
+  virtual ~RecurrentOp() {}
+
  protected:
   /*
    * Prepare inputs for each stepnet.
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index 83d7bb3c15388..ff31fc6b99eeb 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -11,16 +11,67 @@
   limitations under the License.
 */
 
+#include <gtest/gtest.h>
+
 #include "paddle/framework/recurrent_network_op.h"
-#include "gtest/gtest.h"
+#include "paddle/framework/tensor.h"
 
 namespace paddle {
 namespace framework {
 
 class RecurrentOpTest : public ::testing::Test {
  protected:
-  virtual void SetUp() override {}
+  virtual void SetUp() override {
+    CreateGlobalVariables();
+    CreateRNNOp();
+  }
+
+  void CreateGlobalVariables() {
+    // create boot memory
+    scope.CreateVariable("h_boot");
+    // create input, and init content
+    Variable* x = scope.CreateVariable("x");
+    DDim dims = make_ddim(std::vector<int>{10 /*sent size*/, 20 /*batch size*/,
+                                           30 /*input dim*/});
+    x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
+  }
+
+  void CreateRNNOp() {
+    OpDesc op_desc;
+
+    op_desc.set_type("rnn_op");
+    op_desc.add_inputs("x");
+    // output hidden vectors
+    op_desc.add_outputs("hiddens");
+
+    auto memories_attr = op_desc.mutable_attrs()->Add();
+    memories_attr->set_type(paddle::framework::AttrType::STRINGS);
+
+    *memories_attr->mutable_strings()->Add() = "h";
+    memories_attr->set_name("memories");
+
+    auto boot_memories_attr = op_desc.mutable_attrs()->Add();
+    boot_memories_attr->set_type(paddle::framework::AttrType::STRINGS);
+    *boot_memories_attr->mutable_strings()->Add() = "h_boot";
+    boot_memories_attr->set_name("boot_memories");
+
+    AttributeMap attrs;
+    attrs["memories"] = std::vector<std::string>{"h"};
+    attrs["boot_memories"] = std::vector<std::string>{"h_boot"};
+
+    rnn_op.Init(op_desc, attrs);
+  }
+
+  void RunRnnOp() {
+    // TODO
+  }
+
+  // father scope
+  Scope scope;
+  RecurrentOp rnn_op;
 };
-}  // namespace framework
 
+TEST_F(RecurrentOpTest, create_op) {}
+
+}  // namespace framework
 }  // namespace paddle

From c7947de243de3da5b36a7b8db02d7a4db19b3138 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Mon, 10 Jul 2017 21:47:16 +0800
Subject: [PATCH 16/37] disable mutable_data

---
 paddle/framework/recurrent_network_op.cc      | 37 ++++++++++---------
 paddle/framework/recurrent_network_op_test.cc |  5 ++-
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 83447df392b58..054c7a9fbb0db 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -114,10 +114,9 @@ void RecurrentOp::LinkMemories(Scope* scope, std::vector<Scope*>& step_scopes,
                  step_scopes.size());
   // copy boot memory
   for (auto& attr : memory_attrs_) {
-    Scope* step_scope = step_scopes[step];
+    // Scope* step_scope = step_scopes[step];
 
     Tensor* boot_tensor{nullptr};
-    Variable* memory_var = step_scope->CreateVariable(attr.pre_var);
     if (step == 0) {
       PADDLE_ENFORCE(scope->HasVariable(attr.boot_var),
                      "memory [%s]'s boot variable [%s] not exists", attr.var,
@@ -126,24 +125,28 @@ void RecurrentOp::LinkMemories(Scope* scope, std::vector<Scope*>& step_scopes,
       boot_tensor = scope->CreateVariable(attr.boot_var)->GetMutable<Tensor>();
       attr.dims = boot_tensor->dims();
     }
+    // Variable* memory_var = step_scope->CreateVariable(attr.pre_var);
 
     // copy from boot memory
     // TODO support more device
-    float* memory_tensor_val =
-        memory_var->GetMutable<Tensor>()->mutable_data<float>(
-            attr.dims, platform::CPUPlace());
-    if (step == 0) {
-      PADDLE_ENFORCE(boot_tensor, "boot_tensor should be retrieved before");
-      // copy from boot memory
-      std::memcpy(memory_tensor_val, boot_tensor->data<float>(),
-                  product(attr.dims));
-    } else {
-      // copy from previous step scope's memory to this scope's `pre-memory`
-      Tensor* pre_step_memory =
-          step_scopes[step - 1]->GetVariable(attr.var)->GetMutable<Tensor>();
-      std::memcpy(memory_tensor_val, pre_step_memory->data<float>(),
-                  product(attr.dims));
-    }
+    // TODO mutable_data is currently invalid
+    //   float* memory_tensor_val =
+    //       memory_var->GetMutable<Tensor>()->mutable_data<float>(
+    //           attr.dims, platform::CPUPlace());
+    //   if (step == 0) {
+    //     PADDLE_ENFORCE(boot_tensor, "boot_tensor should be retrieved
+    //     before");
+    //     // copy from boot memory
+    //     std::memcpy(memory_tensor_val, boot_tensor->data<float>(),
+    //                 product(attr.dims));
+    //   } else {
+    //     // copy from previous step scope's memory to this scope's
+    //     `pre-memory` Tensor* pre_step_memory =
+    //         step_scopes[step -
+    //         1]->GetVariable(attr.var)->GetMutable<Tensor>();
+    //     std::memcpy(memory_tensor_val, pre_step_memory->data<float>(),
+    //                 product(attr.dims));
+    //   }
   }
 }
 
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index ff31fc6b99eeb..c9dacf98a7dbf 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -30,10 +30,11 @@ class RecurrentOpTest : public ::testing::Test {
     // create boot memory
     scope.CreateVariable("h_boot");
     // create input, and init content
-    Variable* x = scope.CreateVariable("x");
+    // Variable* x = scope.CreateVariable("x");
     DDim dims = make_ddim(std::vector<int>{10 /*sent size*/, 20 /*batch size*/,
                                            30 /*input dim*/});
-    x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
+    // TODO mutable_data is not valid
+    // x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
   }
 
   void CreateRNNOp() {

From 6dca71130bdd8fa27be78657fb9f730842cf92c4 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Mon, 10 Jul 2017 22:19:12 +0800
Subject: [PATCH 17/37] finist segmentInput function

---
 paddle/framework/CMakeLists.txt          |  2 +-
 paddle/framework/recurrent_network_op.cc | 30 ++++++++++++++++++++----
 paddle/framework/recurrent_network_op.h  |  5 +++-
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index cdf29c9aa1299..9b81086237285 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -17,4 +17,4 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
 cc_library(recurrent_network_op SRCS recurrent_network_op.cc DEPS op_desc)
-cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc recurrent_network_op.cc DEPS glog gtest gflags ddim system_allocator op_desc)
+#cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc recurrent_network_op.cc DEPS glog gtest gflags ddim system_allocator op_desc)
diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 2080247cd70ab..1c0279aae2c32 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -76,7 +76,7 @@ void RecurrentOp::Run(OpRunContext* contex) const {
   PADDLE_ENFORCE(step_scopes, "failed to get step scopes");
   // forward
   auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
-  size_t seq_len = dims[1];
+  size_t seq_len = dims[0];
   auto& scopes = *step_scopes->GetMutable<std::vector<ScopePtr>>();
   for (size_t step_id = 0; step_id < seq_len; step_id++) {
     ScopePtr step_scope = scopes[step_id];
@@ -109,7 +109,7 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
 
 void RecurrentOp::CreateScopes(ScopePtr scope) const {
   auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
-  size_t seq_len = dims[1];
+  size_t seq_len = dims[0];
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
   auto step_scopes = scopes_var->GetMutable<std::vector<ScopePtr>>();
   // TODO Only two scopes are needed for inference, this case will be
@@ -136,6 +136,30 @@ void RecurrentOp::CreateStepNet(ScopePtr scope) const {
   // TODO add op descs
 }
 
+void RecurrentOp::SegmentInputs(ScopePtr scope) const {
+  Variable* scopes_var = scope->CreateVariable(step_scopes_name_);
+  auto& step_scopes = *scopes_var->GetMutable<std::vector<Scope*>>();
+
+  auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
+  int seq_len = dims[0];
+  int batch_size = dims[1];
+  int dim = dims[2];
+  int length = batch_size * dim;
+  for (size_t i = 0; i < inputs_.size(); i++) {
+    const float* scope_input =
+        Input(scope, i)->GetMutable<Tensor>()->data<float>();
+    for (int j = 0; j < seq_len; j++) {
+      std::string name =
+          name_ + "@input_" + inputs_[i] + "@step_" + std::to_string(j);
+      Variable* input_var = step_scopes[j]->CreateVariable(name);
+      Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
+      float* step_input = step_input_tensor->mutable_data<float>(
+          make_ddim({1, batch_size, dim}), platform::CPUPlace());
+      std::memcpy(step_input, scope_input + j * length, length);
+    }
+  }
+}
+
 void RecurrentOp::LinkMemories(ScopePtr scope,
                                std::vector<ScopePtr>& step_scopes,
                                size_t step) const {
@@ -144,8 +168,6 @@ void RecurrentOp::LinkMemories(ScopePtr scope,
                  step_scopes.size());
   // copy boot memory
   for (auto& attr : memory_attrs_) {
-    // Scope* step_scope = step_scopes[step];
-
     Tensor* boot_tensor{nullptr};
     if (step == 0) {
       PADDLE_ENFORCE(scope->HasVariable(attr.boot_var),
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 05f26ebeb9149..39edd461606cd 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -87,10 +87,13 @@ class PlainNet {
 
 // fake interfaces end
 // --------------------------------------------------------------------
+// The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now.
 // TODO:
 // 1. No-padding computing for sequences with indifinite length in one batch.
 // 2. Hierarchical RNN for sequence with sub-sequence.
 // 3. Multi-inputs with indifinate length for RecurrentOp.
+// 4. More Complex RNN architecture, such as Gated Feedback RNN.
+//    Refer to: https://arxiv.org/pdf/1502.02367.pdf
 class RecurrentOp : public OperatorBase {
  public:
   /*
@@ -115,7 +118,7 @@ class RecurrentOp : public OperatorBase {
   /*
    * Prepare inputs for each stepnet.
    */
-  void SegmentInputs(ScopePtr scope) const {};
+  void SegmentInputs(ScopePtr scope) const;
 
   /*
    * Process outputs of stepnets and merge to variables.

From d210b0bc8776bd45c612aac98200a2be7e40cd40 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Tue, 11 Jul 2017 09:05:05 +0800
Subject: [PATCH 18/37] enable mutable_data with a trick

---
 paddle/framework/CMakeLists.txt               |  2 +-
 paddle/framework/recurrent_network_op_test.cc |  4 ++--
 paddle/framework/tensor.h                     | 16 +++++++++-------
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 9b81086237285..cdf29c9aa1299 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -17,4 +17,4 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
 cc_library(recurrent_network_op SRCS recurrent_network_op.cc DEPS op_desc)
-#cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc recurrent_network_op.cc DEPS glog gtest gflags ddim system_allocator op_desc)
+cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc recurrent_network_op.cc DEPS glog gtest gflags ddim system_allocator op_desc)
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index c9dacf98a7dbf..a6951bf4252e4 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -30,11 +30,11 @@ class RecurrentOpTest : public ::testing::Test {
     // create boot memory
     scope.CreateVariable("h_boot");
     // create input, and init content
-    // Variable* x = scope.CreateVariable("x");
+    Variable* x = scope.CreateVariable("x");
     DDim dims = make_ddim(std::vector<int>{10 /*sent size*/, 20 /*batch size*/,
                                            30 /*input dim*/});
     // TODO mutable_data is not valid
-    // x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
+    x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
   }
 
   void CreateRNNOp() {
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index 6f1ae09fc0060..b642389b44034 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -36,14 +36,16 @@ class Tensor {
   template <typename T,  // must be POD types
             typename std::enable_if<std::is_pod<T>::value>::type* = nullptr>
   T* mutable_data(DDim dims, paddle::platform::Place place) {
-    if (holder_ == nullptr ||
-        !(holder_->Place() ==
-          place) /* some versions of boost::variant don't have operator!= */
-        || holder_->Size() < product(dims) * sizeof(T)) {
-      holder_.reset(new PlaceholderImpl<T>(place, product(dims) * sizeof(T)));
-    }
+    // if (holder_ == nullptr ||
+    //     !(holder_->Place() ==
+    //       place) /* some versions of boost::variant don't have operator!= */
+    //     || holder_->Size() < product(dims) * sizeof(T)) {
+    //   holder_.reset(new PlaceholderImpl<T>(place, product(dims) *
+    //   sizeof(T)));
+    // }
     dims_ = dims;
-    return static_cast<T*>(holder_->Ptr());
+    return static_cast<T*>(new T[product(dims)]);
+    // return static_cast<T*>(holder_->Ptr());
   }
 
   template <typename T,  // must be POD types

From 6674fee3d5df71ada0bb4ae7a57d8427622b4890 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Tue, 11 Jul 2017 09:50:47 +0800
Subject: [PATCH 19/37] RNNOp test.

---
 paddle/framework/recurrent_network_op.h       |  4 +--
 paddle/framework/recurrent_network_op_test.cc | 27 ++++++++++++++-----
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 39edd461606cd..80e940083d2f1 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -173,8 +173,8 @@ class RecurrentOp : public OperatorBase {
   };
 
   /*
-   * The attributes in protobuf about the memory description and the booted
-   * memory description are as follows. The number of booted memories should
+   * The attributes in protobuf about the memory description and the initial
+   * memory description are as follows. The number of initial memories should
    * equal to the memories number.
    *
    *   arg {
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index c9dacf98a7dbf..c4d428330e840 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -11,6 +11,7 @@
   limitations under the License.
 */
 
+#include <glog/logging.h>
 #include <gtest/gtest.h>
 
 #include "paddle/framework/recurrent_network_op.h"
@@ -26,11 +27,13 @@ class RecurrentOpTest : public ::testing::Test {
     CreateRNNOp();
   }
 
+  virtual void TearDown() {}
+
   void CreateGlobalVariables() {
     // create boot memory
-    scope.CreateVariable("h_boot");
+    scope_.CreateVariable("h_boot");
     // create input, and init content
-    // Variable* x = scope.CreateVariable("x");
+    // Variable* x = scope_.CreateVariable("x");
     DDim dims = make_ddim(std::vector<int>{10 /*sent size*/, 20 /*batch size*/,
                                            30 /*input dim*/});
     // TODO mutable_data is not valid
@@ -45,22 +48,34 @@ class RecurrentOpTest : public ::testing::Test {
     // output hidden vectors
     op_desc.add_outputs("hiddens");
 
+    // add memories
     auto memories_attr = op_desc.mutable_attrs()->Add();
     memories_attr->set_type(paddle::framework::AttrType::STRINGS);
-
     *memories_attr->mutable_strings()->Add() = "h";
     memories_attr->set_name("memories");
 
+    // add initial memories
     auto boot_memories_attr = op_desc.mutable_attrs()->Add();
     boot_memories_attr->set_type(paddle::framework::AttrType::STRINGS);
     *boot_memories_attr->mutable_strings()->Add() = "h_boot";
     boot_memories_attr->set_name("boot_memories");
 
+    // add step net desc
+    auto step_net_attr = op_desc.mutable_attrs()->Add();
+    step_net_attr->set_type(paddle::framework::AttrType::STRING);
+    step_net_attr->set_s(" ");  // TODO add step net proto
+    step_net_attr->set_name("step_net");
+
+    std::ostringstream stream;
+    op_desc.SerializeToOstream(&stream);
+    std::string text = stream.str();
+    LOG(INFO) << text;
+
     AttributeMap attrs;
     attrs["memories"] = std::vector<std::string>{"h"};
     attrs["boot_memories"] = std::vector<std::string>{"h_boot"};
 
-    rnn_op.Init(op_desc, attrs);
+    rnn_op_.Init(op_desc, attrs);
   }
 
   void RunRnnOp() {
@@ -68,8 +83,8 @@ class RecurrentOpTest : public ::testing::Test {
   }
 
   // father scope
-  Scope scope;
-  RecurrentOp rnn_op;
+  Scope scope_;
+  RecurrentOp rnn_op_;
 };
 
 TEST_F(RecurrentOpTest, create_op) {}

From 778ebb4af83de14795368a6c06c4303d8a4d05fd Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Tue, 11 Jul 2017 10:24:32 +0800
Subject: [PATCH 20/37] enable LinkMemories with mutable_data

---
 paddle/framework/recurrent_network_op.cc      | 55 ++++++++++++-------
 paddle/framework/recurrent_network_op.h       | 10 ++--
 paddle/framework/recurrent_network_op_test.cc | 17 ++++--
 3 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 1c0279aae2c32..1d02ef0f1ceb4 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -36,7 +36,7 @@ class FcOp : public OperatorBase {
     }
   }
 
-  virtual void Run(OpRunContext* contex) const override {
+  virtual void Run(OpContext* contex) const override {
     for (const auto& input : inputs_) {
       PADDLE_ENFORCE(contex->scope->HasVariable(input),
                      "no input variable [%s] exists");
@@ -60,7 +60,7 @@ void PlainNet::AddOp(const OpDesc& desc) {
   }
 }
 
-void RecurrentOp::Run(OpRunContext* contex) const {
+void RecurrentOp::Run(OpContext* contex) const {
   auto scope = contex->scope;
 
   if (!scope->HasVariable(net_name_)) {
@@ -99,15 +99,30 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
   auto boot_memories = GetAttr<std::vector<std::string>>("boot_memories");
   PADDLE_ENFORCE(memories.size() == boot_memories.size(),
                  "The size of memories and boot_memories is mismatched.");
+  // set memories
   for (size_t i = 0; i < memories.size(); ++i) {
     MemoryAttr mem_attr;
     mem_attr.var = memories[i];
     mem_attr.boot_var = boot_memories[i];
     memory_attrs_.push_back(mem_attr);
+    LOG(INFO) << "set memorys:\t"
+              << "memory:" << mem_attr.var << "\tboot:" << mem_attr.boot_var;
+  }
+
+  // set inputs
+  for (const std::string& input : op_desc.inputs()) {
+    LOG(INFO) << "set input " << input;
+    inputs_.push_back(input);
+  }
+  // set outputs
+  for (const std::string& output : op_desc.outputs()) {
+    LOG(INFO) << "set output " << output;
+    outputs_.push_back(output);
   }
 }
 
 void RecurrentOp::CreateScopes(ScopePtr scope) const {
+  LOG(INFO) << "create scopes";
   auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
   size_t seq_len = dims[0];
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
@@ -166,6 +181,7 @@ void RecurrentOp::LinkMemories(ScopePtr scope,
   PADDLE_ENFORCE(step < step_scopes.size(),
                  "step [%d] out of range of step scopes' size [%d]", step,
                  step_scopes.size());
+  auto step_scope = step_scopes[step];
   // copy boot memory
   for (auto& attr : memory_attrs_) {
     Tensor* boot_tensor{nullptr};
@@ -177,28 +193,27 @@ void RecurrentOp::LinkMemories(ScopePtr scope,
       boot_tensor = scope->CreateVariable(attr.boot_var)->GetMutable<Tensor>();
       attr.dims = boot_tensor->dims();
     }
-    // Variable* memory_var = step_scope->CreateVariable(attr.pre_var);
+    Variable* memory_var = step_scope->CreateVariable(attr.pre_var);
 
     // copy from boot memory
     // TODO support more device
     // TODO mutable_data is currently invalid
-    //   float* memory_tensor_val =
-    //       memory_var->GetMutable<Tensor>()->mutable_data<float>(
-    //           attr.dims, platform::CPUPlace());
-    //   if (step == 0) {
-    //     PADDLE_ENFORCE(boot_tensor, "boot_tensor should be retrieved
-    //     before");
-    //     // copy from boot memory
-    //     std::memcpy(memory_tensor_val, boot_tensor->data<float>(),
-    //                 product(attr.dims));
-    //   } else {
-    //     // copy from previous step scope's memory to this scope's
-    //     `pre-memory` Tensor* pre_step_memory =
-    //         step_scopes[step -
-    //         1]->GetVariable(attr.var)->GetMutable<Tensor>();
-    //     std::memcpy(memory_tensor_val, pre_step_memory->data<float>(),
-    //                 product(attr.dims));
-    //   }
+    float* memory_tensor_val =
+        memory_var->GetMutable<Tensor>()->mutable_data<float>(
+            attr.dims, platform::CPUPlace());
+    if (step == 0) {
+      PADDLE_ENFORCE(boot_tensor, "boot_tensor should be retrieved before");
+      // copy from boot memory
+      std::memcpy(memory_tensor_val, boot_tensor->data<float>(),
+                  product(attr.dims));
+    } else {
+      // copy from previous step scope's memory to this scope's
+      // `pre - memory`
+      Tensor* pre_step_memory =
+          step_scopes[step - 1]->GetVariable(attr.var)->GetMutable<Tensor>();
+      std::memcpy(memory_tensor_val, pre_step_memory->data<float>(),
+                  product(attr.dims));
+    }
   }
 }
 
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 39edd461606cd..8d3020a5218ff 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -32,15 +32,15 @@ namespace framework {
 // fake interfaces that has not be implemented by other modules.
 // TODO keep updating according to other modules' designs.
 typedef std::shared_ptr<Scope> ScopePtr;
-struct OpRunContext {
+struct OpContext {
   ScopePtr scope;
 };
 
 class OperatorBase {
  public:
   virtual ~OperatorBase() {}
-  void Init(const OpDesc& op_desc, AttributeMap& attrs) {}
-  virtual void Run(OpRunContext* context) const = 0;
+  void Init(const OpDesc& op_desc, AttributeMap& attrs) { attrs_ = attrs; }
+  virtual void Run(OpContext* context) const = 0;
   virtual void InferShape(ScopePtr scope) const = 0;
   inline Variable* Input(ScopePtr scope, int index) const {
     return scope->GetVariable(inputs_[index]);
@@ -74,7 +74,7 @@ class PlainNet {
   // PlainNet(const std::string desc) {}
   void AddOp(const OpDesc& desc);
   void Run(ScopePtr scope) {
-    OpRunContext ctx;
+    OpContext ctx;
     ctx.scope = scope;
     for (auto& op : ops_) {
       op->Run(&ctx);
@@ -110,7 +110,7 @@ class RecurrentOp : public OperatorBase {
    * NOTE the context's scope is not given until `Run` called, so step scopes'
    * father should be set/updated in this method.
    */
-  virtual void Run(OpRunContext* contex) const override;
+  virtual void Run(OpContext* contex) const override;
 
   virtual ~RecurrentOp() {}
 
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index a6951bf4252e4..ef94cd317593a 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -11,6 +11,7 @@
   limitations under the License.
 */
 
+#include <glog/logging.h>
 #include <gtest/gtest.h>
 
 #include "paddle/framework/recurrent_network_op.h"
@@ -27,9 +28,11 @@ class RecurrentOpTest : public ::testing::Test {
   }
 
   void CreateGlobalVariables() {
+    LOG(INFO) << "create global variable h_boot";
     // create boot memory
     scope.CreateVariable("h_boot");
     // create input, and init content
+    LOG(INFO) << "create global variale x";
     Variable* x = scope.CreateVariable("x");
     DDim dims = make_ddim(std::vector<int>{10 /*sent size*/, 20 /*batch size*/,
                                            30 /*input dim*/});
@@ -43,7 +46,7 @@ class RecurrentOpTest : public ::testing::Test {
     op_desc.set_type("rnn_op");
     op_desc.add_inputs("x");
     // output hidden vectors
-    op_desc.add_outputs("hiddens");
+    op_desc.add_outputs("h");
 
     auto memories_attr = op_desc.mutable_attrs()->Add();
     memories_attr->set_type(paddle::framework::AttrType::STRINGS);
@@ -60,11 +63,9 @@ class RecurrentOpTest : public ::testing::Test {
     attrs["memories"] = std::vector<std::string>{"h"};
     attrs["boot_memories"] = std::vector<std::string>{"h_boot"};
 
+    LOG(INFO) << "rnn_op to init";
     rnn_op.Init(op_desc, attrs);
-  }
-
-  void RunRnnOp() {
-    // TODO
+    LOG(INFO) << "rnn_op finish init";
   }
 
   // father scope
@@ -74,5 +75,11 @@ class RecurrentOpTest : public ::testing::Test {
 
 TEST_F(RecurrentOpTest, create_op) {}
 
+TEST_F(RecurrentOpTest, Run) {
+  OpContext ctx;
+  ctx.scope = std::make_shared<Scope>();
+  rnn_op.Run(&ctx);
+}
+
 }  // namespace framework
 }  // namespace paddle

From 8642b27c36d75aefe225f83f8e15dd1e05d965ec Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Tue, 11 Jul 2017 10:54:30 +0800
Subject: [PATCH 21/37] update SegmentInput function with comments

---
 paddle/framework/recurrent_network_op.cc | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 1d02ef0f1ceb4..c7c2b6a0c4cf4 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -152,7 +152,7 @@ void RecurrentOp::CreateStepNet(ScopePtr scope) const {
 }
 
 void RecurrentOp::SegmentInputs(ScopePtr scope) const {
-  Variable* scopes_var = scope->CreateVariable(step_scopes_name_);
+  Variable* scopes_var = scope->GetVariable(step_scopes_name_);
   auto& step_scopes = *scopes_var->GetMutable<std::vector<Scope*>>();
 
   auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
@@ -164,12 +164,10 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
     const float* scope_input =
         Input(scope, i)->GetMutable<Tensor>()->data<float>();
     for (int j = 0; j < seq_len; j++) {
-      std::string name =
-          name_ + "@input_" + inputs_[i] + "@step_" + std::to_string(j);
-      Variable* input_var = step_scopes[j]->CreateVariable(name);
+      Variable* input_var = step_scopes[j]->CreateVariable(inputs_[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
       float* step_input = step_input_tensor->mutable_data<float>(
-          make_ddim({1, batch_size, dim}), platform::CPUPlace());
+          make_ddim({batch_size, dim}), platform::CPUPlace());
       std::memcpy(step_input, scope_input + j * length, length);
     }
   }

From 8e70b376c57dbd74498ad63ec04b12766c37ca10 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Tue, 11 Jul 2017 19:04:33 +0800
Subject: [PATCH 22/37] finish ConcatOutput function

---
 paddle/framework/recurrent_network_op.cc | 34 +++++++++++++++++++++---
 paddle/framework/recurrent_network_op.h  |  2 +-
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 974522b7cd1a7..4393909b87ff2 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -49,7 +49,7 @@ void RecurrentOp::Run(OpContext* contex) const {
   }
 
   // prepare outputs
-  ConcateOutputs(scope);
+  ConcatOutputs(scope);
 }
 
 void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
@@ -122,21 +122,47 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
   auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
   int seq_len = dims[0];
   int batch_size = dims[1];
-  int dim = dims[2];
-  int length = batch_size * dim;
   for (size_t i = 0; i < inputs_.size(); i++) {
+    auto input_dims = Input(scope, i)->GetMutable<Tensor>()->dims();
+    int input_dim = input_dims[2];
+    int length = batch_size * input_dim;
     const float* scope_input =
         Input(scope, i)->GetMutable<Tensor>()->data<float>();
     for (int j = 0; j < seq_len; j++) {
       Variable* input_var = step_scopes[j]->CreateVariable(inputs_[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
       float* step_input = step_input_tensor->mutable_data<float>(
-          make_ddim({batch_size, dim}), platform::CPUPlace());
+          make_ddim({batch_size, input_dim}), platform::CPUPlace());
       std::memcpy(step_input, scope_input + j * length, length);
     }
   }
 }
 
+void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
+  Variable* scopes_var = scope->GetVariable(step_scopes_name_);
+  auto& step_scopes = *scopes_var->GetMutable<std::vector<Scope*>>();
+
+  auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
+  int seq_len = dims[0];
+  int batch_size = dims[1];
+  for (size_t i = 0; i < outputs_.size(); i++) {
+    auto output_dims =
+        step_scopes[0]->GetVariable(outputs_[0])->GetMutable<Tensor>()->dims();
+    int output_dim = output_dims[2];
+    int length = batch_size * output_dim;
+    Tensor* output_tensor =
+        scope->CreateVariable(outputs_[i])->GetMutable<Tensor>();
+    float* output = output_tensor->mutable_data<float>(
+        make_ddim({seq_len, batch_size, output_dim}), platform::CPUPlace());
+    for (int j = 0; j < seq_len; j++) {
+      Variable* output_var = step_scopes[j]->GetVariable(outputs_[i]);
+      const float* step_output =
+          output_var->GetMutable<Tensor>()->data<float>();
+      std::memcpy(output + j * length, step_output, length);
+    }
+  }
+}
+
 void RecurrentOp::LinkMemories(ScopePtr scope,
                                std::vector<ScopePtr>& step_scopes,
                                size_t step) const {
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index afb4f3bbdc2f6..02018c0b7ee96 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -123,7 +123,7 @@ class RecurrentOp : public OperatorBase {
   /*
    * Process outputs of stepnets and merge to variables.
    */
-  void ConcateOutputs(ScopePtr scope) const {};
+  void ConcatOutputs(ScopePtr scope) const;
 
   /*
    * Create a `Net` which is shared across all steps.

From ce802c0413e65082370c39059938b230f86a73cd Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Wed, 12 Jul 2017 10:45:37 +0800
Subject: [PATCH 23/37] reformat inputs and attributes

boot_memories
---
 paddle/framework/recurrent_network_op.cc | 51 +++++++++++++++---------
 paddle/framework/recurrent_network_op.h  | 28 ++++++++++---
 2 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 4393909b87ff2..7cb2f1b902713 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -54,15 +54,40 @@ void RecurrentOp::Run(OpContext* contex) const {
 
 void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
   OperatorBase::Init(op_desc, attrs);
+
+  // set original inputs
+  for (const std::string& input : op_desc.inputs()) {
+    LOG(INFO) << "set input " << input;
+    inputs_.push_back(input);
+  }
+  // set original outputs
+  for (const std::string& output : op_desc.outputs()) {
+    LOG(INFO) << "set output " << output;
+    outputs_.push_back(output);
+  }
+  // prepare inlinks
+  PADDLE_ENFORCE(inlinks_.empty(), "RecurrentOp duplicate inited");
+  for (auto id : GetAttr<std::vector<int>>("real_input")) {
+    inlinks_.push_back(inputs_[id]);
+  }
+
   name_ = op_desc.name();
-  net_name_ = op_desc.name() + "_net";
-  step_scopes_name_ = op_desc.name() + "_step_scopes";
+  net_name_ = inputs_.at(GetAttr<int>("step_net"));
+  step_scopes_name_ = inputs_.at(GetAttr<int>("step_scopes"));
+
+  // set memories
   auto memories = GetAttr<std::vector<std::string>>("memories");
   auto pre_memories = GetAttr<std::vector<std::string>>("pre_memories");
-  auto boot_memories = GetAttr<std::vector<std::string>>("boot_memories");
+  PADDLE_ENFORCE(memories.size() == pre_memories.size(),
+                 "The size of memories and pre_memories doesn't match: %d,%d.",
+                 memories.size(), pre_memories.size());
+  std::vector<std::string> boot_memories;
+  for (auto id : GetAttr<std::vector<int>>("boot_memories")) {
+    boot_memories.push_back(inputs_[id]);
+  }
   PADDLE_ENFORCE(memories.size() == boot_memories.size(),
-                 "The size of memories and boot_memories is mismatched.");
-  // set memories
+                 "the size of memories and boot_memories doesn't match: %d,%d",
+                 memories.size(), boot_memories.size());
   for (size_t i = 0; i < memories.size(); ++i) {
     MemoryAttr mem_attr;
     mem_attr.var = memories[i];
@@ -72,17 +97,6 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
     LOG(INFO) << "set memorys:\t"
               << "memory:" << mem_attr.var << "\tboot:" << mem_attr.boot_var;
   }
-
-  // set inputs
-  for (const std::string& input : op_desc.inputs()) {
-    LOG(INFO) << "set input " << input;
-    inputs_.push_back(input);
-  }
-  // set outputs
-  for (const std::string& output : op_desc.outputs()) {
-    LOG(INFO) << "set output " << output;
-    outputs_.push_back(output);
-  }
 }
 
 void RecurrentOp::CreateScopes(ScopePtr scope) const {
@@ -116,20 +130,21 @@ void RecurrentOp::CreateStepNet(ScopePtr scope) const {
 }
 
 void RecurrentOp::SegmentInputs(ScopePtr scope) const {
+  PADDLE_ENFORCE(!inlinks_.empty(), "no real inputs are provided.");
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
   auto& step_scopes = *scopes_var->GetMutable<std::vector<Scope*>>();
 
   auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
   int seq_len = dims[0];
   int batch_size = dims[1];
-  for (size_t i = 0; i < inputs_.size(); i++) {
+  for (size_t i = 0; i < inlinks_.size(); i++) {
     auto input_dims = Input(scope, i)->GetMutable<Tensor>()->dims();
     int input_dim = input_dims[2];
     int length = batch_size * input_dim;
     const float* scope_input =
         Input(scope, i)->GetMutable<Tensor>()->data<float>();
     for (int j = 0; j < seq_len; j++) {
-      Variable* input_var = step_scopes[j]->CreateVariable(inputs_[i]);
+      Variable* input_var = step_scopes[j]->CreateVariable(inlinks_[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
       float* step_input = step_input_tensor->mutable_data<float>(
           make_ddim({batch_size, input_dim}), platform::CPUPlace());
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 76d924f05b4fc..2d6ce7af194b3 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -95,6 +95,20 @@ class PlainNet {
 // 4. More Complex RNN architecture, such as Gated Feedback RNN.
 //    Refer to: https://arxiv.org/pdf/1502.02367.pdf
 
+/*
+ * RecurrentOp inputs stored in proto:
+ * - real inputs that need to be segmented to steps.
+ * - boot memories
+ * - step net
+ * - step scopes
+ *
+ * Attributes stored in AttributeMap:
+ * - real_inputs: vector<int>
+ * - boot_memories: vector<int>
+ * - step_net: int
+ * - step_scopes: int
+ */
+
 class RecurrentOp : public OperatorBase {
  public:
   /*
@@ -178,14 +192,14 @@ class RecurrentOp : public OperatorBase {
    * equal to the memories number.
    *
    *   arg {
-   *       name: “memories”
-   *       strings: "hidden”
-   *       strings: "state”
+   *       name: "memories"
+   *       strings: "hidden"
+   *       strings: "state"
    *   }
    *   arg {
-   *       name: “boot_memories”
-   *       strings: "boot_hidden”
-   *       strings: "boot_state”
+   *       name: “boot_memories"
+   *       strings: "boot_hidden"
+   *       strings: "boot_state"
    *   }
    */
   // TODO copy from OpBase's
@@ -201,6 +215,8 @@ class RecurrentOp : public OperatorBase {
   // name of steps' scopes which is stored in father scope with a unique key
   // specified by `step_scopes_name_`.
   std::string step_scopes_name_;
+  // real inputs that need to be segmented.
+  std::vector<std::string> inlinks_;
 
   NetDesc step_net_desc_;
 };

From a883b4ccc116e9a13cc4ff8f8ce2e549ad56054b Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Wed, 12 Jul 2017 11:21:24 +0800
Subject: [PATCH 24/37] Refine unit test.

---
 paddle/framework/recurrent_network_op_test.cc | 53 +++++++++++++------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index 632d154f1ffbd..7bd4456a34dfd 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -104,12 +104,29 @@ class RecurrentOpTest : public ::testing::Test {
     // create boot memory
     scope_.CreateVariable("h_boot");
     // create input, and init content
-    LOG(INFO) << "create global variale x";
+    LOG(INFO) << "create global variable x";
     Variable* x = scope_.CreateVariable("x");
     DDim dims = make_ddim(std::vector<int>{10 /*sent size*/, 20 /*batch size*/,
                                            30 /*input dim*/});
     // TODO mutable_data is not valid
     x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
+
+    LOG(INFO) << "create global variable w";
+    Variable* w = scope_.CreateVariable("w");
+    w->GetMutable<Tensor>()->mutable_data<float>(
+        make_ddim(std::vector<int>{30, 30}), platform::CPUPlace());
+
+    LOG(INFO) << "create global variable h_boot";
+    Variable* h_boot = scope_.CreateVariable("h_boot");
+    h_boot->GetMutable<Tensor>()->mutable_data<float>(
+        make_ddim(std::vector<int>{20 /*batch size*/, 30 /*input dim*/}),
+        platform::CPUPlace());
+
+    LOG(INFO) << "create variable step_scopes";
+    scope_.CreateVariable("step_scopes");
+
+    LOG(INFO) << "create variable h";
+    scope_.CreateVariable("h");
   }
 
   void CreateRNNOp() {
@@ -118,7 +135,6 @@ class RecurrentOpTest : public ::testing::Test {
     op_desc.set_type("rnn_op");
     op_desc.set_name("simple_rnn");
     op_desc.add_inputs("x");
-    op_desc.add_inputs("w");
     op_desc.add_inputs("h_boot");       // initial memory
     op_desc.add_inputs("step_net");     // step net
     op_desc.add_inputs("step_scopes");  // step scopes
@@ -131,7 +147,7 @@ class RecurrentOpTest : public ::testing::Test {
     *memories_attr->mutable_strings()->Add() = "h";
     memories_attr->set_name("memories");
 
-    // add memories
+    // add history/previous memories
     auto pre_memories_attr = op_desc.mutable_attrs()->Add();
     pre_memories_attr->set_type(paddle::framework::AttrType::STRINGS);
     *pre_memories_attr->mutable_strings()->Add() = "h_pre";
@@ -139,22 +155,22 @@ class RecurrentOpTest : public ::testing::Test {
 
     // add initial memories
     auto boot_memories_attr = op_desc.mutable_attrs()->Add();
-    boot_memories_attr->set_type(paddle::framework::AttrType::STRINGS);
-    *boot_memories_attr->mutable_strings()->Add() = "h_boot";
+    boot_memories_attr->set_type(paddle::framework::AttrType::INTS);
+    *boot_memories_attr->mutable_ints()->Add() = 1;
     boot_memories_attr->set_name("boot_memories");
 
-    // add step scopes
-    auto step_scopes_attr = op_desc.mutable_attrs()->Add();
-    step_scopes_attr->set_type(paddle::framework::AttrType::STRING);
-    step_scopes_attr->set_s("step_scopes");
-    step_scopes_attr->set_name("step_scopes");
-
     // add step net desc
     auto step_net_attr = op_desc.mutable_attrs()->Add();
-    step_net_attr->set_type(paddle::framework::AttrType::STRING);
-    step_net_attr->set_s("step_net");
+    step_net_attr->set_type(paddle::framework::AttrType::INT);
+    step_net_attr->set_i(2);
     step_net_attr->set_name("step_net");
 
+    // add step scopes
+    auto step_scopes_attr = op_desc.mutable_attrs()->Add();
+    step_scopes_attr->set_type(paddle::framework::AttrType::INT);
+    step_scopes_attr->set_i(3);
+    step_scopes_attr->set_name("step_scopes");
+
     // std::ostringstream stream;
     // op_desc.SerializeToOstream(&stream);
     // std::string text = stream.str();
@@ -163,9 +179,9 @@ class RecurrentOpTest : public ::testing::Test {
     AttributeMap attrs;
     attrs["memories"] = std::vector<std::string>{"h"};
     attrs["pre_memories"] = std::vector<std::string>{"h_pre"};
-    attrs["boot_memories"] = std::vector<std::string>{"h_boot"};
-    attrs["step_net"] = std::vector<std::string>{"step_net"};
-    attrs["step_scopes"] = std::vector<std::string>{"step_scopes"};
+    attrs["boot_memories"] = std::vector<int>{1};
+    attrs["step_net"] = 2;
+    attrs["step_scopes"] = 3;
 
     // TODO
     LOG(INFO) << "rnn_op to init";
@@ -180,6 +196,7 @@ class RecurrentOpTest : public ::testing::Test {
     op_desc.add_inputs("h_pre");
     op_desc.add_inputs("w");
     op_desc.add_outputs("s");
+    // s = h_pre * check
     return op_desc;
   }
 
@@ -190,11 +207,13 @@ class RecurrentOpTest : public ::testing::Test {
     op_desc.add_inputs("x");
     op_desc.add_inputs("s");
     op_desc.add_outputs("h");
+    // h = x + s
     return op_desc;
   }
 
   void CreateStepNet() {
-    Variable* net_var = scope_.CreateVariable("simple_rnn_net");
+    LOG(INFO) << "create variable step_net";
+    Variable* net_var = scope_.CreateVariable("step_net");
     NetDesc net_desc;
     net_desc.name_ = "simple_rnn_net";
     net_desc.op_descs.push_back(CreateFcOpDesc());

From a81be58a500d319b39aef7131ec876fb847ff5fe Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Wed, 12 Jul 2017 12:35:21 +0800
Subject: [PATCH 25/37] Refine unit test.

---
 paddle/framework/recurrent_network_op.cc      |  4 ++-
 paddle/framework/recurrent_network_op.h       |  4 ++-
 paddle/framework/recurrent_network_op_test.cc | 26 ++++++++++++-------
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 7cb2f1b902713..36c3b6b0d2ada 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -67,7 +67,8 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
   }
   // prepare inlinks
   PADDLE_ENFORCE(inlinks_.empty(), "RecurrentOp duplicate inited");
-  for (auto id : GetAttr<std::vector<int>>("real_input")) {
+  LOG(INFO) << "set inlinks";
+  for (auto id : GetAttr<std::vector<int>>("real_inputs")) {
     inlinks_.push_back(inputs_[id]);
   }
 
@@ -82,6 +83,7 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
                  "The size of memories and pre_memories doesn't match: %d,%d.",
                  memories.size(), pre_memories.size());
   std::vector<std::string> boot_memories;
+  LOG(INFO) << "set boot_memories";
   for (auto id : GetAttr<std::vector<int>>("boot_memories")) {
     boot_memories.push_back(inputs_[id]);
   }
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 2d6ce7af194b3..88acfd15bf854 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -47,7 +47,9 @@ class OperatorBase {
   };
 
   template <typename T>
-  inline const T GetAttr(const std::string& name) const {
+  inline const T& GetAttr(const std::string& name) const {
+    PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should be in AttributeMap",
+                   name);
     return boost::get<T>(attrs_.at(name));
   }
 
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index 1714fde8f20e5..29963aa18dd81 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -100,33 +100,34 @@ class RecurrentOpTest : public ::testing::Test {
   virtual void TearDown() override {}
 
   void CreateGlobalVariables() {
+    scope_ = std::make_shared<Scope>();
     LOG(INFO) << "create global variable h_boot";
     // create boot memory
-    scope_.CreateVariable("h_boot");
+    scope_->CreateVariable("h_boot");
     // create input, and init content
     LOG(INFO) << "create global variable x";
-    Variable* x = scope_.CreateVariable("x");
+    Variable* x = scope_->CreateVariable("x");
     DDim dims = make_ddim(std::vector<int>{10 /*sent size*/, 20 /*batch size*/,
                                            30 /*input dim*/});
     // TODO mutable_data is not valid
     x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
 
     LOG(INFO) << "create global variable w";
-    Variable* w = scope_.CreateVariable("w");
+    Variable* w = scope_->CreateVariable("w");
     w->GetMutable<Tensor>()->mutable_data<float>(
         make_ddim(std::vector<int>{30, 30}), platform::CPUPlace());
 
     LOG(INFO) << "create global variable h_boot";
-    Variable* h_boot = scope_.CreateVariable("h_boot");
+    Variable* h_boot = scope_->CreateVariable("h_boot");
     h_boot->GetMutable<Tensor>()->mutable_data<float>(
         make_ddim(std::vector<int>{20 /*batch size*/, 30 /*input dim*/}),
         platform::CPUPlace());
 
     LOG(INFO) << "create variable step_scopes";
-    scope_.CreateVariable("step_scopes");
+    scope_->CreateVariable("step_scopes");
 
     LOG(INFO) << "create variable h";
-    scope_.CreateVariable("h");
+    scope_->CreateVariable("h");
   }
 
   void CreateRNNOp() {
@@ -141,6 +142,12 @@ class RecurrentOpTest : public ::testing::Test {
     // output hidden vectors
     op_desc.add_outputs("h");
 
+    // add real input
+    auto input_attr = op_desc.mutable_attrs()->Add();
+    input_attr->set_type(paddle::framework::AttrType::INTS);
+    *input_attr->mutable_ints()->Add() = 0;
+    input_attr->set_name("real_inputs");
+
     // add memories
     auto memories_attr = op_desc.mutable_attrs()->Add();
     memories_attr->set_type(paddle::framework::AttrType::STRINGS);
@@ -177,6 +184,7 @@ class RecurrentOpTest : public ::testing::Test {
     // LOG(INFO) << text;
 
     AttributeMap attrs;
+    attrs["real_inputs"] = std::vector<int>{0};
     attrs["memories"] = std::vector<std::string>{"h"};
     attrs["pre_memories"] = std::vector<std::string>{"h_pre"};
     attrs["boot_memories"] = std::vector<int>{1};
@@ -213,7 +221,7 @@ class RecurrentOpTest : public ::testing::Test {
 
   void CreateStepNet() {
     LOG(INFO) << "create variable step_net";
-    Variable* net_var = scope_.CreateVariable("step_net");
+    Variable* net_var = scope_->CreateVariable("step_net");
     NetDesc net_desc;
     net_desc.name_ = "simple_rnn_net";
     net_desc.op_descs.push_back(CreateFcOpDesc());
@@ -222,7 +230,7 @@ class RecurrentOpTest : public ::testing::Test {
   }
 
   // father scope
-  Scope scope_;
+  std::shared_ptr<Scope> scope_;
   RecurrentOp rnn_op_;
 };
 
@@ -230,7 +238,7 @@ TEST_F(RecurrentOpTest, create_op) {}
 
 TEST_F(RecurrentOpTest, Run) {
   OpContext ctx;
-  ctx.scope = std::make_shared<Scope>();
+  ctx.scope = scope_;
   rnn_op_.Run(&ctx);
 }
 

From acde9b748f010de42db631c61b4e66cf46446f00 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Wed, 12 Jul 2017 14:13:54 +0800
Subject: [PATCH 26/37] modify inlinks.

---
 paddle/framework/recurrent_network_op.cc | 19 +++++++++----------
 paddle/framework/recurrent_network_op.h  |  2 +-
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 36c3b6b0d2ada..d423620aaa311 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -31,13 +31,15 @@ void RecurrentOp::Run(OpContext* contex) const {
   Variable* net = scope->GetVariable(net_name_);
   PADDLE_ENFORCE(net, "failed to get step net");
 
+  LOG(INFO) << "create scopes";
   CreateScopes(scope);
+  LOG(INFO) << "segment input";
   SegmentInputs(scope);
 
   Variable* step_scopes = scope->GetVariable(step_scopes_name_);
   PADDLE_ENFORCE(step_scopes, "failed to get step scopes");
   // forward
-  auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
+  auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
   size_t seq_len = dims[0];
   auto& scopes = *step_scopes->GetMutable<std::vector<ScopePtr>>();
   for (size_t step_id = 0; step_id < seq_len; step_id++) {
@@ -69,7 +71,7 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
   PADDLE_ENFORCE(inlinks_.empty(), "RecurrentOp duplicate inited");
   LOG(INFO) << "set inlinks";
   for (auto id : GetAttr<std::vector<int>>("real_inputs")) {
-    inlinks_.push_back(inputs_[id]);
+    inlinks_.push_back(id);
   }
 
   name_ = op_desc.name();
@@ -102,8 +104,7 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
 }
 
 void RecurrentOp::CreateScopes(ScopePtr scope) const {
-  LOG(INFO) << "create scopes";
-  auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
+  auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
   size_t seq_len = dims[0];
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
   auto step_scopes = scopes_var->GetMutable<std::vector<ScopePtr>>();
@@ -135,18 +136,17 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
   PADDLE_ENFORCE(!inlinks_.empty(), "no real inputs are provided.");
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
   auto& step_scopes = *scopes_var->GetMutable<std::vector<Scope*>>();
-
-  auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
+  auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
   int seq_len = dims[0];
   int batch_size = dims[1];
-  for (size_t i = 0; i < inlinks_.size(); i++) {
+  for (auto i : inlinks_) {
     auto input_dims = Input(scope, i)->GetMutable<Tensor>()->dims();
     int input_dim = input_dims[2];
     int length = batch_size * input_dim;
     const float* scope_input =
         Input(scope, i)->GetMutable<Tensor>()->data<float>();
     for (int j = 0; j < seq_len; j++) {
-      Variable* input_var = step_scopes[j]->CreateVariable(inlinks_[i]);
+      Variable* input_var = step_scopes[j]->CreateVariable(inputs_[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
       float* step_input = step_input_tensor->mutable_data<float>(
           make_ddim({batch_size, input_dim}), platform::CPUPlace());
@@ -158,8 +158,7 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
 void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
   auto& step_scopes = *scopes_var->GetMutable<std::vector<Scope*>>();
-
-  auto dims = Input(scope, 0)->GetMutable<Tensor>()->dims();
+  auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
   int seq_len = dims[0];
   int batch_size = dims[1];
   for (size_t i = 0; i < outputs_.size(); i++) {
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 88acfd15bf854..476fbe9f0d239 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -218,7 +218,7 @@ class RecurrentOp : public OperatorBase {
   // specified by `step_scopes_name_`.
   std::string step_scopes_name_;
   // real inputs that need to be segmented.
-  std::vector<std::string> inlinks_;
+  std::vector<int> inlinks_;
 
   NetDesc step_net_desc_;
 };

From 82464f56b3aaff3b8a07babf060ad8e7f0acd6a7 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Wed, 12 Jul 2017 15:51:42 +0800
Subject: [PATCH 27/37] add OpDesc to Net

---
 paddle/framework/net.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/framework/net.h b/paddle/framework/net.h
index 76992e0728290..f16d5d9e84c66 100644
--- a/paddle/framework/net.h
+++ b/paddle/framework/net.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include "paddle/framework/net_proto.pb.h"
+#include "paddle/framework/op_desc.pb.h"
 #include "paddle/framework/op_proto.pb.h"
 #include "paddle/framework/scope.h"
 #include "paddle/platform/device_context.h"
@@ -31,7 +32,6 @@ typedef int OpIndex;
  * keep updating if the concepts related are implemented.
  */
 
-struct OpDesc;
 struct OpAttrs {};
 
 class Operator {
@@ -74,7 +74,7 @@ class Net {
   /**
    * @brief Add an Operator according to `def`.
    */
-  virtual OpIndex AddOp(const OpProto &def) = 0;
+  virtual OpIndex AddOp(const OpDesc &def) = 0;
 
   /**
    * @brief Add optimizer operators acctording to `attrs`.

From 5c5d890dbfcebab4f2bdc1f61fedc050dfce704c Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Wed, 12 Jul 2017 20:37:02 +0800
Subject: [PATCH 28/37] fix bug and update unit test.

---
 paddle/framework/recurrent_network_op.cc      | 81 ++++++++++++-------
 paddle/framework/recurrent_network_op.h       |  5 +-
 paddle/framework/recurrent_network_op_test.cc | 47 +++++++----
 paddle/framework/tensor.h                     | 10 ++-
 paddle/platform/CMakeLists.txt                |  2 +-
 5 files changed, 91 insertions(+), 54 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index d423620aaa311..c6749ee61ea25 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -25,9 +25,7 @@ namespace framework {
 void RecurrentOp::Run(OpContext* contex) const {
   auto scope = contex->scope;
 
-  if (!scope->HasVariable(net_name_)) {
-    CreateStepNet(scope);
-  }
+  PADDLE_ENFORCE(scope->HasVariable(net_name_), "step net is not in scope.");
   Variable* net = scope->GetVariable(net_name_);
   PADDLE_ENFORCE(net, "failed to get step net");
 
@@ -41,8 +39,10 @@ void RecurrentOp::Run(OpContext* contex) const {
   // forward
   auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
   size_t seq_len = dims[0];
+  LOG(INFO) << "sequence length " << seq_len;
   auto& scopes = *step_scopes->GetMutable<std::vector<ScopePtr>>();
   for (size_t step_id = 0; step_id < seq_len; step_id++) {
+    LOG(INFO) << "run step " << step_id;
     ScopePtr step_scope = scopes[step_id];
     // TODO replace memorys' copy with reference
     LinkMemories(scope, scopes, step_id);
@@ -50,6 +50,7 @@ void RecurrentOp::Run(OpContext* contex) const {
     net->GetMutable<PlainNet>()->Run(step_scope);
   }
 
+  LOG(INFO) << "concat outputs";
   // prepare outputs
   ConcatOutputs(scope);
 }
@@ -67,6 +68,11 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
     LOG(INFO) << "set output " << output;
     outputs_.push_back(output);
   }
+
+  name_ = op_desc.name();
+  net_name_ = inputs_.at(GetAttr<int>("step_net"));
+  step_scopes_name_ = inputs_.at(GetAttr<int>("step_scopes"));
+
   // prepare inlinks
   PADDLE_ENFORCE(inlinks_.empty(), "RecurrentOp duplicate inited");
   LOG(INFO) << "set inlinks";
@@ -74,16 +80,13 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
     inlinks_.push_back(id);
   }
 
-  name_ = op_desc.name();
-  net_name_ = inputs_.at(GetAttr<int>("step_net"));
-  step_scopes_name_ = inputs_.at(GetAttr<int>("step_scopes"));
-
   // set memories
   auto memories = GetAttr<std::vector<std::string>>("memories");
   auto pre_memories = GetAttr<std::vector<std::string>>("pre_memories");
   PADDLE_ENFORCE(memories.size() == pre_memories.size(),
                  "The size of memories and pre_memories doesn't match: %d,%d.",
                  memories.size(), pre_memories.size());
+
   std::vector<std::string> boot_memories;
   LOG(INFO) << "set boot_memories";
   for (auto id : GetAttr<std::vector<int>>("boot_memories")) {
@@ -117,36 +120,40 @@ void RecurrentOp::CreateScopes(ScopePtr scope) const {
   }
 }
 
-void RecurrentOp::CreateStepNet(ScopePtr scope) const {
-  Variable* var = scope->CreateVariable(net_name_);
-  auto step_net = GetAttr<std::string>("step_net");
-  // get the step net proto from the string.
-  // PADDLE_ENFORCE(
-  //   google::protobuf::TextFormat::ParseFromString(step_net,
-  //   &step_net_desc_));
-  // var->Reset<PlainNet>(new PlainNet(step_net_desc_));
-  // this is a fake net, it will be rewrite after the network has been merged.
-  NetDesc desc;
-  desc.name_ = "rnn_step_net";
-  var->Reset<PlainNet>(new PlainNet(desc));
-  // TODO add op descs
-}
+// void RecurrentOp::CreateStepNet(ScopePtr scope) const {
+//   Variable* var = scope->CreateVariable(net_name_);
+//   auto step_net = GetAttr<std::string>("step_net");
+//   // get the step net proto from the string.
+//   // PADDLE_ENFORCE(
+//   //   google::protobuf::TextFormat::ParseFromString(step_net,
+//   //   &step_net_desc_));
+//   // var->Reset<PlainNet>(new PlainNet(step_net_desc_));
+//   // this is a fake net, it will be rewrite after the network has been
+//   merged.
+//   NetDesc desc;
+//   desc.name_ = "rnn_step_net";
+//   var->Reset<PlainNet>(new PlainNet(desc));
+// }
 
 void RecurrentOp::SegmentInputs(ScopePtr scope) const {
   PADDLE_ENFORCE(!inlinks_.empty(), "no real inputs are provided.");
+  auto input_alias = GetAttr<std::vector<std::string>>("input_alias");
+  PADDLE_ENFORCE(inlinks_.size() == input_alias.size(),
+                 "real_inputs/input_alias mismatch.");
+
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
-  auto& step_scopes = *scopes_var->GetMutable<std::vector<Scope*>>();
+  auto& step_scopes = *scopes_var->GetMutable<std::vector<ScopePtr>>();
   auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
   int seq_len = dims[0];
   int batch_size = dims[1];
-  for (auto i : inlinks_) {
-    auto input_dims = Input(scope, i)->GetMutable<Tensor>()->dims();
+  for (size_t i = 0; i < inlinks_.size(); ++i) {
+    auto input_dims = Input(scope, inlinks_[i])->GetMutable<Tensor>()->dims();
     int input_dim = input_dims[2];
     int length = batch_size * input_dim;
     const float* scope_input =
-        Input(scope, i)->GetMutable<Tensor>()->data<float>();
+        Input(scope, inlinks_[i])->GetMutable<Tensor>()->data<float>();
     for (int j = 0; j < seq_len; j++) {
-      Variable* input_var = step_scopes[j]->CreateVariable(inputs_[i]);
+      Variable* input_var = step_scopes[j]->CreateVariable(input_alias[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
       float* step_input = step_input_tensor->mutable_data<float>(
           make_ddim({batch_size, input_dim}), platform::CPUPlace());
@@ -156,22 +163,28 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
 }
 
 void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
+  auto output_alias = GetAttr<std::vector<std::string>>("output_alias");
+  PADDLE_ENFORCE(outputs_.size() == output_alias.size(),
+                 "output/output_alias mismatch.");
+
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
-  auto& step_scopes = *scopes_var->GetMutable<std::vector<Scope*>>();
+  auto& step_scopes = *scopes_var->GetMutable<std::vector<ScopePtr>>();
   auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
   int seq_len = dims[0];
   int batch_size = dims[1];
   for (size_t i = 0; i < outputs_.size(); i++) {
-    auto output_dims =
-        step_scopes[0]->GetVariable(outputs_[0])->GetMutable<Tensor>()->dims();
-    int output_dim = output_dims[2];
+    auto output_dims = step_scopes[0]
+                           ->GetVariable(output_alias[0])
+                           ->GetMutable<Tensor>()
+                           ->dims();
+    int output_dim = output_dims[1];
     int length = batch_size * output_dim;
     Tensor* output_tensor =
         scope->CreateVariable(outputs_[i])->GetMutable<Tensor>();
     float* output = output_tensor->mutable_data<float>(
         make_ddim({seq_len, batch_size, output_dim}), platform::CPUPlace());
     for (int j = 0; j < seq_len; j++) {
-      Variable* output_var = step_scopes[j]->GetVariable(outputs_[i]);
+      Variable* output_var = step_scopes[j]->GetVariable(output_alias[i]);
       const float* step_output =
           output_var->GetMutable<Tensor>()->data<float>();
       std::memcpy(output + j * length, step_output, length);
@@ -199,6 +212,11 @@ void RecurrentOp::LinkMemories(ScopePtr scope,
     }
     Variable* memory_var = step_scope->CreateVariable(attr.pre_var);
 
+    // TODO the memory of current step should be allocaled in step net ?
+    Tensor* cur_memory =
+        step_scopes[step]->CreateVariable(attr.var)->GetMutable<Tensor>();
+    cur_memory->mutable_data<float>(attr.dims, platform::CPUPlace());
+
     // copy from boot memory
     // TODO support more device
     // TODO mutable_data is currently invalid
@@ -215,6 +233,7 @@ void RecurrentOp::LinkMemories(ScopePtr scope,
       // `pre - memory`
       Tensor* pre_step_memory =
           step_scopes[step - 1]->GetVariable(attr.var)->GetMutable<Tensor>();
+
       std::memcpy(memory_tensor_val, pre_step_memory->data<float>(),
                   product(attr.dims));
     }
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 476fbe9f0d239..4dc2ccee7ca84 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -22,6 +22,7 @@
 #include "paddle/framework/variable.h"
 
 // Remove when including operator.h
+#include <glog/logging.h>
 #include "paddle/framework/attr_checker.h"
 #include "paddle/framework/op_desc.pb.h"
 
@@ -145,7 +146,7 @@ class RecurrentOp : public OperatorBase {
   /*
    * Create a `Net` which is shared across all steps.
    */
-  void CreateStepNet(ScopePtr scope) const;
+  // void CreateStepNet(ScopePtr scope) const;
 
   /*
    * the step scopes as the father scope. The step scopes will be stored in
@@ -219,8 +220,6 @@ class RecurrentOp : public OperatorBase {
   std::string step_scopes_name_;
   // real inputs that need to be segmented.
   std::vector<int> inlinks_;
-
-  NetDesc step_net_desc_;
 };
 
 class RecurrentGradientOp;
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index 29963aa18dd81..70afd42ecb783 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -35,6 +35,7 @@ class FcOp : public OperatorBase {
   }
 
   virtual void Run(OpContext* contex) const override {
+    LOG(INFO) << "run fc op";
     for (const auto& input : inputs_) {
       PADDLE_ENFORCE(contex->scope->HasVariable(input),
                      "no input variable [%s] exists");
@@ -64,6 +65,7 @@ class AddOp : public OperatorBase {
   }
 
   virtual void Run(OpContext* contex) const override {
+    LOG(INFO) << "run add op";
     for (const auto& input : inputs_) {
       PADDLE_ENFORCE(contex->scope->HasVariable(input),
                      "no input variable [%s] exists");
@@ -113,7 +115,7 @@ class RecurrentOpTest : public ::testing::Test {
     x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
 
     LOG(INFO) << "create global variable w";
-    Variable* w = scope_->CreateVariable("w");
+    Variable* w = scope_->CreateVariable("rnn/w");
     w->GetMutable<Tensor>()->mutable_data<float>(
         make_ddim(std::vector<int>{30, 30}), platform::CPUPlace());
 
@@ -134,10 +136,11 @@ class RecurrentOpTest : public ::testing::Test {
     OpDesc op_desc;
 
     op_desc.set_type("rnn_op");
-    op_desc.set_name("simple_rnn");
+    op_desc.set_name("rnn");
     op_desc.add_inputs("x");
-    op_desc.add_inputs("h_boot");       // initial memory
-    op_desc.add_inputs("step_net");     // step net
+    op_desc.add_inputs("h_boot");    // initial memory
+    op_desc.add_inputs("step_net");  // step net
+    // TODO put the step_scopes in the outputs
     op_desc.add_inputs("step_scopes");  // step scopes
     // output hidden vectors
     op_desc.add_outputs("h");
@@ -148,16 +151,28 @@ class RecurrentOpTest : public ::testing::Test {
     *input_attr->mutable_ints()->Add() = 0;
     input_attr->set_name("real_inputs");
 
+    // add input alias, this alias is used in step net.
+    auto input_alias_attr = op_desc.mutable_attrs()->Add();
+    input_alias_attr->set_type(paddle::framework::AttrType::STRINGS);
+    *input_alias_attr->mutable_strings()->Add() = "rnn/x";
+    input_alias_attr->set_name("input_alias");
+
+    // add output alias, this alias is used in step net.
+    auto output_alias_attr = op_desc.mutable_attrs()->Add();
+    output_alias_attr->set_type(paddle::framework::AttrType::STRINGS);
+    *output_alias_attr->mutable_strings()->Add() = "rnn/h";
+    output_alias_attr->set_name("output_alias");
+
     // add memories
     auto memories_attr = op_desc.mutable_attrs()->Add();
     memories_attr->set_type(paddle::framework::AttrType::STRINGS);
-    *memories_attr->mutable_strings()->Add() = "h";
+    *memories_attr->mutable_strings()->Add() = "rnn/h";
     memories_attr->set_name("memories");
 
     // add history/previous memories
     auto pre_memories_attr = op_desc.mutable_attrs()->Add();
     pre_memories_attr->set_type(paddle::framework::AttrType::STRINGS);
-    *pre_memories_attr->mutable_strings()->Add() = "h_pre";
+    *pre_memories_attr->mutable_strings()->Add() = "rnn/h_pre";
     pre_memories_attr->set_name("pre_memories");
 
     // add initial memories
@@ -185,7 +200,9 @@ class RecurrentOpTest : public ::testing::Test {
 
     AttributeMap attrs;
     attrs["real_inputs"] = std::vector<int>{0};
-    attrs["memories"] = std::vector<std::string>{"h"};
+    attrs["input_alias"] = std::vector<std::string>{"rnn/x"};
+    attrs["output_alias"] = std::vector<std::string>{"rnn/h"};
+    attrs["memories"] = std::vector<std::string>{"rnn/h"};
     attrs["pre_memories"] = std::vector<std::string>{"h_pre"};
     attrs["boot_memories"] = std::vector<int>{1};
     attrs["step_net"] = 2;
@@ -201,9 +218,9 @@ class RecurrentOpTest : public ::testing::Test {
     OpDesc op_desc;
     op_desc.set_type("fc");
     op_desc.set_name("fc");
-    op_desc.add_inputs("h_pre");
-    op_desc.add_inputs("w");
-    op_desc.add_outputs("s");
+    op_desc.add_inputs("rnn/h_pre");
+    op_desc.add_inputs("rnn/w");
+    op_desc.add_outputs("rnn/s");
     // s = h_pre * check
     return op_desc;
   }
@@ -212,9 +229,9 @@ class RecurrentOpTest : public ::testing::Test {
     OpDesc op_desc;
     op_desc.set_type("add");
     op_desc.set_name("add");
-    op_desc.add_inputs("x");
-    op_desc.add_inputs("s");
-    op_desc.add_outputs("h");
+    op_desc.add_inputs("rnn/x");
+    op_desc.add_inputs("rnn/s");
+    op_desc.add_outputs("rnn/h");
     // h = x + s
     return op_desc;
   }
@@ -223,7 +240,7 @@ class RecurrentOpTest : public ::testing::Test {
     LOG(INFO) << "create variable step_net";
     Variable* net_var = scope_->CreateVariable("step_net");
     NetDesc net_desc;
-    net_desc.name_ = "simple_rnn_net";
+    net_desc.name_ = "rnn";
     net_desc.op_descs.push_back(CreateFcOpDesc());
     net_desc.op_descs.push_back(CreateAddOpDesc());
     net_var->Reset<PlainNet>(new PlainNet(net_desc));
@@ -234,7 +251,7 @@ class RecurrentOpTest : public ::testing::Test {
   RecurrentOp rnn_op_;
 };
 
-TEST_F(RecurrentOpTest, create_op) {}
+// TEST_F(RecurrentOpTest, create_op) {}
 
 TEST_F(RecurrentOpTest, Run) {
   OpContext ctx;
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index 1e75a7e845fd2..a433df8b5bb60 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -104,7 +104,8 @@ class Tensor {
      public:
       Deleter(platform::Place place) : place_(place) {}
       void operator()(T* ptr) {
-        paddle::memory::Free(place_, static_cast<void*>(ptr));
+        // paddle::memory::Free(place_, static_cast<void*>(ptr));
+        free(static_cast<void*>(ptr));
       }
 
      private:
@@ -112,9 +113,11 @@ class Tensor {
     };
 
    public:
+    // PlaceholderImpl(paddle::platform::Place place, size_t size)
+    //     : ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)),
+    //            Deleter(place)),
     PlaceholderImpl(paddle::platform::Place place, size_t size)
-        : ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)),
-               Deleter(place)),
+        : ptr_(static_cast<T*>(malloc(size * sizeof(T))), Deleter(place)),
           place_(place),
           size_(size) {}
 
@@ -128,7 +131,6 @@ class Tensor {
     size_t size_;                    // size of the memory block.
   };
 
-  DDim dims_;
   std::shared_ptr<Placeholder> holder_;  // holds the memory block if allocated.
   DDim dims_;
   size_t offset_;  // marks the begin of tensor data area.
diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt
index 7a198aec6cf12..7cab2291c896b 100644
--- a/paddle/platform/CMakeLists.txt
+++ b/paddle/platform/CMakeLists.txt
@@ -11,4 +11,4 @@ ELSE()
 ENDIF()
 
 cc_library(device_context SRCS device_context.cc DEPS place eigen3 ${GPU_CTX_DEPS})
-nv_test(device_context_test SRCS device_context_test.cc DEPS device_context glog gflags)
+#nv_test(device_context_test SRCS device_context_test.cc DEPS device_context glog gflags)

From a6483e8d75baa4e2ec7b6c169e7a5d74b9f3cc52 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Wed, 12 Jul 2017 21:30:34 +0800
Subject: [PATCH 29/37] move step scopes from inputs to outputs

---
 paddle/framework/recurrent_network_op.cc      | 12 ++++++++----
 paddle/framework/recurrent_network_op.h       |  5 ++++-
 paddle/framework/recurrent_network_op_test.cc |  2 +-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index c6749ee61ea25..4dc6d4993c92f 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -71,7 +71,7 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
 
   name_ = op_desc.name();
   net_name_ = inputs_.at(GetAttr<int>("step_net"));
-  step_scopes_name_ = inputs_.at(GetAttr<int>("step_scopes"));
+  step_scopes_name_ = outputs_.back();
 
   // prepare inlinks
   PADDLE_ENFORCE(inlinks_.empty(), "RecurrentOp duplicate inited");
@@ -79,6 +79,10 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
   for (auto id : GetAttr<std::vector<int>>("real_inputs")) {
     inlinks_.push_back(id);
   }
+  PADDLE_ENFORCE(
+      outputs_.size() > 1,
+      "more than 1 output should be provided and the last is `step_scopes`");
+  outlinks_ = std::vector<std::string>{outputs_.begin(), outputs_.end() - 1};
 
   // set memories
   auto memories = GetAttr<std::vector<std::string>>("memories");
@@ -164,7 +168,7 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
 
 void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
   auto output_alias = GetAttr<std::vector<std::string>>("output_alias");
-  PADDLE_ENFORCE(outputs_.size() == output_alias.size(),
+  PADDLE_ENFORCE(outlinks_.size() == output_alias.size(),
                  "output/output_alias mismatch.");
 
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
@@ -172,7 +176,7 @@ void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
   auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
   int seq_len = dims[0];
   int batch_size = dims[1];
-  for (size_t i = 0; i < outputs_.size(); i++) {
+  for (size_t i = 0; i < outlinks_.size(); i++) {
     auto output_dims = step_scopes[0]
                            ->GetVariable(output_alias[0])
                            ->GetMutable<Tensor>()
@@ -180,7 +184,7 @@ void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
     int output_dim = output_dims[1];
     int length = batch_size * output_dim;
     Tensor* output_tensor =
-        scope->CreateVariable(outputs_[i])->GetMutable<Tensor>();
+        scope->CreateVariable(outlinks_[i])->GetMutable<Tensor>();
     float* output = output_tensor->mutable_data<float>(
         make_ddim({seq_len, batch_size, output_dim}), platform::CPUPlace());
     for (int j = 0; j < seq_len; j++) {
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 4dc2ccee7ca84..276ee18ece94c 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -103,13 +103,15 @@ class PlainNet {
  * - real inputs that need to be segmented to steps.
  * - boot memories
  * - step net
+ *
+ * outputs:
+ * - real outputs
  * - step scopes
  *
  * Attributes stored in AttributeMap:
  * - real_inputs: vector<int>
  * - boot_memories: vector<int>
  * - step_net: int
- * - step_scopes: int
  */
 
 class RecurrentOp : public OperatorBase {
@@ -220,6 +222,7 @@ class RecurrentOp : public OperatorBase {
   std::string step_scopes_name_;
   // real inputs that need to be segmented.
   std::vector<int> inlinks_;
+  std::vector<std::string> outlinks_;
 };
 
 class RecurrentGradientOp;
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index 70afd42ecb783..6e2da8b60c9ba 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -141,9 +141,9 @@ class RecurrentOpTest : public ::testing::Test {
     op_desc.add_inputs("h_boot");    // initial memory
     op_desc.add_inputs("step_net");  // step net
     // TODO put the step_scopes in the outputs
-    op_desc.add_inputs("step_scopes");  // step scopes
     // output hidden vectors
     op_desc.add_outputs("h");
+    op_desc.add_outputs("step_scopes");  // step scopes
 
     // add real input
     auto input_attr = op_desc.mutable_attrs()->Add();

From bcd03bfae5eb4363e2d133be4f098fe5c04deea4 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Thu, 13 Jul 2017 11:01:11 +0800
Subject: [PATCH 30/37] fix merge conflict, update SegmentInput function

---
 paddle/framework/net.h                   |  2 +-
 paddle/framework/recurrent_network_op.cc | 12 +++---------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/paddle/framework/net.h b/paddle/framework/net.h
index f16d5d9e84c66..903c22a872956 100644
--- a/paddle/framework/net.h
+++ b/paddle/framework/net.h
@@ -128,7 +128,7 @@ class PlainNet : public Net {
   /**
    * @brief Add an operator to this network.
    */
-  virtual OpIndex AddOp(const OpProto &def) override;
+  virtual OpIndex AddOp(const OpProto &def);
 
   /**
    * @brief Add all optimizer operators related into the network.
diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 4dc6d4993c92f..2c919f3184e68 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -149,19 +149,13 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
   auto& step_scopes = *scopes_var->GetMutable<std::vector<ScopePtr>>();
   auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
   int seq_len = dims[0];
-  int batch_size = dims[1];
   for (size_t i = 0; i < inlinks_.size(); ++i) {
-    auto input_dims = Input(scope, inlinks_[i])->GetMutable<Tensor>()->dims();
-    int input_dim = input_dims[2];
-    int length = batch_size * input_dim;
-    const float* scope_input =
-        Input(scope, inlinks_[i])->GetMutable<Tensor>()->data<float>();
+    Tensor* scope_input_tensor =
+        Input(scope, inlinks_[i])->GetMutable<Tensor>();
     for (int j = 0; j < seq_len; j++) {
       Variable* input_var = step_scopes[j]->CreateVariable(input_alias[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
-      float* step_input = step_input_tensor->mutable_data<float>(
-          make_ddim({batch_size, input_dim}), platform::CPUPlace());
-      std::memcpy(step_input, scope_input + j * length, length);
+      *step_input_tensor = scope_input_tensor->Slice(j, j + 1);
     }
   }
 }

From e64b5d328d63eff9ffbc8523fb099951b25bbf30 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Fri, 14 Jul 2017 11:41:36 +0800
Subject: [PATCH 31/37] add RecurrentOpProtoAndCheckerMaker.

---
 paddle/framework/net.h                        |  2 +-
 paddle/framework/recurrent_network_op.cc      | 73 ++++++++++++-------
 paddle/framework/recurrent_network_op.h       |  7 +-
 paddle/framework/recurrent_network_op_test.cc | 25 ++-----
 4 files changed, 57 insertions(+), 50 deletions(-)

diff --git a/paddle/framework/net.h b/paddle/framework/net.h
index f16d5d9e84c66..903c22a872956 100644
--- a/paddle/framework/net.h
+++ b/paddle/framework/net.h
@@ -128,7 +128,7 @@ class PlainNet : public Net {
   /**
    * @brief Add an operator to this network.
    */
-  virtual OpIndex AddOp(const OpProto &def) override;
+  virtual OpIndex AddOp(const OpProto &def);
 
   /**
    * @brief Add all optimizer operators related into the network.
diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 4dc6d4993c92f..3f51ed17ced38 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -13,12 +13,12 @@
    limitations under the License. */
 
 #include "paddle/framework/recurrent_network_op.h"
+#include "paddle/framework/tensor.h"
+// #include "paddle/framework/op_registry.h"
 
 #include <glog/logging.h>
 #include <cstring>
 
-#include "paddle/framework/tensor.h"
-
 namespace paddle {
 namespace framework {
 
@@ -76,7 +76,7 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
   // prepare inlinks
   PADDLE_ENFORCE(inlinks_.empty(), "RecurrentOp duplicate inited");
   LOG(INFO) << "set inlinks";
-  for (auto id : GetAttr<std::vector<int>>("real_inputs")) {
+  for (auto id : GetAttr<std::vector<int>>("in_links")) {
     inlinks_.push_back(id);
   }
   PADDLE_ENFORCE(
@@ -124,26 +124,11 @@ void RecurrentOp::CreateScopes(ScopePtr scope) const {
   }
 }
 
-// void RecurrentOp::CreateStepNet(ScopePtr scope) const {
-//   Variable* var = scope->CreateVariable(net_name_);
-//   auto step_net = GetAttr<std::string>("step_net");
-//   // get the step net proto from the string.
-//   // PADDLE_ENFORCE(
-//   //   google::protobuf::TextFormat::ParseFromString(step_net,
-//   //   &step_net_desc_));
-//   // var->Reset<PlainNet>(new PlainNet(step_net_desc_));
-//   // this is a fake net, it will be rewrite after the network has been
-//   merged.
-//   NetDesc desc;
-//   desc.name_ = "rnn_step_net";
-//   var->Reset<PlainNet>(new PlainNet(desc));
-// }
-
 void RecurrentOp::SegmentInputs(ScopePtr scope) const {
-  PADDLE_ENFORCE(!inlinks_.empty(), "no real inputs are provided.");
-  auto input_alias = GetAttr<std::vector<std::string>>("input_alias");
-  PADDLE_ENFORCE(inlinks_.size() == input_alias.size(),
-                 "real_inputs/input_alias mismatch.");
+  PADDLE_ENFORCE(!inlinks_.empty(), "no in links are provided.");
+  auto inlink_alias = GetAttr<std::vector<std::string>>("in_link_alias");
+  PADDLE_ENFORCE(inlinks_.size() == inlink_alias.size(),
+                 "in_links/in_link_alias mismatch.");
 
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
   auto& step_scopes = *scopes_var->GetMutable<std::vector<ScopePtr>>();
@@ -157,7 +142,7 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
     const float* scope_input =
         Input(scope, inlinks_[i])->GetMutable<Tensor>()->data<float>();
     for (int j = 0; j < seq_len; j++) {
-      Variable* input_var = step_scopes[j]->CreateVariable(input_alias[i]);
+      Variable* input_var = step_scopes[j]->CreateVariable(inlink_alias[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
       float* step_input = step_input_tensor->mutable_data<float>(
           make_ddim({batch_size, input_dim}), platform::CPUPlace());
@@ -167,9 +152,9 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
 }
 
 void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
-  auto output_alias = GetAttr<std::vector<std::string>>("output_alias");
-  PADDLE_ENFORCE(outlinks_.size() == output_alias.size(),
-                 "output/output_alias mismatch.");
+  auto outlink_alias = GetAttr<std::vector<std::string>>("out_link_alias");
+  PADDLE_ENFORCE(outlinks_.size() == outlink_alias.size(),
+                 "out_links/out_link_alias mismatch.");
 
   Variable* scopes_var = scope->GetVariable(step_scopes_name_);
   auto& step_scopes = *scopes_var->GetMutable<std::vector<ScopePtr>>();
@@ -178,7 +163,7 @@ void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
   int batch_size = dims[1];
   for (size_t i = 0; i < outlinks_.size(); i++) {
     auto output_dims = step_scopes[0]
-                           ->GetVariable(output_alias[0])
+                           ->GetVariable(outlink_alias[0])
                            ->GetMutable<Tensor>()
                            ->dims();
     int output_dim = output_dims[1];
@@ -188,7 +173,7 @@ void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
     float* output = output_tensor->mutable_data<float>(
         make_ddim({seq_len, batch_size, output_dim}), platform::CPUPlace());
     for (int j = 0; j < seq_len; j++) {
-      Variable* output_var = step_scopes[j]->GetVariable(output_alias[i]);
+      Variable* output_var = step_scopes[j]->GetVariable(outlink_alias[i]);
       const float* step_output =
           output_var->GetMutable<Tensor>()->data<float>();
       std::memcpy(output + j * length, step_output, length);
@@ -244,5 +229,37 @@ void RecurrentOp::LinkMemories(ScopePtr scope,
   }
 }
 
+// TODO testing when including operator.h
+
+// class RecurrentOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
+//  public:
+//   RecurrentOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
+//       : OpProtoAndCheckerMaker(proto, op_checker) {
+//     // AddInput("input", "input of test op"); // need to support dynamic
+//     number
+//     // AddOutput("output", "output of test op"); // need to support dynamic
+//     number
+//     AddAttr<std::std::vector<int>>("in_links", "The input link positions in
+//     the all inputs.")
+//         .SetDefault({0});
+//     AddAttr<std::std::vector<int>>("boot_memories", "The initial memory
+//     positions in the all inputs.");
+//     AddAttr<int>("step_net", "The step net position in the all inputs.");
+//
+//     AddAttr<std::std::vector<std::string>>("in_link_alias", "The input link
+//     alias in the step network.");
+//     AddAttr<std::std::vector<std::string>>("out_link_alias", "The output link
+//     alias in the step network.");
+//     AddAttr<std::std::vector<std::string>>("memories", "The memory names.");
+//     AddAttr<std::std::vector<std::string>>("pre_memories", "The
+//     history/previous memory names.");
+//
+//     AddType("recurrent_op");
+//     AddComment("This is a recurrent group operator.");
+//   }
+// };
+//
+// REGISTER_OP(recurrent_op, RecurrentOp, RecurrentOpProtoAndCheckerMaker);
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 276ee18ece94c..afa4d14c663e4 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -21,9 +21,7 @@
 #include "paddle/framework/scope.h"
 #include "paddle/framework/variable.h"
 
-// Remove when including operator.h
 #include <glog/logging.h>
-#include "paddle/framework/attr_checker.h"
 #include "paddle/framework/op_desc.pb.h"
 
 namespace paddle {
@@ -202,6 +200,11 @@ class RecurrentOp : public OperatorBase {
    *       strings: "state"
    *   }
    *   arg {
+   *       name: “pre_memories"
+   *       strings: "pre_hidden"
+   *       strings: "pre_state"
+   *   }
+   *   arg {
    *       name: “boot_memories"
    *       strings: "boot_hidden"
    *       strings: "boot_state"
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index 6e2da8b60c9ba..3d88850d77303 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -149,19 +149,19 @@ class RecurrentOpTest : public ::testing::Test {
     auto input_attr = op_desc.mutable_attrs()->Add();
     input_attr->set_type(paddle::framework::AttrType::INTS);
     *input_attr->mutable_ints()->Add() = 0;
-    input_attr->set_name("real_inputs");
+    input_attr->set_name("in_links");
 
     // add input alias, this alias is used in step net.
     auto input_alias_attr = op_desc.mutable_attrs()->Add();
     input_alias_attr->set_type(paddle::framework::AttrType::STRINGS);
     *input_alias_attr->mutable_strings()->Add() = "rnn/x";
-    input_alias_attr->set_name("input_alias");
+    input_alias_attr->set_name("in_link_alias");
 
     // add output alias, this alias is used in step net.
     auto output_alias_attr = op_desc.mutable_attrs()->Add();
     output_alias_attr->set_type(paddle::framework::AttrType::STRINGS);
     *output_alias_attr->mutable_strings()->Add() = "rnn/h";
-    output_alias_attr->set_name("output_alias");
+    output_alias_attr->set_name("out_link_alias");
 
     // add memories
     auto memories_attr = op_desc.mutable_attrs()->Add();
@@ -187,28 +187,15 @@ class RecurrentOpTest : public ::testing::Test {
     step_net_attr->set_i(2);
     step_net_attr->set_name("step_net");
 
-    // add step scopes
-    auto step_scopes_attr = op_desc.mutable_attrs()->Add();
-    step_scopes_attr->set_type(paddle::framework::AttrType::INT);
-    step_scopes_attr->set_i(3);
-    step_scopes_attr->set_name("step_scopes");
-
-    // std::ostringstream stream;
-    // op_desc.SerializeToOstream(&stream);
-    // std::string text = stream.str();
-    // LOG(INFO) << text;
-
     AttributeMap attrs;
-    attrs["real_inputs"] = std::vector<int>{0};
-    attrs["input_alias"] = std::vector<std::string>{"rnn/x"};
-    attrs["output_alias"] = std::vector<std::string>{"rnn/h"};
+    attrs["in_links"] = std::vector<int>{0};
+    attrs["in_link_alias"] = std::vector<std::string>{"rnn/x"};
+    attrs["out_link_alias"] = std::vector<std::string>{"rnn/h"};
     attrs["memories"] = std::vector<std::string>{"rnn/h"};
     attrs["pre_memories"] = std::vector<std::string>{"h_pre"};
     attrs["boot_memories"] = std::vector<int>{1};
     attrs["step_net"] = 2;
-    attrs["step_scopes"] = 3;
 
-    // TODO
     LOG(INFO) << "rnn_op to init";
     rnn_op_.Init(op_desc, attrs);
     LOG(INFO) << "rnn_op finish init";

From f525390662a6c48838eb56b595c96e6c52850a41 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Fri, 14 Jul 2017 12:50:48 +0800
Subject: [PATCH 32/37] clean the codes

---
 paddle/framework/op_desc.proto                |  7 ++-----
 paddle/framework/recurrent_network_op.cc      |  1 -
 paddle/framework/recurrent_network_op.h       | 15 +--------------
 paddle/framework/recurrent_network_op_test.cc | 11 ++++-------
 4 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/paddle/framework/op_desc.proto b/paddle/framework/op_desc.proto
index 84aaf59ac5532..89497f3c16bc2 100644
--- a/paddle/framework/op_desc.proto
+++ b/paddle/framework/op_desc.proto
@@ -51,9 +51,6 @@ message OpDesc {
     // type of this Operator, such as "add", "sub", "fc".
     required string type = 3;
 
-    // the name of this Operator.
-    required string name = 4;
-
     // Attributes of this Operator. e.g., scale=3.0 in cosine op.
-    repeated AttrDesc attrs = 5;
-};
+    repeated AttrDesc attrs = 4;
+};
\ No newline at end of file
diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index df4c92d301ef3..ae4f8000d2f71 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -69,7 +69,6 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
     outputs_.push_back(output);
   }
 
-  name_ = op_desc.name();
   net_name_ = inputs_.at(GetAttr<int>("step_net"));
   step_scopes_name_ = outputs_.back();
 
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index afa4d14c663e4..b81ed49e7f3e7 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -92,7 +92,7 @@ class PlainNet {
 // TODO:
 // 1. No-padding computing for sequences with indifinite length in one batch.
 // 2. Hierarchical RNN for sequence with sub-sequence.
-// 3. Multi-inputs with indifinate length for RecurrentOp.
+// 3. External Memory.
 // 4. More Complex RNN architecture, such as Gated Feedback RNN.
 //    Refer to: https://arxiv.org/pdf/1502.02367.pdf
 
@@ -143,11 +143,6 @@ class RecurrentOp : public OperatorBase {
    */
   void ConcatOutputs(ScopePtr scope) const;
 
-  /*
-   * Create a `Net` which is shared across all steps.
-   */
-  // void CreateStepNet(ScopePtr scope) const;
-
   /*
    * the step scopes as the father scope. The step scopes will be stored in
    * the father scope as a variable whose name is specified by
@@ -158,11 +153,6 @@ class RecurrentOp : public OperatorBase {
    */
   void CreateScopes(ScopePtr scope) const;
 
-  /*
-   * Create memories in each step scope.
-   */
-  // void CreateMemories(ScopePtr scope) const;
-
   /*
    * Link memory in previous step scope to current scope.
    */
@@ -213,9 +203,6 @@ class RecurrentOp : public OperatorBase {
   // TODO copy from OpBase's
   mutable std::vector<MemoryAttr> memory_attrs_;
 
-  // this op's name, used as a unique key in father scope.
-  // TODO repace it with OpBase's interface if supported.
-  std::string name_;
   // name of rnn op's step net, the step net will be shared by both `Forward`
   // and `Backward`, so we store it as a variable in father's scope, with a
   // unique key specified by `net_name_`.
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index 3d88850d77303..63d9dfec102d2 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -24,7 +24,7 @@ namespace framework {
 namespace fake {
 class FcOp : public OperatorBase {
  public:
-  FcOp(const OpDesc& desc) : name_(desc.name()) {}
+  FcOp(const OpDesc& desc) {}
 
   virtual void InferShape(ScopePtr scope) const override {
     for (const auto& output : outputs_) {
@@ -54,7 +54,7 @@ class FcOp : public OperatorBase {
 
 class AddOp : public OperatorBase {
  public:
-  AddOp(const OpDesc& desc) : name_(desc.name()) {}
+  AddOp(const OpDesc& desc) {}
 
   virtual void InferShape(ScopePtr scope) const override {
     for (const auto& output : outputs_) {
@@ -136,7 +136,6 @@ class RecurrentOpTest : public ::testing::Test {
     OpDesc op_desc;
 
     op_desc.set_type("rnn_op");
-    op_desc.set_name("rnn");
     op_desc.add_inputs("x");
     op_desc.add_inputs("h_boot");    // initial memory
     op_desc.add_inputs("step_net");  // step net
@@ -204,22 +203,20 @@ class RecurrentOpTest : public ::testing::Test {
   OpDesc CreateFcOpDesc() {
     OpDesc op_desc;
     op_desc.set_type("fc");
-    op_desc.set_name("fc");
     op_desc.add_inputs("rnn/h_pre");
     op_desc.add_inputs("rnn/w");
     op_desc.add_outputs("rnn/s");
-    // s = h_pre * check
+    // rnn/s = rnn/h_pre * rnn/w
     return op_desc;
   }
 
   OpDesc CreateAddOpDesc() {
     OpDesc op_desc;
     op_desc.set_type("add");
-    op_desc.set_name("add");
     op_desc.add_inputs("rnn/x");
     op_desc.add_inputs("rnn/s");
     op_desc.add_outputs("rnn/h");
-    // h = x + s
+    // rnn/h = rnn/x + rnn/s
     return op_desc;
   }
 

From 3a27b0200ff7a88a30aef1f86e7211d2a4f34640 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Fri, 14 Jul 2017 16:30:44 +0800
Subject: [PATCH 33/37] Abstract GetStepScopes and GetMaxSeqLen function

---
 paddle/framework/recurrent_network_op.cc | 53 +++++++++++++-----------
 paddle/framework/recurrent_network_op.h  | 13 ++++++
 2 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index ae4f8000d2f71..52fb869663308 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -34,18 +34,15 @@ void RecurrentOp::Run(OpContext* contex) const {
   LOG(INFO) << "segment input";
   SegmentInputs(scope);
 
-  Variable* step_scopes = scope->GetVariable(step_scopes_name_);
-  PADDLE_ENFORCE(step_scopes, "failed to get step scopes");
   // forward
-  auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
-  size_t seq_len = dims[0];
-  LOG(INFO) << "sequence length " << seq_len;
-  auto& scopes = *step_scopes->GetMutable<std::vector<ScopePtr>>();
-  for (size_t step_id = 0; step_id < seq_len; step_id++) {
+  size_t max_seq_len = GetMaxSeqLen(scope);
+  LOG(INFO) << "sequence length " << max_seq_len;
+  auto step_scopes = GetStepScopes(scope);
+  for (size_t step_id = 0; step_id < max_seq_len; step_id++) {
     LOG(INFO) << "run step " << step_id;
-    ScopePtr step_scope = scopes[step_id];
+    ScopePtr step_scope = step_scopes[step_id];
     // TODO replace memorys' copy with reference
-    LinkMemories(scope, scopes, step_id);
+    LinkMemories(scope, step_scopes, step_id);
 
     net->GetMutable<PlainNet>()->Run(step_scope);
   }
@@ -109,15 +106,20 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
   }
 }
 
+size_t RecurrentOp::GetMaxSeqLen(ScopePtr scope) const {
+  // TODO update this function when using variable-length of sequence.
+  return Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims()[0];
+}
+
 void RecurrentOp::CreateScopes(ScopePtr scope) const {
-  auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
-  size_t seq_len = dims[0];
-  Variable* scopes_var = scope->GetVariable(step_scopes_name_);
-  auto step_scopes = scopes_var->GetMutable<std::vector<ScopePtr>>();
+  size_t max_seq_len = GetMaxSeqLen(scope);
+  std::vector<ScopePtr>* step_scopes =
+      scope->GetVariable(step_scopes_name_)
+          ->GetMutable<std::vector<ScopePtr>>();
   // TODO Only two scopes are needed for inference, this case will be
   // supported later.
-  if (seq_len > step_scopes->size()) {
-    for (size_t i = step_scopes->size(); i < seq_len; ++i) {
+  if (max_seq_len > step_scopes->size()) {
+    for (size_t i = step_scopes->size(); i < max_seq_len; ++i) {
       step_scopes->push_back(std::make_shared<Scope>(scope));
     }
   }
@@ -129,17 +131,17 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
   PADDLE_ENFORCE(inlinks_.size() == inlink_alias.size(),
                  "in_links/in_link_alias mismatch.");
 
-  Variable* scopes_var = scope->GetVariable(step_scopes_name_);
-  auto& step_scopes = *scopes_var->GetMutable<std::vector<ScopePtr>>();
-  auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
-  int seq_len = dims[0];
+  auto step_scopes = GetStepScopes(scope);
+  size_t max_seq_len = GetMaxSeqLen(scope);
   for (size_t i = 0; i < inlinks_.size(); ++i) {
     Tensor* scope_input_tensor =
         Input(scope, inlinks_[i])->GetMutable<Tensor>();
-    for (int j = 0; j < seq_len; j++) {
+    for (size_t j = 0; j < max_seq_len; j++) {
       Variable* input_var = step_scopes[j]->CreateVariable(inlink_alias[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
       *step_input_tensor = scope_input_tensor->Slice(j, j + 1);
+      // TODO (luotao1): use reshape function to decrease the dims of
+      // step_input_tensor.
     }
   }
 }
@@ -149,10 +151,10 @@ void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
   PADDLE_ENFORCE(outlinks_.size() == outlink_alias.size(),
                  "out_links/out_link_alias mismatch.");
 
-  Variable* scopes_var = scope->GetVariable(step_scopes_name_);
-  auto& step_scopes = *scopes_var->GetMutable<std::vector<ScopePtr>>();
+  auto step_scopes = GetStepScopes(scope);
+  size_t max_seq_len = GetMaxSeqLen(scope);
+  // TODO (luotao1): update using CopyFrom function in tensor.
   auto dims = Input(scope, inlinks_[0])->GetMutable<Tensor>()->dims();
-  int seq_len = dims[0];
   int batch_size = dims[1];
   for (size_t i = 0; i < outlinks_.size(); i++) {
     auto output_dims = step_scopes[0]
@@ -164,8 +166,9 @@ void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
     Tensor* output_tensor =
         scope->CreateVariable(outlinks_[i])->GetMutable<Tensor>();
     float* output = output_tensor->mutable_data<float>(
-        make_ddim({seq_len, batch_size, output_dim}), platform::CPUPlace());
-    for (int j = 0; j < seq_len; j++) {
+        make_ddim({(int)max_seq_len, batch_size, output_dim}),
+        platform::CPUPlace());
+    for (size_t j = 0; j < max_seq_len; j++) {
       Variable* output_var = step_scopes[j]->GetVariable(outlink_alias[i]);
       const float* step_output =
           output_var->GetMutable<Tensor>()->data<float>();
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index b81ed49e7f3e7..2f62f365e42b8 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -133,6 +133,11 @@ class RecurrentOp : public OperatorBase {
   virtual ~RecurrentOp() {}
 
  protected:
+  /*
+   * Get the max sequence length of the scope.
+   */
+  size_t GetMaxSeqLen(ScopePtr scope) const;
+
   /*
    * Prepare inputs for each stepnet.
    */
@@ -153,6 +158,14 @@ class RecurrentOp : public OperatorBase {
    */
   void CreateScopes(ScopePtr scope) const;
 
+  /*
+   * Get the step scopes.
+   */
+  inline const std::vector<ScopePtr>& GetStepScopes(ScopePtr scope) const {
+    return *(scope->GetVariable(step_scopes_name_))
+                ->GetMutable<std::vector<ScopePtr>>();
+  }
+
   /*
    * Link memory in previous step scope to current scope.
    */

From aede869805d67b9869912eacaad0c2b090f9508f Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Fri, 14 Jul 2017 17:59:02 +0800
Subject: [PATCH 34/37] refine LinkMemories

---
 paddle/framework/recurrent_network_op.cc | 60 +++++++++---------------
 paddle/framework/recurrent_network_op.h  |  5 +-
 2 files changed, 24 insertions(+), 41 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 52fb869663308..316d5deeea503 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -40,11 +40,10 @@ void RecurrentOp::Run(OpContext* contex) const {
   auto step_scopes = GetStepScopes(scope);
   for (size_t step_id = 0; step_id < max_seq_len; step_id++) {
     LOG(INFO) << "run step " << step_id;
-    ScopePtr step_scope = step_scopes[step_id];
     // TODO replace memorys' copy with reference
-    LinkMemories(scope, step_scopes, step_id);
+    LinkMemories(step_scopes, step_id);
 
-    net->GetMutable<PlainNet>()->Run(step_scope);
+    net->GetMutable<PlainNet>()->Run(step_scopes[step_id]);
   }
 
   LOG(INFO) << "concat outputs";
@@ -177,51 +176,38 @@ void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
   }
 }
 
-void RecurrentOp::LinkMemories(ScopePtr scope,
-                               std::vector<ScopePtr>& step_scopes,
-                               size_t step) const {
-  PADDLE_ENFORCE(step < step_scopes.size(),
-                 "step [%d] out of range of step scopes' size [%d]", step,
+void RecurrentOp::LinkMemories(std::vector<ScopePtr>& step_scopes,
+                               size_t step_id) const {
+  PADDLE_ENFORCE(step_id < step_scopes.size(),
+                 "step [%d] out of range of step scopes' size [%d]", step_id,
                  step_scopes.size());
-  auto step_scope = step_scopes[step];
-  // copy boot memory
+  ScopePtr step_scope = step_scopes[step_id];
   for (auto& attr : memory_attrs_) {
-    Tensor* boot_tensor{nullptr};
-    if (step == 0) {
-      PADDLE_ENFORCE(scope->HasVariable(attr.boot_var),
+    Tensor* pre_memory_tensor =
+        step_scope->CreateVariable(attr.pre_var)->GetMutable<Tensor>();
+
+    if (step_id == 0) {
+      PADDLE_ENFORCE(step_scope->HasVariable(attr.boot_var),
                      "memory [%s]'s boot variable [%s] not exists", attr.var,
                      attr.boot_var);
-      // update memory's ddim
-      boot_tensor = scope->CreateVariable(attr.boot_var)->GetMutable<Tensor>();
-      attr.dims = boot_tensor->dims();
-    }
-    Variable* memory_var = step_scope->CreateVariable(attr.pre_var);
-
-    // TODO the memory of current step should be allocaled in step net ?
-    Tensor* cur_memory =
-        step_scopes[step]->CreateVariable(attr.var)->GetMutable<Tensor>();
-    cur_memory->mutable_data<float>(attr.dims, platform::CPUPlace());
-
-    // copy from boot memory
-    // TODO support more device
-    // TODO mutable_data is currently invalid
-    float* memory_tensor_val =
-        memory_var->GetMutable<Tensor>()->mutable_data<float>(
-            attr.dims, platform::CPUPlace());
-    if (step == 0) {
+      Tensor* boot_tensor =
+          step_scope->CreateVariable(attr.boot_var)->GetMutable<Tensor>();
       PADDLE_ENFORCE(boot_tensor, "boot_tensor should be retrieved before");
       // copy from boot memory
-      std::memcpy(memory_tensor_val, boot_tensor->data<float>(),
-                  product(attr.dims));
+      pre_memory_tensor->ShareDataFrom(*boot_tensor);
     } else {
       // copy from previous step scope's memory to this scope's
       // `pre - memory`
       Tensor* pre_step_memory =
-          step_scopes[step - 1]->GetVariable(attr.var)->GetMutable<Tensor>();
-
-      std::memcpy(memory_tensor_val, pre_step_memory->data<float>(),
-                  product(attr.dims));
+          step_scopes[step_id - 1]->GetVariable(attr.var)->GetMutable<Tensor>();
+      pre_memory_tensor->ShareDataFrom(*pre_step_memory);
     }
+
+    // TODO the memory of current step should be allocated in step net ?
+    Tensor* cur_memory_tensor =
+        step_scopes[step_id]->CreateVariable(attr.var)->GetMutable<Tensor>();
+    cur_memory_tensor->mutable_data<float>(pre_memory_tensor->dims(),
+                                           platform::CPUPlace());
   }
 }
 
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 2f62f365e42b8..857bb3164d638 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -169,8 +169,7 @@ class RecurrentOp : public OperatorBase {
   /*
    * Link memory in previous step scope to current scope.
    */
-  void LinkMemories(ScopePtr scope, std::vector<ScopePtr>& step_scopes,
-                    size_t step) const;
+  void LinkMemories(std::vector<ScopePtr>& step_scopes, size_t step_id) const;
 
  private:
   /*
@@ -188,8 +187,6 @@ class RecurrentOp : public OperatorBase {
     // name of the variables to init this memory (same role of `boot_layer` in
     // PaddlePaddle), which is store in father's scope.
     std::string boot_var;
-    // this dim will infered from boot memories's tensor in the first step.
-    DDim dims;
   };
 
   /*

From 45682d20b2e1c81f303406ad74df556f9360eccf Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Sat, 15 Jul 2017 11:55:11 +0800
Subject: [PATCH 35/37] Refine code and add some comments.

---
 paddle/framework/recurrent_network_op.cc      | 32 ++++++++++---------
 paddle/framework/recurrent_network_op.h       | 24 +++++++++-----
 paddle/framework/recurrent_network_op_test.cc |  2 --
 paddle/platform/CMakeLists.txt                |  2 +-
 4 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index 316d5deeea503..ede8b1193834d 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -14,7 +14,6 @@
 
 #include "paddle/framework/recurrent_network_op.h"
 #include "paddle/framework/tensor.h"
-// #include "paddle/framework/op_registry.h"
 
 #include <glog/logging.h>
 #include <cstring>
@@ -40,7 +39,6 @@ void RecurrentOp::Run(OpContext* contex) const {
   auto step_scopes = GetStepScopes(scope);
   for (size_t step_id = 0; step_id < max_seq_len; step_id++) {
     LOG(INFO) << "run step " << step_id;
-    // TODO replace memorys' copy with reference
     LinkMemories(step_scopes, step_id);
 
     net->GetMutable<PlainNet>()->Run(step_scopes[step_id]);
@@ -72,13 +70,25 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
   PADDLE_ENFORCE(inlinks_.empty(), "RecurrentOp duplicate inited");
   LOG(INFO) << "set inlinks";
   for (auto id : GetAttr<std::vector<int>>("in_links")) {
-    inlinks_.push_back(id);
+    inlinks_.push_back(inputs_[id]);
   }
+  auto inlink_alias = GetAttr<std::vector<std::string>>("in_link_alias");
+  in_link_alias_ =
+      std::vector<std::string>{inlink_alias.begin(), inlink_alias.end()};
+  PADDLE_ENFORCE(inlinks_.size() == in_link_alias_.size(),
+                 "in_links/in_link_alias mismatch.");
+
   PADDLE_ENFORCE(
       outputs_.size() > 1,
       "more than 1 output should be provided and the last is `step_scopes`");
   outlinks_ = std::vector<std::string>{outputs_.begin(), outputs_.end() - 1};
 
+  auto outlink_alias = GetAttr<std::vector<std::string>>("out_link_alias");
+  out_link_alias_ =
+      std::vector<std::string>{outlink_alias.begin(), outlink_alias.end()};
+  PADDLE_ENFORCE(outlinks_.size() == outlink_alias.size(),
+                 "out_links/out_link_alias mismatch.");
+
   // set memories
   auto memories = GetAttr<std::vector<std::string>>("memories");
   auto pre_memories = GetAttr<std::vector<std::string>>("pre_memories");
@@ -126,17 +136,13 @@ void RecurrentOp::CreateScopes(ScopePtr scope) const {
 
 void RecurrentOp::SegmentInputs(ScopePtr scope) const {
   PADDLE_ENFORCE(!inlinks_.empty(), "no in links are provided.");
-  auto inlink_alias = GetAttr<std::vector<std::string>>("in_link_alias");
-  PADDLE_ENFORCE(inlinks_.size() == inlink_alias.size(),
-                 "in_links/in_link_alias mismatch.");
-
   auto step_scopes = GetStepScopes(scope);
   size_t max_seq_len = GetMaxSeqLen(scope);
   for (size_t i = 0; i < inlinks_.size(); ++i) {
     Tensor* scope_input_tensor =
         Input(scope, inlinks_[i])->GetMutable<Tensor>();
     for (size_t j = 0; j < max_seq_len; j++) {
-      Variable* input_var = step_scopes[j]->CreateVariable(inlink_alias[i]);
+      Variable* input_var = step_scopes[j]->CreateVariable(in_link_alias_[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
       *step_input_tensor = scope_input_tensor->Slice(j, j + 1);
       // TODO (luotao1): use reshape function to decrease the dims of
@@ -146,10 +152,6 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
 }
 
 void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
-  auto outlink_alias = GetAttr<std::vector<std::string>>("out_link_alias");
-  PADDLE_ENFORCE(outlinks_.size() == outlink_alias.size(),
-                 "out_links/out_link_alias mismatch.");
-
   auto step_scopes = GetStepScopes(scope);
   size_t max_seq_len = GetMaxSeqLen(scope);
   // TODO (luotao1): update using CopyFrom function in tensor.
@@ -157,7 +159,7 @@ void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
   int batch_size = dims[1];
   for (size_t i = 0; i < outlinks_.size(); i++) {
     auto output_dims = step_scopes[0]
-                           ->GetVariable(outlink_alias[0])
+                           ->GetVariable(out_link_alias_[0])
                            ->GetMutable<Tensor>()
                            ->dims();
     int output_dim = output_dims[1];
@@ -168,7 +170,7 @@ void RecurrentOp::ConcatOutputs(ScopePtr scope) const {
         make_ddim({(int)max_seq_len, batch_size, output_dim}),
         platform::CPUPlace());
     for (size_t j = 0; j < max_seq_len; j++) {
-      Variable* output_var = step_scopes[j]->GetVariable(outlink_alias[i]);
+      Variable* output_var = step_scopes[j]->GetVariable(out_link_alias_[i]);
       const float* step_output =
           output_var->GetMutable<Tensor>()->data<float>();
       std::memcpy(output + j * length, step_output, length);
@@ -203,7 +205,7 @@ void RecurrentOp::LinkMemories(std::vector<ScopePtr>& step_scopes,
       pre_memory_tensor->ShareDataFrom(*pre_step_memory);
     }
 
-    // TODO the memory of current step should be allocated in step net ?
+    // TODO the memory of current step should be allocated in step net
     Tensor* cur_memory_tensor =
         step_scopes[step_id]->CreateVariable(attr.var)->GetMutable<Tensor>();
     cur_memory_tensor->mutable_data<float>(pre_memory_tensor->dims(),
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 857bb3164d638..3d84c399a9398 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -41,8 +41,8 @@ class OperatorBase {
   void Init(const OpDesc& op_desc, AttributeMap& attrs) { attrs_ = attrs; }
   virtual void Run(OpContext* context) const = 0;
   virtual void InferShape(ScopePtr scope) const = 0;
-  inline Variable* Input(ScopePtr scope, int index) const {
-    return scope->GetVariable(inputs_[index]);
+  inline Variable* Input(ScopePtr scope, std::string name) const {
+    return scope->GetVariable(name);
   };
 
   template <typename T>
@@ -58,7 +58,6 @@ class OperatorBase {
   AttributeMap attrs_;
 };
 
-// TODO replace this with Net's proto.
 struct NetDesc {
   std::string name_;
   std::vector<OpDesc> op_descs;
@@ -98,18 +97,25 @@ class PlainNet {
 
 /*
  * RecurrentOp inputs stored in proto:
- * - real inputs that need to be segmented to steps.
+ * - in_links : real inputs that need to be segmented to steps.
  * - boot memories
+ * - all weights in step net
  * - step net
  *
  * outputs:
- * - real outputs
+ * - out_links : real outputs
  * - step scopes
  *
  * Attributes stored in AttributeMap:
- * - real_inputs: vector<int>
+ * - in_links: vector<int>
  * - boot_memories: vector<int>
  * - step_net: int
+ * - in_link_alias: vector<string>  the alias of in_links in step net.
+ * - out_link_alias: vector<string> the alias of out_links in step net
+ * - memories: vector<string> the memory names
+ * - pre_memories: vector<string> the previous memory names
+ *
+ * see RecurrentOpProtoAndCheckerMaker
  */
 
 class RecurrentOp : public OperatorBase {
@@ -210,7 +216,6 @@ class RecurrentOp : public OperatorBase {
    *       strings: "boot_state"
    *   }
    */
-  // TODO copy from OpBase's
   mutable std::vector<MemoryAttr> memory_attrs_;
 
   // name of rnn op's step net, the step net will be shared by both `Forward`
@@ -221,8 +226,11 @@ class RecurrentOp : public OperatorBase {
   // specified by `step_scopes_name_`.
   std::string step_scopes_name_;
   // real inputs that need to be segmented.
-  std::vector<int> inlinks_;
+  std::vector<std::string> inlinks_;
   std::vector<std::string> outlinks_;
+
+  std::vector<std::string> in_link_alias_;
+  std::vector<std::string> out_link_alias_;
 };
 
 class RecurrentGradientOp;
diff --git a/paddle/framework/recurrent_network_op_test.cc b/paddle/framework/recurrent_network_op_test.cc
index 63d9dfec102d2..ce65235c1e40b 100644
--- a/paddle/framework/recurrent_network_op_test.cc
+++ b/paddle/framework/recurrent_network_op_test.cc
@@ -111,7 +111,6 @@ class RecurrentOpTest : public ::testing::Test {
     Variable* x = scope_->CreateVariable("x");
     DDim dims = make_ddim(std::vector<int>{10 /*sent size*/, 20 /*batch size*/,
                                            30 /*input dim*/});
-    // TODO mutable_data is not valid
     x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
 
     LOG(INFO) << "create global variable w";
@@ -139,7 +138,6 @@ class RecurrentOpTest : public ::testing::Test {
     op_desc.add_inputs("x");
     op_desc.add_inputs("h_boot");    // initial memory
     op_desc.add_inputs("step_net");  // step net
-    // TODO put the step_scopes in the outputs
     // output hidden vectors
     op_desc.add_outputs("h");
     op_desc.add_outputs("step_scopes");  // step scopes
diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt
index 530bf886d4de5..358d14f4555e1 100644
--- a/paddle/platform/CMakeLists.txt
+++ b/paddle/platform/CMakeLists.txt
@@ -12,4 +12,4 @@ ELSE()
 ENDIF()
 
 cc_library(device_context SRCS device_context.cc DEPS place eigen3 ${GPU_CTX_DEPS})
-#nv_test(device_context_test SRCS device_context_test.cc DEPS device_context glog gflags)
+nv_test(device_context_test SRCS device_context_test.cc DEPS device_context glog gflags)

From fc5acee51ec643a65790da61e338852b41411904 Mon Sep 17 00:00:00 2001
From: Superjom <superjom@gmail.com>
Date: Sat, 15 Jul 2017 12:59:57 +0800
Subject: [PATCH 36/37] add backward core

---
 paddle/framework/recurrent_network_op.cc |  2 +-
 paddle/framework/recurrent_network_op.h  | 74 ++++++++++++++++++------
 2 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index ede8b1193834d..93f2f8c2ef2d6 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -105,7 +105,7 @@ void RecurrentOp::Init(const OpDesc& op_desc, AttributeMap& attrs) {
                  "the size of memories and boot_memories doesn't match: %d,%d",
                  memories.size(), boot_memories.size());
   for (size_t i = 0; i < memories.size(); ++i) {
-    MemoryAttr mem_attr;
+    details::MemoryAttr mem_attr;
     mem_attr.var = memories[i];
     mem_attr.pre_var = pre_memories[i];
     mem_attr.boot_var = boot_memories[i];
diff --git a/paddle/framework/recurrent_network_op.h b/paddle/framework/recurrent_network_op.h
index 3d84c399a9398..161408482fc92 100644
--- a/paddle/framework/recurrent_network_op.h
+++ b/paddle/framework/recurrent_network_op.h
@@ -85,6 +85,27 @@ class PlainNet {
   std::vector<std::unique_ptr<OperatorBase>> ops_;
 };
 
+namespace details {
+
+/*
+ * Memory of a RNN (same as the role of `Momory` in PaddlePaddle).
+ *
+ * Memory attributes cached by this op, dims will be infered from
+ * boot memories in father scope. Other attributes are copied from Op's proto
+ * attributes.
+ */
+struct MemoryAttr {
+  // name of current state variable
+  std::string var;
+  // name of previous step's state variable
+  std::string pre_var;
+  // name of the variables to init this memory (same role of `boot_layer` in
+  // PaddlePaddle), which is store in father's scope.
+  std::string boot_var;
+};
+
+};  // namespace details
+
 // fake interfaces end
 // --------------------------------------------------------------------
 // The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now.
@@ -178,23 +199,6 @@ class RecurrentOp : public OperatorBase {
   void LinkMemories(std::vector<ScopePtr>& step_scopes, size_t step_id) const;
 
  private:
-  /*
-   * Memory of a RNN (same as the role of `Momory` in PaddlePaddle).
-   *
-   * Memory attributes cached by this op, dims will be infered from
-   * boot memories in father scope. Other attributes are copied from Op's proto
-   * attributes.
-   */
-  struct MemoryAttr {
-    // name of current state variable
-    std::string var;
-    // name of previous step's state variable
-    std::string pre_var;
-    // name of the variables to init this memory (same role of `boot_layer` in
-    // PaddlePaddle), which is store in father's scope.
-    std::string boot_var;
-  };
-
   /*
    * The attributes in protobuf about the memory description and the initial
    * memory description are as follows. The number of initial memories should
@@ -216,7 +220,7 @@ class RecurrentOp : public OperatorBase {
    *       strings: "boot_state"
    *   }
    */
-  mutable std::vector<MemoryAttr> memory_attrs_;
+  mutable std::vector<details::MemoryAttr> memory_attrs_;
 
   // name of rnn op's step net, the step net will be shared by both `Forward`
   // and `Backward`, so we store it as a variable in father's scope, with a
@@ -233,7 +237,39 @@ class RecurrentOp : public OperatorBase {
   std::vector<std::string> out_link_alias_;
 };
 
-class RecurrentGradientOp;
+/*
+ * RNN's backward alogorithm.
+ *
+ * To accelerate the development of RecurrentBackwardOp, we decouple RNN's
+ * algorithm and `RecurrentBackwardAlgorithm`, the former contains the core
+ * implementation of a RNN, and will keep stable even if the framework changes a
+ * lot, and the latter is a wrapper acts like an dapter for it to make RNN an
+ * operator.
+ */
+class RecurrentBackwardAlgorithm {
+ public:
+ private:
+  // stepnet for backward
+  // NOTE this stepnet is created by others and should insert AddOp for its
+  // weights gradient updating, RNN backward just run it.
+  std::string stepnet_name_;
+  // step scopes that shared by both the forward and backward operators.
+  std::string step_scopes_name_;
+
+  // inputs(gradients of forward operator's outputs) that need to be segmented
+  // for each step.
+  std::vector<std::string> inlinks_;
+  // outputs(gradients of forward operator's inputs) of each step that need to
+  // be concated.
+  std::vector<std::string> outlinks_;
+
+  // alias to avoid duplicate keys in scopes.
+  std::vector<std::string> inlink_alias_;
+  std::vector<std::string> outlink_alias_;
+
+  // NOTE the first step's boot memories' gradients should be outputed.
+  std::vector<details::MemoryAttr> memories_;
+};
 
 }  // namespace framework
 }  // namespace paddle

From 14dd843a7dd2d7da1eae70600141080b1e5f4e6f Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Sat, 15 Jul 2017 14:28:16 +0800
Subject: [PATCH 37/37] update for develop branch.

---
 paddle/framework/CMakeLists.txt          | 4 ++--
 paddle/framework/recurrent_network_op.cc | 9 ++++-----
 paddle/framework/tensor.h                | 5 ++---
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index e475d6fee70ed..95638ebcdf8ae 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -19,8 +19,8 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
 # Generate an empty __init__.py to make framework_py_proto as a valid python module.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
-cc_library(recurrent_network_op SRCS recurrent_network_op.cc DEPS op_desc)
-cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc recurrent_network_op.cc DEPS glog gtest gflags ddim system_allocator op_desc)
+cc_library(recurrent_network_op SRCS recurrent_network_op.cc DEPS op_desc place)
+cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc DEPS recurrent_network_op glog gtest gflags ddim op_desc)
 
 proto_library(net_proto SRCS net_proto.proto DEPS op_proto)
 cc_library(net SRCS net.cc DEPS net_proto)
diff --git a/paddle/framework/recurrent_network_op.cc b/paddle/framework/recurrent_network_op.cc
index ede8b1193834d..df0d977d8a4b9 100644
--- a/paddle/framework/recurrent_network_op.cc
+++ b/paddle/framework/recurrent_network_op.cc
@@ -139,12 +139,11 @@ void RecurrentOp::SegmentInputs(ScopePtr scope) const {
   auto step_scopes = GetStepScopes(scope);
   size_t max_seq_len = GetMaxSeqLen(scope);
   for (size_t i = 0; i < inlinks_.size(); ++i) {
-    Tensor* scope_input_tensor =
-        Input(scope, inlinks_[i])->GetMutable<Tensor>();
+    Tensor* input_tensor = Input(scope, inlinks_[i])->GetMutable<Tensor>();
     for (size_t j = 0; j < max_seq_len; j++) {
       Variable* input_var = step_scopes[j]->CreateVariable(in_link_alias_[i]);
       Tensor* step_input_tensor = input_var->GetMutable<Tensor>();
-      *step_input_tensor = scope_input_tensor->Slice(j, j + 1);
+      *step_input_tensor = input_tensor->Slice<float>(j, j + 1);
       // TODO (luotao1): use reshape function to decrease the dims of
       // step_input_tensor.
     }
@@ -196,13 +195,13 @@ void RecurrentOp::LinkMemories(std::vector<ScopePtr>& step_scopes,
           step_scope->CreateVariable(attr.boot_var)->GetMutable<Tensor>();
       PADDLE_ENFORCE(boot_tensor, "boot_tensor should be retrieved before");
       // copy from boot memory
-      pre_memory_tensor->ShareDataFrom(*boot_tensor);
+      pre_memory_tensor->ShareDataFrom<float>(*boot_tensor);
     } else {
       // copy from previous step scope's memory to this scope's
       // `pre - memory`
       Tensor* pre_step_memory =
           step_scopes[step_id - 1]->GetVariable(attr.var)->GetMutable<Tensor>();
-      pre_memory_tensor->ShareDataFrom(*pre_step_memory);
+      pre_memory_tensor->ShareDataFrom<float>(*pre_step_memory);
     }
 
     // TODO the memory of current step should be allocated in step net
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index 35fdcad03b321..8756c5d33c68d 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -29,8 +29,6 @@ class Tensor {
  public:
   Tensor() : numel_(0), offset_(0) {}
 
-  Tensor& operator=(const Tensor& src) = delete;
-
   template <typename T>
   const T* data() const {
     CheckDims<T>();
@@ -141,7 +139,8 @@ class Tensor {
    public:
     // PlaceholderImpl(paddle::platform::Place place, size_t size)
     //     : ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)),
-    //            Deleter(place)),
+    //           Deleter(place)),
+
     PlaceholderImpl(paddle::platform::Place place, size_t size)
         : ptr_(static_cast<T*>(malloc(size * sizeof(T))), Deleter(place)),
           place_(place),