From e82a5ae62a53fdbb24a33e7f56abe3934a91aeb4 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 10 Oct 2017 15:24:15 -0700 Subject: [PATCH 01/19] init parameter base class --- python/paddle/v2/framework/optimizer.py | 42 +++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 python/paddle/v2/framework/optimizer.py diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py new file mode 100644 index 0000000000000..1ea4885b7ab91 --- /dev/null +++ b/python/paddle/v2/framework/optimizer.py @@ -0,0 +1,42 @@ +class Optimizer(object): + """Optimizer Base class. + + """ + + def __init__(self): + pass + + def create_backward_pass(self, loss, parameter_list=None): + """ + Add gradient Operators into Block to Compute gradients of `loss` + for parameters in parameter_list + + Args: + loss: an variable generated by cost function. + parameter_list: parameters that need to compute gradient and update to minimize the lost + + Returns: + (parameters, gradients) pair list. + """ + return None + + def create_optimization_pass(self, vars_grads): + """Add Operators to Apply gradients to variables. + + Args: + vars_grads: a list of (variable, gradient) pair to update. + + Returns: + optmization_op_list: a list of optimization operator that will optimize parameter with gradient. + """ + return None + + def minimize(self, loss, parameter_list): + """Add operations to minimize `loss` by updating `parameter_list `. + + This method simply combines calls `create_backward_pass()` and + `create_optimization_pass()`. + """ + vars_grads = self.create_backward_pass(loss, parameter_list) + update_ops = self.create_optimization_pass(vars_grads) + return update_ops From ab37fba60c090b6cafc790fe85756c55ec9a5f16 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 10 Oct 2017 15:39:44 -0700 Subject: [PATCH 02/19] optimize the Comments of optimizer --- python/paddle/v2/framework/optimizer.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index 1ea4885b7ab91..0639902c33986 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -8,35 +8,35 @@ def __init__(self): def create_backward_pass(self, loss, parameter_list=None): """ - Add gradient Operators into Block to Compute gradients of `loss` + create and add gradient Operators in BlockDesc to Compute gradients of `loss` for parameters in parameter_list Args: loss: an variable generated by cost function. - parameter_list: parameters that need to compute gradient and update to minimize the lost + parameter_list: parameters that need to compute gradient and update to optimize the lost. Returns: - (parameters, gradients) pair list. + list of (parameters, gradients) pair. """ return None - def create_optimization_pass(self, vars_grads): - """Add Operators to Apply gradients to variables. + def create_optimization_pass(self, parameters_and_grads): + """Add optimization operators to update gradients to variables. Args: - vars_grads: a list of (variable, gradient) pair to update. + parameters_and_grads: a list of (variable, gradient) pair to update. Returns: - optmization_op_list: a list of optimization operator that will optimize parameter with gradient. + optmization_op_list: a list of optimization operator that will update parameter using gradient. """ return None def minimize(self, loss, parameter_list): - """Add operations to minimize `loss` by updating `parameter_list `. + """Add operations to minimize `loss` by updating `parameter_list`. - This method simply combines calls `create_backward_pass()` and - `create_optimization_pass()`. + This method combines interface `create_backward_pass()` and + `create_optimization_pass()` into one. """ - vars_grads = self.create_backward_pass(loss, parameter_list) - update_ops = self.create_optimization_pass(vars_grads) + params_grads = self.create_backward_pass(loss, parameter_list) + update_ops = self.create_optimization_pass(params_grads) return update_ops From 838f904fc6121b003d72282c6f654c4aeca5ea83 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 11 Oct 2017 15:15:36 -0700 Subject: [PATCH 03/19] basic implimentation of optimizer --- paddle/pybind/protobuf.cc | 5 +++ python/paddle/v2/framework/graph.py | 5 ++- python/paddle/v2/framework/optimizer.py | 42 ++++++++++++++++++++++--- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 0e4bbe8415fd8..3a75019280817 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -140,6 +140,11 @@ void BindBlockDesc(py::module &m) { return self.NewVar(name); }, py::return_value_policy::reference) + .def("has_var", + [](BlockDescBind &self, py::bytes byte_name) { + std::string name = byte_name; + return self.HasVar(name); + }) .def("var", [](BlockDescBind &self, py::bytes byte_name) { std::string name = byte_name; diff --git a/python/paddle/v2/framework/graph.py b/python/paddle/v2/framework/graph.py index 0f0a2847e58a1..d92649cf7c494 100644 --- a/python/paddle/v2/framework/graph.py +++ b/python/paddle/v2/framework/graph.py @@ -152,7 +152,9 @@ def create_var(self, *args, **kwargs): def create_parameter(self, *args, **kwargs): global_block = self.program.global_block() - return Parameter(global_block, *args, **kwargs) + param = Parameter(global_block, *args, **kwargs) + self.program.parameters.append(param.name) + return param def append_op(self, *args, **kwargs): op_desc = self.desc.append_op() @@ -182,6 +184,7 @@ def __init__(self): self.desc = core.ProgramDesc.instance() self.blocks = [Block(self, 0)] self.current_block_idx = 0 + self.parameters = [] # parameter name list stored in the global scope def global_block(self): return self.blocks[0] diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index 0639902c33986..80014c134ef03 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -1,11 +1,25 @@ +import paddle.v2.framework.graph as graph + + +def grad_var_name(name): + return name + "@GRAD" + + class Optimizer(object): """Optimizer Base class. + Define the common interface of an optimizer. + User should not use this class directly, but need to use one of it's implementation. """ def __init__(self): pass + def _append_optimize_op(self, block, param_and_grad): + """ append optimize operator to block and return all the added optimize_op + """ + raise NotImplementedError() + def create_backward_pass(self, loss, parameter_list=None): """ create and add gradient Operators in BlockDesc to Compute gradients of `loss` @@ -18,18 +32,36 @@ def create_backward_pass(self, loss, parameter_list=None): Returns: list of (parameters, gradients) pair. """ - return None - def create_optimization_pass(self, parameters_and_grads): + assert isinstance(loss, graph.Variable) + loss.block.program.append_backward(set()) + parameters = loss.block.program.parameters + params_and_grads = [] + for param in parameters: + grad = grad_var_name(param) + if loss.block.has_var(grad): + params_and_grads.append((param, grad)) + else: + params_and_grads.append((param, None)) + return params_and_grads + + def create_optimization_pass(self, parameters_and_grads, loss): """Add optimization operators to update gradients to variables. Args: + loss: the target that this optimization is for. parameters_and_grads: a list of (variable, gradient) pair to update. Returns: optmization_op_list: a list of optimization operator that will update parameter using gradient. """ - return None + optimize_ops = [] + for param_and_grad in parameters_and_grads: + if param_and_grad[2] is not None: + optimize_op = self._append_optimize_op(loss.block, + param_and_grad) + optimize_ops.append(optimize_op) + return optimize_ops def minimize(self, loss, parameter_list): """Add operations to minimize `loss` by updating `parameter_list`. @@ -38,5 +70,5 @@ def minimize(self, loss, parameter_list): `create_optimization_pass()` into one. """ params_grads = self.create_backward_pass(loss, parameter_list) - update_ops = self.create_optimization_pass(params_grads) - return update_ops + optimize_ops = self.create_optimization_pass(params_grads, loss) + return optimize_ops From 5c6a4587d5b0e92ef7e5cccdbe05ad7a7271ed6c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 12 Oct 2017 11:14:27 -0700 Subject: [PATCH 04/19] add test_optimizer --- python/paddle/v2/framework/optimizer.py | 43 ++++++++++++++++++- .../v2/framework/tests/test_optimizer.py | 18 ++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 python/paddle/v2/framework/tests/test_optimizer.py diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index 80014c134ef03..9e6d5561f94d6 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -34,8 +34,12 @@ def create_backward_pass(self, loss, parameter_list=None): """ assert isinstance(loss, graph.Variable) + # TODO(qiao) append_backward should support target. loss.block.program.append_backward(set()) - parameters = loss.block.program.parameters + if parameter_list is not None: + parameters = parameter_list + else: + parameters = loss.block.program.parameters params_and_grads = [] for param in parameters: grad = grad_var_name(param) @@ -72,3 +76,40 @@ def minimize(self, loss, parameter_list): params_grads = self.create_backward_pass(loss, parameter_list) optimize_ops = self.create_optimization_pass(params_grads, loss) return optimize_ops + + +class SGDOptimizer(Optimizer): + """ Simple SGD optimizer without any state. + """ + + def __init__(self, learning_rate): + assert learning_rate is not None + super(Optimizer, self).__init__() + self.type = "sgd" + self._learning_rate = learning_rate + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, graph.Block) + lr_shape = [1] + # create a var for learning_rate + lr = block.create_var(dtype="float32", shape=lr_shape, lod_level=0) + + # create an op to init the learning_rate + init_op = block.append_op( + type="fill_constant", + outputs={"Out": lr.name}, + attrs={"shape": lr_shape, + "value": self._learning_rate}) + + # create the optimize op + sgd_op = block.append_op( + type=self.type, + inputs={ + "Param", param_and_grad[0], "Grad", param_and_grad[1], + "LearningRate", lr.name() + }, + outputs={"Out", param_and_grad[0]}, + attrs={"shape": [1], + "value": self._learning_rate}) + + return sgd_op diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py new file mode 100644 index 0000000000000..e21f028c0bf34 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -0,0 +1,18 @@ +import unittest + +import paddle.v2.framework.graph as graph +import paddle.v2.framework.optimizer as optimizer + + +class TestOptimizer(unittest.TestCase): + def test_sgd_optimizer(self): + program = graph.g_program + block = program.global_block() + init_op = block.append_op( + type="mul", inputs={}, outputs={"Out": "out1"}) + block.create_var() + optimizer = optimizer.SGDOptimizer() + + +if __name__ == '__main__': + unittest.main() From e9a75d2f748969097f6d273c0cca074f758ba42d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 12 Oct 2017 13:23:01 -0700 Subject: [PATCH 05/19] add no_grad_set to interface --- python/paddle/v2/framework/graph.py | 4 ++++ python/paddle/v2/framework/optimizer.py | 12 +++++++---- .../v2/framework/tests/test_optimizer.py | 20 +++++++++++++++---- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/python/paddle/v2/framework/graph.py b/python/paddle/v2/framework/graph.py index 7b93ae8136153..ea303db475000 100644 --- a/python/paddle/v2/framework/graph.py +++ b/python/paddle/v2/framework/graph.py @@ -303,6 +303,10 @@ def create_block(self): self.blocks.append(Block(self, self.current_block_idx)) return self.current_block() + def append_backward(self, target, no_grad_set): + # TODO(qiao) Add Target to append_backward + self.desc.append_backward(no_grad_set) + def rollback(self): self.current_block_idx = self.current_block().parent_idx diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index 9e6d5561f94d6..9c1577db0793d 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -1,5 +1,7 @@ import paddle.v2.framework.graph as graph +__all__ = ['SGDOptimizer'] + def grad_var_name(name): return name + "@GRAD" @@ -20,13 +22,14 @@ def _append_optimize_op(self, block, param_and_grad): """ raise NotImplementedError() - def create_backward_pass(self, loss, parameter_list=None): + def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None): """ create and add gradient Operators in BlockDesc to Compute gradients of `loss` for parameters in parameter_list Args: loss: an variable generated by cost function. + no_grad_set: variable that should not create gradient parameter_list: parameters that need to compute gradient and update to optimize the lost. Returns: @@ -35,7 +38,7 @@ def create_backward_pass(self, loss, parameter_list=None): assert isinstance(loss, graph.Variable) # TODO(qiao) append_backward should support target. - loss.block.program.append_backward(set()) + loss.block.program.append_backward(loss, no_grad_set or set()) if parameter_list is not None: parameters = parameter_list else: @@ -67,13 +70,14 @@ def create_optimization_pass(self, parameters_and_grads, loss): optimize_ops.append(optimize_op) return optimize_ops - def minimize(self, loss, parameter_list): + def minimize(self, loss, parameter_list=None, no_grad_set=None): """Add operations to minimize `loss` by updating `parameter_list`. This method combines interface `create_backward_pass()` and `create_optimization_pass()` into one. """ - params_grads = self.create_backward_pass(loss, parameter_list) + params_grads = self.create_backward_pass(loss, parameter_list, + no_grad_set or set()) optimize_ops = self.create_optimization_pass(params_grads, loss) return optimize_ops diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index e21f028c0bf34..560c9135af85f 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -8,10 +8,22 @@ class TestOptimizer(unittest.TestCase): def test_sgd_optimizer(self): program = graph.g_program block = program.global_block() - init_op = block.append_op( - type="mul", inputs={}, outputs={"Out": "out1"}) - block.create_var() - optimizer = optimizer.SGDOptimizer() + mul_x = block.create_var( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + mul_op = block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": [mul_out]}, + attrs={"x_num_col_dims": 1}) + loss = block.create_var("loss") + sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) + opts = sgd_optimizer.minimize(loss) + print(opts) if __name__ == '__main__': From 96d3a753d8adb77a7325f81ce66445a238a6f674 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sat, 14 Oct 2017 14:30:46 -0700 Subject: [PATCH 06/19] update optimizer.py --- python/paddle/v2/framework/optimizer.py | 10 +++++----- python/paddle/v2/framework/tests/test_optimizer.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index 9c1577db0793d..b4f3952b67b15 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -1,4 +1,4 @@ -import paddle.v2.framework.graph as graph +import paddle.v2.framework.framework as framework __all__ = ['SGDOptimizer'] @@ -36,13 +36,13 @@ def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None): list of (parameters, gradients) pair. """ - assert isinstance(loss, graph.Variable) + assert isinstance(loss, framework.Variable) # TODO(qiao) append_backward should support target. - loss.block.program.append_backward(loss, no_grad_set or set()) + loss.block.framework.append_backward(loss, no_grad_set or set()) if parameter_list is not None: parameters = parameter_list else: - parameters = loss.block.program.parameters + parameters = loss.block.framework.parameters params_and_grads = [] for param in parameters: grad = grad_var_name(param) @@ -93,7 +93,7 @@ def __init__(self, learning_rate): self._learning_rate = learning_rate def _append_optimize_op(self, block, param_and_grad): - assert isinstance(block, graph.Block) + assert isinstance(block, framework.Block) lr_shape = [1] # create a var for learning_rate lr = block.create_var(dtype="float32", shape=lr_shape, lod_level=0) diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index 560c9135af85f..d94710727e143 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -1,12 +1,12 @@ import unittest -import paddle.v2.framework.graph as graph +import paddle.v2.framework.framework as framework import paddle.v2.framework.optimizer as optimizer class TestOptimizer(unittest.TestCase): def test_sgd_optimizer(self): - program = graph.g_program + program = framework.g_program block = program.global_block() mul_x = block.create_var( dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") From 11829ec93279d35defc78c8ccba6f77a10104a62 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sat, 14 Oct 2017 17:30:48 -0700 Subject: [PATCH 07/19] python code can run --- python/paddle/v2/framework/framework.py | 4 ++-- python/paddle/v2/framework/optimizer.py | 5 ++--- python/paddle/v2/framework/tests/test_optimizer.py | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index 3cf54015298e1..29b6a9a58955d 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -365,8 +365,8 @@ def create_block(self): return self.current_block() def append_backward(self, target, no_grad_set): - # TODO(qiao) Add Target to append_backward - self.desc.append_backward(no_grad_set) + assert isinstance(target, Variable) + self.desc.append_backward(target.desc, no_grad_set) def rollback(self): self.current_block_idx = self.current_block().parent_idx diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index b4f3952b67b15..b188be5603efd 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -37,12 +37,11 @@ def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None): """ assert isinstance(loss, framework.Variable) - # TODO(qiao) append_backward should support target. - loss.block.framework.append_backward(loss, no_grad_set or set()) + loss.block.program.append_backward(loss, no_grad_set or set()) if parameter_list is not None: parameters = parameter_list else: - parameters = loss.block.framework.parameters + parameters = loss.block.program.parameters params_and_grads = [] for param in parameters: grad = grad_var_name(param) diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index d94710727e143..a281daca32420 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -20,9 +20,8 @@ def test_sgd_optimizer(self): "Y": mul_y}, outputs={"Out": [mul_out]}, attrs={"x_num_col_dims": 1}) - loss = block.create_var("loss") sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) - opts = sgd_optimizer.minimize(loss) + opts = sgd_optimizer.minimize(mul_out) print(opts) From cf343ac98dad27f98647d4a1bb9303666b3a2e08 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sat, 14 Oct 2017 18:38:51 -0700 Subject: [PATCH 08/19] fix some problem --- python/paddle/v2/framework/optimizer.py | 5 ++--- python/paddle/v2/framework/tests/test_optimizer.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index b188be5603efd..a3308b2f150ef 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -35,7 +35,6 @@ def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None): Returns: list of (parameters, gradients) pair. """ - assert isinstance(loss, framework.Variable) loss.block.program.append_backward(loss, no_grad_set or set()) if parameter_list is not None: @@ -45,7 +44,7 @@ def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None): params_and_grads = [] for param in parameters: grad = grad_var_name(param) - if loss.block.has_var(grad): + if loss.block.desc.has_var(grad): params_and_grads.append((param, grad)) else: params_and_grads.append((param, None)) @@ -63,7 +62,7 @@ def create_optimization_pass(self, parameters_and_grads, loss): """ optimize_ops = [] for param_and_grad in parameters_and_grads: - if param_and_grad[2] is not None: + if param_and_grad[1] is not None: optimize_op = self._append_optimize_op(loss.block, param_and_grad) optimize_ops.append(optimize_op) diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index a281daca32420..0c9cf191f5612 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -8,7 +8,7 @@ class TestOptimizer(unittest.TestCase): def test_sgd_optimizer(self): program = framework.g_program block = program.global_block() - mul_x = block.create_var( + mul_x = block.create_parameter( dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") mul_y = block.create_var( dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") @@ -18,7 +18,7 @@ def test_sgd_optimizer(self): type="mul", inputs={"X": mul_x, "Y": mul_y}, - outputs={"Out": [mul_out]}, + outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) opts = sgd_optimizer.minimize(mul_out) From 787b8ad3735e102e5fc0a029998262017146083b Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 15 Oct 2017 11:02:30 -0700 Subject: [PATCH 09/19] add sync_with_cpp to Python Program and Block --- python/paddle/v2/framework/framework.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index 01cd9982dc1c8..0df60bd8b8e78 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -308,6 +308,11 @@ def idx(self): def create_var(self, *args, **kwargs): return Variable(self, *args, **kwargs) + def has_var(self, name): + """only means the var with name is in the Python Block. + """ + return name in self.vars + def create_parameter(self, *args, **kwargs): global_block = self.program.global_block() return Parameter(global_block, *args, **kwargs) @@ -324,6 +329,11 @@ def prepend_op(self, *args, **kwargs): self.ops.appendleft(op) return op + def sync_with_cpp(self): + for var in self.desc.all_vars(): + if not self.has_var(var.name): + self.create_var(name=var.name, desc=var, type=var.type) + class Program(object): @classmethod @@ -354,6 +364,11 @@ def global_block(self): def current_block(self): return self.blocks[self.current_block_idx] + def append_backward(self, target, no_grad_set): + assert isinstance(target, Variable) + self.desc.append_backward(target.desc, no_grad_set) + self.sync_with_cpp() + def create_block(self): new_block_idx = len(self.blocks) self.desc.append_block(self.current_block().desc) @@ -364,6 +379,12 @@ def create_block(self): def rollback(self): self.current_block_idx = self.current_block().parent_idx + def sync_with_cpp(self): + for block_idx in range(len(self.blocks), self.desc.num_blocks()): + self.blocks.append(Block(self, block_idx)) + for block in self.blocks: + block.sync_with_cpp() + class Parameter(Variable): def __init__(self, block, shape, dtype, **kwargs): From ae34692a6ef9f7c76eb1ff1d330fda2da30597ef Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 15 Oct 2017 12:54:52 -0700 Subject: [PATCH 10/19] sync vars and ops in block from cpp --- python/paddle/v2/framework/framework.py | 31 +++++++++++++++++-- .../paddle/v2/framework/tests/test_program.py | 21 ++++++++++++- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index 9a5fe973f2849..8320c3584ebe4 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -331,8 +331,35 @@ def prepend_op(self, *args, **kwargs): def sync_with_cpp(self): for var in self.desc.all_vars(): - if not self.has_var(var.name): - self.create_var(name=var.name, desc=var, type=var.type) + if not self.has_var(var.name()): + self.create_var(name=var.name(), desc=var, type=var.type()) + ops_in_cpp = self.desc.all_ops() + first_op_in_python = self.ops[0].desc + last_op_in_python = self.ops[len(self.ops) - 1].desc + start_index = None + end_index = None + for index in range(len(ops_in_cpp)): + if first_op_in_python == ops_in_cpp[index]: + start_index = index + if last_op_in_python == ops_in_cpp[index]: + end_index = index + assert start_index is not None + assert end_index is not None + assert start_index < end_index + + # sync ops append to the head of cpp_ops + for index in range((start_index - 1 - 1), -1, -1): + op_desc = ops_in_cpp[index] + op = Operator(self, op_desc) + self.ops.appendleft(op) + + # sync ops append to the end of cpp_ops + for index in range((end_index + 1), len(ops_in_cpp)): + op_desc = ops_in_cpp[index] + op = Operator(self, op_desc) + self.ops.append(op) + + assert len(self.ops) == len(ops_in_cpp) class Program(object): diff --git a/python/paddle/v2/framework/tests/test_program.py b/python/paddle/v2/framework/tests/test_program.py index 7c521cd634ca5..d06f86c09fe4e 100644 --- a/python/paddle/v2/framework/tests/test_program.py +++ b/python/paddle/v2/framework/tests/test_program.py @@ -1,6 +1,7 @@ import unittest import paddle.v2.framework.core as core +from paddle.v2.framework.framework import Program from paddle.v2.framework.framework import g_program @@ -33,7 +34,7 @@ def test_program(self): self.assertEqual(1, b.idx) self.assertEqual(0, b.parent_idx) - def test_append_backward(self): + def test_desc_append_backward(self): prog = core.ProgramDesc.__create_program_desc__() self.assertIsNotNone(prog) block = prog.block(0) @@ -71,6 +72,24 @@ def grad_name(name): actual_ops.append(op.type()) self.assertEqual(actual_ops, expect_ops) + def test_append_backward(self): + prog = Program.instance() + block = prog.global_block() + + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + mul_op = block.append_op( + type="mul", + inputs={"X": [mul_x], + "Y": mul_y}, + outputs={"Out": [mul_out]}, + attrs={"x_num_col_dims": 1}) + param_to_grad = prog.append_backward(mul_out, set()) + if __name__ == '__main__': unittest.main() From 1ab717abf92308bec5657375bae7d6a416680b90 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 15 Oct 2017 13:05:13 -0700 Subject: [PATCH 11/19] optimize code and add some comment --- python/paddle/v2/framework/framework.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index 8320c3584ebe4..f10f8d1ddab20 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -330,9 +330,12 @@ def prepend_op(self, *args, **kwargs): return op def sync_with_cpp(self): + # sync variables from cpp for var in self.desc.all_vars(): if not self.has_var(var.name()): self.create_var(name=var.name(), desc=var, type=var.type()) + + # sync operators from cpp ops_in_cpp = self.desc.all_ops() first_op_in_python = self.ops[0].desc last_op_in_python = self.ops[len(self.ops) - 1].desc @@ -345,7 +348,7 @@ def sync_with_cpp(self): end_index = index assert start_index is not None assert end_index is not None - assert start_index < end_index + assert start_index <= end_index # sync ops append to the head of cpp_ops for index in range((start_index - 1 - 1), -1, -1): @@ -393,8 +396,9 @@ def current_block(self): def append_backward(self, target, no_grad_set): assert isinstance(target, Variable) - self.desc.append_backward(target.desc, no_grad_set) + param_to_grad_info = self.desc.append_backward(target.desc, no_grad_set) self.sync_with_cpp() + return param_to_grad_info def create_block(self): new_block_idx = len(self.blocks) From 0f1f96df223feb9a510abf930df5c9f3b3b980c6 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 15 Oct 2017 13:23:37 -0700 Subject: [PATCH 12/19] add more check for sync --- python/paddle/v2/framework/framework.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index f10f8d1ddab20..d649e69d58961 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -309,8 +309,6 @@ def create_var(self, *args, **kwargs): return Variable(self, *args, **kwargs) def has_var(self, name): - """only means the var with name is in the Python Block. - """ return name in self.vars def create_parameter(self, *args, **kwargs): @@ -363,6 +361,8 @@ def sync_with_cpp(self): self.ops.append(op) assert len(self.ops) == len(ops_in_cpp) + for index in range(len(self.ops)): + assert self.ops[index].desc == ops_in_cpp[index] class Program(object): From e0ec8755ffb03b4fc420defbb5914558e21b5878 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 15 Oct 2017 16:27:32 -0700 Subject: [PATCH 13/19] update optimizer with return value of Backward --- python/paddle/v2/framework/framework.py | 15 ++++++++---- python/paddle/v2/framework/optimizer.py | 32 +++++++++++++++---------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index b4011820edb06..19e3d7fa30252 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -305,6 +305,11 @@ def parent_idx(self): def idx(self): return self.desc.id + def var(self, name): + if name not in self.vars: + raise Exception("var %s not in this block" % name) + return self.vars[name] + def create_var(self, *args, **kwargs): return Variable(self, *args, **kwargs) @@ -394,10 +399,16 @@ def __str__(self): def global_block(self): return self.blocks[0] + def block(self, index): + return self.blocks[index] + def current_block(self): return self.blocks[self.current_block_idx] def append_backward(self, target, no_grad_set): + """ + return map(param_name -> (grad_name, block_index, op_index)) + """ assert isinstance(target, Variable) param_to_grad_info = self.desc.append_backward(target.desc, no_grad_set) self.sync_with_cpp() @@ -410,10 +421,6 @@ def create_block(self): self.blocks.append(Block(self, self.current_block_idx)) return self.current_block() - def append_backward(self, target, no_grad_set): - assert isinstance(target, Variable) - self.desc.append_backward(target.desc, no_grad_set) - def rollback(self): self.current_block_idx = self.current_block().parent_idx diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index a3308b2f150ef..d932ea12f8adf 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -3,10 +3,6 @@ __all__ = ['SGDOptimizer'] -def grad_var_name(name): - return name + "@GRAD" - - class Optimizer(object): """Optimizer Base class. @@ -36,18 +32,27 @@ def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None): list of (parameters, gradients) pair. """ assert isinstance(loss, framework.Variable) - loss.block.program.append_backward(loss, no_grad_set or set()) + param_grad_map = loss.block.program.append_backward(loss, no_grad_set or + set()) if parameter_list is not None: parameters = parameter_list else: parameters = loss.block.program.parameters params_and_grads = [] for param in parameters: - grad = grad_var_name(param) - if loss.block.desc.has_var(grad): - params_and_grads.append((param, grad)) + if param not in param_grad_map: + raise Exception("param %s is not in map" % param) + grad_info = param_grad_map[param] + grad_block = loss.block.program.block(grad_info[1]) + if not grad_block.has_var(grad_info[0]): + raise Exception("grad block[%d] did not have grad var %s" % + grad_info[1], grad_info[0]) + param_var = loss.block.var(param) + grad_var = grad_block.var(grad_info[0]) + if loss.block.has_var(grad_info[0]): + params_and_grads.append((param_var, grad_var)) else: - params_and_grads.append((param, None)) + params_and_grads.append((param_var, None)) return params_and_grads def create_optimization_pass(self, parameters_and_grads, loss): @@ -99,7 +104,7 @@ def _append_optimize_op(self, block, param_and_grad): # create an op to init the learning_rate init_op = block.append_op( type="fill_constant", - outputs={"Out": lr.name}, + outputs={"Out": lr}, attrs={"shape": lr_shape, "value": self._learning_rate}) @@ -107,10 +112,11 @@ def _append_optimize_op(self, block, param_and_grad): sgd_op = block.append_op( type=self.type, inputs={ - "Param", param_and_grad[0], "Grad", param_and_grad[1], - "LearningRate", lr.name() + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "LearningRate": lr }, - outputs={"Out", param_and_grad[0]}, + outputs={"ParamOut": param_and_grad[0]}, attrs={"shape": [1], "value": self._learning_rate}) From 37ee2ff8670f3273ac6d507e96d7b4662ff3c54a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 16 Oct 2017 12:39:07 -0700 Subject: [PATCH 14/19] rm unused code --- paddle/framework/backward.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index ca9163c037381..e3d7dacd7f0ad 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -281,15 +281,6 @@ static void CreateGradVarInBlock( auto ops = block_desc->AllOps(); for (size_t op_index = grad_op_start_index; op_index < ops.size(); ++op_index) { - // <<<<<<< HEAD - // for (const auto& output : ops[op_index]->Outputs()) { - // for (const auto& real_output : output.second) { - // if (!block_desc->HasVar(real_output)) { - // block_desc->Var(real_output); - // } - // } - // } - // ======= ForEachVarName(ops[op_index]->Outputs(), [&](const std::string& grad_var_name) { if (block_desc->HasVar(grad_var_name)) { @@ -307,7 +298,6 @@ static void CreateGradVarInBlock( grad_record.op_idx_ = static_cast(op_index); return false; /* not break */ }); - // >>>>>>> origin/develop } } From 6c765b07667e420e519aa14e41bcac1f0c9b0b95 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 16 Oct 2017 13:05:50 -0700 Subject: [PATCH 15/19] infer shape when create gradient vairiable --- paddle/framework/backward.cc | 14 ++++++++++++-- paddle/operators/mul_op.cc | 8 ++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index e3d7dacd7f0ad..f57e9c34988f6 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -281,12 +281,16 @@ static void CreateGradVarInBlock( auto ops = block_desc->AllOps(); for (size_t op_index = grad_op_start_index; op_index < ops.size(); ++op_index) { + bool need_infer_shape = false; ForEachVarName(ops[op_index]->Outputs(), [&](const std::string& grad_var_name) { if (block_desc->HasVar(grad_var_name)) { return false; } - block_desc->Var(grad_var_name); + need_infer_shape = true; + auto var = block_desc->Var(grad_var_name); + // FIXME(qiao) infer the datatype + var->SetDataType(framework::DataType::FP32); auto it = param_name_map.find(grad_var_name); if (it == param_name_map.end()) { return false; @@ -298,6 +302,9 @@ static void CreateGradVarInBlock( grad_record.op_idx_ = static_cast(op_index); return false; /* not break */ }); + if (need_infer_shape) { + ops[op_index]->InferShape(*block_desc); + } } } @@ -449,7 +456,10 @@ ParamGradInfoMap AppendBackward( for (auto& ptr : backward_op_descs) { all_ops.push_back(std::move(ptr)); } - root_block->Var(fill_one_op_out); + auto var = root_block->Var(fill_one_op_out); + // FIXME(qiao) infer the data type + var->SetDataType(framework::DataType::FP32); + var->SetShape(target.Shape()); // create grad_var for all blocks in this program CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index ec0683d8875a9..9db05a8ca0735 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -106,10 +106,10 @@ class MulOpGrad : public framework::OperatorWithKernel { auto y_dims = ctx->GetInputDim("Y"); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); - auto x_mat_dims = - framework::flatten_to_2d(x_dims, Attr("x_num_col_dims")); - auto y_mat_dims = - framework::flatten_to_2d(y_dims, Attr("y_num_col_dims")); + auto x_mat_dims = framework::flatten_to_2d( + x_dims, ctx->Attrs().Get("x_num_col_dims")); + auto y_mat_dims = framework::flatten_to_2d( + y_dims, ctx->Attrs().Get("y_num_col_dims")); PADDLE_ENFORCE_EQ( x_mat_dims[0], out_dims[0], From c141b9715398beea2fc9baba6c0be029b748014c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 16 Oct 2017 13:14:35 -0700 Subject: [PATCH 16/19] update test_optimizer --- paddle/framework/backward.cc | 8 +++++++- python/paddle/v2/framework/tests/test_optimizer.py | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index f57e9c34988f6..6c51ac02b69f2 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -435,10 +435,16 @@ ParamGradInfoMap AppendBackward( auto& all_ops = root_block->ops_; // insert fill one op for target + // TODO(qiao) add some check to the target. std::string fill_one_op_out = GradVarName(target.Name()); + std::vector target_shape_desc = target.Shape(); + std::vector target_shape; + std::transform(target_shape_desc.begin(), target_shape_desc.end(), + std::back_inserter(target_shape), + [](int64_t dim) { return static_cast(dim); }); std::unique_ptr fill_one_op( new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, - {{"shape", std::vector{1}}, + {{"shape", target_shape}, {"value", static_cast(1.0)}, {"dataType", framework::DataType::FP32}})); all_ops.push_back(std::move(fill_one_op)); diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index 0c9cf191f5612..3d6fa70737bf3 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -22,7 +22,9 @@ def test_sgd_optimizer(self): attrs={"x_num_col_dims": 1}) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) opts = sgd_optimizer.minimize(mul_out) - print(opts) + self.assertEqual(len(opts), 1) + sgd_op = opts[0] + self.assertEqual(sgd_op.type, "sgd") if __name__ == '__main__': From c0f8af2a36292fe9b50f854d97bf55130cdf8533 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 16 Oct 2017 14:30:51 -0700 Subject: [PATCH 17/19] update test_program.py --- paddle/framework/backward.cc | 12 +++- .../paddle/v2/framework/tests/test_program.py | 70 ++++++++----------- 2 files changed, 39 insertions(+), 43 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 6c51ac02b69f2..5f8a4815ed894 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -456,16 +456,22 @@ ParamGradInfoMap AppendBackward( auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx, &no_grad_var_names, &grad_to_var); - std::unordered_map retv; - - // Create Variable for (auto& ptr : backward_op_descs) { all_ops.push_back(std::move(ptr)); } + // Create Variable + + // Create target gradient variable + std::unordered_map retv; + auto var = root_block->Var(fill_one_op_out); // FIXME(qiao) infer the data type var->SetDataType(framework::DataType::FP32); var->SetShape(target.Shape()); + auto& target_grad = retv[target.Name()]; + target_grad.name_ = fill_one_op_out; + target_grad.block_idx_ = root_block_idx; + target_grad.op_idx_ = static_cast(forward_op_num); // create grad_var for all blocks in this program CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv); diff --git a/python/paddle/v2/framework/tests/test_program.py b/python/paddle/v2/framework/tests/test_program.py index d06f86c09fe4e..c98dc3492b950 100644 --- a/python/paddle/v2/framework/tests/test_program.py +++ b/python/paddle/v2/framework/tests/test_program.py @@ -34,49 +34,11 @@ def test_program(self): self.assertEqual(1, b.idx) self.assertEqual(0, b.parent_idx) - def test_desc_append_backward(self): - prog = core.ProgramDesc.__create_program_desc__() - self.assertIsNotNone(prog) - block = prog.block(0) - self.assertIsNotNone(block) - - mul_op_desc = block.append_op() - mul_op_desc.set_type("mul") - mul_op_desc.set_input("X", ["x1"]) - mul_op_desc.set_input("Y", ["y1"]) - mul_op_desc.set_output("Out", ["out1"]) - - sum_op_desc = block.append_op() - sum_op_desc.set_type("elementwise_add") - sum_op_desc.set_input("X", ["out1"]) - sum_op_desc.set_input("Y", ["b1"]) - sum_op_desc.set_output("Out", ["out2"]) - - target = block.var("out2") - - expect_ops = [ - "mul", "elementwise_add", "fill_constant", "elementwise_add_grad", - "mul_grad" - ] - - def grad_name(name): - return name + "@GRAD" - - actual_ops = [] - param_to_grad = prog.append_backward(target, set()) - for var_name in ("x1", "y1", "out1", "b1"): - self.assertEqual(param_to_grad[var_name][0], grad_name(var_name)) - self.assertEqual(param_to_grad[var_name][1], 0) - - for op in block.all_ops(): - actual_ops.append(op.type()) - self.assertEqual(actual_ops, expect_ops) - def test_append_backward(self): prog = Program.instance() block = prog.global_block() - mul_x = block.create_parameter( + mul_x = block.create_var( dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") mul_y = block.create_var( dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") @@ -88,7 +50,35 @@ def test_append_backward(self): "Y": mul_y}, outputs={"Out": [mul_out]}, attrs={"x_num_col_dims": 1}) - param_to_grad = prog.append_backward(mul_out, set()) + + add_y = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="add.y") + add_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="add.out") + add_op = block.append_op( + type="elementwise_add", + inputs={"X": mul_out, + "Y": add_y}, + outputs={"Out": add_out}, + attrs={"x_num_col_dims": 1}) + + param_to_grad = prog.append_backward(add_out, set()) + + def grad_name(name): + return name + "@GRAD" + + for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out"): + self.assertEqual(param_to_grad[var_name][0], grad_name(var_name)) + self.assertEqual(param_to_grad[var_name][1], 0) + + expect_ops = [ + "mul", "elementwise_add", "fill_constant", "elementwise_add_grad", + "mul_grad" + ] + actual_ops = [] + for op in block.ops: + actual_ops.append(op.type) + self.assertEqual(actual_ops, expect_ops) if __name__ == '__main__': From ea2a483134a68ead31b2494fc6daff4e5aa637b4 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 16 Oct 2017 17:46:24 -0700 Subject: [PATCH 18/19] update backward test --- paddle/framework/backward_test.cc | 81 ++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 17 deletions(-) diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 5302afcafb5c0..0c35a157bcfeb 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -26,6 +26,20 @@ namespace framework { using DeviceContext = platform::DeviceContext; +class NoneOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *ctx) const override {} +}; + +template +class NoneKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override {} +}; + class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { public: RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) @@ -215,19 +229,51 @@ class MinusOpMaker : public OpProtoAndCheckerMaker { namespace f = paddle::framework; namespace ops = paddle::operators; using EnforceNotMet = paddle::platform::EnforceNotMet; -REGISTER_OPERATOR(rowwise_add, f::NOP, f::RowWiseAddOpMaker, +// rowwise_add +REGISTER_OPERATOR(rowwise_add, f::NoneOp, f::RowWiseAddOpMaker, f::RowWiseAddGradMaker); -REGISTER_OPERATOR(rowwise_add_grad, f::NOP); -REGISTER_OP(mul, f::NOP, f::MulOpMaker, mul_grad, f::NOP); -REGISTER_OP(sigmoid, f::NOP, f::SigmoidOpMaker, sigmoid_grad, f::NOP); -REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NOP, f::NoGradOpMaker); -REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NOP, f::FillZeroOpMaker); -REGISTER_OP(sum, f::NOP, f::SumOpMaker, sum_grad, f::NOP); +REGISTER_OP_CPU_KERNEL(rowwise_add, + f::NoneKernel); +REGISTER_OPERATOR(rowwise_add_grad, f::NoneOp); +REGISTER_OP_CPU_KERNEL(rowwise_add_grad, + f::NoneKernel); +// mul +REGISTER_OP(mul, f::NoneOp, f::MulOpMaker, mul_grad, f::NoneOp); +REGISTER_OP_CPU_KERNEL(mul, f::NoneKernel); +REGISTER_OP_CPU_KERNEL(mul_grad, + f::NoneKernel); +// sigmoid +REGISTER_OP(sigmoid, f::NoneOp, f::SigmoidOpMaker, sigmoid_grad, f::NoneOp); +REGISTER_OP_CPU_KERNEL(sigmoid, + f::NoneKernel); +REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NoneOp, f::NoGradOpMaker); +// fill_zeros_like +REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NoneOp, f::FillZeroOpMaker); +REGISTER_OP_CPU_KERNEL(fill_zeros_like, + f::NoneKernel); +// sum +REGISTER_OP(sum, f::NoneOp, f::SumOpMaker, sum_grad, f::NoneOp); +REGISTER_OP_CPU_KERNEL(sum, f::NoneKernel); +REGISTER_OP_CPU_KERNEL(sum_grad, + f::NoneKernel); +// fc REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); -REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad, - f::NOP); -REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP); -REGISTER_OPERATOR(minus, f::NOP, f::MinusOpMaker, f::MinusGradOpDescMaker); +// many_output_op +REGISTER_OP(many_output_op, f::NoneOp, f::ManyOutputOpMaker, + many_output_op_grad, f::NoneOp); +// mult_in_out +REGISTER_OP(mult_in_out, f::NoneOp, f::MultInOutOpMaker, mult_in_out_grad, + f::NoneOp); +REGISTER_OP_CPU_KERNEL(mult_in_out, + f::NoneKernel); +REGISTER_OP_CPU_KERNEL(mult_in_out_grad, + f::NoneKernel); +// minus +REGISTER_OPERATOR(minus, f::NoneOp, f::MinusOpMaker, f::MinusGradOpDescMaker); +REGISTER_OP_CPU_KERNEL(minus, f::NoneKernel); +// scale +REGISTER_OPERATOR(scale, f::NoneOp); +REGISTER_OP_CPU_KERNEL(scale, f::NoneKernel); TEST(Backward, simple_op_not_need_grad) { auto fwd = f::OpRegistry::CreateOp( @@ -463,6 +509,7 @@ TEST(Backward, simple_single_op) { f::ProgramDesc *program_desc = GetNewProgramDesc(); f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc); f::BlockDescBind *block = program.Block(0); + f::OpDescBind *op = block->AppendOp(); op->SetType("rowwise_add"); op->SetInput("X", {"x"}); @@ -487,7 +534,7 @@ TEST(Backward, simple_single_op) { EXPECT_EQ(grad_op->Output(f::GradVarName("b")), std::vector({f::GradVarName("b")})); - EXPECT_EQ(var_to_grad.size(), 2UL); + EXPECT_EQ(var_to_grad.size(), 3UL); EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2)); EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2)); @@ -588,7 +635,7 @@ TEST(Backward, simple_mult_op) { EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), std::vector({f::GradVarName("b3")})); - EXPECT_EQ(var_to_grad.size(), 6UL); + EXPECT_EQ(var_to_grad.size(), 7UL); EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); EXPECT_EQ(var_to_grad.at("out1"), @@ -666,7 +713,7 @@ TEST(Backward, intermedia_var_no_grad) { std::vector({f::GradVarName("out1")})); EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector()); - EXPECT_EQ(var_to_grad.size(), 3UL); + EXPECT_EQ(var_to_grad.size(), 4UL); EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); EXPECT_EQ(var_to_grad.at("out1"), @@ -744,7 +791,7 @@ TEST(Backward, var_no_grad) { EXPECT_EQ(grad_op1->Output(f::GradVarName("H")), std::vector({f::GradVarName("h1")})); - EXPECT_EQ(var_to_grad.size(), 3UL); + EXPECT_EQ(var_to_grad.size(), 4UL); EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3)); EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5)); EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5)); @@ -830,7 +877,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); - EXPECT_EQ(var_to_grad.size(), 5UL); + EXPECT_EQ(var_to_grad.size(), 6UL); EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4)); EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5)); EXPECT_EQ(var_to_grad.at("out1"), @@ -863,7 +910,7 @@ TEST(Backward, half_backward) { auto ops = block->AllOps(); ASSERT_EQ(3UL, ops.size()); - EXPECT_EQ(var_to_grad.size(), 1UL); + EXPECT_EQ(var_to_grad.size(), 2UL); EXPECT_EQ(var_to_grad.at("a"), f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1)); } From b241744aa63d5fe84530c45e368062704518625f Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 16 Oct 2017 21:00:13 -0700 Subject: [PATCH 19/19] follow comment --- python/paddle/v2/framework/framework.py | 7 ++++--- python/paddle/v2/framework/optimizer.py | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index 7186dd5c637f5..3fb6efe42a2c6 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -308,9 +308,12 @@ def idx(self): def var(self, name): if name not in self.vars: - raise Exception("var %s not in this block" % name) + raise ValueError("var %s not in this block" % name) return self.vars[name] + def all_parameters(self): + return {v for k, v in self.vars.iteritems() if isinstance(v, Parameter)} + def create_var(self, *args, **kwargs): return Variable(self, *args, **kwargs) @@ -320,7 +323,6 @@ def has_var(self, name): def create_parameter(self, *args, **kwargs): global_block = self.program.global_block() param = Parameter(global_block, *args, **kwargs) - self.program.parameters.append(param.name) return param def append_op(self, *args, **kwargs): @@ -388,7 +390,6 @@ def __init__(self, desc=None): self.desc = desc self.blocks = [Block(self, 0)] self.current_block_idx = 0 - self.parameters = [] # parameter name list stored in the global scope def __str__(self): protostr = self.desc.serialize_to_string() diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index d932ea12f8adf..e356a7aadb8d6 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -37,7 +37,8 @@ def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None): if parameter_list is not None: parameters = parameter_list else: - parameters = loss.block.program.parameters + params = loss.block.program.global_block().all_parameters() + parameters = [param.name for param in params] params_and_grads = [] for param in parameters: if param not in param_grad_map: