From 685e23258567b45f68e12f0bced805fec3f1bf47 Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Tue, 14 Nov 2017 11:49:50 -0800 Subject: [PATCH 1/3] add decayed adagrad python code --- python/paddle/v2/fluid/optimizer.py | 53 +++++++++++++++++- .../paddle/v2/fluid/tests/test_optimizer.py | 56 ++++++++++++++++++- 2 files changed, 105 insertions(+), 4 deletions(-) diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 4252a6f08509f..b703f76d0d790 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -9,7 +9,7 @@ __all__ = [ 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', - 'AdamaxOptimizer' + 'AdamaxOptimizer', 'DecayedAdagradOptimizer' ] @@ -85,7 +85,7 @@ def _add_accumulator(self, name, param, dtype=None, fill_value=0.0): """ if (name in self._accumulators and param.name in self._accumulators[name]): - raise Exception("Accumulator {} already exists for parmeter {}". + raise Exception("Accumulator {} already exists for parameter {}". format(name, param.name)) assert isinstance(self.helper, LayerHelper) @@ -307,7 +307,7 @@ def _append_optimize_op(self, block, param_and_grad): moment_acc = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) - # create the adagrad optimizer op + # Create the adagrad optimizer op adagrad_op = block.append_op( type=self.type, inputs={ @@ -323,6 +323,53 @@ def _append_optimize_op(self, block, param_and_grad): return adagrad_op +class DecayedAdagradOptimizer(Optimizer): + """Simple Decayed Adagrad optimizer with moment state + """ + _moment_acc_str = "moment" + + def __init__(self, + learning_rate, + decay=0.95, + epsilon=1.0e-6, + global_step=None): + assert learning_rate is not None + assert decay is not None + assert epsilon is not None + super(DecayedAdagradOptimizer, self).__init__(global_step) + self.type = "decayed_adagrad" + self._learning_rate = learning_rate + self._decay = decay + self._epsilon = epsilon + + def _create_accumulators(self, block, parameters): + assert isinstance(block, framework.Block) + + for p in parameters: + self._add_accumulator(self._moment_acc_str, p) + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, framework.Block) + + moment_acc = self._get_accumulator(self._moment_acc_str, + param_and_grad[0]) + + # Create the decayed adagrad optimizer op + decayed_adagrad_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "Moment": moment_acc, + "LearningRate": self._create_param_lr(param_and_grad) + }, + outputs={"ParamOut": param_and_grad[0], + "MomentOut": moment_acc}, + attrs={"epsilon": self._epsilon}) + + return decayed_adagrad_op + + class AdamOptimizer(Optimizer): """Implements the Adam Optimizer """ diff --git a/python/paddle/v2/fluid/tests/test_optimizer.py b/python/paddle/v2/fluid/tests/test_optimizer.py index 0ebf7cdf208c4..bdfc839a7e7e5 100644 --- a/python/paddle/v2/fluid/tests/test_optimizer.py +++ b/python/paddle/v2/fluid/tests/test_optimizer.py @@ -198,7 +198,7 @@ def test_adagrad_optimizer(self): adagrad_op = opts[0] self.assertEqual(adagrad_op.type, "adagrad") - # check accumulators + # Check accumulators accumulators = adagrad_optimizer.get_accumulators() self.assertEqual(len(accumulators), 1) self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators) @@ -215,6 +215,60 @@ def test_adagrad_optimizer(self): self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) +class TestDecayedAdagradOptimizer(unittest.TestCase): + class MockDecayedAdagrad(optimizer.DecayedAdagradOptimzier): + def get_accumulators(self): + return self._accumulators + + def get_moment_str(self): + return self._moment_acc_str + + def test_decayed_adagrad_optimizer(self): + init_program = framework.Program() + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + learning_rate = 0.01 + decayed_adagrad_optimizer = self.MockDecayedAdagrad( + learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) + params_grads = append_backward_ops(mul_out) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) + opts = decayed_adagrad_optimizer.create_optimization_pass( + params_grads, mul_out, init_program) + self.assertEqual(len(opts), 1) + decayed_adagrad_op = opts[0] + self.assertEqual(decayed_adagrad_op.type, "decayed_adagrad") + + # Check accumulators + accumulators = decayed_adagrad_optimizer.get_accumulators() + self.assertEqual(len(accumulators), 1) + self.assertTrue( + decayed_adagrad_optimizer.get_moment_str() in accumulators) + moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()] + self.assertEqual(len(moment_acc), 1) + self.assertTrue(mul_x.name in moment_acc) + + # Check init_program + init_ops = init_program.global_block().ops + self.assertEqual(len(init_ops), 2) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + self.assertEqual(init_ops[1].type, "fill_constant") + self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) + + class TestAdamOptimizer(unittest.TestCase): class MockAdam(optimizer.AdamOptimizer): def get_accumulators(self): From fd22e73eba52ebdf762b5ec63a2c547d769abeb2 Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Tue, 14 Nov 2017 13:45:38 -0800 Subject: [PATCH 2/3] fix typo and order --- python/paddle/v2/fluid/optimizer.py | 94 +++++++-------- .../paddle/v2/fluid/tests/test_optimizer.py | 108 +++++++++--------- 2 files changed, 101 insertions(+), 101 deletions(-) diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index b703f76d0d790..490dcca095abe 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -323,53 +323,6 @@ def _append_optimize_op(self, block, param_and_grad): return adagrad_op -class DecayedAdagradOptimizer(Optimizer): - """Simple Decayed Adagrad optimizer with moment state - """ - _moment_acc_str = "moment" - - def __init__(self, - learning_rate, - decay=0.95, - epsilon=1.0e-6, - global_step=None): - assert learning_rate is not None - assert decay is not None - assert epsilon is not None - super(DecayedAdagradOptimizer, self).__init__(global_step) - self.type = "decayed_adagrad" - self._learning_rate = learning_rate - self._decay = decay - self._epsilon = epsilon - - def _create_accumulators(self, block, parameters): - assert isinstance(block, framework.Block) - - for p in parameters: - self._add_accumulator(self._moment_acc_str, p) - - def _append_optimize_op(self, block, param_and_grad): - assert isinstance(block, framework.Block) - - moment_acc = self._get_accumulator(self._moment_acc_str, - param_and_grad[0]) - - # Create the decayed adagrad optimizer op - decayed_adagrad_op = block.append_op( - type=self.type, - inputs={ - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "Moment": moment_acc, - "LearningRate": self._create_param_lr(param_and_grad) - }, - outputs={"ParamOut": param_and_grad[0], - "MomentOut": moment_acc}, - attrs={"epsilon": self._epsilon}) - - return decayed_adagrad_op - - class AdamOptimizer(Optimizer): """Implements the Adam Optimizer """ @@ -557,3 +510,50 @@ def _finish_update(self, block): attrs={"scale": self._beta1}) return [scale_beta1] + + +class DecayedAdagradOptimizer(Optimizer): + """Simple Decayed Adagrad optimizer with moment state + """ + _moment_acc_str = "moment" + + def __init__(self, + learning_rate, + decay=0.95, + epsilon=1.0e-6, + global_step=None): + assert learning_rate is not None + assert decay is not None + assert epsilon is not None + super(DecayedAdagradOptimizer, self).__init__(global_step) + self.type = "decayed_adagrad" + self._learning_rate = learning_rate + self._decay = decay + self._epsilon = epsilon + + def _create_accumulators(self, block, parameters): + assert isinstance(block, framework.Block) + + for p in parameters: + self._add_accumulator(self._moment_acc_str, p) + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, framework.Block) + + moment_acc = self._get_accumulator(self._moment_acc_str, + param_and_grad[0]) + + # Create the decayed adagrad optimizer op + decayed_adagrad_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "Moment": moment_acc, + "LearningRate": self._create_param_lr(param_and_grad) + }, + outputs={"ParamOut": param_and_grad[0], + "MomentOut": moment_acc}, + attrs={"epsilon": self._epsilon}) + + return decayed_adagrad_op diff --git a/python/paddle/v2/fluid/tests/test_optimizer.py b/python/paddle/v2/fluid/tests/test_optimizer.py index bdfc839a7e7e5..7b4237e7fdf59 100644 --- a/python/paddle/v2/fluid/tests/test_optimizer.py +++ b/python/paddle/v2/fluid/tests/test_optimizer.py @@ -215,60 +215,6 @@ def test_adagrad_optimizer(self): self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) -class TestDecayedAdagradOptimizer(unittest.TestCase): - class MockDecayedAdagrad(optimizer.DecayedAdagradOptimzier): - def get_accumulators(self): - return self._accumulators - - def get_moment_str(self): - return self._moment_acc_str - - def test_decayed_adagrad_optimizer(self): - init_program = framework.Program() - program = framework.Program() - block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - learning_rate = 0.01 - decayed_adagrad_optimizer = self.MockDecayedAdagrad( - learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) - params_grads = append_backward_ops(mul_out) - self.assertEqual(len(params_grads), 1) - self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) - opts = decayed_adagrad_optimizer.create_optimization_pass( - params_grads, mul_out, init_program) - self.assertEqual(len(opts), 1) - decayed_adagrad_op = opts[0] - self.assertEqual(decayed_adagrad_op.type, "decayed_adagrad") - - # Check accumulators - accumulators = decayed_adagrad_optimizer.get_accumulators() - self.assertEqual(len(accumulators), 1) - self.assertTrue( - decayed_adagrad_optimizer.get_moment_str() in accumulators) - moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()] - self.assertEqual(len(moment_acc), 1) - self.assertTrue(mul_x.name in moment_acc) - - # Check init_program - init_ops = init_program.global_block().ops - self.assertEqual(len(init_ops), 2) - self.assertEqual(init_ops[0].type, "fill_constant") - self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) - self.assertEqual(init_ops[1].type, "fill_constant") - self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) - - class TestAdamOptimizer(unittest.TestCase): class MockAdam(optimizer.AdamOptimizer): def get_accumulators(self): @@ -385,5 +331,59 @@ def test_adamax_optimizer(self): self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) +class TestDecayedAdagradOptimizer(unittest.TestCase): + class MockDecayedAdagrad(optimizer.DecayedAdagradOptimizer): + def get_accumulators(self): + return self._accumulators + + def get_moment_str(self): + return self._moment_acc_str + + def test_decayed_adagrad_optimizer(self): + init_program = framework.Program() + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + learning_rate = 0.01 + decayed_adagrad_optimizer = self.MockDecayedAdagrad( + learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) + params_grads = append_backward_ops(mul_out) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) + opts = decayed_adagrad_optimizer.create_optimization_pass( + params_grads, mul_out, init_program) + self.assertEqual(len(opts), 1) + decayed_adagrad_op = opts[0] + self.assertEqual(decayed_adagrad_op.type, "decayed_adagrad") + + # Check accumulators + accumulators = decayed_adagrad_optimizer.get_accumulators() + self.assertEqual(len(accumulators), 1) + self.assertTrue( + decayed_adagrad_optimizer.get_moment_str() in accumulators) + moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()] + self.assertEqual(len(moment_acc), 1) + self.assertTrue(mul_x.name in moment_acc) + + # Check init_program + init_ops = init_program.global_block().ops + self.assertEqual(len(init_ops), 2) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + self.assertEqual(init_ops[1].type, "fill_constant") + self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) + + if __name__ == '__main__': unittest.main() From 168173b88e23c800afe6c03a26cb3c81646054d0 Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Tue, 14 Nov 2017 15:14:44 -0800 Subject: [PATCH 3/3] small fix --- python/paddle/v2/fluid/optimizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 490dcca095abe..d2841df6af7a0 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -525,6 +525,7 @@ def __init__(self, assert learning_rate is not None assert decay is not None assert epsilon is not None + super(DecayedAdagradOptimizer, self).__init__(global_step) self.type = "decayed_adagrad" self._learning_rate = learning_rate