Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding interface for decayed adagrad optimizer #5644

Merged
merged 3 commits into from
Nov 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 51 additions & 3 deletions python/paddle/v2/fluid/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

__all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
'AdamaxOptimizer'
'AdamaxOptimizer', 'DecayedAdagradOptimizer'
]


Expand Down Expand Up @@ -85,7 +85,7 @@ def _add_accumulator(self, name, param, dtype=None, fill_value=0.0):
"""
if (name in self._accumulators and
param.name in self._accumulators[name]):
raise Exception("Accumulator {} already exists for parmeter {}".
raise Exception("Accumulator {} already exists for parameter {}".
format(name, param.name))

assert isinstance(self.helper, LayerHelper)
Expand Down Expand Up @@ -307,7 +307,7 @@ def _append_optimize_op(self, block, param_and_grad):
moment_acc = self._get_accumulator(self._moment_acc_str,
param_and_grad[0])

# create the adagrad optimizer op
# Create the adagrad optimizer op
adagrad_op = block.append_op(
type=self.type,
inputs={
Expand Down Expand Up @@ -510,3 +510,51 @@ def _finish_update(self, block):
attrs={"scale": self._beta1})

return [scale_beta1]


class DecayedAdagradOptimizer(Optimizer):
"""Simple Decayed Adagrad optimizer with moment state
"""
_moment_acc_str = "moment"

def __init__(self,
learning_rate,
decay=0.95,
epsilon=1.0e-6,
global_step=None):
assert learning_rate is not None
assert decay is not None
assert epsilon is not None

super(DecayedAdagradOptimizer, self).__init__(global_step)
self.type = "decayed_adagrad"
self._learning_rate = learning_rate
self._decay = decay
self._epsilon = epsilon

def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)

for p in parameters:
self._add_accumulator(self._moment_acc_str, p)

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)

moment_acc = self._get_accumulator(self._moment_acc_str,
param_and_grad[0])

# Create the decayed adagrad optimizer op
decayed_adagrad_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"Moment": moment_acc,
"LearningRate": self._create_param_lr(param_and_grad)
},
outputs={"ParamOut": param_and_grad[0],
"MomentOut": moment_acc},
attrs={"epsilon": self._epsilon})

return decayed_adagrad_op
56 changes: 55 additions & 1 deletion python/paddle/v2/fluid/tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def test_adagrad_optimizer(self):
adagrad_op = opts[0]
self.assertEqual(adagrad_op.type, "adagrad")

# check accumulators
# Check accumulators
accumulators = adagrad_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 1)
self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators)
Expand Down Expand Up @@ -331,5 +331,59 @@ def test_adamax_optimizer(self):
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)


class TestDecayedAdagradOptimizer(unittest.TestCase):
class MockDecayedAdagrad(optimizer.DecayedAdagradOptimizer):
def get_accumulators(self):
return self._accumulators

def get_moment_str(self):
return self._moment_acc_str

def test_decayed_adagrad_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
learning_rate = 0.01
decayed_adagrad_optimizer = self.MockDecayedAdagrad(
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass(
params_grads, mul_out, init_program)
self.assertEqual(len(opts), 1)
decayed_adagrad_op = opts[0]
self.assertEqual(decayed_adagrad_op.type, "decayed_adagrad")

# Check accumulators
accumulators = decayed_adagrad_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 1)
self.assertTrue(
decayed_adagrad_optimizer.get_moment_str() in accumulators)
moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()]
self.assertEqual(len(moment_acc), 1)
self.assertTrue(mul_x.name in moment_acc)

# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 2)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
self.assertEqual(init_ops[1].type, "fill_constant")
self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)


if __name__ == '__main__':
unittest.main()