Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding interface for the adagrad optimizer #4977

Merged
merged 2 commits into from
Oct 21, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 58 additions & 1 deletion python/paddle/v2/framework/optimizer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import paddle.v2.framework.framework as framework
from collections import defaultdict

__all__ = ['SGDOptimizer', 'MomentumOptimizer']
__all__ = ['SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer']


class Optimizer(object):
Expand Down Expand Up @@ -272,3 +272,60 @@ def _append_optimize_op(self, block, param_and_grad):
attrs={"mu": self._momentum})

return momentum_op


class AdagradOptimizer(Optimizer):
"""Simple Adagrad optimizer with moment state
"""
_moment_acc_str = "moment"

def __init__(self, learning_rate, epsilon):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can give a default value to the epsilon.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also if the original paper mentions a default learning rate value for the Adagrad optimizer, then you can add it too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. The original paper does not specify a default learning rate. Only added default value to epsilon.

assert learning_rate is not None
assert epsilon is not None
super(AdagradOptimizer, self).__init__()
self.type = "adagrad"
self._learning_rate = learning_rate
self._epsilon = epsilon

def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
# create a variable for learning_rate
self._lr = block.create_var(
dtype="float32", shape=lr_shape, lod_level=0)

# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# https://github.com/PaddlePaddle/Paddle/pull/4852
block.append_op(
type="fill_constant",
outputs={"Out": self._lr},
attrs={"shape": lr_shape,
"value": self._learning_rate})

def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)

for p in parameters:
self._add_accumulator(block, self._moment_acc_str, p, 'float32')

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)

moment_acc = self._get_accumulator(self._moment_acc_str,
param_and_grad[0])

# create the adagrad optimizer op
adagrad_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"Moment": moment_acc,
"LearningRate": self._lr
},
outputs={"ParamOut": param_and_grad[0],
"MomentOut": moment_acc},
attrs={"epsilon": self._epsilon})

return adagrad_op
41 changes: 41 additions & 0 deletions python/paddle/v2/framework/tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,46 @@ def test_momentum_optimizer(self):
self.assertTrue(mul_x.name in velocity_acc)


class TestAdagradOptimizer(unittest.TestCase):
class MockAdagrad(optimizer.AdagradOptimizer):
def get_accumulators(self):
return self._accumulators

def get_moment_str(self):
return self._moment_acc_str

def test_adagrad_optimizer(self):
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
adagrad_optimizer = self.MockAdagrad(learning_rate=0.01, epsilon=1.0e-6)
params_grads = adagrad_optimizer.create_backward_pass(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out)
self.assertEqual(len(opts), 1)
adagrad_op = opts[0]
self.assertEqual(adagrad_op.type, "adagrad")

# check accumulators
accumulators = adagrad_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 1)
self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators)
moment_acc = accumulators[adagrad_optimizer.get_moment_str()]
self.assertEqual(len(moment_acc), 1)
self.assertTrue(mul_x.name in moment_acc)


if __name__ == '__main__':
unittest.main()