Skip to content

Commit

Permalink
Implementing the python wrapper for Adamax optimizer (#5061)
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinavarora committed Oct 26, 2017
1 parent 39a6f43 commit f8c6dad
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 2 deletions.
110 changes: 108 additions & 2 deletions python/paddle/v2/framework/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from paddle.v2.framework.backward import append_backward_ops

__all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer'
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
'AdamaxOptimizer'
]


Expand Down Expand Up @@ -399,7 +400,7 @@ def _append_optimize_op(self, block, param_and_grad):
param_and_grad[0])
moment2 = self._get_accumulator(self._moment2_acc_str,
param_and_grad[0])
# create the momentum optimize op
# create the adam optimize op
adam_op = block.append_op(
type=self.type,
inputs={
Expand Down Expand Up @@ -442,3 +443,108 @@ def _finish_update(self, block):
attrs={"scale": self._beta2})

return [scale_beta1, scale_beta2]


class AdamaxOptimizer(Optimizer):
"""Implements the Adamax Optimizer
"""
_moment_acc_str = "moment"
_inf_norm_acc_str = "inf_norm"

def __init__(self,
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-8):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
assert epsilon is not None
super(AdamaxOptimizer, self).__init__()
self.type = "adamax"
self._learning_rate = learning_rate
self._beta1 = beta1
self._beta2 = beta2
self._epsilon = epsilon

def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
# create a variable for learning_rate
self._lr = block.create_var(
dtype="float32", shape=lr_shape, lod_level=0)

# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# https://github.com/PaddlePaddle/Paddle/pull/4852
block.append_op(
type="fill_constant",
outputs={"Out": self._lr},
attrs={"shape": lr_shape,
"value": self._learning_rate})

def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)

global_block = block.program.global_block()
# Create beta1 power accumulator tensor
beta_shape = [1]
self._beta1_pow_acc = global_block.create_var(
dtype="float32", shape=beta_shape, lod_level=0)

# Initialize beta1 power accumulator
# FIXME: Fix when Initialization design has been implemented
# https://github.com/PaddlePaddle/Paddle/pull/4852
global_block.append_op(
type="fill_constant",
outputs={"Out": self._beta1_pow_acc},
attrs={"shape": beta_shape,
"value": self._beta1})

# Create accumulator tensors for first moment and infinity norm
for p in parameters:
self._add_accumulator(block, self._moment_acc_str, p, 'float32')
self._add_accumulator(block, self._inf_norm_acc_str, p, 'float32')

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)

moment = self._get_accumulator(self._moment_acc_str, param_and_grad[0])
inf_norm = self._get_accumulator(self._inf_norm_acc_str,
param_and_grad[0])
# create the adamax optimize op
adamax_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"LearningRate": self._lr,
"Moment": moment,
"InfNorm": inf_norm,
"Beta1Pow": self._beta1_pow_acc
},
outputs={
"ParamOut": param_and_grad[0],
"MomentOut": moment,
"InfNormOut": inf_norm
},
attrs={
"beta1": self._beta1,
"beta2": self._beta2,
"epsilon": self._epsilon
})

return adamax_op

def _finish_update(self, block):
"""Update Beta1 Power accumulator
"""
assert isinstance(block, framework.Block)
global_block = block.program.global_block()
scale_beta1 = global_block.append_op(
type="scale",
inputs={"X": self._beta1_pow_acc},
outputs={"Out": self._beta1_pow_acc},
attrs={"scale": self._beta1})

return [scale_beta1]
49 changes: 49 additions & 0 deletions python/paddle/v2/framework/tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,5 +196,54 @@ def test_adam_optimizer(self):
self.assertTrue(mul_x.name in moment2_acc)


class TestAdamaxOptimizer(unittest.TestCase):
class MockAdamax(optimizer.AdamaxOptimizer):
def get_accumulators(self):
return self._accumulators

def get_moment_str(self):
return self._moment_acc_str

def get_inf_norm_str(self):
return self._inf_norm_acc_str

def test_adamax_optimizer(self):
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
adamax_optimizer = self.MockAdamax(
learning_rate=0.01, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out)
self.assertEqual(len(opts), 2)
adam_op = opts[0]
self.assertEqual(adam_op.type, "adamax")

# Check accumulators
accumulators = adamax_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 2)
self.assertTrue(adamax_optimizer.get_moment_str() in accumulators)
self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators)
moment_acc = accumulators[adamax_optimizer.get_moment_str()]
inf_norm_acc = accumulators[adamax_optimizer.get_inf_norm_str()]
self.assertEqual(len(moment_acc), 1)
self.assertEqual(len(inf_norm_acc), 1)
self.assertTrue(mul_x.name in moment_acc)
self.assertTrue(mul_x.name in inf_norm_acc)


if __name__ == '__main__':
unittest.main()

0 comments on commit f8c6dad

Please sign in to comment.