Skip to content

Commit

Permalink
revert 12103
Browse files Browse the repository at this point in the history
  • Loading branch information
chenjiawen committed Jul 13, 2018
1 parent aad57e7 commit 7af363d
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 72 deletions.
132 changes: 62 additions & 70 deletions python/paddle/fluid/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _create_accumulators(self, block, parameters):
"""
pass

def _finish_update(self, block, parameters_and_grads):
def _finish_update(self, block):
"""Finish any custom updates needed
before completing an optimization step
Expand All @@ -132,7 +132,7 @@ def _finish_update(self, block, parameters_and_grads):
parameters: list of parameter variables for the optimizer
Returns:
None
list of finish ops or None
"""
pass

Expand Down Expand Up @@ -237,7 +237,7 @@ def _create_optimization_pass(self,

# Get custom finish ops for subclasses
# FIXME: Need to fix this once we figure out how to handle dependencies
self._finish_update(loss.block, parameters_and_grads)
self._finish_update(loss.block)

end = len(global_block.ops)
return global_block.slice_ops(start, end)
Expand Down Expand Up @@ -487,8 +487,6 @@ class AdamOptimizer(Optimizer):
"""
_moment1_acc_str = "moment1"
_moment2_acc_str = "moment2"
_beta1_pow_acc_str = "beta1_pow_acc"
_beta2_pow_acc_str = "beta2_pow_acc"

def __init__(self,
learning_rate=0.001,
Expand All @@ -510,22 +508,32 @@ def __init__(self,
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)

main_block = block.program.global_block()
# Create beta1 and beta2 power tensors
beta_shape = [1]
self._beta1_pow_acc = self.helper.create_global_variable(
name=unique_name.generate('beta1_pow_acc'),
dtype='float32' if self._dtype == None else self._dtype,
shape=beta_shape,
lod_level=0,
persistable=True)
self.helper.set_variable_initializer(
self._beta1_pow_acc, initializer=Constant(self._beta1))

self._beta2_pow_acc = self.helper.create_global_variable(
name=unique_name.generate('beta2_pow_acc'),
dtype='float32' if self._dtype == None else self._dtype,
shape=beta_shape,
lod_level=0,
persistable=True)

self.helper.set_variable_initializer(
self._beta2_pow_acc, initializer=Constant(self._beta2))

# Create accumulator tensors for first and second moments
for p in parameters:
self._add_accumulator(self._moment1_acc_str, p)
self._add_accumulator(self._moment2_acc_str, p)
self._add_accumulator(
name=self._beta1_pow_acc_str,
param=p,
dtype='float32',
fill_value=self._beta1,
shape=[1])
self._add_accumulator(
name=self._beta2_pow_acc_str,
param=p,
dtype='float32',
fill_value=self._beta2,
shape=[1])

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
Expand All @@ -534,11 +542,6 @@ def _append_optimize_op(self, block, param_and_grad):
param_and_grad[0])
moment2 = self._get_accumulator(self._moment2_acc_str,
param_and_grad[0])
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param_and_grad[0])
beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
param_and_grad[0])

# create the adam optimize op
adam_op = block.append_op(
type=self.type,
Expand All @@ -548,8 +551,8 @@ def _append_optimize_op(self, block, param_and_grad):
"LearningRate": self._create_param_lr(param_and_grad),
"Moment1": moment1,
"Moment2": moment2,
"Beta1Pow": beta1_pow_acc,
"Beta2Pow": beta2_pow_acc
"Beta1Pow": self._beta1_pow_acc,
"Beta2Pow": self._beta2_pow_acc
},
outputs={
"ParamOut": param_and_grad[0],
Expand All @@ -564,30 +567,24 @@ def _append_optimize_op(self, block, param_and_grad):

return adam_op

def _finish_update(self, block, param_and_grads):
def _finish_update(self, block):
"""Update Beta1 and Beta2 Power accumulators
"""
assert isinstance(block, framework.Block)
main_block = block.program.global_block()
for param, grad in param_and_grads:
if grad is None:
continue
with param.block.program.optimized_guard([param, grad]):
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param)
beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
param)
main_block.append_op(
type="scale",
inputs={"X": beta1_pow_acc},
outputs={"Out": beta1_pow_acc},
attrs={"scale": self._beta1})

main_block.append_op(
type="scale",
inputs={"X": beta2_pow_acc},
outputs={"Out": beta2_pow_acc},
attrs={"scale": self._beta2})
scale_beta1 = main_block.append_op(
type="scale",
inputs={"X": self._beta1_pow_acc},
outputs={"Out": self._beta1_pow_acc},
attrs={"scale": self._beta1})

scale_beta2 = main_block.append_op(
type="scale",
inputs={"X": self._beta2_pow_acc},
outputs={"Out": self._beta2_pow_acc},
attrs={"scale": self._beta2})

return [scale_beta1, scale_beta2]


class AdamaxOptimizer(Optimizer):
Expand Down Expand Up @@ -630,7 +627,6 @@ class AdamaxOptimizer(Optimizer):
"""
_moment_acc_str = "moment"
_inf_norm_acc_str = "inf_norm"
_beta1_pow_acc_str = "beta1_pow_acc"

def __init__(self,
learning_rate=0.001,
Expand All @@ -650,25 +646,28 @@ def __init__(self,
self._epsilon = epsilon

def _create_accumulators(self, block, parameters):
# Create beta1 power accumulator tensor
beta_shape = [1]
self._beta1_pow_acc = self.helper.create_global_variable(
name=unique_name.generate('beta1_pow_acc'),
dtype='float32' if self._dtype == None else self._dtype,
shape=beta_shape,
lod_level=0,
persistable=True)
self.helper.set_variable_initializer(
self._beta1_pow_acc, initializer=Constant(self._beta1))

# Create accumulator tensors for first moment and infinity norm
for p in parameters:
self._add_accumulator(self._moment_acc_str, p)
self._add_accumulator(self._inf_norm_acc_str, p)
self._add_accumulator(
name=self._beta1_pow_acc_str,
param=p,
dtype='float32',
fill_value=self._beta1,
shape=[1])

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)

moment = self._get_accumulator(self._moment_acc_str, param_and_grad[0])
inf_norm = self._get_accumulator(self._inf_norm_acc_str,
param_and_grad[0])
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param_and_grad[0])
# create the adamax optimize op
adamax_op = block.append_op(
type=self.type,
Expand All @@ -678,7 +677,7 @@ def _append_optimize_op(self, block, param_and_grad):
"LearningRate": self._create_param_lr(param_and_grad),
"Moment": moment,
"InfNorm": inf_norm,
"Beta1Pow": beta1_pow_acc
"Beta1Pow": self._beta1_pow_acc
},
outputs={
"ParamOut": param_and_grad[0],
Expand All @@ -693,22 +692,18 @@ def _append_optimize_op(self, block, param_and_grad):

return adamax_op

def _finish_update(self, block, parameters_and_grads):
def _finish_update(self, block):
"""Update Beta1 Power accumulator
"""
assert isinstance(block, framework.Block)
main_block = block.program.global_block()
for param, grad in parameters_and_grads:
if grad is None:
continue
with param.block.program.optimized_guard([param, grad]):
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param)
main_block.append_op(
type="scale",
inputs={"X": beta1_pow_acc},
outputs={"Out": beta1_pow_acc},
attrs={"scale": self._beta1})
scale_beta1 = main_block.append_op(
type="scale",
inputs={"X": self._beta1_pow_acc},
outputs={"Out": self._beta1_pow_acc},
attrs={"scale": self._beta1})

return [scale_beta1]


class DecayedAdagradOptimizer(Optimizer):
Expand Down Expand Up @@ -1162,10 +1157,7 @@ def __init__(self,
self.params_grads.append((param, grad))

for param, grad in self.params_grads:
if grad is None:
continue
with param.block.program.optimized_guard([param, grad]):
self._append_average_accumulate_op(param)
self._append_average_accumulate_op(param)

self.apply_program = Program()
block = self.apply_program.global_block()
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/fluid/tests/unittests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def test_adam_optimizer(self):

# Check accumulators
accumulators = adam_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 4)
self.assertEqual(len(accumulators), 2)
self.assertTrue(adam_optimizer.get_moment1_str() in accumulators)
self.assertTrue(adam_optimizer.get_moment2_str() in accumulators)
moment1_acc = accumulators[adam_optimizer.get_moment1_str()]
Expand Down Expand Up @@ -354,7 +354,7 @@ def test_adamax_optimizer(self):

# Check accumulators
accumulators = adamax_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 3)
self.assertEqual(len(accumulators), 2)
self.assertTrue(adamax_optimizer.get_moment_str() in accumulators)
self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators)
moment_acc = accumulators[adamax_optimizer.get_moment_str()]
Expand Down

0 comments on commit 7af363d

Please sign in to comment.