Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Impl optimizer #4734

Merged
merged 31 commits into from
Oct 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e82a5ae
init parameter base class
jacquesqiao Oct 10, 2017
ab37fba
optimize the Comments of optimizer
jacquesqiao Oct 10, 2017
1fe2371
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 10, 2017
1a37590
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 11, 2017
838f904
basic implimentation of optimizer
jacquesqiao Oct 11, 2017
5c6a458
add test_optimizer
jacquesqiao Oct 12, 2017
8a6f2c6
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 12, 2017
e9a75d2
add no_grad_set to interface
jacquesqiao Oct 12, 2017
e36e770
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 14, 2017
96d3a75
update optimizer.py
jacquesqiao Oct 14, 2017
1bb5fd1
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 14, 2017
11829ec
python code can run
jacquesqiao Oct 15, 2017
cf343ac
fix some problem
jacquesqiao Oct 15, 2017
49f206c
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 15, 2017
787b8ad
add sync_with_cpp to Python Program and Block
jacquesqiao Oct 15, 2017
ad0d9aa
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 15, 2017
ae34692
sync vars and ops in block from cpp
jacquesqiao Oct 15, 2017
1ab717a
optimize code and add some comment
jacquesqiao Oct 15, 2017
c584d7e
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 15, 2017
0f1f96d
add more check for sync
jacquesqiao Oct 15, 2017
a465e40
Merge branch 'python-cpp-sync' of https://github.com/jacquesqiao/Padd…
jacquesqiao Oct 15, 2017
e0ec875
update optimizer with return value of Backward
jacquesqiao Oct 15, 2017
37ee2ff
rm unused code
jacquesqiao Oct 16, 2017
6c765b0
infer shape when create gradient vairiable
jacquesqiao Oct 16, 2017
c141b97
update test_optimizer
jacquesqiao Oct 16, 2017
6324b26
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 16, 2017
c0f8af2
update test_program.py
jacquesqiao Oct 16, 2017
ea2a483
update backward test
jacquesqiao Oct 17, 2017
30368a0
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 17, 2017
29bc256
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Oct 17, 2017
b241744
follow comment
jacquesqiao Oct 17, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions paddle/framework/backward.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,16 @@ static void CreateGradVarInBlock(
auto ops = block_desc->AllOps();
for (size_t op_index = grad_op_start_index; op_index < ops.size();
++op_index) {
bool need_infer_shape = false;
ForEachVarName(ops[op_index]->Outputs(),
[&](const std::string& grad_var_name) {
if (block_desc->HasVar(grad_var_name)) {
return false;
}
block_desc->Var(grad_var_name);
need_infer_shape = true;
auto var = block_desc->Var(grad_var_name);
// FIXME(qiao) infer the datatype
var->SetDataType(framework::DataType::FP32);
auto it = param_name_map.find(grad_var_name);
if (it == param_name_map.end()) {
return false;
Expand All @@ -298,6 +302,9 @@ static void CreateGradVarInBlock(
grad_record.op_idx_ = static_cast<int>(op_index);
return false; /* not break */
});
if (need_infer_shape) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool.

ops[op_index]->InferShape(*block_desc);
}
}
}

Expand Down Expand Up @@ -428,10 +435,16 @@ ParamGradInfoMap AppendBackward(
auto& all_ops = root_block->ops_;

// insert fill one op for target
// TODO(qiao) add some check to the target.
std::string fill_one_op_out = GradVarName(target.Name());
std::vector<int64_t> target_shape_desc = target.Shape();
std::vector<int> target_shape;
std::transform(target_shape_desc.begin(), target_shape_desc.end(),
std::back_inserter(target_shape),
[](int64_t dim) { return static_cast<int>(dim); });
std::unique_ptr<OpDescBind> fill_one_op(
new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}},
{{"shape", std::vector<int>{1}},
{{"shape", target_shape},
{"value", static_cast<float>(1.0)},
{"data_type", framework::DataType::FP32}}));
all_ops.push_back(std::move(fill_one_op));
Expand All @@ -443,13 +456,22 @@ ParamGradInfoMap AppendBackward(
auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx,
&no_grad_var_names, &grad_to_var);

std::unordered_map<std::string, GradVarInfo> retv;

// Create Variable
for (auto& ptr : backward_op_descs) {
all_ops.push_back(std::move(ptr));
}
root_block->Var(fill_one_op_out);
// Create Variable

// Create target gradient variable
std::unordered_map<std::string, GradVarInfo> retv;

auto var = root_block->Var(fill_one_op_out);
// FIXME(qiao) infer the data type
var->SetDataType(framework::DataType::FP32);
var->SetShape(target.Shape());
auto& target_grad = retv[target.Name()];
target_grad.name_ = fill_one_op_out;
target_grad.block_idx_ = root_block_idx;
target_grad.op_idx_ = static_cast<int>(forward_op_num);

// create grad_var for all blocks in this program
CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv);
Expand Down
81 changes: 64 additions & 17 deletions paddle/framework/backward_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,20 @@ namespace framework {

using DeviceContext = platform::DeviceContext;

class NoneOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

protected:
void InferShape(framework::InferShapeContext *ctx) const override {}
};

template <typename Place, typename T>
class NoneKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {}
};

class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
public:
RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
Expand Down Expand Up @@ -215,19 +229,51 @@ class MinusOpMaker : public OpProtoAndCheckerMaker {
namespace f = paddle::framework;
namespace ops = paddle::operators;
using EnforceNotMet = paddle::platform::EnforceNotMet;
REGISTER_OPERATOR(rowwise_add, f::NOP, f::RowWiseAddOpMaker,
// rowwise_add
REGISTER_OPERATOR(rowwise_add, f::NoneOp, f::RowWiseAddOpMaker,
f::RowWiseAddGradMaker);
REGISTER_OPERATOR(rowwise_add_grad, f::NOP);
REGISTER_OP(mul, f::NOP, f::MulOpMaker, mul_grad, f::NOP);
REGISTER_OP(sigmoid, f::NOP, f::SigmoidOpMaker, sigmoid_grad, f::NOP);
REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NOP, f::NoGradOpMaker);
REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NOP, f::FillZeroOpMaker);
REGISTER_OP(sum, f::NOP, f::SumOpMaker, sum_grad, f::NOP);
REGISTER_OP_CPU_KERNEL(rowwise_add,
f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OPERATOR(rowwise_add_grad, f::NoneOp);
REGISTER_OP_CPU_KERNEL(rowwise_add_grad,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// mul
REGISTER_OP(mul, f::NoneOp, f::MulOpMaker, mul_grad, f::NoneOp);
REGISTER_OP_CPU_KERNEL(mul, f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(mul_grad,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// sigmoid
REGISTER_OP(sigmoid, f::NoneOp, f::SigmoidOpMaker, sigmoid_grad, f::NoneOp);
REGISTER_OP_CPU_KERNEL(sigmoid,
f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NoneOp, f::NoGradOpMaker);
// fill_zeros_like
REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NoneOp, f::FillZeroOpMaker);
REGISTER_OP_CPU_KERNEL(fill_zeros_like,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// sum
REGISTER_OP(sum, f::NoneOp, f::SumOpMaker, sum_grad, f::NoneOp);
REGISTER_OP_CPU_KERNEL(sum, f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(sum_grad,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// fc
REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad,
f::NOP);
REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP);
REGISTER_OPERATOR(minus, f::NOP, f::MinusOpMaker, f::MinusGradOpDescMaker);
// many_output_op
REGISTER_OP(many_output_op, f::NoneOp, f::ManyOutputOpMaker,
many_output_op_grad, f::NoneOp);
// mult_in_out
REGISTER_OP(mult_in_out, f::NoneOp, f::MultInOutOpMaker, mult_in_out_grad,
f::NoneOp);
REGISTER_OP_CPU_KERNEL(mult_in_out,
f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(mult_in_out_grad,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// minus
REGISTER_OPERATOR(minus, f::NoneOp, f::MinusOpMaker, f::MinusGradOpDescMaker);
REGISTER_OP_CPU_KERNEL(minus, f::NoneKernel<paddle::platform::CPUPlace, float>);
// scale
REGISTER_OPERATOR(scale, f::NoneOp);
REGISTER_OP_CPU_KERNEL(scale, f::NoneKernel<paddle::platform::CPUPlace, float>);

TEST(Backward, simple_op_not_need_grad) {
auto fwd = f::OpRegistry::CreateOp(
Expand Down Expand Up @@ -463,6 +509,7 @@ TEST(Backward, simple_single_op) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);

f::OpDescBind *op = block->AppendOp();
op->SetType("rowwise_add");
op->SetInput("X", {"x"});
Expand All @@ -487,7 +534,7 @@ TEST(Backward, simple_single_op) {
EXPECT_EQ(grad_op->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b")}));

EXPECT_EQ(var_to_grad.size(), 2UL);
EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2));
EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2));

Expand Down Expand Up @@ -588,7 +635,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")}));

EXPECT_EQ(var_to_grad.size(), 6UL);
EXPECT_EQ(var_to_grad.size(), 7UL);
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
EXPECT_EQ(var_to_grad.at("out1"),
Expand Down Expand Up @@ -666,7 +713,7 @@ TEST(Backward, intermedia_var_no_grad) {
std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector<std::string>());

EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.size(), 4UL);
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
EXPECT_EQ(var_to_grad.at("out1"),
Expand Down Expand Up @@ -744,7 +791,7 @@ TEST(Backward, var_no_grad) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("H")),
std::vector<std::string>({f::GradVarName("h1")}));

EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.size(), 4UL);
EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3));
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5));
EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5));
Expand Down Expand Up @@ -830,7 +877,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")}));

EXPECT_EQ(var_to_grad.size(), 5UL);
EXPECT_EQ(var_to_grad.size(), 6UL);
EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
EXPECT_EQ(var_to_grad.at("out1"),
Expand Down Expand Up @@ -863,7 +910,7 @@ TEST(Backward, half_backward) {
auto ops = block->AllOps();
ASSERT_EQ(3UL, ops.size());

EXPECT_EQ(var_to_grad.size(), 1UL);
EXPECT_EQ(var_to_grad.size(), 2UL);
EXPECT_EQ(var_to_grad.at("a"),
f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1));
}
8 changes: 4 additions & 4 deletions paddle/operators/mul_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,10 @@ class MulOpGrad : public framework::OperatorWithKernel {
auto y_dims = ctx->GetInputDim("Y");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));

auto x_mat_dims =
framework::flatten_to_2d(x_dims, Attr<int>("x_num_col_dims"));
auto y_mat_dims =
framework::flatten_to_2d(y_dims, Attr<int>("y_num_col_dims"));
auto x_mat_dims = framework::flatten_to_2d(
x_dims, ctx->Attrs().Get<int>("x_num_col_dims"));
auto y_mat_dims = framework::flatten_to_2d(
y_dims, ctx->Attrs().Get<int>("y_num_col_dims"));

PADDLE_ENFORCE_EQ(
x_mat_dims[0], out_dims[0],
Expand Down
5 changes: 5 additions & 0 deletions paddle/pybind/protobuf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ void BindBlockDesc(py::module &m) {
return self.Var(name);
},
py::return_value_policy::reference)
.def("has_var",
[](BlockDescBind &self, py::bytes byte_name) {
std::string name = byte_name;
return self.HasVar(name);
})
.def("find_var",
[](BlockDescBind &self, py::bytes byte_name) {
std::string name = byte_name;
Expand Down
17 changes: 16 additions & 1 deletion python/paddle/v2/framework/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,14 @@ def parent_idx(self):
def idx(self):
return self.desc.id

def var(self, name):
if name not in self.vars:
raise ValueError("var %s not in this block" % name)
return self.vars[name]

def all_parameters(self):
return {v for k, v in self.vars.iteritems() if isinstance(v, Parameter)}

def create_var(self, *args, **kwargs):
return Variable(self, *args, **kwargs)

Expand All @@ -314,7 +322,8 @@ def has_var(self, name):

def create_parameter(self, *args, **kwargs):
global_block = self.program.global_block()
return Parameter(global_block, *args, **kwargs)
param = Parameter(global_block, *args, **kwargs)
return param

def append_op(self, *args, **kwargs):
op_desc = self.desc.append_op()
Expand Down Expand Up @@ -392,10 +401,16 @@ def __str__(self):
def global_block(self):
return self.blocks[0]

def block(self, index):
return self.blocks[index]

def current_block(self):
return self.blocks[self.current_block_idx]

def append_backward(self, target, no_grad_set):
"""
return map(param_name -> (grad_name, block_index, op_index))
"""
assert isinstance(target, Variable)
param_to_grad_info = self.desc.append_backward(target.desc, no_grad_set)
self.sync_with_cpp()
Expand Down
Loading