diff --git a/src/example/mnist/test_mnist.cc b/src/example/mnist/test_mnist.cc index 03401a1..dac5215 100644 --- a/src/example/mnist/test_mnist.cc +++ b/src/example/mnist/test_mnist.cc @@ -56,10 +56,11 @@ TEST(Mnist, TestCPU) { auto predict = softmax(linear3(linear2(linear1(data)))); auto loss = mean(cross_entropy(predict, label)); - if (i % print_step == 0) { - avg_loss += - loss->Value().Get().data()[0]; - LOG(INFO) << avg_loss; + + avg_loss += + loss->Value().Get().data()[0]; + if ((i + 1) % print_step == 0) { + LOG(INFO) << avg_loss / print_step; avg_loss = 0; } diff --git a/src/function.h b/src/function.h index 2e50257..6699aeb 100644 --- a/src/function.h +++ b/src/function.h @@ -70,31 +70,6 @@ void init_params(VariableHandle v, } } -VariableHandle mean(VariableHandle x) { - VariableHandle out(new Variable("mean")); - get_global_tape().AddOp("mean", {{"X", {x}}}, {{"Out", {out}}}, {}); - return out; -} - -VariableHandle relu(VariableHandle x) { - VariableHandle out(new Variable("relu")); - get_global_tape().AddOp("relu", {{"X", {x}}}, {{"Out", {out}}}, {}); - return out; -} - -VariableHandle softmax(VariableHandle x) { - VariableHandle out(new Variable("softmax")); - get_global_tape().AddOp("softmax", {{"X", {x}}}, {{"Out", {out}}}, {}); - return out; -} - -VariableHandle cross_entropy(VariableHandle x, VariableHandle label) { - VariableHandle out(new Variable("cross_entropy")); - get_global_tape().AddOp( - "cross_entropy", {{"X", {x}}, {"Label", {label}}}, {{"Y", {out}}}, {}); - return out; -} - class Linear { public: Linear(int in_dim, int out_dim, const std::string &act) @@ -118,17 +93,19 @@ class Linear { init_params(b_, "fill_constant", attrs); } - VariableHandle operator()(VariableHandle input) { + VariableHandle operator()(VariableHandle input, + const framework::AttributeMap &mul_op_attrs = {}, + const framework::AttributeMap &add_op_attrs = {}) { VariableHandle pre_bias(new Variable("linear")); get_global_tape().AddOp("mul", {{"X", {input}}, {"Y", {w_}}}, {{"Out", {pre_bias}}}, - {{"x_num_col_dims", 1}, {"y_num_col_dims", 1}}); + mul_op_attrs); VariableHandle pre_act(new Variable("linear")); get_global_tape().AddOp("elementwise_add", {{"X", {pre_bias}}, {"Y", {b_}}}, {{"Out", {pre_act}}}, - {{"axis", 1}}); + add_op_attrs); VariableHandle post_act(new Variable("linear")); get_global_tape().AddOp( act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {}); @@ -145,7 +122,7 @@ class Linear { class Convolution2D { public: - Convolution2D(int c_in, int c_out, int f, std::string act) + Convolution2D(int c_in, int c_out, int f, const std::string &act) : w_(new Variable("ConvolutionWeight")), b_(new Variable("ConvolutionBias")), act_(act) { @@ -167,23 +144,20 @@ class Convolution2D { init_params(b_, "fill_constant", attrs); } - VariableHandle operator()(VariableHandle input) { + VariableHandle operator()( + VariableHandle input, + const framework::AttributeMap &conv_op_attrs = {}, + const framework::AttributeMap &add_op_attrs = {{"axis", 1}}) { VariableHandle pre_bias(new Variable("conv")); get_global_tape().AddOp("conv2d", {{"Input", {input}}, {"Filter", {w_}}}, {{"Output", {pre_bias}}}, - {{"strides", std::vector{1, 1}}, - {"paddings", std::vector{0, 0}}, - {"dilations", std::vector{1, 1}}, - {"groups", 1}, - {"use_cudnn", false}, - {"use_mkldnn", false}, - {"data_format", std::string("AnyLayout")}}); + conv_op_attrs); VariableHandle pre_act(new Variable("conv")); get_global_tape().AddOp("elementwise_add", {{"X", {pre_bias}}, {"Y", {b_}}}, {{"Out", {pre_act}}}, - {{"axis", 1}}); + add_op_attrs); VariableHandle post_act(new Variable("conv")); get_global_tape().AddOp( act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {}); @@ -226,6 +200,104 @@ class SGD { VariableHandle learning_rate_; }; +class BatchNorm { + public: + BatchNorm(int channel_in, const std::string &act) + : scale_(new Variable("BatchNormScale")), + bias_(new Variable("BatchNormBias")), + mean_(new Variable("BatchNormMean")), + variance_(new Variable("BatchNormVariance")), + act_(act) { + // Use fill one to initialize scale and variance + framework::AttributeMap attrs; + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["shape"] = std::vector{channel_in}; + attrs["value"] = 1.0f; + init_params(scale_, "fill_constant", attrs); + init_params(variance_, "fill_constant", attrs); + + // Use fill zero to initialize bias and mean + attrs["value"] = 0.0f; + init_params(bias_, "fill_constant", attrs); + init_params(mean_, "fill_constant", attrs); + } + + VariableHandle operator()(VariableHandle x, + const framework::AttributeMap &attrs = {}) { + VariableHandle pre_act(new Variable("batch_norm")); + VariableHandle tmp_mean(new Variable("tmp_mean")); + VariableHandle tmp_var(new Variable("tmp_var")); + get_global_tape().AddOp("batch_norm", + {{"X", {x}}, + {"Scale", {scale_}}, + {"Bias", {bias_}}, + {"Mean", {mean_}}, + {"Variance", {variance_}}}, + {{"Y", {pre_act}}, + {"MeanOut", {mean_}}, + {"VarianceOut", {variance_}}, + {"SavedMean", {tmp_mean}}, + {"SavedVariance", {tmp_var}}}, + attrs); + + VariableHandle post_act(new Variable("batch_norm")); + get_global_tape().AddOp( + act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {}); + return post_act; + } + + // Only scale and bias need to be updated by SGD + std::vector Params() { return {scale_, bias_}; } + + private: + VariableHandle scale_; + VariableHandle bias_; + VariableHandle mean_; + VariableHandle variance_; + std::string act_; +}; + +VariableHandle pool2d(VariableHandle x, + const framework::AttributeMap &attrs = {}) { + VariableHandle out(new Variable("pool2d")); + get_global_tape().AddOp("pool2d", {{"X", {x}}}, {{"Out", {out}}}, attrs); + return out; +} + +VariableHandle dropout(VariableHandle x, + const framework::AttributeMap &attrs = {}) { + VariableHandle out(new Variable("dropout")); + VariableHandle mask(new Variable("mask")); + get_global_tape().AddOp( + "dropout", {{"X", {x}}}, {{"Out", {out}}, {"Mask", {mask}}}, attrs); + return out; +} + +VariableHandle mean(VariableHandle x) { + VariableHandle out(new Variable("mean")); + get_global_tape().AddOp("mean", {{"X", {x}}}, {{"Out", {out}}}, {}); + return out; +} + +VariableHandle relu(VariableHandle x) { + VariableHandle out(new Variable("relu")); + get_global_tape().AddOp("relu", {{"X", {x}}}, {{"Out", {out}}}, {}); + return out; +} + +VariableHandle softmax(VariableHandle x) { + VariableHandle out(new Variable("softmax")); + get_global_tape().AddOp("softmax", {{"X", {x}}}, {{"Out", {out}}}, {}); + return out; +} + +VariableHandle cross_entropy(VariableHandle x, VariableHandle label) { + VariableHandle out(new Variable("cross_entropy")); + get_global_tape().AddOp( + "cross_entropy", {{"X", {x}}, {"Label", {label}}}, {{"Y", {out}}}, {}); + return out; +} + VariableHandle CreateRecordioFileReader(std::string filename, std::vector shape_concat, std::vector ranks, diff --git a/src/tape.cc b/src/tape.cc index 5587e75..1821c7f 100644 --- a/src/tape.cc +++ b/src/tape.cc @@ -118,14 +118,15 @@ void Tape::AddOp(const std::string &type, const VariableHandleMap &in_vars, VariableHandleMap out_vars, const framework::AttributeMap &attrs) { + PADDLE_ENFORCE(!has_been_backwarded_); InferShapeAndVarType(type, in_vars, &out_vars, attrs); tape_.emplace_back(type, in_vars, out_vars, attrs); } void Tape::Forward() { VLOG(3) << "Starting forward -------------------------"; - PADDLE_ENFORCE(!has_been_backwarded_); while (current_position_ < tape_.size()) { + PADDLE_ENFORCE(!has_been_backwarded_); OpHandle &op = tape_[current_position_]; framework::OpDesc op_desc = CreateOpDesc(op.type_, op.inputs_, op.outputs_, op.attrs_); diff --git a/src/test_tape.cc b/src/test_tape.cc index e37dd14..ea13113 100644 --- a/src/test_tape.cc +++ b/src/test_tape.cc @@ -21,6 +21,8 @@ using paddle::tape::Linear; using paddle::tape::Convolution2D; using paddle::tape::SGD; using paddle::tape::Fill; +using paddle::tape::BatchNorm; +using paddle::tape::dropout; using paddle::tape::mean; using paddle::tape::softmax; using paddle::tape::cross_entropy; @@ -29,6 +31,77 @@ using paddle::tape::get_global_tape; using paddle::tape::CreateRecordioFileReader; using paddle::tape::ReadNext; +TEST(Tape, TestDropout) { + std::string initializer = "uniform_random"; + paddle::framework::AttributeMap attrs; + attrs["min"] = -1.0f; + attrs["max"] = 1.0f; + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["seed"] = 123; + attrs["shape"] = std::vector{3, 3}; + Fill filler(initializer, attrs); + + reset_global_tape(); + VariableHandle input(new Variable("input")); + filler(input); + auto loss = dropout(input); + LOG(INFO) << input->Value(); + LOG(INFO) << loss->Value(); + + get_global_tape().Backward(loss); + LOG(INFO) << input->Grad()->Value(); +} + +TEST(Tape, TestPool2d) { + std::string initializer = "uniform_random"; + paddle::framework::AttributeMap attrs; + attrs["min"] = -1.0f; + attrs["max"] = 1.0f; + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["seed"] = 123; + attrs["shape"] = std::vector{1, 1, 3, 3}; + Fill filler(initializer, attrs); + + reset_global_tape(); + VariableHandle input(new Variable("input")); + filler(input); + auto loss = pool2d(input); + LOG(INFO) << input->Value(); + LOG(INFO) << loss->Value(); + + get_global_tape().Backward(loss); + LOG(INFO) << input->Grad()->Value(); +} + +TEST(Tape, TestBatchNorm) { + BatchNorm bn(4, "relu"); + SGD sgd(0.001); + + std::string initializer = "uniform_random"; + paddle::framework::AttributeMap attrs; + attrs["min"] = -1.0f; + attrs["max"] = 1.0f; + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["seed"] = 123; + attrs["shape"] = std::vector{32, 4, 8, 8}; + Fill filler(initializer, attrs); + + for (int i = 0; i < 2; ++i) { + reset_global_tape(); + + VariableHandle input(new Variable("input")); + filler(input); + + auto loss = bn(input); + + get_global_tape().Backward(loss); + + for (auto w : bn.Params()) { + sgd.Update(w); + } + } +} + TEST(Tape, TestConv) { Convolution2D conv1(3, 16, 3, "relu"); Convolution2D conv2(16, 1, 3, "relu");