Skip to content

Commit

Permalink
refine test_recognize_digits_mlp and format codes (#5937)
Browse files Browse the repository at this point in the history
  • Loading branch information
QiJune authored and reyoung committed Nov 27, 2017
1 parent d89ff5b commit b28b2f1
Show file tree
Hide file tree
Showing 17 changed files with 231 additions and 273 deletions.
4 changes: 2 additions & 2 deletions paddle/capi/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
}

PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat,
paddle_real* value) {
paddle_real* value) {
if (mat == nullptr || value == nullptr) return kPD_NULLPTR;
auto ptr = cast(mat);
if (ptr->mat == nullptr) return kPD_NULLPTR;
Expand All @@ -75,7 +75,7 @@ PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat,
}

PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat,
paddle_real* result) {
paddle_real* result) {
if (mat == nullptr || result == nullptr) return kPD_NULLPTR;
auto ptr = cast(mat);
if (ptr->mat == nullptr) return kPD_NULLPTR;
Expand Down
8 changes: 4 additions & 4 deletions paddle/capi/matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ PD_API paddle_error paddle_matrix_set_row(paddle_matrix mat,
* @note value should contain enough element of data to init the mat
*/
PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat,
paddle_real* value);
paddle_real* value);

/**
* @brief PDMatGetRow Get raw row buffer from matrix
Expand All @@ -93,14 +93,14 @@ PD_API paddle_error paddle_matrix_get_row(paddle_matrix mat,
paddle_real** rawRowBuffer);

/**
* @brief copy data from the matrix
* @brief copy data from the matrix
* @param [in] mat Target matrix
* @param [out] result pointer to store the matrix data
* @param [out] result pointer to store the matrix data
* @return paddle_error
* @note the space of the result should allocated before invoke this API
*/
PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat,
paddle_real* result);
paddle_real* result);
/**
* @brief PDMatCreateNone Create None Matrix
* @return
Expand Down
9 changes: 4 additions & 5 deletions paddle/framework/tensor_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,18 +135,17 @@ inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx,
auto dst_ptr = static_cast<void*>(dst->data());

if (platform::is_cpu_place(src.place())) {
memory::Copy(dst_place, dst_ptr, boost::get<platform::CPUPlace>(src.place()),
src_ptr, size);
memory::Copy(dst_place, dst_ptr,
boost::get<platform::CPUPlace>(src.place()), src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src.place())) { // NOLINT
memory::Copy(
dst_place, dst_ptr, boost::get<platform::GPUPlace>(src.place()), src_ptr,
size,
dst_place, dst_ptr, boost::get<platform::GPUPlace>(src.place()),
src_ptr, size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
}
#endif

}

} // namespace framework
Expand Down
31 changes: 13 additions & 18 deletions paddle/operators/math/maxouting.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ template <typename T>
class MaxOutFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input,
framework::Tensor * output,
const framework::Tensor& input, framework::Tensor* output,
int groups) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
Expand All @@ -37,34 +36,30 @@ class MaxOutFunctor<platform::CPUPlace, T> {
T* output_data = output->mutable_data<T>(context.GetPlace());

for (int i = 0; i < batch_size; ++i) {
int new_bindex = c_size * i;
int new_bindex = c_size * i;
for (int c = 0; c < output_channels; ++c) {
int new_cindex = fea_size * c;
for (int f = 0; f < fea_size; ++f) {
T ele = static_cast<T>(-FLT_MAX);
for (int ph = 0; ph < groups; ++ph) {
T x = input_data[(new_bindex + new_cindex) * groups
+ ph * fea_size + f];
T x = input_data[(new_bindex + new_cindex) * groups +
ph * fea_size + f];
ele = ele > x ? ele : x;
}
output_data[(new_bindex+new_cindex+f)] = ele;
output_data[(new_bindex + new_cindex + f)] = ele;
}
}
}
}
};



template <class T>
class MaxOutGradFunctor<platform::CPUPlace, T> {
public:
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input,
framework::Tensor * input_grad,
const framework::Tensor& input, framework::Tensor* input_grad,
const framework::Tensor& output,
const framework::Tensor& output_grad,
int groups) {
const framework::Tensor& output_grad, int groups) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
Expand All @@ -84,11 +79,11 @@ class MaxOutGradFunctor<platform::CPUPlace, T> {
bool continue_match = true;
int output_idx = blen + clen + f;
for (int g = 0; g < groups && continue_match; ++g) {
int input_idx = input_idx0 + fea_size * g;
if (input_data[input_idx] == output_data[output_idx]) {
input_grad_data[input_idx] += output_grad_data[output_idx];
continue_match = false;
}
int input_idx = input_idx0 + fea_size * g;
if (input_data[input_idx] == output_data[output_idx]) {
input_grad_data[input_idx] += output_grad_data[output_idx];
continue_match = false;
}
}
}
}
Expand Down
80 changes: 39 additions & 41 deletions paddle/operators/math/maxouting.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ namespace math {

template <typename T>
__global__ void KernelMaxOut(const int nthreads, const T* input_data,
const int channels,
const int input_height, const int input_width,
int groups, T* output_data ) {
const int channels, const int input_height,
const int input_width, int groups,
T* output_data) {
const int size = input_height * input_width * channels / groups;
const int feat_len = input_height * input_width;
int index = blockIdx.x * blockDim.x + threadIdx.x;
Expand All @@ -34,7 +34,7 @@ __global__ void KernelMaxOut(const int nthreads, const T* input_data,
int channel_idx = batch_offset / feat_len;
int feat_idx = batch_offset % feat_len;
int data_idx =
(batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
(batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
T ele = static_cast<T>(-FLT_MAX);
for (int g = 0; g < groups; ++g) {
T x = input_data[data_idx + g * feat_len];
Expand All @@ -44,34 +44,35 @@ __global__ void KernelMaxOut(const int nthreads, const T* input_data,
}
}
template <typename T>
__global__ void KernelMaxoutGrad(
const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels,
const int input_height, const int input_width, int groups) {
const int size = input_height * input_width * channels / groups;
const int feat_len = input_height * input_width;
int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x;
for (int i = index; i < nthreads; i += offset) {
int batch_idx = i / size;
int batch_offset = i % size;
int channel_idx = batch_offset / feat_len;
int feat_idx = batch_offset % feat_len;
int data_idx =
__global__ void KernelMaxoutGrad(const int nthreads, const T* input_data,
const T* output_data, const T* output_grad,
T* input_grad, const int channels,
const int input_height, const int input_width,
int groups) {
const int size = input_height * input_width * channels / groups;
const int feat_len = input_height * input_width;
int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x;
for (int i = index; i < nthreads; i += offset) {
int batch_idx = i / size;
int batch_offset = i % size;
int channel_idx = batch_offset / feat_len;
int feat_idx = batch_offset % feat_len;
int data_idx =
(batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
int max_index = -1;
bool continue_match = true;
for (int g = 0; g < groups && continue_match; ++g) {
if (input_data[data_idx + g * feat_len] == output_data[i]) {
max_index = data_idx + g * feat_len;
continue_match = false;
break;
}
}
if (max_index != -1) {
input_grad[max_index] += output_grad[index];
int max_index = -1;
bool continue_match = true;
for (int g = 0; g < groups && continue_match; ++g) {
if (input_data[data_idx + g * feat_len] == output_data[i]) {
max_index = data_idx + g * feat_len;
continue_match = false;
break;
}
}
if (max_index != -1) {
input_grad[max_index] += output_grad[index];
}
}
}
/*
* All tensors are in NCHW format.
Expand All @@ -80,7 +81,7 @@ template <typename T>
class MaxOutFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor * output,
const framework::Tensor& input, framework::Tensor* output,
int groups) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
Expand All @@ -92,7 +93,7 @@ class MaxOutFunctor<platform::GPUPlace, T> {

const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = output->numel();
int nthreads = output->numel();
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
Expand All @@ -101,8 +102,7 @@ class MaxOutFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, input_channels,
input_height, input_width, groups,
output_data);
input_height, input_width, groups, output_data);
}
};
/*
Expand All @@ -112,11 +112,9 @@ template <typename T>
class MaxOutGradFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input,
framework::Tensor * input_grad,
const framework::Tensor& input, framework::Tensor* input_grad,
const framework::Tensor& output,
const framework::Tensor& output_grad,
int groups) {
const framework::Tensor& output_grad, int groups) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_height = input.dims()[2];
Expand All @@ -129,17 +127,17 @@ class MaxOutGradFunctor<platform::GPUPlace, T> {
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = output.numel();
int nthreads = output.numel();
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);

KernelMaxoutGrad<
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data,
input_channels, input_height, input_width, groups);
.stream()>>>(nthreads, input_data, output_data,
output_grad_data, input_grad_data, input_channels,
input_height, input_width, groups);
}
};

Expand Down
8 changes: 3 additions & 5 deletions paddle/operators/math/maxouting.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,22 @@ namespace paddle {
namespace operators {
namespace math {

#define FLT_MAX \
__FLT_MAX__
#define FLT_MAX __FLT_MAX__

template <typename Place, typename T>

class MaxOutFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor * output,
const framework::Tensor& input, framework::Tensor* output,
int groups);
};

template <typename Place, class T>
class MaxOutGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input,
framework::Tensor * input_grad,
const framework::Tensor& input, framework::Tensor* input_grad,
const framework::Tensor& output,
const framework::Tensor& output_grad, int groups);
};
Expand Down
38 changes: 18 additions & 20 deletions paddle/operators/maxout_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
public:
MaxOutOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
AddInput(
"X",
"(Tensor) The input tensor of maxout operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of feature.");
AddOutput("Out",
"(Tensor) The output tensor of maxout operator."
"The format of output tensor is also NCHW."
"Where N is batch size, C is "
"the number of channels, H and W is the height and "
"width of feature.");
"(Tensor) The output tensor of maxout operator."
"The format of output tensor is also NCHW."
"Where N is batch size, C is "
"the number of channels, H and W is the height and "
"width of feature.");
AddAttr<int>(
"groups",
R"DOC("Specifies how many groups the input tensor will be split"
Expand Down Expand Up @@ -59,21 +60,19 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
}
};


class MaxOutOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MaxoutOp"
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of MaxoutOp"
"should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of MaxoutOp should not be null.");
auto in_x_dims = ctx->GetInputDim("X");
int groups = ctx->Attrs().Get<int>("groups");
// check groups > 1
PADDLE_ENFORCE_GT(
groups, 1,
"groups should be larger than 1 in maxoutop");
PADDLE_ENFORCE_GT(groups, 1, "groups should be larger than 1 in maxoutop");
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1] / groups});
output_shape.push_back(in_x_dims[2]);
output_shape.push_back(in_x_dims[3]);
Expand All @@ -87,18 +86,17 @@ class MaxOutOpGrad : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Input(X@GRAD) should not be null.");
"Input(X@GRAD) should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
};
} // namespace operators
} // namespace paddle
} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP(maxout, ops::MaxOutOp, ops::MaxOutOpMaker, maxout_grad,
ops::MaxOutOpGrad);
REGISTER_OP_CPU_KERNEL(maxout, ops::MaxOutKernel<paddle::platform::CPUPlace,
float>);
REGISTER_OP_CPU_KERNEL(maxout_grad,
ops::MaxOutGradKernel<paddle::platform::CPUPlace,
float>);
ops::MaxOutOpGrad);
REGISTER_OP_CPU_KERNEL(maxout,
ops::MaxOutKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
maxout_grad, ops::MaxOutGradKernel<paddle::platform::CPUPlace, float>);
8 changes: 3 additions & 5 deletions paddle/operators/maxout_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(maxout,
ops::MaxOutKernel<paddle::platform::GPUPlace, float>,
ops::MaxOutKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(maxout_grad,
ops::MaxOutGradKernel<paddle::platform::GPUPlace,
float>,
ops::MaxOutGradKernel<paddle::platform::GPUPlace,
double>);
REGISTER_OP_GPU_KERNEL(
maxout_grad, ops::MaxOutGradKernel<paddle::platform::GPUPlace, float>,
ops::MaxOutGradKernel<paddle::platform::GPUPlace, double>);
Loading

0 comments on commit b28b2f1

Please sign in to comment.