-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ctc batch inference, change im2sequence_op #10923
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,8 @@ | |
limitations under the License. */ | ||
|
||
#pragma once | ||
|
||
#include <string> | ||
#include <fstream> | ||
#include "paddle/fluid/framework/data_layout.h" | ||
#include "paddle/fluid/framework/eigen.h" | ||
#include "paddle/fluid/framework/op_registry.h" | ||
|
@@ -38,8 +39,9 @@ class Im2SequenceKernel : public framework::OpKernel<T> { | |
public: | ||
void Compute(const framework::ExecutionContext& ctx) const override { | ||
const Tensor* in = ctx.Input<Tensor>("X"); | ||
// TODO(fuhailong): add new data layer to solve multibatch inference | ||
const Tensor* imgRealSize = ctx.Input<Tensor>("Image_real_size"); | ||
LoDTensor* out = ctx.Output<LoDTensor>("Out"); | ||
out->mutable_data<T>(ctx.GetPlace()); | ||
// TODO(wanghaoshuang): Add layout checker after 'set_layout' | ||
// being available for python API | ||
// PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW, | ||
|
@@ -49,41 +51,178 @@ class Im2SequenceKernel : public framework::OpKernel<T> { | |
int img_channels = in_dim[1]; | ||
int img_height = in_dim[2]; | ||
int img_width = in_dim[3]; | ||
|
||
auto imgRealSize_vec = imgRealSize->data<float>(); | ||
auto imgRealSize_dim = imgRealSize->dims(); | ||
auto kernels = ctx.Attr<std::vector<int>>("kernels"); | ||
auto strides = ctx.Attr<std::vector<int>>("strides"); | ||
auto paddings = ctx.Attr<std::vector<int>>("paddings"); | ||
int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], | ||
paddings[2], strides[0]); | ||
int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], | ||
paddings[3], strides[1]); | ||
auto out_stride = ctx.Attr<std::vector<int>>("out_stride"); | ||
auto is_inference = ctx.Attr<bool>("is_inference"); | ||
if (is_inference) { | ||
if (batch_size == 1) { | ||
out->mutable_data<T>(ctx.GetPlace()); | ||
int output_height = Im2SeqOutputSize(img_height, | ||
kernels[0], | ||
paddings[0], | ||
paddings[2], | ||
strides[0]); | ||
int output_width = Im2SeqOutputSize(img_width, | ||
kernels[1], | ||
paddings[1], | ||
paddings[3], | ||
strides[1]); | ||
const std::vector<int> dilations({1, 1}); | ||
auto out_dims = out->dims(); | ||
out->Resize({batch_size, out->numel() / batch_size}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 你这个resize会报错吧? 因为你在line 63之前好像并没有计算正确的 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. batchsize = 1 是走的train 分支 |
||
for (int i = 0; i < batch_size; i++) { | ||
const Tensor src = | ||
in->Slice(i, i + 1).Resize({img_channels, | ||
img_height, | ||
img_width}); | ||
Tensor dst = out->Slice(i, i + 1).Resize({output_height, | ||
output_width, | ||
img_channels, | ||
kernels[0], | ||
kernels[1]}); | ||
|
||
const std::vector<int> dilations({1, 1}); | ||
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f; | ||
auto& dev_ctx = ctx.template device_context<DeviceContext>(); | ||
f(dev_ctx, src, dilations, strides, paddings, &dst); | ||
} | ||
out->Resize(out_dims); | ||
// set lod information | ||
// TODO(wanghaoshuang): Move this to InferShape | ||
framework::LoD lod(1); | ||
lod[0].reserve(batch_size + 1); | ||
int offset = 0; | ||
lod[0].push_back(offset); | ||
for (int i = 0; i < batch_size; ++i) { | ||
offset += output_height * output_width; | ||
lod[0].push_back(offset); | ||
} | ||
out->set_lod(lod); | ||
} else { | ||
std::vector<int> imgReal_H; | ||
std::vector<int> imgReal_W; | ||
for (int i = 0; i < batch_size; i++) { | ||
int tmp_real_H = int(imgRealSize_vec[2 * i]); | ||
int tmp_real_W = int(imgRealSize_vec[2 * i + 1]); | ||
for (int j = 0; j < out_stride[0]; j++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 以我们现在的,模型为例子,原图是缩小16倍,如果width可以整除,我们直接除是没有影响的,如果width不可以整除16, 模型里面的逻辑是向上取整,而直接除则是向下取整,所以这个地方不能直接除,而是做了循环除。我们可以将这个out_stride属性改一下,改成做了多少次卷积,同时还得传入conv kernel 的大小,我觉得这样可能会更好一些 |
||
tmp_real_H = tmp_real_H / 2 + tmp_real_H % 2; | ||
tmp_real_W = tmp_real_W / 2 + tmp_real_W % 2; | ||
} | ||
imgReal_H.push_back(tmp_real_H); | ||
imgReal_W.push_back(tmp_real_W); | ||
} | ||
|
||
auto out_dims = out->dims(); | ||
out->Resize({batch_size, out->numel() / batch_size}); | ||
for (int i = 0; i < batch_size; i++) { | ||
const Tensor src = | ||
in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); | ||
Tensor dst = out->Slice(i, i + 1).Resize( | ||
{output_height, output_width, img_channels, kernels[0], kernels[1]}); | ||
// TODO(fuhailong): for loop to compute real output size | ||
std::vector<int> output_height; | ||
std::vector<int> output_width; | ||
for (int i = 0; i < batch_size; i++) { | ||
output_height.push_back(Im2SeqOutputSize(imgReal_H[i], | ||
kernels[0], | ||
paddings[0], | ||
paddings[2], | ||
strides[0])); | ||
output_width.push_back(Im2SeqOutputSize(imgReal_W[i], | ||
kernels[1], | ||
paddings[1], | ||
paddings[3], | ||
strides[1])); | ||
} | ||
// TODO(fuhailong): compute dims of output | ||
// call: out->mutable_data<T>(ctx.GetPlace(), output_dims); | ||
int result = 0; | ||
for (int i = 0; i < batch_size; i++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里一共有三个连着的 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里的合并应该是可以的 |
||
result += output_height[i] * output_width[i]; | ||
} | ||
|
||
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f; | ||
auto& dev_ctx = ctx.template device_context<DeviceContext>(); | ||
f(dev_ctx, src, dilations, strides, paddings, &dst); | ||
} | ||
out->Resize(out_dims); | ||
|
||
// set lod information | ||
// TODO(wanghaoshuang): Move this to InferShape | ||
framework::LoD lod(1); | ||
lod[0].reserve(batch_size + 1); | ||
for (int i = 0, offset = 0; i < batch_size + 1; ++i) { | ||
lod[0].push_back(offset); | ||
offset += output_height * output_width; | ||
} | ||
out->set_lod(lod); | ||
} | ||
out->mutable_data<T>({result, img_channels*kernels[0]*kernels[1]}, | ||
ctx.GetPlace()); | ||
// out->numel(); | ||
const std::vector<int> dilations({1, 1}); | ||
// TODO(fuhailong): out_dims has two index, | ||
// out_dims[0] and out_dims[1], | ||
// {batchsize*output_height*output_width,channel*kernel[0],*kernel[1]}, | ||
// multi batch ,the first place is output_height[i]*output_width[i]. | ||
auto out_dims = out->dims(); | ||
int offset_out = 0; | ||
|
||
for (int i = 0; i < batch_size; i++) { | ||
const Tensor src = | ||
in->Slice(i, i + 1).Resize({img_channels, | ||
img_height, | ||
img_width}); | ||
// TODO(fuhailong): add image real size | ||
Tensor dst = out->Slice(offset_out, | ||
offset_out + output_height[i]*output_width[i]).Resize( | ||
{output_height[i], output_width[i], | ||
img_channels, kernels[0], kernels[1]}); | ||
offset_out += output_height[i]*output_width[i]; | ||
|
||
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f; | ||
// eq, kOCF cnn to rnn format | ||
auto& dev_ctx = ctx.template device_context<DeviceContext>(); | ||
f(dev_ctx, src, dilations, strides, paddings, &dst); | ||
} | ||
out->Resize(out_dims); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里的 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 对,这个里面在最开始的时候,out->mutable就已经固定大小了,我再看一下 |
||
// set lod information | ||
// TODO(wanghaoshuang): Move this to InferShape | ||
framework::LoD lod(1); | ||
lod[0].reserve(batch_size + 1); | ||
int offset = 0; | ||
lod[0].push_back(offset); | ||
for (int i = 0; i < batch_size; ++i) { | ||
offset += output_height[i] * output_width[i]; | ||
lod[0].push_back(offset); | ||
} | ||
out->set_lod(lod); | ||
} | ||
} else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 嗯,你 |
||
out->mutable_data<T>(ctx.GetPlace()); | ||
int output_height = Im2SeqOutputSize(img_height, | ||
kernels[0], | ||
paddings[0], | ||
paddings[2], | ||
strides[0]); | ||
int output_width = Im2SeqOutputSize(img_width, | ||
kernels[1], | ||
paddings[1], | ||
paddings[3], | ||
strides[1]); | ||
|
||
const std::vector<int> dilations({1, 1}); | ||
auto out_dims = out->dims(); | ||
out->Resize({batch_size, out->numel() / batch_size}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个resize也有问题,在这之前并没有计算正确的out_dims, 这个和 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个地方,我觉得没有问题的,使用mutable_data 是由于batchsize 大于1的时候,输出的维度是不一样的,在batchsize = 1 和train的逻辑中,是不存在这个过程的,out->dims()直接获取到out的dim,这个dims应该是不用自己计算的 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 在batch_size=1和train的时候,out_dims也是要自己算的。
out->dims()直接获取正确dimsd 前提是在infershape里计算正确的out_dims, 也就是这里: |
||
for (int i = 0; i < batch_size; i++) { | ||
const Tensor src = | ||
in->Slice(i, i + 1).Resize({img_channels, | ||
img_height, | ||
img_width}); | ||
Tensor dst = out->Slice(i, i + 1).Resize({output_height, | ||
output_width, | ||
img_channels, | ||
kernels[0], | ||
kernels[1]}); | ||
|
||
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f; | ||
auto& dev_ctx = ctx.template device_context<DeviceContext>(); | ||
f(dev_ctx, src, dilations, strides, paddings, &dst); | ||
} | ||
out->Resize(out_dims); | ||
// set lod information | ||
// TODO(wanghaoshuang): Move this to InferShape | ||
framework::LoD lod(1); | ||
lod[0].reserve(batch_size + 1); | ||
int offset = 0; | ||
lod[0].push_back(offset); | ||
for (int i = 0; i < batch_size; ++i) { | ||
offset += output_height * output_width; | ||
lod[0].push_back(offset); | ||
} | ||
out->set_lod(lod); | ||
} | ||
} | ||
}; | ||
|
||
template <typename DeviceContext, typename T> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
batch_size=1
的时候, 图片应该是没有padding的吧,这种情况直接按is_inference=False
来算?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
batchsize = 1 的时候,直接走的原来的逻辑,我在inference的里面进行了判断