PaddlePaddle · lemon34 · May 25, 2018 · wanghaoshuang · May 25, 2018 · lemon34
diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/im2sequence_op.h"
+#include <vector>
 
 namespace paddle {
 namespace operators {
@@ -53,14 +54,14 @@ class Im2SequenceOp : public framework::OperatorWithKernel {
 
 class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
-  Im2SequenceOpMaker(OpProto* proto, OpAttrChecker* op_checker)
-      : OpProtoAndCheckerMaker(proto, op_checker) {
+  void Make() override {
     AddInput("X",
              "(Tensor) The input tensor has NCHW format."
              "N: batch size"
              "C: channels"
              "H: height"
              "W: width");
+    AddInput("Image_real_size", "Image real size.");
     AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
     AddAttr<std::vector<int>>("kernels",
                               "(vector<int>), the "
@@ -73,6 +74,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
                               "(vector<int> default:{0, 0, 0, 0}), the "
                               "paddings(up_pad, left_pad, down_pad, right_pad)")
         .SetDefault({0, 0, 0, 0});
+    AddAttr<std::vector<int>>("out_stride",
+                "(vector<int> dedault:{1,1}),the out_stride "
+                " (out_stride_height, out_stride_width)")
+        .SetDefault({1, 1});
+    AddAttr<bool>("is_inference",
+                   " nor 0  is inference, 0 is train")
+        .SetDefault({false});
     AddComment(R"DOC(
 This op uses kernels to scan images and converts these images to sequences.
 After expanding, The number of time steps are output_height * output_width
@@ -147,8 +155,9 @@ class Im2SequenceGradOp : public framework::OperatorWithKernel {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker,
-            im2sequence_grad, ops::Im2SequenceGradOp);
+REGISTER_OPERATOR(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker,
+                  paddle::framework::DefaultGradOpDescMaker<true>);
+REGISTER_OPERATOR(im2sequence_grad, ops::Im2SequenceGradOp);
 REGISTER_OP_CPU_KERNEL(
     im2sequence,
     ops::Im2SequenceKernel<paddle::platform::CPUDeviceContext, float>);

diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h
@@ -13,7 +13,8 @@
    limitations under the License. */
 
 #pragma once
-
+#include <string>
+#include <fstream>
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
@@ -38,8 +39,9 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     const Tensor* in = ctx.Input<Tensor>("X");
+    // TODO(fuhailong): add new data layer to solve multibatch inference
+    const Tensor* imgRealSize = ctx.Input<Tensor>("Image_real_size");
     LoDTensor* out = ctx.Output<LoDTensor>("Out");
-    out->mutable_data<T>(ctx.GetPlace());
     // TODO(wanghaoshuang): Add layout checker after 'set_layout'
     // being available for python API
     // PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW,
@@ -49,41 +51,178 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
     int img_channels = in_dim[1];
     int img_height = in_dim[2];
     int img_width = in_dim[3];
-
+    auto imgRealSize_vec = imgRealSize->data<float>();
+    auto imgRealSize_dim = imgRealSize->dims();
     auto kernels = ctx.Attr<std::vector<int>>("kernels");
     auto strides = ctx.Attr<std::vector<int>>("strides");
     auto paddings = ctx.Attr<std::vector<int>>("paddings");
-    int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0],
-                                         paddings[2], strides[0]);
-    int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
-                                        paddings[3], strides[1]);
+    auto out_stride = ctx.Attr<std::vector<int>>("out_stride");
+    auto is_inference = ctx.Attr<bool>("is_inference");
+    if (is_inference) {
+        if (batch_size == 1) {
+            out->mutable_data<T>(ctx.GetPlace());
+           int output_height = Im2SeqOutputSize(img_height,
+                                                kernels[0],
+                                                paddings[0],
+                                                paddings[2],
+                                                strides[0]);
+           int output_width = Im2SeqOutputSize(img_width,
+                                               kernels[1],
+                                               paddings[1],
+                                               paddings[3],
+                                               strides[1]);
+           const std::vector<int> dilations({1, 1});
+           auto out_dims = out->dims();
+           out->Resize({batch_size, out->numel() / batch_size});
+           for (int i = 0; i < batch_size; i++) {
+             const Tensor src =
+                 in->Slice(i, i + 1).Resize({img_channels,
+                                             img_height,
+                                             img_width});
+             Tensor dst = out->Slice(i, i + 1).Resize({output_height,
+                                                       output_width,
+                                                       img_channels,
+                                                       kernels[0],
+                                                       kernels[1]});
 
-    const std::vector<int> dilations({1, 1});
+             math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
+             auto& dev_ctx = ctx.template device_context<DeviceContext>();
+             f(dev_ctx, src, dilations, strides, paddings, &dst);
+           }
+           out->Resize(out_dims);
+           // set lod information
+           // TODO(wanghaoshuang): Move this to InferShape
+           framework::LoD lod(1);
+           lod[0].reserve(batch_size + 1);
+           int offset = 0;
+           lod[0].push_back(offset);
+           for (int i = 0; i < batch_size; ++i) {
+             offset += output_height * output_width;
+             lod[0].push_back(offset);
+           }
+           out->set_lod(lod);
+        } else {
+            std::vector<int> imgReal_H;
+            std::vector<int> imgReal_W;
+            for (int i = 0; i < batch_size; i++) {
+                int tmp_real_H = int(imgRealSize_vec[2 * i]);
+                int tmp_real_W = int(imgRealSize_vec[2 * i + 1]);
+                for (int j = 0; j < out_stride[0]; j++) {
+                    tmp_real_H = tmp_real_H / 2 + tmp_real_H % 2;
+                    tmp_real_W = tmp_real_W / 2 + tmp_real_W % 2;
+                }
+                imgReal_H.push_back(tmp_real_H);
+                imgReal_W.push_back(tmp_real_W);
+            }
 
-    auto out_dims = out->dims();
-    out->Resize({batch_size, out->numel() / batch_size});
-    for (int i = 0; i < batch_size; i++) {
-      const Tensor src =
-          in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
-      Tensor dst = out->Slice(i, i + 1).Resize(
-          {output_height, output_width, img_channels, kernels[0], kernels[1]});
+        // TODO(fuhailong): for loop to compute real output size
+            std::vector<int> output_height;
+            std::vector<int> output_width;
+            for (int i = 0; i < batch_size; i++) {
+                output_height.push_back(Im2SeqOutputSize(imgReal_H[i],
+                                                         kernels[0],
+                                                         paddings[0],
+                                                         paddings[2],
+                                                         strides[0]));
+                output_width.push_back(Im2SeqOutputSize(imgReal_W[i],
+                                                        kernels[1],
+                                                        paddings[1],
+                                                        paddings[3],
+                                                        strides[1]));
+            }
+            // TODO(fuhailong): compute dims of output
+            // call: out->mutable_data<T>(ctx.GetPlace(), output_dims);
+            int result = 0;
+            for (int i = 0; i < batch_size; i++) {
+                result += output_height[i] * output_width[i];
+            }
 
-      math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
-      auto& dev_ctx = ctx.template device_context<DeviceContext>();
-      f(dev_ctx, src, dilations, strides, paddings, &dst);
-    }
-    out->Resize(out_dims);
-
-    // set lod information
-    // TODO(wanghaoshuang): Move this to InferShape
-    framework::LoD lod(1);
-    lod[0].reserve(batch_size + 1);
-    for (int i = 0, offset = 0; i < batch_size + 1; ++i) {
-      lod[0].push_back(offset);
-      offset += output_height * output_width;
-    }
-    out->set_lod(lod);
-  }
+            out->mutable_data<T>({result, img_channels*kernels[0]*kernels[1]},
+                                  ctx.GetPlace());
+         // out->numel();
+            const std::vector<int> dilations({1, 1});
+         // TODO(fuhailong): out_dims has two index,
+         // out_dims[0] and out_dims[1],
+         // {batchsize*output_height*output_width,channel*kernel[0],*kernel[1]},
+         // multi batch ,the first place is output_height[i]*output_width[i].
+            auto out_dims = out->dims();
+            int offset_out = 0;
+
+            for (int i = 0; i < batch_size; i++) {
+                const Tensor src =
+                    in->Slice(i, i + 1).Resize({img_channels,
+                                                img_height,
+                                                img_width});
+         // TODO(fuhailong): add image real size
+                Tensor dst = out->Slice(offset_out,
+                        offset_out + output_height[i]*output_width[i]).Resize(
+                        {output_height[i], output_width[i],
+                        img_channels, kernels[0], kernels[1]});
+                offset_out += output_height[i]*output_width[i];
+
+                math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
+        // eq, kOCF cnn to rnn format
+                auto& dev_ctx = ctx.template device_context<DeviceContext>();
+                f(dev_ctx, src, dilations, strides, paddings, &dst);
+            }
+            out->Resize(out_dims);
+            // set lod information
+            // TODO(wanghaoshuang): Move this to InferShape
+            framework::LoD lod(1);
+            lod[0].reserve(batch_size + 1);
+            int offset = 0;
+            lod[0].push_back(offset);
+            for (int i = 0; i < batch_size; ++i) {
+              offset += output_height[i] * output_width[i];
+              lod[0].push_back(offset);
+            }
+            out->set_lod(lod);
+        }
+    } else {
+           out->mutable_data<T>(ctx.GetPlace());
+           int output_height = Im2SeqOutputSize(img_height,
+                                                kernels[0],
+                                                paddings[0],
+                                                paddings[2],
+                                                strides[0]);
+           int output_width = Im2SeqOutputSize(img_width,
+                                               kernels[1],
+                                               paddings[1],
+                                               paddings[3],
+                                               strides[1]);
+
+           const std::vector<int> dilations({1, 1});
+           auto out_dims = out->dims();
+           out->Resize({batch_size, out->numel() / batch_size});
+           for (int i = 0; i < batch_size; i++) {
+             const Tensor src =
+                 in->Slice(i, i + 1).Resize({img_channels,
+                                             img_height,
+                                             img_width});
+             Tensor dst = out->Slice(i, i + 1).Resize({output_height,
+                                                       output_width,
+                                                       img_channels,
+                                                       kernels[0],
+                                                       kernels[1]});
+
+             math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
+             auto& dev_ctx = ctx.template device_context<DeviceContext>();
+             f(dev_ctx, src, dilations, strides, paddings, &dst);
+           }
+           out->Resize(out_dims);
+           // set lod information
+           // TODO(wanghaoshuang): Move this to InferShape
+           framework::LoD lod(1);
+           lod[0].reserve(batch_size + 1);
+           int offset = 0;
+           lod[0].push_back(offset);
+           for (int i = 0; i < batch_size; ++i) {
+             offset += output_height * output_width;
+             lod[0].push_back(offset);
+           }
+           out->set_lod(lod);
+       }
+   }
 };
 
 template <typename DeviceContext, typename T>