diff --git a/paddle/operators/conv2dtranspose_op.cc b/paddle/operators/conv2dtranspose_op.cc
new file mode 100644
index 0000000000000..c1b231906e2f1
--- /dev/null
+++ b/paddle/operators/conv2dtranspose_op.cc
@@ -0,0 +1,107 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/conv2dtranspose_op.h"
+
+namespace paddle {
+namespace operators {
+
+void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
+  PADDLE_ENFORCE(ctx->HasInput("Input"),
+                 "Input(Input) of Conv2DTransposeOp should not be null.");
+  PADDLE_ENFORCE(ctx->HasInput("Filter"),
+                 "Input(Filter) of Conv2DTransposeOp should not be null.");
+  PADDLE_ENFORCE(ctx->HasOutput("Output"),
+                 "Output(Output) of Conv2DTransposeOp should not be null.");
+
+  auto in_dims = ctx->GetInputDim("Input");
+  auto filter_dims = ctx->GetInputDim("Filter");
+  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
+  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
+
+  for (size_t i = 0; i < paddings.size(); ++i) {
+    PADDLE_ENFORCE_EQ(paddings[i], 0,
+                      "No Padding allowed in conv transpose op.");
+  }
+
+  PADDLE_ENFORCE_EQ(in_dims.size(), 4,
+                    "Conv2DTransposeOp input should be 4-D tensor.");
+  PADDLE_ENFORCE_EQ(filter_dims.size(), 4,
+                    "Conv2DTransposeOp filter should be 4-D tensor.");
+  PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
+                    "input and kernel input dimension should be equal.");
+
+  auto output_height = (in_dims[2] - 1) * strides[0] + filter_dims[2];
+  auto output_width = (in_dims[3] - 1) * strides[1] + filter_dims[3];
+  ctx->SetOutputDim("Output",
+                    {in_dims[0], filter_dims[1], output_height, output_width});
+}
+
+Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
+    framework::OpProto* proto, framework::OpAttrChecker* op_checker)
+    : OpProtoAndCheckerMaker(proto, op_checker) {
+  AddInput(
+      "Input",
+      "(Tensor) The input tensor of convolution transpose operator. "
+      "The format of input tensor is NCHW. Where N is batch size, C is the "
+      "number of input channels, H and W is the height and width of image.");
+  AddInput("Filter",
+           "(Tensor) The filter tensor of convolution transpose operator."
+           "The format of the filter tensor is CMHW, where C is the number of "
+           "output image channels, M is the number of input image channels, "
+           "H and W is height and width of filter. "
+           "We enforce groups number == 1 and padding == 0 in "
+           "convolution transpose Scenario.");
+  AddOutput("Output",
+            "(Tensor) The output tensor of convolution transpose operator."
+            "The format of output tensor is also NCHW.");
+  AddAttr<std::vector<int>>("strides",
+                            "strides of convolution transpose operator.")
+      .SetDefault({1, 1});
+  AddAttr<std::vector<int>>("paddings",
+                            "paddings of convolution transpose operator.")
+      .SetDefault({0, 0});
+  AddComment(R"DOC(
+The convolution transpose operation calculates the output based on the input, filter
+and strides, paddings, groups parameters. The size of each dimension of the
+parameters is checked in the infer-shape.
+)DOC");
+}
+
+void Conv2DTransposeOpGrad::InferShape(
+    framework::InferShapeContext* ctx) const {
+  auto in_dims = ctx->GetInputDim("Input");
+  auto filter_dims = ctx->GetInputDim("Filter");
+  if (ctx->HasOutput(framework::GradVarName("Input"))) {
+    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
+  }
+  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
+    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
+  }
+}
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(conv2dtranspose, ops::Conv2DTransposeOp,
+            ops::Conv2DTransposeOpMaker, conv2dtranspose_grad,
+            ops::Conv2DTransposeOpGrad);
+
+REGISTER_OP_CPU_KERNEL(
+    conv2dtranspose,
+    ops::GemmConv2DTransposeKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    conv2dtranspose_grad,
+    ops::GemmConv2DTransposeGradKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/conv2dtranspose_op.cu b/paddle/operators/conv2dtranspose_op.cu
new file mode 100644
index 0000000000000..761bc1959e69b
--- /dev/null
+++ b/paddle/operators/conv2dtranspose_op.cu
@@ -0,0 +1,24 @@
+/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/conv2dtranspose_op.h"
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_GPU_KERNEL(
+    conv2dtranspose,
+    ops::GemmConv2DTransposeKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    conv2dtranspose_grad,
+    ops::GemmConv2DTransposeGradKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/conv2dtranspose_op.h b/paddle/operators/conv2dtranspose_op.h
new file mode 100644
index 0000000000000..293b7ce9ba945
--- /dev/null
+++ b/paddle/operators/conv2dtranspose_op.h
@@ -0,0 +1,254 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/im2col.h"
+#include "paddle/operators/math/math_function.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using DDim = framework::DDim;
+
+// Define Op classes in .h file so that other conv transpose
+// operator implementations can reuse the code.
+class Conv2DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  Conv2DTransposeOpMaker(framework::OpProto* proto,
+                         framework::OpAttrChecker* op_checker);
+};
+
+class Conv2DTransposeOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override;
+};
+
+class Conv2DTransposeOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override;
+};
+
+template <typename Place, typename T>
+class GemmConv2DTransposeKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    const Tensor* input = context.Input<Tensor>("Input");
+    // The filter will be reshaped, so it should not be constant pointer
+    Tensor filter = *context.Input<Tensor>("Filter");
+
+    Tensor* output = context.Output<Tensor>("Output");
+
+    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
+
+    // TODO(Zhuoyuan): Paddings can be added in future.
+    // groups will alway be disabled in conv2dtranspose.
+
+    const int batch_size = input->dims()[0];
+    const int m = input->dims()[1];
+    const int h = input->dims()[2];
+    const int w = input->dims()[3];
+
+    const int k_h = filter.dims()[2];
+    const int k_w = filter.dims()[3];
+
+    const int c = output->dims()[1];  // output channels
+    const int o_h = output->dims()[2];
+    const int o_w = output->dims()[3];
+
+    paddle::operators::math::Col2ImFunctor<
+        paddle::operators::math::ColFormat::kCFO, Place, T>
+        col2im;
+
+    // use col_shape in the im2col and col2im calculation
+    DDim col_shape = {c, k_h, k_w, h, w};
+
+    // use col_matrix_shape in the gemm calculation
+    DDim col_matrix_shape = {c * k_h * k_w, h * w};
+
+    Tensor col;
+    col.mutable_data<T>(col_shape, context.GetPlace());
+    // col_matrix shares the same piece of data with col,
+    // but will be reshaped into a two-dimensional matrix shape
+    // to call the matrix multiplication interface.
+    Tensor col_matrix;
+    col_matrix.ShareDataWith(col);
+    col_matrix.Resize(col_matrix_shape);
+
+    DDim output_shape = {c, o_h, o_w};
+    DDim input_matrix_shape = {m, h * w};
+
+    DDim filter_matrix_shape = {m, c * k_h * k_w};
+    filter.Resize(filter_matrix_shape);
+
+    // convolution transpose: gemm + col2im (similar to conv-backward on input)
+
+    output->mutable_data<T>(context.GetPlace());
+    auto t = framework::EigenVector<T>::Flatten(*output);
+    t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
+
+    for (int i = 0; i < batch_size; i++) {
+      // batch with size (M, h * w)
+      Tensor input_batch = input->Slice(i, i + 1).Resize(input_matrix_shape);
+      // filter size: (M, c * k_h * k_w)
+
+      // output size: (c, o_h, o_w)
+      Tensor output_batch = output->Slice(i, i + 1).Resize(output_shape);
+
+      // col_matrix = filter * input_batch
+      // of shape (c * k_h * k_w, h * w)
+      math::matmul<Place, T>(context.device_context(), filter, true,
+                             input_batch, false, T(1.0), &col_matrix, T(0.0));
+      col2im(context.device_context(), output_batch, col, strides[0],
+             strides[1], 0, 0);
+    }
+  }
+};
+
+template <typename Place, typename T>
+class GemmConv2DTransposeGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    const Tensor* input = context.Input<Tensor>("Input");
+    const Tensor* output_grad =
+        context.Input<Tensor>(framework::GradVarName("Output"));
+
+    // For filter, we do not use const pointer b/c we will do reshape,
+    // but we should avoid modifying its value.
+    Tensor filter = *context.Input<Tensor>("Filter");
+
+    Tensor* input_grad =
+        context.Output<Tensor>(framework::GradVarName("Input"));
+    Tensor* filter_grad =
+        context.Output<Tensor>(framework::GradVarName("Filter"));
+
+    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
+    // Actually, no paddings and groups allowed in conv transpose.
+    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
+
+    const int batch_size = input->dims()[0];
+    const int m = input->dims()[1];
+    const int h = input->dims()[2];
+    const int w = input->dims()[3];
+
+    const int k_h = filter.dims()[2];
+    const int k_w = filter.dims()[3];
+
+    const int c = output_grad->dims()[1];  // output channels
+    const int o_h = output_grad->dims()[2];
+    const int o_w = output_grad->dims()[3];
+
+    // Only im2col functor required for bp to get to the right shape
+    paddle::operators::math::Im2ColFunctor<
+        paddle::operators::math::ColFormat::kCFO, Place, T>
+        im2col;
+
+    // use col_shape in the im2col and col2im calculation
+    DDim col_shape = {c, k_h, k_w, h, w};
+
+    // use col_matrix_shape in the gemm calculation
+    DDim col_matrix_shape_f = {c * h * w, k_h * k_w};
+
+    Tensor col;
+    col.mutable_data<T>(col_shape, context.GetPlace());
+    // col_matrix shares the same piece of data with col,
+    // but will be reshaped into a two-dimensional matrix shape
+    // to call the matrix multiplication interface.
+
+    DDim output_shape = {c, o_h, o_w};
+    DDim input_matrix_shape = {m, h * w};
+
+    DDim filter_matrix_shape = {m, c * k_h * k_w};
+    filter.Resize(filter_matrix_shape);
+
+    // convolution transpose grad on input:
+    // im2col + gemm (similar to conv-forward)
+    // input need to compute gradient
+    if (input_grad) {
+      Tensor col_matrix;
+      col_matrix.ShareDataWith(col);
+      DDim col_matrix_shape = {c * k_h * k_w, h * w};
+      col_matrix.Resize(col_matrix_shape);
+
+      input_grad->mutable_data<T>(context.GetPlace());
+      auto t = framework::EigenVector<T>::Flatten(*input_grad);
+      t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
+
+      for (int i = 0; i < batch_size; i++) {
+        // batch with size (c, o_h * o_w)
+        Tensor output_grad_batch =
+            output_grad->Slice(i, i + 1).Resize(output_shape);
+        // filter of size (m, c * k_h * k_w)
+
+        // batch with size (m, h, w)
+        Tensor input_grad_batch =
+            input_grad->Slice(i, i + 1).Resize(input_matrix_shape);
+
+        // im2col: dy from (c, o_h, o_w) -> (c * k_h * k_w, h * w)
+        im2col(context.device_context(), output_grad_batch, col, strides[0],
+               strides[1], paddings[0], paddings[1]);
+
+        // gemm: dx = filter * dy
+        // (m, c * k_h * k_w) * (c * k_h * k_w, h * w) -> (m, c, h)
+        math::matmul<Place, T>(context.device_context(), filter, false,
+                               col_matrix, false, T(1.0), &input_grad_batch,
+                               T(0.0));
+      }
+    }
+
+    // filter gradient required
+    if (filter_grad) {
+      Tensor col_matrix_f;
+      col_matrix_f.ShareDataWith(col);
+      DDim col_matrix_shape_f = {c * h * w, k_h * k_w};
+      col_matrix_f.Resize(col_matrix_shape_f);
+
+      filter_grad->mutable_data<T>(context.GetPlace());
+      Tensor filter_grad_ = *filter_grad;
+      filter_grad_.Resize(filter_matrix_shape);
+      auto t = framework::EigenVector<T>::Flatten(filter_grad_);
+      t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
+
+      for (int i = 0; i < batch_size; ++i) {
+        // batch with size (c, o_h, o_w)
+        Tensor output_grad_batch =
+            output_grad->Slice(i, i + 1).Resize(output_shape);
+        // input batch
+        Tensor in_batch = input->Slice(i, i + 1).Resize(input_matrix_shape);
+
+        // im2col: (c * h * w, k_h * k_w)
+        im2col(context.device_context(), output_grad_batch, col, strides[0],
+               strides[1], paddings[0], paddings[1]);
+
+        // gemm: d_filter = x * y_grad^T
+        // (m, c * h * w) * (k_h * k_w, c * h * w) -> (m, c, h)
+        math::matmul<Place, T>(context.device_context(), in_batch, false,
+                               col_matrix_f, true, T(1.0), &filter_grad_,
+                               T(1.0));
+      }
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/v2/framework/tests/test_conv2dtranspose_op.py b/python/paddle/v2/framework/tests/test_conv2dtranspose_op.py
new file mode 100644
index 0000000000000..71ca262f00378
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_conv2dtranspose_op.py
@@ -0,0 +1,102 @@
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
+    # [2, 3, 5, 5]
+    in_n, in_c, in_h, in_w = input_.shape
+    # [3, 6, 3, 3]
+    f_c, out_c, f_h, f_w = filter_.shape
+    assert in_c == f_c
+
+    stride, pad = conv2dtranspose_param['stride'], conv2dtranspose_param['pad']
+    out_h = (in_h - 1) * stride[0] + f_h
+    out_w = (in_w - 1) * stride[1] + f_w
+
+    out = np.zeros((in_n, out_c, out_h, out_w))
+
+    for n in range(in_n):
+        for i in range(in_h):
+            for j in range(in_w):
+                input_masked = input_[n, :, i, j]  # (c)
+                input_masked = np.reshape(input_masked, (in_c, 1, 1))
+                input_masked = np.tile(input_masked, (1, f_h, f_w))
+
+                for k in range(out_c):
+                    tmp_out = np.sum(input_masked * filter_[:, k, :, :], axis=0)
+                    i1, i2 = i * stride[0], i * stride[0] + f_h
+                    j1, j2 = j * stride[0], j * stride[0] + f_w
+                    out[n, k, i1:i2, j1:j2] += tmp_out
+
+    return out
+
+
+class TestConv2dTransposeOp(OpTest):
+    def setUp(self):
+        # init as conv transpose
+        self.init_op_type()
+
+        # [2, 3, 5, 5] -> kernel [3, 6, 3, 3] -> output [2, 6, 7, 7]
+        self.init_test_case()
+
+        conv2dtranspose_param = {'stride': self.stride, 'pad': self.pad}
+        input_ = np.random.random(self.input_size).astype("float32")
+        filter_ = np.random.random(self.filter_size).astype("float32")
+        output = conv2dtranspose_forward_naive(input_, filter_,
+                                               conv2dtranspose_param)
+        # print 'deconv output py', output, output.shape
+
+        self.inputs = {'Input': input_, 'Filter': filter_}
+        self.attrs = {
+            'strides': self.stride,
+            'paddings': self.pad,
+            # 'dilations': self.dilations
+        }
+        self.outputs = {'Output': output}
+
+    def test_check_output(self):
+        print 'check output here'
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(
+            set(['Input', 'Filter']), 'Output', max_relative_error=0.05)
+
+    def test_check_grad_no_filter(self):
+        self.check_grad(
+            ['Input'],
+            'Output',
+            max_relative_error=0.05,
+            no_grad_set=set(['Filter']))
+
+    def test_check_grad_no_input(self):
+        self.check_grad(
+            ['Filter'],
+            'Output',
+            max_relative_error=0.05,
+            no_grad_set=set(['Input']))
+
+    def init_test_case(self):
+        self.pad = [0, 0]
+        self.stride = [1, 1]
+        self.dilations = [1, 1]
+        self.input_size = [2, 3, 5, 5]  # NCHW
+        f_c = self.input_size[1]
+        self.filter_size = [f_c, 6, 3, 3]
+
+    def init_op_type(self):
+        self.op_type = "conv2dtranspose"
+
+
+"""
+class TestCudnn(TestConv2dOp):
+    def init_group(self):
+        self.groups = 1
+
+    def init_op_type(self):
+        self.op_type = "conv_cudnn"
+"""
+
+if __name__ == '__main__':
+    unittest.main()