Skip to content

Commit

Permalink
[PHI] transpose2_grad op migration (#46139)
Browse files Browse the repository at this point in the history
* op migrated, Copy(OneDNNContext, ...) added

* mutable_data & op registration in fluid removed

* refactoring

* OneDNNGetDataType to uppercase

* missing cpu check added, handler moved to .h file

* name changed to transpose_grad

* Copy changed back to TensorCopy

* Resizing corrected, Copy(OneDNNContext) removed
  • Loading branch information
paulinagacek committed Oct 10, 2022
1 parent a0dffd3 commit e3407a8
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 6 deletions.
5 changes: 0 additions & 5 deletions paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
Expand Up @@ -210,8 +210,3 @@ REGISTER_OP_KERNEL(transpose_grad,
MKLDNN,
::paddle::platform::CPUPlace,
ops::TransposeMKLDNNGradOpKernel<float>);

REGISTER_OP_KERNEL(transpose2_grad,
MKLDNN,
::paddle::platform::CPUPlace,
ops::TransposeMKLDNNGradOpKernel<float>);
64 changes: 64 additions & 0 deletions paddle/phi/backends/onednn/onednn_reuse.h
Expand Up @@ -1046,5 +1046,69 @@ class ClipOneDNNHandler
to_void_cast<T>(input_data));
}
};
template <typename T>
class TransposeOneDNNHandler {
public:
TransposeOneDNNHandler(const OneDNNContext& dev_ctx,
std::vector<int64_t>& dims, // NOLINT
std::vector<int>& axis, // NOLINT
dnnl::engine engine)
: dev_ctx_(dev_ctx),
dims_(dims),
axis_(axis),
logical_axis_(dims.size(), 0),
engine_(engine) {}

std::shared_ptr<dnnl::memory> AcquireSrcMemory(const OneDNNMemoryFormat& fmt,
void* ptr) {
// Make memory descriptor using input format, unless it
// cannot be trusted (nchw) then make up memory fmt manually
for (size_t i = 0; i < this->logical_axis_.size(); ++i) {
this->logical_axis_[i] = i;
}

auto src_md = fmt != OneDNNMemoryFormat::nchw
? OneDNNMemDesc(dims_, OneDNNGetDataType<T>(), fmt)
: Axis2MemoryDesc(dims_, logical_axis_);
return std::make_shared<dnnl::memory>(src_md, engine_, ptr);
}

std::shared_ptr<dnnl::memory> AcquireDstMemory(DenseTensor* output,
Place place) {
auto dst_md = Axis2MemoryDesc(dims_, axis_);
auto dst_data = dev_ctx_.Alloc<T>(output);
return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
}

std::shared_ptr<dnnl::reorder> AcquireTranspose(
std::shared_ptr<dnnl::memory> dst_memory_p,
std::shared_ptr<dnnl::memory> src_memory_p) {
return std::make_shared<dnnl::reorder>(*(src_memory_p), *(dst_memory_p));
}

protected:
dnnl::memory::desc Axis2MemoryDesc(std::vector<int64_t>& nchw_tz, // NOLINT
std::vector<int>& axis // NOLINT
) {
size_t ndims = axis.size();

std::vector<int64_t> strides(ndims);
unsigned int total_stride = 1;
for (int i = ndims - 1; i >= 0; --i) {
strides[axis[i]] = total_stride;
total_stride *= nchw_tz[axis[i]];
}
dnnl::memory::desc mem_d(nchw_tz, OneDNNGetDataType<T>(), strides);

return mem_d;
}

private:
const OneDNNContext& dev_ctx_;
std::vector<int64_t> dims_;
std::vector<int> axis_;
std::vector<int> logical_axis_;
dnnl::engine engine_;
};
} // namespace funcs
} // namespace phi
68 changes: 68 additions & 0 deletions paddle/phi/kernels/onednn/transpose_grad_kernel.cc
@@ -0,0 +1,68 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/kernels/transpose_grad_kernel.h"

#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/kernel_registry.h"

namespace phi {
template <typename T, typename Context>
void TransposeGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
const std::vector<int>& axis,
DenseTensor* x_grad) {
PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType() == phi::AllocationType::CPU,
true,
errors::PreconditionNotMet(
"Operator DNNL TransposeGrad must use CPUPlace"));
if (!x_grad) return;

const auto& onednn_engine = dev_ctx.GetEngine();
std::vector<int> reversed_axis(axis);
if (axis.size() == 1) {
paddle::framework::TensorCopy(out_grad, out_grad.place(), x_grad);
x_grad->set_format(out_grad.format());
return;
}

for (size_t i = 0; i < axis.size(); i++) {
reversed_axis[axis[i]] = i;
}

const T* out_grad_data = out_grad.data<T>();
dev_ctx.template Alloc<T>(x_grad);
auto nchw_tz = vectorize<int64_t>(out_grad.dims());

funcs::TransposeOneDNNHandler<T> handler(
dev_ctx, nchw_tz, reversed_axis, onednn_engine);

auto transpose_src_memory_p = handler.AcquireSrcMemory(
out_grad.format(), funcs::to_void_cast<T>(out_grad_data));
auto transpose_dst_memory_p =
handler.AcquireDstMemory(x_grad, dev_ctx.GetPlace());
auto transpose_p =
handler.AcquireTranspose(transpose_dst_memory_p, transpose_src_memory_p);

auto& astream = OneDNNContext::tls().get_stream();
transpose_p->execute(
astream, *transpose_src_memory_p, *transpose_dst_memory_p);
astream.wait();
}

} // namespace phi

PD_REGISTER_KERNEL(
transpose_grad, OneDNN, ALL_LAYOUT, phi::TransposeGradKernel, float) {}
2 changes: 1 addition & 1 deletion python/paddle/fluid/tests/unittests/test_transpose_op.py
Expand Up @@ -14,7 +14,7 @@

import unittest
import numpy as np
from op_test import OpTest, convert_float_to_uint16
from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
Expand Down

0 comments on commit e3407a8

Please sign in to comment.