Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PHI]Add new Tensor type and migrate save_combine kernel #47856

Merged
merged 15 commits into from
Dec 12, 2022
33 changes: 33 additions & 0 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,35 @@ function(find_register FILENAME PATTERN OUTPUT)
PARENT_SCOPE)
endfunction()

function(find_phi_register FILENAME ADD_PATH)
# set op_name to OUTPUT
set(options "")
set(oneValueArgs "")
set(multiValueArgs "")
file(READ ${FILENAME} CONTENT)

string(
REGEX
MATCH
"PD_REGISTER_KERNEL\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z]*,[ \\\t\r\n]*[A-Z_]*"
register
"${CONTENT}")
if(NOT register STREQUAL "")
string(REPLACE "PD_REGISTER_KERNEL(" "" register "${register}")
string(REPLACE "," ";" register "${register}")
string(REGEX REPLACE "[ \\\t\r\n]+" "" register "${register}")
string(REGEX REPLACE "//cuda_only" "" register "${register}")
list(GET register 0 kernel_name)
list(GET register 1 kernel_backend)
list(GET register 2 kernel_layout)

file(
APPEND ${ADD_PATH}
"PD_DECLARE_KERNEL(${kernel_name}, ${kernel_backend}, ${kernel_layout});\n"
)
endif()
endfunction()

function(op_library TARGET)
# op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library
Expand Down Expand Up @@ -371,6 +400,8 @@ function(op_library TARGET)
foreach(cc_src ${cc_srcs})
# pybind USE_OP_ITSELF
set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cc_src} ${pybind_file})
find_register(${cc_src} "REGISTER_OPERATOR" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
Expand Down Expand Up @@ -408,6 +439,8 @@ function(op_library TARGET)
# message("cu_srcs ${cu_srcs}")
foreach(cu_src ${cu_srcs})
set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cu_src} ${pybind_file})
find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto
cc_library(
string_array
SRCS string_array.cc
DEPS utf8proc)
DEPS utf8proc phi_enforce)

cc_library(
data_type
Expand Down Expand Up @@ -233,7 +233,8 @@ cc_test(
cc_library(
var_type_traits
SRCS var_type_traits.cc
DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor)
DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor
extended_tensor)
if(WITH_GPU)
target_link_libraries(var_type_traits dynload_cuda)
endif()
Expand Down
12 changes: 12 additions & 0 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/framework/details/nan_inf_utils.h"
#include "paddle/fluid/framework/op_call_stack.h"
#include "paddle/fluid/framework/phi_utils.h"
#include "paddle/fluid/framework/raw_tensor.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/unused_var_check.h"
Expand Down Expand Up @@ -3008,6 +3009,9 @@ void OperatorWithKernel::BuildPhiKernelContext(
need_prepare_phi_data_ = true;
tensor_in = &(var->Get<framework::LoDTensorArray>());
phi_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
} else if (var->IsType<framework::Vocab>()) {
tensor_in = &(var->Get<framework::Vocab>());
phi_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported input `%s` type when call pt kernel.",
Expand Down Expand Up @@ -3057,6 +3061,13 @@ void OperatorWithKernel::BuildPhiKernelContext(
// Note: If the input LoDTensorArray size is 0, the output
// LoDTensorArray is also 0
phi_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
} else if (var->template IsType<paddle::framework::RawTensor>()) {
tensor_out = var->template GetMutable<paddle::framework::RawTensor>();
phi_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
} else if (!var->IsInitialized()) {
// The following is for RAW type of var
tensor_out = var->template GetMutable<paddle::framework::RawTensor>();
phi_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported output `%s` type when call pt kernel.",
Expand Down Expand Up @@ -3156,6 +3167,7 @@ void OperatorWithKernel::BuildPhiKernelContext(
}
}
break;

case phi::AttributeType::SCALARS: {
PADDLE_ENFORCE_NE(
attr_iter,
Expand Down
78 changes: 78 additions & 0 deletions paddle/fluid/framework/raw_tensor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <unordered_map>

#include "paddle/phi/core/extended_tensor.h"
#include "paddle/utils/any.h"

namespace paddle {
namespace framework {

/// \brief Fluid Kernel and PHI Kernel will be unified in the future.
/// So, we need a class in PHI that can represent the RAW type in Fluid.
/// The RawTensor is for PHI Kernel that has RAW type arguments.
class RawTensor : public phi::ExtendedTensor,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fluid 下的 RawType 是一个数据类型,然而在 phi 下变成了 "Tensor",这个概念是有差异的:

  1. 后续是否考虑在 phi 下对 enum DataType 做扩展?(phi 下如何表示 RawTensor 的数据类型?)
  2. 如果 phi 下需要做数据类型转换,RawType 如何对应到 phi 下的数据类型?(是否会扩展 TransToProtoVarType函数?)

RawType in the fluid is datatype. However, it corresponds to "Tensor" in phi. The concepts between RawType and RawTensor have differences:

  1. Will phi extend enum DataType in the future? (How to represent the datatype of RawTensor? )
  2. If we need to transform the datatype between fluid and phi, how does RawType matches the datatype under phi?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里RawTensor命名后边可以更改为Raw,可能理解上就更准确一点。
准确意义上来说,RawType在fluid下不是一个具体的数据类型,我的理解把它作为一个标志更为合理一点,类似于DataType里的Undefined。对于RawType的具体类型(可以是任意类型)是在执行Kernel的时候确定,Fluid由于是结构体类型的Kernel,其对于RawType的具体类型处理可以通过传入的参数「ExecutionContext」来展开进行。但是对于PHI Kernel,由于是函数式的,其函数参数类型必须明确,所以这里必须存在一个数据结构能表示不确定的数据类型。所以对于以上提出的俩点:
1,DataType里不需要扩展,从上边可以看出,Raw类型的在PHI下必须是一个类类型,DataType里存放的都是基础数据类型,所以显然是不合适的。
2,PHI下也不需要类型转换,因为RawType在Fluid下就不是一个具体的类型

public phi::TypeInfoTraits<phi::TensorBase, RawTensor> {
public:
RawTensor() = default;

RawTensor(RawTensor&& other) = default;

RawTensor(const RawTensor& other) = default;

RawTensor& operator=(RawTensor&& other) = default;

/// \brief Destroy the RawTensor and release exclusive resources.
virtual ~RawTensor() = default;

public:
/// \brief Returns the name of the class for type traits.
/// \return The name of the class.
static const char* name() { return "RawTensor"; }

template <typename T>
T* GetMutable() {
if (!data_.empty()) {
try {
return paddle::any_cast<T*>(data_);
} catch (paddle::bad_any_cast&) {
PADDLE_THROW(phi::errors::InvalidArgument(
"Invalid data type error, expected %s, actual %s.",
typeid(T).name(),
data_type_.name()));
}
}
T* created_data = new T();
data_ = created_data;
data_deleter_ = [created_data]() { delete created_data; };
data_type_ = std::type_index(typeid(T));
return created_data;
}

template <typename T>
bool IsType() const {
return std::type_index(typeid(T)) == data_type_;
}

private:
paddle::any data_;
std::function<void(void)> data_deleter_;
std::type_index data_type_ = std::type_index(typeid(void));
};

} // namespace framework
} // namespace paddle
71 changes: 70 additions & 1 deletion paddle/fluid/framework/string_array.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,82 @@ limitations under the License. */
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/phi/core/extended_tensor.h"

namespace paddle {
namespace framework {

class Vocab : public phi::ExtendedTensor,
public phi::TypeInfoTraits<phi::TensorBase, Vocab> {
public:
Vocab() = default;

Vocab(Vocab&& other) = default;

Vocab(const Vocab& other) = default;

Vocab& operator=(const Vocab& other) = default;

Vocab& operator=(Vocab&& other) = default;

Vocab& operator=(
const std::unordered_map<std::wstring, std::int32_t>& other) {
this->data_ = other;
return *this;
}

/// \brief Destroy the Vocab and release exclusive resources.
virtual ~Vocab() = default;

public:
/// \brief Returns the name of the class for type traits.
/// \return The name of the class.
static const char* name() { return "Vocab"; }

size_t size() const { return data_.size(); }

void clear() { data_.clear(); }

void emplace(const std::wstring& key, std::int32_t value) {
data_.emplace(key, value);
}

std::int32_t at(const std::wstring& key) { return data_.at(key); }

std::int32_t at(const std::wstring& key) const { return data_.at(key); }

std::unordered_map<std::wstring, std::int32_t>::iterator find(
const std::wstring& key) {
return data_.find(key);
}

std::unordered_map<std::wstring, std::int32_t>::const_iterator find(
const std::wstring& key) const {
return data_.find(key);
}

std::unordered_map<std::wstring, std::int32_t>::iterator begin() {
return data_.begin();
}

std::unordered_map<std::wstring, std::int32_t>::const_iterator begin() const {
return data_.begin();
}

std::unordered_map<std::wstring, std::int32_t>::iterator end() {
return data_.end();
}

std::unordered_map<std::wstring, std::int32_t>::const_iterator end() const {
return data_.end();
}

private:
std::unordered_map<std::wstring, std::int32_t> data_;
};

using String = std::string;
using Strings = std::vector<std::string>;
using Vocab = std::unordered_map<std::wstring, std::int32_t>;

// Convert the std::string type to the std::string type.
bool ConvertStrToWstr(const std::string& src, std::wstring* res);
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/var_type_traits.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
#endif

#include "paddle/fluid/framework/raw_tensor.h"
#include "paddle/fluid/operators/cuda_graph_with_in_out.h"

namespace paddle {
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/framework/var_type_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/raw_tensor.h"
#include "paddle/fluid/framework/string_array.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
Expand Down Expand Up @@ -219,7 +220,8 @@ using VarTypeRegistry = detail::VarTypeRegistryImpl<
float,
Vocab,
std::vector<int>,
std::vector<float>>;
std::vector<float>,
RawTensor>;
template <typename T>
struct VarTypeTrait {
static_assert(VarTypeRegistry::IsRegistered<T>(), "Must be registered type");
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/var_type_traits_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#if defined(PADDLE_WITH_XPU_BKCL)
#include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
#endif
#include "paddle/fluid/framework/raw_tensor.h"

namespace paddle {
namespace framework {
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/variable_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ namespace framework {
TEST(Variable, GetMutable) {
std::unique_ptr<Variable> v(new Variable());

auto* t = v->GetMutable<std::string>();
auto* t = v->GetMutable<String>();
*t = "1234";

const auto& tt = v->Get<std::string>();
const auto& tt = v->Get<String>();
EXPECT_EQ("1234", tt);

try {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/imperative/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cc_library(
cc_library(
var_helper
SRCS var_helper.cc
DEPS tensor selected_rows)
DEPS tensor selected_rows extended_tensor)
if(WITH_XPU)
cc_library(
prepared_operator
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/jit/layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ std::vector<std::string> Layer::FunctionNames() const {

PD_SPECIALZE_ATTRIBUTE_TYPE(int)
PD_SPECIALZE_ATTRIBUTE_TYPE(float)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::string)
PD_SPECIALZE_ATTRIBUTE_TYPE(framework::String)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<int>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<float>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<std::string>)
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/jit/layer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ TEST(CpuLayerTest, Construct) {
int ds = layer.Attribute<int>("down_sampling");
EXPECT_EQ(ds, 4);

std::string fstr = layer.Attribute<std::string>("fstr");
std::string fstr = layer.Attribute<framework::String>("fstr");
EXPECT_STREQ(fstr.c_str(), "save str property");

std::vector<int> ints = layer.Attribute<std::vector<int>>("ints");
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/jit/property.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ std::unordered_map<std::string, std::shared_ptr<Variable>> Property::Values() {
*var->GetMutable<int>() = static_cast<int>(GetInt64(n));
break;
case ValueProto::STRING:
*var->GetMutable<std::string>() = GetString(n);
*var->GetMutable<paddle::framework::String>() = GetString(n);
break;
case ValueProto::FLOATS:
*var->GetMutable<std::vector<float>>() = GetFloats(n);
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ unset(OP_LIBRARY CACHE)
set(pybind_file ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h.tmp CACHE INTERNAL "pybind.h file")
set(pybind_file_prune ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h.prune CACHE INTERNAL "pybind.h file")
set(pybind_file_final ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h)
file(WRITE ${pybind_file} "// Generated by the paddle/fluid/operators/CMakeLists.txt. DO NOT EDIT!\n\n")
file(WRITE ${pybind_file} "#include \"paddle/phi/core/kernel_registry.h\" // Generated by the paddle/fluid/operators/CMakeLists.txt. DO NOT EDIT!\n\n")

add_subdirectory(math)
add_subdirectory(controlflow)
Expand Down Expand Up @@ -109,7 +109,7 @@ register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combin
op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc run_program_op_npu.cc DEPS executor_cache ${OP_HEADER_DEPS})
target_link_libraries(run_program_op cuda_graph_with_memory_pool)
op_library(quantize_linear_op DEPS phi)
op_library(save_combine_op DEPS string_array)
op_library(save_combine_op DEPS string_array phi)
op_library(load_combine_op DEPS string_array)

if (WITH_GPU OR WITH_ROCM)
Expand Down
Loading