PaddlePaddle · YuanRisheng · Dec 12, 2022 · Nov 10, 2022 · Nov 10, 2022 · Nov 18, 2022
diff --git a/cmake/operators.cmake b/cmake/operators.cmake
@@ -26,6 +26,35 @@ function(find_register FILENAME PATTERN OUTPUT)
       PARENT_SCOPE)
 endfunction()
 
+function(find_phi_register FILENAME ADD_PATH)
+  # set op_name to OUTPUT
+  set(options "")
+  set(oneValueArgs "")
+  set(multiValueArgs "")
+  file(READ ${FILENAME} CONTENT)
+
+  string(
+    REGEX
+      MATCH
+      "PD_REGISTER_KERNEL\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z]*,[ \\\t\r\n]*[A-Z_]*"
+      register
+      "${CONTENT}")
+  if(NOT register STREQUAL "")
+    string(REPLACE "PD_REGISTER_KERNEL(" "" register "${register}")
+    string(REPLACE "," ";" register "${register}")
+    string(REGEX REPLACE "[ \\\t\r\n]+" "" register "${register}")
+    string(REGEX REPLACE "//cuda_only" "" register "${register}")
+    list(GET register 0 kernel_name)
+    list(GET register 1 kernel_backend)
+    list(GET register 2 kernel_layout)
+
+    file(
+      APPEND ${ADD_PATH}
+      "PD_DECLARE_KERNEL(${kernel_name}, ${kernel_backend}, ${kernel_layout});\n"
+    )
+  endif()
+endfunction()
+
 function(op_library TARGET)
   # op_library is a function to create op library. The interface is same as
   # cc_library. But it handle split GPU/CPU code and link some common library
@@ -371,6 +400,8 @@ function(op_library TARGET)
   foreach(cc_src ${cc_srcs})
     # pybind USE_OP_ITSELF
     set(op_name "")
+    # Add PHI Kernel Registry Message
+    find_phi_register(${cc_src} ${pybind_file})
     find_register(${cc_src} "REGISTER_OPERATOR" op_name)
     if(NOT ${op_name} EQUAL "")
       file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
@@ -408,6 +439,8 @@ function(op_library TARGET)
   # message("cu_srcs ${cu_srcs}")
   foreach(cu_src ${cu_srcs})
     set(op_name "")
+    # Add PHI Kernel Registry Message
+    find_phi_register(${cu_src} ${pybind_file})
     find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name)
     if(NOT ${op_name} EQUAL "")
       file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")

diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
@@ -115,7 +115,7 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto
 cc_library(
   string_array
   SRCS string_array.cc
-  DEPS utf8proc)
+  DEPS utf8proc phi_enforce)
 
 cc_library(
   data_type
@@ -233,7 +233,8 @@ cc_test(
 cc_library(
   var_type_traits
   SRCS var_type_traits.cc
-  DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor)
+  DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor
+       extended_tensor)
 if(WITH_GPU)
   target_link_libraries(var_type_traits dynload_cuda)
 endif()

diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
@@ -24,6 +24,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/details/nan_inf_utils.h"
 #include "paddle/fluid/framework/op_call_stack.h"
 #include "paddle/fluid/framework/phi_utils.h"
+#include "paddle/fluid/framework/raw_tensor.h"
 #include "paddle/fluid/framework/shape_inference.h"
 #include "paddle/fluid/framework/transfer_scope_cache.h"
 #include "paddle/fluid/framework/unused_var_check.h"
@@ -3008,6 +3009,9 @@ void OperatorWithKernel::BuildPhiKernelContext(
         need_prepare_phi_data_ = true;
         tensor_in = &(var->Get<framework::LoDTensorArray>());
         phi_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
+      } else if (var->IsType<framework::Vocab>()) {
+        tensor_in = &(var->Get<framework::Vocab>());
+        phi_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
       } else {
         PADDLE_THROW(platform::errors::Unimplemented(
             "Unsupported input `%s` type when call pt kernel.",
@@ -3057,6 +3061,13 @@ void OperatorWithKernel::BuildPhiKernelContext(
           // Note: If the input LoDTensorArray size is 0, the output
           // LoDTensorArray is also 0
           phi_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
+        } else if (var->template IsType<paddle::framework::RawTensor>()) {
+          tensor_out = var->template GetMutable<paddle::framework::RawTensor>();
+          phi_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
+        } else if (!var->IsInitialized()) {
+          // The following is for RAW type of var
+          tensor_out = var->template GetMutable<paddle::framework::RawTensor>();
+          phi_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
         } else {
           PADDLE_THROW(platform::errors::Unimplemented(
               "Unsupported output `%s` type when call pt kernel.",
@@ -3156,6 +3167,7 @@ void OperatorWithKernel::BuildPhiKernelContext(
           }
         }
         break;
+
       case phi::AttributeType::SCALARS: {
         PADDLE_ENFORCE_NE(
             attr_iter,

diff --git a/paddle/fluid/framework/raw_tensor.h b/paddle/fluid/framework/raw_tensor.h
@@ -0,0 +1,78 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <unordered_map>
+
+#include "paddle/phi/core/extended_tensor.h"
+#include "paddle/utils/any.h"
+
+namespace paddle {
+namespace framework {
+
+/// \brief Fluid Kernel and PHI Kernel will be unified in the future.
+/// So, we need a class in PHI that can represent the RAW type in Fluid.
+/// The RawTensor is for PHI Kernel that has RAW type arguments.
+class RawTensor : public phi::ExtendedTensor,
+                  public phi::TypeInfoTraits<phi::TensorBase, RawTensor> {
+ public:
+  RawTensor() = default;
+
+  RawTensor(RawTensor&& other) = default;
+
+  RawTensor(const RawTensor& other) = default;
+
+  RawTensor& operator=(RawTensor&& other) = default;
+
+  /// \brief Destroy the RawTensor and release exclusive resources.
+  virtual ~RawTensor() = default;
+
+ public:
+  /// \brief Returns the name of the class for type traits.
+  /// \return The name of the class.
+  static const char* name() { return "RawTensor"; }
+
+  template <typename T>
+  T* GetMutable() {
+    if (!data_.empty()) {
+      try {
+        return paddle::any_cast<T*>(data_);
+      } catch (paddle::bad_any_cast&) {
+        PADDLE_THROW(phi::errors::InvalidArgument(
+            "Invalid data type error, expected %s, actual %s.",
+            typeid(T).name(),
+            data_type_.name()));
+      }
+    }
+    T* created_data = new T();
+    data_ = created_data;
+    data_deleter_ = [created_data]() { delete created_data; };
+    data_type_ = std::type_index(typeid(T));
+    return created_data;
+  }
+
+  template <typename T>
+  bool IsType() const {
+    return std::type_index(typeid(T)) == data_type_;
+  }
+
+ private:
+  paddle::any data_;
+  std::function<void(void)> data_deleter_;
+  std::type_index data_type_ = std::type_index(typeid(void));
+};
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/string_array.h b/paddle/fluid/framework/string_array.h
@@ -20,13 +20,82 @@ limitations under the License. */
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include "paddle/phi/core/extended_tensor.h"
 
 namespace paddle {
 namespace framework {
 
+class Vocab : public phi::ExtendedTensor,
+              public phi::TypeInfoTraits<phi::TensorBase, Vocab> {
+ public:
+  Vocab() = default;
+
+  Vocab(Vocab&& other) = default;
+
+  Vocab(const Vocab& other) = default;
+
+  Vocab& operator=(const Vocab& other) = default;
+
+  Vocab& operator=(Vocab&& other) = default;
+
+  Vocab& operator=(
+      const std::unordered_map<std::wstring, std::int32_t>& other) {
+    this->data_ = other;
+    return *this;
+  }
+
+  /// \brief Destroy the Vocab and release exclusive resources.
+  virtual ~Vocab() = default;
+
+ public:
+  /// \brief Returns the name of the class for type traits.
+  /// \return The name of the class.
+  static const char* name() { return "Vocab"; }
+
+  size_t size() const { return data_.size(); }
+
+  void clear() { data_.clear(); }
+
+  void emplace(const std::wstring& key, std::int32_t value) {
+    data_.emplace(key, value);
+  }
+
+  std::int32_t at(const std::wstring& key) { return data_.at(key); }
+
+  std::int32_t at(const std::wstring& key) const { return data_.at(key); }
+
+  std::unordered_map<std::wstring, std::int32_t>::iterator find(
+      const std::wstring& key) {
+    return data_.find(key);
+  }
+
+  std::unordered_map<std::wstring, std::int32_t>::const_iterator find(
+      const std::wstring& key) const {
+    return data_.find(key);
+  }
+
+  std::unordered_map<std::wstring, std::int32_t>::iterator begin() {
+    return data_.begin();
+  }
+
+  std::unordered_map<std::wstring, std::int32_t>::const_iterator begin() const {
+    return data_.begin();
+  }
+
+  std::unordered_map<std::wstring, std::int32_t>::iterator end() {
+    return data_.end();
+  }
+
+  std::unordered_map<std::wstring, std::int32_t>::const_iterator end() const {
+    return data_.end();
+  }
+
+ private:
+  std::unordered_map<std::wstring, std::int32_t> data_;
+};
+
 using String = std::string;
 using Strings = std::vector<std::string>;
-using Vocab = std::unordered_map<std::wstring, std::int32_t>;
 
 // Convert the std::string type to the std::string type.
 bool ConvertStrToWstr(const std::string& src, std::wstring* res);

diff --git a/paddle/fluid/framework/var_type_traits.cc b/paddle/fluid/framework/var_type_traits.cc
@@ -41,6 +41,7 @@
 #include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
 #endif
 
+#include "paddle/fluid/framework/raw_tensor.h"
 #include "paddle/fluid/operators/cuda_graph_with_in_out.h"
 
 namespace paddle {

diff --git a/paddle/fluid/framework/var_type_traits.h b/paddle/fluid/framework/var_type_traits.h
@@ -23,6 +23,7 @@
 
 #include "paddle/fluid/framework/feed_fetch_type.h"
 #include "paddle/fluid/framework/lod_tensor_array.h"
+#include "paddle/fluid/framework/raw_tensor.h"
 #include "paddle/fluid/framework/string_array.h"
 #include "paddle/fluid/platform/place.h"
 #ifdef PADDLE_WITH_CUDA
@@ -219,7 +220,8 @@ using VarTypeRegistry = detail::VarTypeRegistryImpl<
     float,
     Vocab,
     std::vector<int>,
-    std::vector<float>>;
+    std::vector<float>,
+    RawTensor>;
 template <typename T>
 struct VarTypeTrait {
   static_assert(VarTypeRegistry::IsRegistered<T>(), "Must be registered type");

diff --git a/paddle/fluid/framework/var_type_traits_test.cc b/paddle/fluid/framework/var_type_traits_test.cc
@@ -38,6 +38,7 @@
 #if defined(PADDLE_WITH_XPU_BKCL)
 #include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
 #endif
+#include "paddle/fluid/framework/raw_tensor.h"
 
 namespace paddle {
 namespace framework {

diff --git a/paddle/fluid/framework/variable_test.cc b/paddle/fluid/framework/variable_test.cc
@@ -22,10 +22,10 @@ namespace framework {
 TEST(Variable, GetMutable) {
   std::unique_ptr<Variable> v(new Variable());
 
-  auto* t = v->GetMutable<std::string>();
+  auto* t = v->GetMutable<String>();
   *t = "1234";
 
-  const auto& tt = v->Get<std::string>();
+  const auto& tt = v->Get<String>();
   EXPECT_EQ("1234", tt);
 
   try {

diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt
@@ -5,7 +5,7 @@ cc_library(
 cc_library(
   var_helper
   SRCS var_helper.cc
-  DEPS tensor selected_rows)
+  DEPS tensor selected_rows extended_tensor)
 if(WITH_XPU)
   cc_library(
     prepared_operator

diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc
@@ -89,7 +89,7 @@ std::vector<std::string> Layer::FunctionNames() const {
 
 PD_SPECIALZE_ATTRIBUTE_TYPE(int)
 PD_SPECIALZE_ATTRIBUTE_TYPE(float)
-PD_SPECIALZE_ATTRIBUTE_TYPE(std::string)
+PD_SPECIALZE_ATTRIBUTE_TYPE(framework::String)
 PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<int>)
 PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<float>)
 PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<std::string>)

diff --git a/paddle/fluid/jit/layer_test.cc b/paddle/fluid/jit/layer_test.cc
@@ -86,7 +86,7 @@ TEST(CpuLayerTest, Construct) {
   int ds = layer.Attribute<int>("down_sampling");
   EXPECT_EQ(ds, 4);
 
-  std::string fstr = layer.Attribute<std::string>("fstr");
+  std::string fstr = layer.Attribute<framework::String>("fstr");
   EXPECT_STREQ(fstr.c_str(), "save str property");
 
   std::vector<int> ints = layer.Attribute<std::vector<int>>("ints");

diff --git a/paddle/fluid/jit/property.cc b/paddle/fluid/jit/property.cc
@@ -97,7 +97,7 @@ std::unordered_map<std::string, std::shared_ptr<Variable>> Property::Values() {
           *var->GetMutable<int>() = static_cast<int>(GetInt64(n));
           break;
         case ValueProto::STRING:
-          *var->GetMutable<std::string>() = GetString(n);
+          *var->GetMutable<paddle::framework::String>() = GetString(n);
           break;
         case ValueProto::FLOATS:
           *var->GetMutable<std::vector<float>>() = GetFloats(n);

diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
@@ -12,7 +12,7 @@ unset(OP_LIBRARY CACHE)
 set(pybind_file ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h.tmp CACHE INTERNAL "pybind.h file")
 set(pybind_file_prune ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h.prune CACHE INTERNAL "pybind.h file")
 set(pybind_file_final ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h)
-file(WRITE ${pybind_file} "// Generated by the paddle/fluid/operators/CMakeLists.txt.  DO NOT EDIT!\n\n")
+file(WRITE ${pybind_file} "#include \"paddle/phi/core/kernel_registry.h\" // Generated by the paddle/fluid/operators/CMakeLists.txt.  DO NOT EDIT!\n\n")
 
 add_subdirectory(math)
 add_subdirectory(controlflow)
@@ -109,7 +109,7 @@ register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combin
 op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc run_program_op_npu.cc DEPS executor_cache ${OP_HEADER_DEPS})
 target_link_libraries(run_program_op cuda_graph_with_memory_pool)
 op_library(quantize_linear_op DEPS phi)
-op_library(save_combine_op DEPS string_array)
+op_library(save_combine_op DEPS string_array phi)
 op_library(load_combine_op DEPS string_array)
 
 if (WITH_GPU OR WITH_ROCM)