From 06748210d4771b37bd964e25513102cd2e0fccbf Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 12 Jul 2017 18:05:41 +0800 Subject: [PATCH 01/10] Fix some link errors about NNPACK. --- CMakeLists.txt | 3 ++- .../nnpack => cmake/external}/nnpack.cmake | 14 +++++++++++ paddle/function/CMakeLists.txt | 1 - paddle/function/nnpack/NNPACKConvOp.cpp | 23 +++++++++++-------- 4 files changed, 29 insertions(+), 12 deletions(-) rename {paddle/function/nnpack => cmake/external}/nnpack.cmake (54%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c713db3e3854..af58957ea8933 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,7 +135,8 @@ if(WITH_GPU) endif(WITH_GPU) if(USE_NNPACK) - list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt") + include(external/nnpack) + list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) endif(USE_NNPACK) add_subdirectory(proto) diff --git a/paddle/function/nnpack/nnpack.cmake b/cmake/external/nnpack.cmake similarity index 54% rename from paddle/function/nnpack/nnpack.cmake rename to cmake/external/nnpack.cmake index 7182730ae8f13..d42bcb0f32904 100644 --- a/paddle/function/nnpack/nnpack.cmake +++ b/cmake/external/nnpack.cmake @@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib) if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) set(NNPACK_FOUND ON) INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) + + set(NNPACK_LIBS) + list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB}) + if (NNPACK_UKERNELS_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB}) + endif() + if (NNPACK_CPUFEATURES_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB}) + endif() + if(NOT ANDROID) + list(APPEND NNPACK_LIBS "rt") + endif() else() message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") endif() diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 1518a8a654cfb..a5b14c0c71c18 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -11,7 +11,6 @@ if(WITH_GPU) endif() if(USE_NNPACK) - include(nnpack/nnpack.cmake) list(APPEND cpp_files nnpack/NNPACKConvOp.cpp) if(WITH_TESTING) add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp) diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index e8080c3d714b3..e83bca5d9f0a6 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -58,18 +58,10 @@ class NNPACKConvFunction : public ConvFunctionBase { workspaceBuffer_ = nullptr; workspaceSize_ = 0; - threadpool_ = nullptr; - if (FLAGS_nnpack_num_threads) { - threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads); - VLOG(3) << "Number of threads " - << pthreadpool_get_threads_count(threadpool_); - } + create_nnpack_threadpool(); } ~NNPACKConvFunction() { - if (threadpool_) { - pthreadpool_destroy(threadpool_); - } if (workspaceBuffer_) { free(workspaceBuffer_); } @@ -225,14 +217,25 @@ class NNPACKConvFunction : public ConvFunctionBase { } } + static void create_nnpack_threadpool() { + if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) { + threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads); + VLOG(3) << "Number of threads " + << pthreadpool_get_threads_count(threadpool_); + } + } + private: nnp_convolution_algorithm algorithm_; nnp_convolution_transform_strategy transform_strategy_; void* workspaceBuffer_; size_t workspaceSize_; - pthreadpool_t threadpool_; + static pthreadpool_t threadpool_; }; +template +pthreadpool_t NNPACKConvFunction::threadpool_ = nullptr; + REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction); } // namespace paddle From 891e5dcc48590375d37364634838b6da260fd41e Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 12 Jul 2017 20:13:07 +0800 Subject: [PATCH 02/10] Modify the default value of nnpack_allocate_outside. --- paddle/function/nnpack/NNPACKConvOp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index e83bca5d9f0a6..f0ec77a5d0033 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/function/ConvOp.h" DEFINE_bool(nnpack_allocate_outside, - false, + true, "Allocate and free workspace memory outside the NNPACK interface."); DEFINE_int32(nnpack_num_threads, 0, From 68adb9541d339ffd0df43a7a45a5a4adf16f2067 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 15 Jul 2017 15:00:18 +0800 Subject: [PATCH 03/10] enbale tensor memory test --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/tensor.h | 50 ++++++++------ paddle/framework/tensor_test.cc | 118 +++++++++++++++++--------------- 3 files changed, 92 insertions(+), 78 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 8415ce67e9039..f7f606e4b8cf8 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -2,7 +2,7 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_test(tensor_test SRCS tensor_test.cc DEPS ddim) +cc_test(tensor_test SRCS tensor_test.cc DEPS ddim paddle_memory) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 62e0710a8244c..81db722c99f07 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -29,8 +29,6 @@ class Tensor { public: Tensor() : numel_(0), offset_(0) {} - Tensor& operator=(const Tensor& src) = delete; - template const T* data() const { CheckDims(); @@ -39,13 +37,13 @@ class Tensor { } template - T* mutable_data(DDim dims, paddle::platform::Place place) { + T* mutable_data(DDim dims, platform::Place place) { set_dims(dims); return mutable_data(place); } template - T* mutable_data(paddle::platform::Place place) { + T* mutable_data(platform::Place place) { PADDLE_ENFORCE(numel_ > 0, "Tensor::numel_ must be larger than zero to call " "Tensor::mutable_data. Call Tensor::set_dim first."); @@ -53,7 +51,18 @@ class Tensor { !(holder_->place() == place) /* some versions of boost::variant don't have operator!= */ || holder_->size() < numel_ * sizeof(T) + offset_) { - holder_.reset(new PlaceholderImpl(place, numel_ * sizeof(T))); + switch (place.which()) { + case 0: + holder_.reset(new PlaceholderImpl( + boost::get(place), numel_ * sizeof(T))); + break; + + case 1: + holder_.reset(new PlaceholderImpl( + boost::get(place), numel_ * sizeof(T))); + break; + } + offset_ = 0; } return reinterpret_cast(reinterpret_cast(holder_->ptr()) + @@ -69,7 +78,7 @@ class Tensor { } template - void CopyFrom(const Tensor& src, paddle::platform::Place dst_place) { + void CopyFrom(const Tensor& src, platform::Place dst_place) { PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) && platform::is_cpu_place(dst_place), "Tensor::CopyFrom only support CPU now."); @@ -119,38 +128,37 @@ class Tensor { struct Placeholder { virtual ~Placeholder() {} virtual void* ptr() const = 0; - virtual paddle::platform::Place place() const = 0; + virtual platform::Place place() const = 0; virtual size_t size() const = 0; }; - template + template struct PlaceholderImpl : public Placeholder { private: + template class Deleter { public: - Deleter(platform::Place place) : place_(place) {} - void operator()(T* ptr) { - paddle::memory::Free(place_, static_cast(ptr)); - } + Deleter(PType place) : place_(place) {} + void operator()(T* ptr) { memory::Free(place_, static_cast(ptr)); } private: - paddle::platform::Place place_; + PType place_; }; public: - PlaceholderImpl(paddle::platform::Place place, size_t size) - : ptr_(static_cast(paddle::memory::Alloc(place, size)), - Deleter(place)), + PlaceholderImpl(PlaceType place, size_t size) + : ptr_(static_cast(memory::Alloc(place, size)), + Deleter(place)), place_(place), size_(size) {} virtual void* ptr() const { return static_cast(ptr_.get()); } virtual size_t size() const { return size_; } - virtual paddle::platform::Place place() const { return place_; } + virtual platform::Place place() const { return place_; } - std::unique_ptr ptr_; - paddle::platform::Place place_; // record the place of ptr_. - size_t size_; // size of the memory block. + std::unique_ptr> ptr_; + platform::Place place_; // record the place of ptr_. + size_t size_; // size of the memory block. }; template @@ -166,7 +174,7 @@ class Tensor { DDim dims_; size_t numel_; // cache of `product(dims_)` size_t offset_; // marks the begin of tensor data area. -}; +}; // namespace framework } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 255f69372f4f0..79bd0cc607b10 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -47,7 +47,7 @@ TEST(Tensor, DataAssert) { /* following tests are not available at present because Memory::Alloc() and Memory::Free() have not been ready. - +*/ TEST(Tensor, MutableData) { using namespace paddle::framework; using namespace paddle::platform; @@ -72,28 +72,29 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); EXPECT_EQ(p1, p2); } - - { - Tensor src_tensor; - float* p1 = nullptr; - float* p2 = nullptr; - // initialization - p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); - EXPECT_NE(p1, nullptr); - // set src_tensor a new dim with large size - // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); - EXPECT_NE(p2, nullptr); - EXPECT_NE(p1, p2); - // set src_tensor a new dim with same size - // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); - EXPECT_EQ(p1, p2); - // set src_tensor a new dim with smaller size - // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); - EXPECT_EQ(p1, p2); - } + /* + { + Tensor src_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); + EXPECT_NE(p1, nullptr); + // set src_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set src_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); + EXPECT_EQ(p1, p2); + // set src_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); + EXPECT_EQ(p1, p2); + } + */ } TEST(Tensor, ShareDataFrom) { @@ -108,9 +109,11 @@ TEST(Tensor, ShareDataFrom) { dst_tensor.ShareDataFrom(src_tensor); } catch (EnforceNotMet err) { caught = true; - std::string msg = "Tenosr holds no memory. Call Tensor::mutable_data -first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); -++i) { ASSERT_EQ(what[i], msg[i]); + std::string msg = + "Tenosr holds no memory. Call Tensor::mutable_data first."; + const char* what = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); } } ASSERT_TRUE(caught); @@ -120,13 +123,15 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } - { - Tensor src_tensor; - Tensor dst_tensor; - src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); - dst_tensor.ShareDataFrom(src_tensor); - ASSERT_EQ(src_tensor.data(), dst_tensor.data()); - } + /* + { + Tensor src_tensor; + Tensor dst_tensor; + src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); + dst_tensor.ShareDataFrom(src_tensor); + ASSERT_EQ(src_tensor.data(), dst_tensor.data()); + } + */ } TEST(Tensor, Slice) { @@ -155,27 +160,29 @@ TEST(Tensor, Slice) { EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); } - { - Tensor src_tensor; - src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); - Tensor slice_tensor = src_tensor.Slice(2, 6); - DDim slice_dims = slice_tensor.dims(); - ASSERT_EQ(arity(slice_dims), 2); - EXPECT_EQ(slice_dims[0], 4); - EXPECT_EQ(slice_dims[1], 9); - - uintptr_t src_data_address = - reinterpret_cast(src_tensor.data()); - uintptr_t src_mutable_data_address = reinterpret_cast( - src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); - uintptr_t slice_data_address = - reinterpret_cast(slice_tensor.data()); - uintptr_t slice_mutable_data_address = reinterpret_cast( - slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); - EXPECT_EQ(src_data_address, src_mutable_data_address); - EXPECT_EQ(slice_data_address, slice_mutable_data_address); - EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); - } + /* + { + Tensor src_tensor; + src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); + Tensor slice_tensor = src_tensor.Slice(2, 6); + DDim slice_dims = slice_tensor.dims(); + ASSERT_EQ(arity(slice_dims), 2); + EXPECT_EQ(slice_dims[0], 4); + EXPECT_EQ(slice_dims[1], 9); + + uintptr_t src_data_address = + reinterpret_cast(src_tensor.data()); + uintptr_t src_mutable_data_address = reinterpret_cast( + src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); + uintptr_t slice_data_address = + reinterpret_cast(slice_tensor.data()); + uintptr_t slice_mutable_data_address = reinterpret_cast( + slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); + EXPECT_EQ(src_data_address, src_mutable_data_address); + EXPECT_EQ(slice_data_address, slice_mutable_data_address); + EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); + } + */ } TEST(Tensor, CopyFrom) { @@ -202,5 +209,4 @@ TEST(Tensor, CopyFrom) { for (size_t i = 0; i < 3; ++i) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } -} -*/ \ No newline at end of file +} \ No newline at end of file From 66cf21c880fba791910dc449dfc716d11c52803f Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 15 Jul 2017 07:16:11 +0000 Subject: [PATCH 04/10] fix compile error --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/tensor_test.cc | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index f7f606e4b8cf8..b8bfab5320c3f 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -2,7 +2,7 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_test(tensor_test SRCS tensor_test.cc DEPS ddim paddle_memory) +cc_test(tensor_test SRCS tensor_test.cc DEPS ddim place paddle_memory) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 79bd0cc607b10..30b1448a9b2af 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -72,7 +72,7 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); EXPECT_EQ(p1, p2); } - /* + #ifdef __CUDACC__ { Tensor src_tensor; float* p1 = nullptr; @@ -94,7 +94,7 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); EXPECT_EQ(p1, p2); } - */ + #endif } TEST(Tensor, ShareDataFrom) { @@ -123,7 +123,7 @@ TEST(Tensor, ShareDataFrom) { ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } - /* + #ifdef __CUDACC__ { Tensor src_tensor; Tensor dst_tensor; @@ -131,7 +131,7 @@ TEST(Tensor, ShareDataFrom) { dst_tensor.ShareDataFrom(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } - */ + #endif } TEST(Tensor, Slice) { @@ -160,7 +160,7 @@ TEST(Tensor, Slice) { EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); } - /* + #ifdef __CUDACC__ { Tensor src_tensor; src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); @@ -182,7 +182,7 @@ TEST(Tensor, Slice) { EXPECT_EQ(slice_data_address, slice_mutable_data_address); EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); } - */ + #endif } TEST(Tensor, CopyFrom) { @@ -209,4 +209,4 @@ TEST(Tensor, CopyFrom) { for (size_t i = 0; i < 3; ++i) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } -} \ No newline at end of file +} From afa2a88d7896a03feb18b3cf6e6736c8ca79fcad Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 15 Jul 2017 15:25:06 +0800 Subject: [PATCH 05/10] add conditional compilation for tensor --- paddle/framework/tensor.h | 5 ++ paddle/framework/tensor_test.cc | 108 ++++++++++++++++---------------- 2 files changed, 59 insertions(+), 54 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 81db722c99f07..29bad7a00a439 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -51,6 +51,7 @@ class Tensor { !(holder_->place() == place) /* some versions of boost::variant don't have operator!= */ || holder_->size() < numel_ * sizeof(T) + offset_) { +#ifdef __CUDACC__ switch (place.which()) { case 0: holder_.reset(new PlaceholderImpl( @@ -62,6 +63,10 @@ class Tensor { boost::get(place), numel_ * sizeof(T))); break; } +#else + holder_.reset(new PlaceholderImpl( + boost::get(place), numel_ * sizeof(T))); +#endif offset_ = 0; } diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 30b1448a9b2af..84c6f0cf65588 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -72,29 +72,29 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); EXPECT_EQ(p1, p2); } - #ifdef __CUDACC__ - { - Tensor src_tensor; - float* p1 = nullptr; - float* p2 = nullptr; - // initialization - p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); - EXPECT_NE(p1, nullptr); - // set src_tensor a new dim with large size - // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); - EXPECT_NE(p2, nullptr); - EXPECT_NE(p1, p2); - // set src_tensor a new dim with same size - // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); - EXPECT_EQ(p1, p2); - // set src_tensor a new dim with smaller size - // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); - EXPECT_EQ(p1, p2); - } - #endif +#ifdef __CUDACC__ + { + Tensor src_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); + EXPECT_NE(p1, nullptr); + // set src_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set src_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); + EXPECT_EQ(p1, p2); + // set src_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); + EXPECT_EQ(p1, p2); + } +#endif } TEST(Tensor, ShareDataFrom) { @@ -123,15 +123,15 @@ TEST(Tensor, ShareDataFrom) { ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } - #ifdef __CUDACC__ - { - Tensor src_tensor; - Tensor dst_tensor; - src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); - dst_tensor.ShareDataFrom(src_tensor); - ASSERT_EQ(src_tensor.data(), dst_tensor.data()); - } - #endif +#ifdef __CUDACC__ + { + Tensor src_tensor; + Tensor dst_tensor; + src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); + dst_tensor.ShareDataFrom(src_tensor); + ASSERT_EQ(src_tensor.data(), dst_tensor.data()); + } +#endif } TEST(Tensor, Slice) { @@ -160,29 +160,29 @@ TEST(Tensor, Slice) { EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); } - #ifdef __CUDACC__ - { - Tensor src_tensor; - src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); - Tensor slice_tensor = src_tensor.Slice(2, 6); - DDim slice_dims = slice_tensor.dims(); - ASSERT_EQ(arity(slice_dims), 2); - EXPECT_EQ(slice_dims[0], 4); - EXPECT_EQ(slice_dims[1], 9); +#ifdef __CUDACC__ + { + Tensor src_tensor; + src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); + Tensor slice_tensor = src_tensor.Slice(2, 6); + DDim slice_dims = slice_tensor.dims(); + ASSERT_EQ(arity(slice_dims), 2); + EXPECT_EQ(slice_dims[0], 4); + EXPECT_EQ(slice_dims[1], 9); - uintptr_t src_data_address = - reinterpret_cast(src_tensor.data()); - uintptr_t src_mutable_data_address = reinterpret_cast( - src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); - uintptr_t slice_data_address = - reinterpret_cast(slice_tensor.data()); - uintptr_t slice_mutable_data_address = reinterpret_cast( - slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); - EXPECT_EQ(src_data_address, src_mutable_data_address); - EXPECT_EQ(slice_data_address, slice_mutable_data_address); - EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); - } - #endif + uintptr_t src_data_address = + reinterpret_cast(src_tensor.data()); + uintptr_t src_mutable_data_address = reinterpret_cast( + src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); + uintptr_t slice_data_address = + reinterpret_cast(slice_tensor.data()); + uintptr_t slice_mutable_data_address = reinterpret_cast( + slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); + EXPECT_EQ(src_data_address, src_mutable_data_address); + EXPECT_EQ(slice_data_address, slice_mutable_data_address); + EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); + } +#endif } TEST(Tensor, CopyFrom) { From 9e0c6800c53701fc50dfb69a2c8b6de19c52c559 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 15 Jul 2017 20:18:54 +0800 Subject: [PATCH 06/10] Python Generate OpCreation Methods by OpProto All OpCreation method are generated by `create_op_creation_methods::__bootstrap__` method, and stores in `op_creations` object and its methods. There are three parts to implement this feature. 1. Get all registered `OpProto` from C++ side. It is implemented in `get_all_op_protos` method. 1. Create a function to convert `kwargs` to `OpDesc` base on each op's `OpProto`. The `OpDescCreationMethod` class. 1. Convert `OpProto` to `docstring` by `get_docstring_from_op_proto` method. All three methods are unit tested. The `__bootstrap__` just combines them together and create a method in runtime. For details, please reference the doc string in `create_op_creation_methods.py` and the unit test `test_op_creation_methods.py`. --- paddle/framework/op_registry.h | 24 ++ paddle/framework/operator.cc | 28 +- paddle/framework/operator.h | 8 +- paddle/pybind/pybind.cc | 17 ++ .../framework/create_op_creation_methods.py | 235 +++++++++++++++++ .../tests/test_op_creation_methods.py | 243 +++++++++++++++++- python/paddle/v2/optimizer.py | 2 + 7 files changed, 539 insertions(+), 18 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index de20e7af05a8d..3d67541db2022 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -199,8 +200,12 @@ class OpRegistry { } static OperatorPtr CreateOp(const OpDesc& op_desc) { + //! Create a OpPtr by type. std::string op_type = op_desc.type(); OperatorPtr op(creators().at(op_type)()); + + //! Fill op's data member. Not use constructor because it will be noising + //! for Op developer. op->desc_ = op_desc; op->inputs_.reserve((size_t)op_desc.inputs_size()); std::copy(op_desc.inputs().begin(), op_desc.inputs().end(), @@ -208,10 +213,18 @@ class OpRegistry { op->outputs_.reserve((size_t)op_desc.outputs_size()); std::copy(op_desc.outputs().begin(), op_desc.outputs().end(), std::back_inserter(op->outputs_)); + + //! Fill attrs, and validate attrs. for (auto& attr : op_desc.attrs()) { op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr); } op_checkers().at(op_type).Check(op->attrs_); + + //! Convert Temporary variable name to an unique variable name. + AssignTempVariable(op.get()); + + //! Other op's custom Init for a complex Op. For simple Op, the Init + //! method do nothing. op->Init(); return op; } @@ -222,6 +235,17 @@ class OpRegistry { }; private: + static void AssignTempVariable(OperatorBase* op) { + static std::atomic gUniqId(0UL); + for (auto& outname : op->outputs_) { + if (outname == OperatorBase::TMP_VAR_NAME()) { + outname += op->Type(); + outname += "@"; + outname += std::to_string(gUniqId.fetch_add(1)); + } + } + } + static std::unordered_map& creators() { static std::unordered_map creators_; return creators_; diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index d065670829ccb..a467d328e1d57 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -19,23 +19,21 @@ namespace framework { std::string OperatorBase::DebugString() const { std::stringstream ss; - ss << "=================\n"; - ss << "type = " << desc_.type() << "\n"; - ss << "inputs = ["; - for (auto& ipt : inputs_) { - ss << ipt << ", "; + ss << "Op(" << Type() << "), inputs:("; + for (size_t i = 0; i < inputs_.size(); ++i) { + ss << inputs_[i]; + if (i != inputs_.size() - 1) { + ss << ", "; + } } - ss << "]\n"; - ss << "outputs = ["; - for (auto& opt : outputs_) { - ss << opt << ", "; + ss << "), outputs:("; + for (size_t i = 0; i < outputs_.size(); ++i) { + ss << outputs_[i]; + if (i != outputs_.size() - 1) { + ss << ", "; + } } - ss << "]\n"; - ss << "attr_keys = ["; - for (auto& attr : attrs_) { - ss << attr.first << ", "; - } - ss << "]\n"; + ss << ")."; return ss.str(); } diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index cf79f379fae1e..cc166048b7bf8 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -39,6 +39,13 @@ using OperatorPtr = std::shared_ptr; */ class OperatorBase { public: + /// If a variable is a empty variable, that name will be used. + static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; } + + /// If a variable is a temporary variable, that name will be set in Python, + /// but it will be convert to a unique name in scope after OpCreator. + static std::string TMP_VAR_NAME() { return "@TEMP@"; } + virtual ~OperatorBase() {} template @@ -62,7 +69,6 @@ class OperatorBase { virtual void Run(const ScopePtr& scope, const platform::DeviceContext& dev_ctx) const = 0; - protected: std::string Type() const { return desc_.type(); } public: diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index c1a025ed0492f..b5ead21fd0128 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -63,6 +63,23 @@ All parameter, weight, gradient are variables in Paddle. } return ret_values; }); + m.def_submodule( + "var_names", + "The module will return special predefined variable name in Paddle") + .def("empty", pd::OperatorBase::EMPTY_VAR_NAME) + .def("temp", pd::OperatorBase::TMP_VAR_NAME); + + py::class_(m, "Operator") + .def("__str__", &pd::OperatorBase::DebugString) + .def_static("create", [](const std::string& protobin) { + pd::OpDesc desc; + PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE(desc.IsInitialized(), + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + return pd::OpRegistry::CreateOp(desc); + }); return m.ptr(); } diff --git a/python/paddle/v2/framework/create_op_creation_methods.py b/python/paddle/v2/framework/create_op_creation_methods.py index 2fcdfead25414..c2a7ae7692b08 100644 --- a/python/paddle/v2/framework/create_op_creation_methods.py +++ b/python/paddle/v2/framework/create_op_creation_methods.py @@ -1,11 +1,246 @@ import paddle.v2.framework.core as core import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 +import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 +import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2 +import cStringIO def get_all_op_protos(): + """ + Get all registered op proto from Paddle C++ + :return: list of OpProto + """ protostrs = core.get_all_op_protos() ret_values = [] for pbstr in protostrs: op_proto = op_proto_pb2.OpProto.FromString(str(pbstr)) ret_values.append(op_proto) return ret_values + + +class OpDescCreationMethod(object): + """ + A Functor object to convert user input(use key word args) to OpDesc based on + OpProto. + + :param op_proto: The OpProto object. + :type op_proto: op_proto_pb2.OpProto + """ + + def __init__(self, op_proto): + if not isinstance(op_proto, op_proto_pb2.OpProto): + raise TypeError("Argument should be OpProto") + self.__op_proto__ = op_proto + + def __call__(self, *args, **kwargs): + """ + Convert user input to OpDesc. Only key-word args are supported. + :return: OpDesc based on user input + :rtype: op_desc_pb2.OpDesc + """ + if len(args) != 0: + raise ValueError("Only keyword arguments is supported by Paddle") + op_desc = op_desc_pb2.OpDesc() + + # Inputs + ipts, ipt_format, _ = OpDescCreationMethod.extract_input_or_output( + "input", kwargs, self.__op_proto__.inputs) + op_desc.inputs.extend(ipts) + if ipt_format is not None: + op_desc.attrs.extend([ipt_format]) + + # Outputs + outs, out_format, tmp_index = OpDescCreationMethod.extract_input_or_output( + "output", kwargs, self.__op_proto__.outputs) + op_desc.outputs.extend(outs) + if out_format is not None: + op_desc.attrs.extend([out_format]) + if len(tmp_index) != 0: + tmp_index_attr = op_desc.attrs.add() + tmp_index_attr.type = attr_type_pb2.INTS + tmp_index_attr.name = "temporary_index" + tmp_index_attr.ints.extend(tmp_index) + + # Types + op_desc.type = self.__op_proto__.type + + # Attrs + for attr in self.__op_proto__.attrs: + if attr.generated: + continue + user_defined_attr = kwargs.get(attr.name, None) + if user_defined_attr is not None: + new_attr = op_desc.attrs.add() + new_attr.name = attr.name + new_attr.type = attr.type + if attr.type == attr_type_pb2.INT: + new_attr.i = user_defined_attr + elif attr.type == attr_type_pb2.FLOAT: + new_attr.f = user_defined_attr + elif attr.type == attr_type_pb2.STRING: + new_attr.s = user_defined_attr + elif attr.type == attr_type_pb2.INTS: + new_attr.ints.extend(user_defined_attr) + elif attr.type == attr_type_pb2.FLOATS: + new_attr.floats.extend(user_defined_attr) + elif attr.type == attr_type_pb2.STRINGS: + new_attr.strings.extend(user_defined_attr) + else: + raise NotImplementedError("Not support attribute type " + + attr.type) + + return op_desc + + @staticmethod + def extract_input_or_output(in_out, kwargs, meta): + """ + Extract input variable names or output variable names from key-word + arguments, which base on VarProtos. + + :param in_out: "input" or "output" + :param kwargs: key-word arguments that user inputted. + :param meta: a list of VarProto + :return: The three object will be return. The variable names. The + input_format or output_format attribute(None if the input or output is + not multiple). The temporary variable index list. + """ + multiple = OpDescCreationMethod.any_is_true((m.multiple for m in meta)) + tmp_index = [] + retv = [] + if multiple: + var_format = op_desc_pb2.AttrDesc() + var_format.type = attr_type_pb2.INTS + var_format.name = "%s_format" % in_out + var_format.ints.append(0) + + for var in meta: + var_name = var.name + + if var.temporary: + var_name = [core.var_names.temp()] + tmp_index.append(len(retv)) + else: + var_name = kwargs.get(var_name, []) + if not isinstance(var_name, list): + var_name = [var_name] + retv.extend(var_name) + var_format.ints.append(len(var_name) + var_format.ints[-1]) + return retv, var_format, tmp_index + else: + for var in meta: + if var.temporary: + retv.append(kwargs.get(var.name, core.var_names.temp())) + tmp_index.append(len(retv)) + else: + retv.append(kwargs.get(var.name, core.var_names.empty())) + return retv, None, tmp_index + + @staticmethod + def any_is_true(generator): + """ + Reduce a bool array to one. If any of them is True, then return True. + """ + for flag in generator: + if flag: + return True + return False + + +def get_docstring_from_op_proto(op_proto): + """ + Generate docstring from a OpProto + :param op_proto: a OpProto instance. + :type op_proto: op_proto_pb2.OpProto + :return: docstring + """ + if not isinstance(op_proto, op_proto_pb2.OpProto): + raise TypeError("Input must be OpProto") + f = cStringIO.StringIO() + f.write(op_proto.comment) + f.write("\n") + + def __append_param__(name, comment, type): + # Maybe replace the following line with template engine is better. + f.write(":param ") + f.write(name) + f.write(": ") + f.write(comment) + f.write("\n") + f.write(":type ") + f.write(name) + f.write(": ") + f.write(type) + f.write("\n") + + for ipt in op_proto.inputs: + __append_param__(ipt.name, ipt.comment, "list | basestr" + if ipt.multiple else "basestr") + + temp_var_prefix = \ + "This is a temporary variable. It does not have to set by user. " + for opt in op_proto.outputs: + __append_param__(opt.name, opt.comment if not opt.temporary else + temp_var_prefix + opt.comment, "list | basestr" + if opt.multiple else "basestr") + + for attr in op_proto.attrs: + attr_type = None + if attr.type == attr_type_pb2.INT: + attr_type = "int" + elif attr.type == attr_type_pb2.FLOAT: + attr_type = "float" + elif attr.type == attr_type_pb2.STRING: + attr_type = "basestr" + elif attr.type == attr_type_pb2.INTS: + attr_type = "list of int" + elif attr.type == attr_type_pb2.FLOATS: + attr_type = "list of float" + elif attr.type == attr_type_pb2.STRINGS: + attr_type = "list of basestr" + + if attr_type is None: + raise RuntimeError("Not supported attribute type " + attr.type) + + __append_param__(attr.name, attr.comment, attr_type) + + return f.getvalue() + + +def create_op_creation_method(op_proto): + """ + Generate op creation method for an OpProto + """ + method = OpDescCreationMethod(op_proto) + + def __impl__(*args, **kwargs): + opdesc = method(*args, **kwargs) + return core.Operator.create(opdesc.SerializeToString()) + + __impl__.__doc__ = get_docstring_from_op_proto(op_proto) + return __impl__ + + +class OpCreationsHolder(object): + """ + A object will holds all op creation methods. + + Use `op_creations.xxx_op` to access them. + """ + pass + + +op_creations = OpCreationsHolder() + + +def __bootstrap__(): + """ + Bootstrap function for this module. It will dynamic create all op creation + methods in runtime. + """ + for op_proto in get_all_op_protos(): + func = create_op_creation_method(op_proto) + func.__name__ = str(op_proto.type) + setattr(op_creations, func.__name__, func) + + +__bootstrap__() diff --git a/python/paddle/v2/framework/tests/test_op_creation_methods.py b/python/paddle/v2/framework/tests/test_op_creation_methods.py index b205e2cabb99a..41db7c0d535aa 100644 --- a/python/paddle/v2/framework/tests/test_op_creation_methods.py +++ b/python/paddle/v2/framework/tests/test_op_creation_methods.py @@ -1,9 +1,13 @@ import unittest import paddle.v2.framework.create_op_creation_methods as creation +import paddle.v2.framework.core as core +import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 +import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 +import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2 -class TestOpCreationsMethods(unittest.TestCase): - def test_all_protos(self): +class TestGetAllProtos(unittest.TestCase): + def test_all(self): all_protos = creation.get_all_op_protos() self.assertNotEqual(0, len(all_protos)) @@ -11,5 +15,240 @@ def test_all_protos(self): self.assertTrue(each.IsInitialized()) +class TestOpDescCreationMethod(unittest.TestCase): + def test_plain_input_output(self): + op = op_proto_pb2.OpProto() + op.type = "test" + ipt = op.inputs.add() + ipt.name = "X" + ipt.comment = "not matter" + + ipt = op.inputs.add() + ipt.name = "Y" + ipt.comment = "not matter" + + opt = op.outputs.add() + opt.name = "Z" + opt.comment = "not matter" + + op.comment = "not matter" + + self.assertTrue(op.IsInitialized()) + + method = creation.OpDescCreationMethod(op) + output = method(X="a", Y="b", Z="c") + + expected = op_desc_pb2.OpDesc() + expected.type = "test" + expected.inputs.extend(["a", "b"]) + expected.outputs.append("c") + self.assertEqual(expected, output) + + def test_multiple_input_plain_output(self): + op = op_proto_pb2.OpProto() + op.type = "fc" + ipt = op.inputs.add() + ipt.name = "X" + ipt.comment = "" + ipt.multiple = True + + ipt = op.inputs.add() + ipt.name = "W" + ipt.comment = "" + ipt.multiple = True + + ipt = op.inputs.add() + ipt.name = "b" + ipt.comment = "" + + out = op.outputs.add() + out.name = "Y" + out.comment = "" + + op.comment = "" + self.assertTrue(op.IsInitialized()) + method = creation.OpDescCreationMethod(op) + + generated1 = method(X="x", W="w", b="b", Y="y") + expected1 = op_desc_pb2.OpDesc() + expected1.inputs.extend(['x', 'w', 'b']) + expected1.outputs.extend(['y']) + expected1.type = 'fc' + attr = expected1.attrs.add() + attr.name = 'input_format' + attr.type = attr_type_pb2.INTS + attr.ints.extend([0, 1, 2, 3]) + self.assertEqual(expected1, generated1) + + generated2 = method( + X=['x1', 'x2', 'x3'], b='b', W=['w1', 'w2', 'w3'], Y='y') + expected2 = op_desc_pb2.OpDesc() + expected2.inputs.extend(['x1', 'x2', 'x3', 'w1', 'w2', 'w3', 'b']) + expected2.outputs.extend(['y']) + expected2.type = 'fc' + attr = expected2.attrs.add() + attr.name = 'input_format' + attr.type = attr_type_pb2.INTS + attr.ints.extend([0, 3, 6, 7]) + self.assertEqual(expected2, generated2) + + def test_attrs(self): + op = op_proto_pb2.OpProto() + op.type = "test" + ipt = op.inputs.add() + ipt.name = 'X' + ipt.comment = "" + + def __add_attr__(name, type): + attr = op.attrs.add() + attr.name = name + attr.comment = "" + attr.type = type + + __add_attr__("int_attr", attr_type_pb2.INT) + __add_attr__("float_attr", attr_type_pb2.FLOAT) + __add_attr__("string_attr", attr_type_pb2.STRING) + __add_attr__("ints_attr", attr_type_pb2.INTS) + __add_attr__("floats_attr", attr_type_pb2.FLOATS) + __add_attr__("strings_attr", attr_type_pb2.STRINGS) + + op.comment = "" + self.assertTrue(op.IsInitialized()) + + method = creation.OpDescCreationMethod(op) + + generated = method( + X="a", + int_attr=10, + float_attr=3.2, + string_attr="test_str", + ints_attr=[0, 1, 2, 3, 4], + floats_attr=[0.2, 3.2, 4.5], + strings_attr=["a", "b", "c"]) + + expected = op_desc_pb2.OpDesc() + expected.type = "test" + expected.inputs.extend(['a']) + attr = expected.attrs.add() + attr.name = "int_attr" + attr.type = attr_type_pb2.INT + attr.i = 10 + + attr = expected.attrs.add() + attr.name = "float_attr" + attr.type = attr_type_pb2.FLOAT + attr.f = 3.2 + + attr = expected.attrs.add() + attr.name = "string_attr" + attr.type = attr_type_pb2.STRING + attr.s = "test_str" + + attr = expected.attrs.add() + attr.name = "ints_attr" + attr.type = attr_type_pb2.INTS + attr.ints.extend([0, 1, 2, 3, 4]) + + attr = expected.attrs.add() + attr.name = 'floats_attr' + attr.type = attr_type_pb2.FLOATS + attr.floats.extend([0.2, 3.2, 4.5]) + + attr = expected.attrs.add() + attr.name = 'strings_attr' + attr.type = attr_type_pb2.STRINGS + attr.strings.extend(['a', 'b', 'c']) + + self.assertEqual(expected, generated) + + def test_input_temporary_output(self): + op = op_proto_pb2.OpProto() + op.type = "test" + out = op.outputs.add() + out.name = "OUT" + out.comment = "" + + out = op.outputs.add() + out.name = "TMP" + out.comment = "" + out.temporary = True + + out = op.outputs.add() + out.name = "OUT2" + out.comment = "" + op.comment = "" + + method = creation.OpDescCreationMethod(op) + generated = method(OUT="a", OUT2="b") + desc = op_desc_pb2.OpDesc() + desc.outputs.extend(["a", core.var_names.temp(), "b"]) + desc.type = "test" + attr = desc.attrs.add() + attr.name = "temporary_index" + attr.type = attr_type_pb2.INTS + attr.ints.append(2) + self.assertEqual(generated, desc) + + +class TestOpCreationDocStr(unittest.TestCase): + def test_all(self): + op = op_proto_pb2.OpProto() + op.type = "test" + op.comment = """Test Op. + +This op is used for unit test, not a real op. +""" + a = op.inputs.add() + a.name = "a" + a.comment = "Input a for test op" + a.multiple = True + + b = op.inputs.add() + b.name = "b" + b.comment = "Input b for test op" + self.assertTrue(op.IsInitialized()) + + o1 = op.outputs.add() + o1.name = "output" + o1.comment = "The output of test op" + + o2 = op.outputs.add() + o2.name = "temp output" + o2.comment = "The temporary output of test op" + o2.temporary = True + + test_str = op.attrs.add() + test_str.name = "str_attr" + test_str.type = attr_type_pb2.STRING + test_str.comment = "A string attribute for test op" + + actual = creation.get_docstring_from_op_proto(op) + expected_docstring = '''Test Op. + +This op is used for unit test, not a real op. + +:param a: Input a for test op +:type a: list | basestr +:param b: Input b for test op +:type b: basestr +:param output: The output of test op +:type output: basestr +:param temp output: This is a temporary variable. It does not have to set by user. The temporary output of test op +:type temp output: basestr +:param str_attr: A string attribute for test op +:type str_attr: basestr +''' + self.assertEqual(expected_docstring, actual) + + +class TestOpCreations(unittest.TestCase): + def test_all(self): + add_op = creation.op_creations.add_two(X="a", Y="b", Out="z") + self.assertIsNotNone(add_op) + # Invoke C++ DebugString() + self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).', + str(add_op)) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index b6ee51cfe899f..173a30a41181c 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -25,6 +25,8 @@ def __impl__(): self.__opt_conf_proto__ = config_parser_utils.parse_optimizer_config( __impl__) + if swig_api is None: + raise RuntimeError("paddle.v2 currently need swig_paddle") self.__opt_conf__ = swig_api.OptimizationConfig.createFromProto( self.__opt_conf_proto__) From c5bc126762031231eb8a144d3318c9dcbaea68ed Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 12:42:04 +0800 Subject: [PATCH 07/10] Follow comment, rename to `GenerateTempVariableName` --- paddle/framework/op_registry.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index b627b4a60a728..ec237950dff72 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -220,7 +220,7 @@ class OpRegistry { op_checkers().at(op_type).Check(op->attrs_); //! Convert Temporary variable name to an unique variable name. - AssignTempVariable(op.get()); + GenerateTempVariableName(op.get()); //! Other op's custom Init for a complex Op. For simple Op, the Init //! method do nothing. @@ -234,7 +234,7 @@ class OpRegistry { }; private: - static void AssignTempVariable(OperatorBase* op) { + static void GenerateTempVariableName(OperatorBase* op) { static std::atomic gUniqId(0UL); for (auto& outname : op->outputs_) { if (outname == OperatorBase::TMP_VAR_NAME()) { From c78a5e5da24e7e7edc7d5cfd92b349f3913773ac Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 13:11:47 +0800 Subject: [PATCH 08/10] Fix merge error before --- python/paddle/v2/optimizer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 260a5094695d4..ba581980334fe 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,4 +1,3 @@ -import py_paddle.swig_paddle as swig_api import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.optimizers as v1_optimizers """ @@ -17,6 +16,7 @@ class Optimizer(object): def __init__(self, **kwargs): + import py_paddle.swig_paddle as swig_api if 'batch_size' in kwargs: del kwargs['batch_size'] # not important for python library. @@ -25,8 +25,6 @@ def __impl__(): self.__opt_conf_proto__ = config_parser_utils.parse_optimizer_config( __impl__) - if swig_api is None: - raise RuntimeError("paddle.v2 currently need swig_paddle") self.__opt_conf__ = swig_api.OptimizationConfig.createFromProto( self.__opt_conf_proto__) @@ -37,18 +35,22 @@ def enable_types(self): For each optimizer(SGD, Adam), GradientMachine should enable different buffers. """ + import py_paddle.swig_paddle as swig_api tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__) assert isinstance(tmp, swig_api.ParameterOptimizer) return tmp.getParameterTypes() def __create_local_updater__(self): + import py_paddle.swig_paddle as swig_api return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__) def __create_remote_updater__(self, pass_num, use_sparse_updater): + import py_paddle.swig_paddle as swig_api return swig_api.ParameterUpdater.createRemoteUpdater( self.__opt_conf__, pass_num, use_sparse_updater) def __create_new_remote_updater__(self, pserver_spec, use_etcd): + import py_paddle.swig_paddle as swig_api return swig_api.ParameterUpdater.createNewRemoteUpdater( self.__opt_conf__, pserver_spec, use_etcd) From 78bd815e8504496ccae388bb799cc8026427084c Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 17 Jul 2017 19:48:33 +0800 Subject: [PATCH 09/10] refine conditional compilation and remove `numel_` --- paddle/framework/tensor.h | 40 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 29bad7a00a439..b405e3877c9f1 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -27,7 +27,7 @@ namespace framework { class Tensor { public: - Tensor() : numel_(0), offset_(0) {} + Tensor() : offset_(0) {} template const T* data() const { @@ -44,30 +44,26 @@ class Tensor { template T* mutable_data(platform::Place place) { - PADDLE_ENFORCE(numel_ > 0, - "Tensor::numel_ must be larger than zero to call " + PADDLE_ENFORCE(product(dims_) > 0, + "Tensor's numel must be larger than zero to call " "Tensor::mutable_data. Call Tensor::set_dim first."); if (holder_ == nullptr || !(holder_->place() == place) /* some versions of boost::variant don't have operator!= */ - || holder_->size() < numel_ * sizeof(T) + offset_) { + || holder_->size() < product(dims_) * sizeof(T) + offset_) { + if (platform::is_cpu_place(place)) { + holder_.reset(new PlaceholderImpl( + boost::get(place), product(dims_) * sizeof(T))); + } else if (platform::is_gpu_place(place)) { #ifdef __CUDACC__ - switch (place.which()) { - case 0: - holder_.reset(new PlaceholderImpl( - boost::get(place), numel_ * sizeof(T))); - break; - - case 1: - holder_.reset(new PlaceholderImpl( - boost::get(place), numel_ * sizeof(T))); - break; - } + holder_.reset(new PlaceholderImpl( + boost::get(place), product(dims_) * sizeof(T))); #else - holder_.reset(new PlaceholderImpl( - boost::get(place), numel_ * sizeof(T))); + PADDLE_ENFORCE(true, "'GPUPlace' is not supported in CPU only device."); #endif - + } else { + PADDLE_ENFORCE(true, "Unknown 'place'."); + } offset_ = 0; } return reinterpret_cast(reinterpret_cast(holder_->ptr()) + @@ -88,7 +84,7 @@ class Tensor { platform::is_cpu_place(dst_place), "Tensor::CopyFrom only support CPU now."); src.CheckDims(); - size_t size = src.numel_ * sizeof(T); + size_t size = product(src.dims_) * sizeof(T); set_dims(src.dims()); const void* src_ptr = static_cast(src.data()); void* dst_ptr = static_cast(mutable_data(dst_place)); @@ -122,7 +118,6 @@ class Tensor { return; } dims_ = dims; - numel_ = product(dims_); } DDim dims() const { return dims_; } @@ -170,16 +165,15 @@ class Tensor { inline void CheckDims() const { PADDLE_ENFORCE(holder_ != nullptr, "Tenosr holds no memory. Call Tensor::mutable_data first."); - PADDLE_ENFORCE(holder_->size() >= numel_ * sizeof(T) + offset_, + PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, "Tensor's dims_ is out of bound. Call Tensor::mutable_data " "first to re-allocate memory."); } std::shared_ptr holder_; // holds the memory block if allocated. DDim dims_; - size_t numel_; // cache of `product(dims_)` size_t offset_; // marks the begin of tensor data area. -}; // namespace framework +}; } // namespace framework } // namespace paddle From 78fa5e307da3cb32706f396346d3db7a875b4178 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 17 Jul 2017 20:00:58 +0800 Subject: [PATCH 10/10] Add DDim::size() --- paddle/framework/ddim.cc | 2 ++ paddle/framework/ddim.h | 2 ++ paddle/framework/ddim_test.cc | 1 + 3 files changed, 5 insertions(+) diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 73f5499ad1575..b6ad8b60aaf7b 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -117,6 +117,8 @@ int DDim::operator[](int idx) const { return boost::apply_visitor(DynamicConstIndexer(idx), var); } +ssize_t DDim::size() const { return arity(*this); } + bool DDim::operator==(DDim d) const { if (var.which() != d.getVar().which()) { return false; diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index a0c2a8a74afde..7bc21a1e3455b 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -50,6 +50,8 @@ struct DDim { DDimVar getVar() { return var; } + ssize_t size() const; + bool operator==(DDim d) const; bool operator!=(DDim d) const; diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index 6a099f2aeb4aa..9d18a2972ce62 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -49,6 +49,7 @@ TEST(DDim, Equality) { // arity of a DDim EXPECT_EQ(paddle::framework::arity(ddim), 3); + EXPECT_EQ(ddim.size(), 3); // product of a DDim EXPECT_EQ(paddle::framework::product(vddim), 45);