Skip to content

Commit

Permalink
merge baidu/develop
Browse files Browse the repository at this point in the history
  • Loading branch information
QiJune committed Jul 18, 2017
2 parents 8718966 + f146b03 commit 4d49f1d
Show file tree
Hide file tree
Showing 16 changed files with 613 additions and 70 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ if(WITH_GPU)
endif(WITH_GPU)

if(USE_NNPACK)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt")
include(external/nnpack)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
endif(USE_NNPACK)

add_subdirectory(proto)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib)

if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
set(NNPACK_FOUND ON)
INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})

set(NNPACK_LIBS)
list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB})
if (NNPACK_UKERNELS_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB})
endif()
if (NNPACK_CPUFEATURES_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB})
endif()
if(NOT ANDROID)
list(APPEND NNPACK_LIBS "rt")
endif()
else()
message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
endif()
2 changes: 2 additions & 0 deletions paddle/framework/ddim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ int DDim::operator[](int idx) const {
return boost::apply_visitor(DynamicConstIndexer(idx), var);
}

ssize_t DDim::size() const { return arity(*this); }

bool DDim::operator==(DDim d) const {
if (var.which() != d.getVar().which()) {
return false;
Expand Down
2 changes: 2 additions & 0 deletions paddle/framework/ddim.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ struct DDim {

DDimVar getVar() { return var; }

ssize_t size() const;

bool operator==(DDim d) const;

bool operator!=(DDim d) const;
Expand Down
1 change: 1 addition & 0 deletions paddle/framework/ddim_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ TEST(DDim, Equality) {

// arity of a DDim
EXPECT_EQ(paddle::framework::arity(ddim), 3);
EXPECT_EQ(ddim.size(), 3);

// product of a DDim
EXPECT_EQ(paddle::framework::product(vddim), 45);
Expand Down
26 changes: 24 additions & 2 deletions paddle/framework/op_registry.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <algorithm>
#include <atomic>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
Expand Down Expand Up @@ -214,25 +215,35 @@ class OpRegistry {
}

static OperatorPtr CreateOp(const OpDesc& op_desc) {
//! Create a OpPtr by type.
std::string op_type = op_desc.type();
OperatorPtr op(creators().at(op_type)());
//! Fill op's data member. Not use constructor because it will be noising
//! for Op developer.
const OpProto& op_proto = protos().at(op_type);
// set op's inputs_ from desc.
op->type_ = op_desc.type();
// set op's inputs_ from desc.
op->inputs_.reserve((size_t)op_desc.inputs_size());
std::copy(op_desc.inputs().begin(), op_desc.inputs().end(),
std::back_inserter(op->inputs_));
// set op's outputs_ from desc.
op->outputs_.reserve((size_t)op_desc.outputs_size());
std::copy(op_desc.outputs().begin(), op_desc.outputs().end(),
std::back_inserter(op->outputs_));
// set op's attr;

//! Fill attrs, and validate attrs.
for (auto& attr : op_desc.attrs()) {
op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
}
op_checkers().at(op_type).Check(op->attrs_);

//! Convert Temporary variable name to an unique variable name.
GenerateTempVariableName(op.get());

// set argument offsets stored in op.
CreateInOutOffsetMap(op, op_proto);
//! Other op's custom Init for a complex Op. For simple Op, the Init
//! method do nothing.
op->Init();
return op;
}
Expand All @@ -248,6 +259,17 @@ class OpRegistry {
};

private:
static void GenerateTempVariableName(OperatorBase* op) {
static std::atomic<size_t> gUniqId(0UL);
for (auto& outname : op->outputs_) {
if (outname == OperatorBase::TMP_VAR_NAME()) {
outname += op->type_;
outname += "@";
outname += std::to_string(gUniqId.fetch_add(1));
}
}
}

static std::unordered_map<std::string, OpCreator>& creators() {
static std::unordered_map<std::string, OpCreator> creators_;
return creators_;
Expand Down
28 changes: 13 additions & 15 deletions paddle/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,23 +91,21 @@ std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {

std::string OperatorBase::DebugString() const {
std::stringstream ss;
ss << "=================\n";
ss << "type = " << type_ << "\n";
ss << "inputs = [";
for (auto& ipt : inputs_) {
ss << ipt << ", ";
ss << "Op(" << type_ << "), inputs:(";
for (size_t i = 0; i < inputs_.size(); ++i) {
ss << inputs_[i];
if (i != inputs_.size() - 1) {
ss << ", ";
}
}
ss << "]\n";
ss << "outputs = [";
for (auto& opt : outputs_) {
ss << opt << ", ";
ss << "), outputs:(";
for (size_t i = 0; i < outputs_.size(); ++i) {
ss << outputs_[i];
if (i != outputs_.size() - 1) {
ss << ", ";
}
}
ss << "]\n";
ss << "attr_keys = [";
for (auto& attr : attrs_) {
ss << attr.first << ", ";
}
ss << "]\n";
ss << ").";
return ss.str();
}

Expand Down
7 changes: 7 additions & 0 deletions paddle/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ using OperatorPtr = std::shared_ptr<OperatorBase>;
*/
class OperatorBase {
public:
/// If a variable is a empty variable, that name will be used.
static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; }

/// If a variable is a temporary variable, that name will be set in Python,
/// but it will be convert to a unique name in scope after OpCreator.
static std::string TMP_VAR_NAME() { return "@TEMP@"; }

virtual ~OperatorBase() {}

template <typename T>
Expand Down
55 changes: 24 additions & 31 deletions paddle/framework/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ namespace framework {

class Tensor {
public:
Tensor() : numel_(0), offset_(0) {}

Tensor& operator=(const Tensor& src) = delete;
Tensor() : offset_(0) {}

template <typename T>
const T* data() const {
Expand All @@ -48,34 +46,33 @@ class Tensor {
}

template <typename T>
T* mutable_data(DDim dims, paddle::platform::Place place) {
T* mutable_data(DDim dims, platform::Place place) {
set_dims(dims);
return mutable_data<T>(place);
}

template <typename T>
T* mutable_data(paddle::platform::Place place) {
PADDLE_ENFORCE(numel_ > 0,
"Tensor::numel_ must be larger than zero to call "
T* mutable_data(platform::Place place) {
PADDLE_ENFORCE(product(dims_) > 0,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first.");
if (holder_ == nullptr ||
!(holder_->place() ==
place) /* some versions of boost::variant don't have operator!= */
|| holder_->size() < numel_ * sizeof(T) + offset_) {
|| holder_->size() < product(dims_) * sizeof(T) + offset_) {
if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), numel_ * sizeof(T)));
}
boost::get<platform::CPUPlace>(place), product(dims_) * sizeof(T)));
} else if (platform::is_gpu_place(place)) {
#ifdef __CUDACC__
else if (platform::is_gpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
boost::get<platform::GPUPlace>(place), numel_ * sizeof(T)));
}
boost::get<platform::GPUPlace>(place), product(dims_) * sizeof(T)));
#else
else if (platform::is_gpu_place(place)) {
PADDLE_ENFORCE(true, "GPU not support!");
}
PADDLE_ENFORCE(true, "'GPUPlace' is not supported in CPU only device.");
#endif
} else {
PADDLE_ENFORCE(true, "Unknown 'place'.");
}
offset_ = 0;
}
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
Expand All @@ -98,7 +95,7 @@ class Tensor {
// flat to rank = 1
template <typename T>
typename TTypes<T>::Flat flat() {
return shaped<T, 1>(make_ddim({static_cast<int>(numel_)}));
return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
}

// to TensorType Vec
Expand Down Expand Up @@ -129,7 +126,7 @@ class Tensor {

template <typename T>
typename TTypes<T>::ConstFlat flat() const {
return shaped<T, 1>(make_ddim({static_cast<int>(numel_)}));
return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
}

template <typename T>
Expand All @@ -151,12 +148,12 @@ class Tensor {
}

template <typename T>
void CopyFrom(const Tensor& src, paddle::platform::Place dst_place) {
void CopyFrom(const Tensor& src, platform::Place dst_place) {
PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) &&
platform::is_cpu_place(dst_place),
"Tensor::CopyFrom only support CPU now.");
src.CheckDims<T>();
size_t size = src.numel_ * sizeof(T);
size_t size = product(src.dims_) * sizeof(T);
set_dims(src.dims());
const void* src_ptr = static_cast<const void*>(src.data<T>());
void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
Expand Down Expand Up @@ -190,7 +187,6 @@ class Tensor {
return;
}
dims_ = dims;
numel_ = product(dims_);
}

DDim dims() const { return dims_; }
Expand All @@ -201,7 +197,7 @@ class Tensor {
struct Placeholder {
virtual ~Placeholder() {}
virtual void* ptr() const = 0;
virtual paddle::platform::Place place() const = 0;
virtual platform::Place place() const = 0;
virtual size_t size() const = 0;
};

Expand All @@ -212,42 +208,39 @@ class Tensor {
class Deleter {
public:
Deleter(PType place) : place_(place) {}
void operator()(T* ptr) {
paddle::memory::Free(place_, static_cast<void*>(ptr));
}
void operator()(T* ptr) { memory::Free(place_, static_cast<void*>(ptr)); }

private:
PType place_;
};

public:
PlaceholderImpl(PlaceType place, size_t size)
: ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)),
: ptr_(static_cast<T*>(memory::Alloc(place, size)),
Deleter<PlaceType>(place)),
place_(place),
size_(size) {}

virtual void* ptr() const { return static_cast<void*>(ptr_.get()); }
virtual size_t size() const { return size_; }
virtual paddle::platform::Place place() const { return place_; }
virtual platform::Place place() const { return place_; }

std::unique_ptr<T, Deleter<PlaceType>> ptr_;
paddle::platform::Place place_; // record the place of ptr_.
size_t size_; // size of the memory block.
platform::Place place_; // record the place of ptr_.
size_t size_; // size of the memory block.
};

template <typename T>
inline void CheckDims() const {
PADDLE_ENFORCE(holder_ != nullptr,
"Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= numel_ * sizeof(T) + offset_,
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.");
}

std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
DDim dims_;
size_t numel_; // cache of `product(dims_)`
size_t offset_; // marks the begin of tensor data area.
};

Expand Down
Loading

0 comments on commit 4d49f1d

Please sign in to comment.