Skip to content

Commit

Permalink
Merge pull request #5 from PaddlePaddle/develop
Browse files Browse the repository at this point in the history
Update USERNAME/paddle
  • Loading branch information
AshburnLee committed Oct 20, 2020
2 parents 5be3a45 + 651dab4 commit a1d92b7
Show file tree
Hide file tree
Showing 445 changed files with 20,648 additions and 10,180 deletions.
10 changes: 9 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,13 @@ repos:
name: copyright_checker
entry: python ./tools/codestyle/copyright.hook
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py|sh)$
exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: local
hooks:
- id: shellcheck
name: shellcheck
entry: shellcheck
language: system
files: .sh$
exclude: (paddle_build.sh|fast_install.sh|check_file_diff_approvals.sh)
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,12 @@ if(WITH_AMD_GPU)
include(hip)
endif(WITH_AMD_GPU)

if(WITH_DISTRIBUTE)
if(LINUX)
set(WITH_GLOO ON CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE)
endif()
endif()

if(WITH_ARM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
Expand Down
74 changes: 30 additions & 44 deletions cmake/external/gloo.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,55 +14,41 @@

INCLUDE(ExternalProject)

execute_process(COMMAND bash -c "gcc -dumpversion" OUTPUT_VARIABLE GCC_VERSION)

SET(GLOO_PROJECT "extern_gloo")
IF((NOT DEFINED GLOO_VER) OR (NOT DEFINED GLOO_URL))
MESSAGE(STATUS "use pre defined download url")
SET(GLOO_VER "master" CACHE STRING "" FORCE)
SET(GLOO_NAME "gloo" CACHE STRING "" FORCE)

if(${GCC_VERSION} VERSION_EQUAL "8.2.0")
SET(GLOO_URL "https://fleet.bj.bcebos.com/gloo/gloo.tar.gz.gcc8" CACHE STRING "" FORCE)
else()
SET(GLOO_URL "https://fleet.bj.bcebos.com/gloo/gloo.tar.gz.gcc482" CACHE STRING "" FORCE)
endif()
ENDIF()

MESSAGE(STATUS "GLOO_NAME: ${GLOO_NAME}, GLOO_URL: ${GLOO_URL}")
SET(GLOO_SOURCE_DIR "${THIRD_PARTY_PATH}/gloo")
SET(GLOO_DOWNLOAD_DIR "${GLOO_SOURCE_DIR}/src/${GLOO_PROJECT}")
SET(GLOO_DST_DIR "gloo")
SET(GLOO_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
SET(GLOO_INSTALL_DIR ${GLOO_INSTALL_ROOT}/${GLOO_DST_DIR})
SET(GLOO_ROOT ${GLOO_INSTALL_DIR})
SET(GLOO_INC_DIR ${GLOO_ROOT}/include)
SET(GLOO_LIB_DIR ${GLOO_ROOT}/lib)
SET(GLOO_LIB ${GLOO_LIB_DIR}/libgloo.a)
#SET(GLOO_IOMP_LIB ${GLOO_LIB_DIR}/libiomp5.so) #todo what is this
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${GLOO_ROOT}/lib")

INCLUDE_DIRECTORIES(${GLOO_INC_DIR})

FILE(WRITE ${GLOO_DOWNLOAD_DIR}/CMakeLists.txt
"PROJECT(GLOO)\n"
"cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY ${GLOO_NAME}/include ${GLOO_NAME}/lib \n"
" DESTINATION ${GLOO_DST_DIR})\n")
SET(GLOO_PREFIX_DIR ${THIRD_PARTY_PATH}/gloo)
SET(GLOO_SOURCE_DIR ${THIRD_PARTY_PATH}/gloo/src/extern_gloo/gloo)
SET(GLOO_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gloo)
SET(GLOO_INCLUDE_DIR "${GLOO_INSTALL_DIR}/include" CACHE PATH "gloo include directory." FORCE)
SET(GLOO_LIBRARY_DIR "${GLOO_INSTALL_DIR}/lib" CACHE PATH "gloo library directory." FORCE)
# As we add extra features for gloo, we use the non-official repo
SET(GLOO_REPOSITORY https://github.com/sandyhouse/gloo.git)
SET(GLOO_TAG v0.0.2)
SET(GLOO_LIBRARIES "${GLOO_INSTALL_DIR}/lib/libgloo.a" CACHE FILEPATH "gloo library." FORCE)

INCLUDE_DIRECTORIES(${GLOO_INCLUDE_DIR})

cache_third_party(extern_gloo
REPOSITORY ${GLOO_REPOSITORY}
TAG ${GLOO_TAG}
DIR GLOO_SOURCE_DIR)

ExternalProject_Add(
${GLOO_PROJECT}
extern_gloo
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${GLOO_SOURCE_DIR}
DOWNLOAD_DIR ${GLOO_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate ${GLOO_URL} -c -q -O ${GLOO_NAME}.tar.gz
&& tar zxvf ${GLOO_NAME}.tar.gz
DOWNLOAD_NO_PROGRESS 1
${SHALLOW_CLONE}
"${GLOO_DOWNLOAD_CMD}"
PREFIX "${GLOO_PREFIX_DIR}"
SOURCE_DIR "${GLOO_SOURCE_DIR}"
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOO_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOO_INSTALL_ROOT}
CONFIGURE_COMMAND ""
BUILD_COMMAND mkdir -p ${GLOO_SOURCE_DIR}/build
&& cd ${GLOO_SOURCE_DIR}/build && cmake .. && make
&& mkdir -p ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR}
COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" "${GLOO_INCLUDE_DIR}/gloo"
)

ADD_LIBRARY(gloo SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET gloo PROPERTY IMPORTED_LOCATION ${GLOO_LIB})

ADD_LIBRARY(gloo STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gloo PROPERTY IMPORTED_LOCATION ${GLOO_LIBRARIES})
ADD_DEPENDENCIES(gloo ${GLOO_PROJECT})
3 changes: 2 additions & 1 deletion paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS
graph build_strategy collective_helper
fast_threaded_ssa_graph_executor variable_helper)

cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS executor)
cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS
conditional_block_op executor)
cc_library(prune SRCS prune.cc DEPS framework_proto boost)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/device_worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ limitations under the License. */
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <set>
#include <string>
#include <thread> // NOLINT
#include <unordered_map> // NOLINT
Expand Down Expand Up @@ -313,6 +314,10 @@ class DownpourWorker : public HogwildWorker {
std::map<uint64_t, std::vector<std::string>> dense_value_names_;
std::map<uint64_t, uint64_t> table_dependency_;
std::vector<std::pair<uint64_t, uint64_t>> copy_dense_tables_;
// multitask
std::map<int32_t, uint64_t> cond2table_map_;
std::set<uint64_t> condvalue_set_;
bool flag_partial_push_;

private:
// std::vector<std::string> dump_param_;
Expand Down
51 changes: 44 additions & 7 deletions paddle/fluid/framework/downpour_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <cstdlib>
#include <ctime>
#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/platform/cpu_helper.h"

Expand Down Expand Up @@ -65,6 +67,13 @@ void DownpourWorker::Initialize(const TrainerDesc& desc) {
}
}

flag_partial_push_ = false;
for (auto& m : param_.program_config(0).partial_pushdense_condtable_map()) {
cond2table_map_[m.key()] = m.value();
condvalue_set_.insert(m.value());
flag_partial_push_ = true;
}

skip_ops_.resize(param_.skip_ops_size());
for (int i = 0; i < param_.skip_ops_size(); ++i) {
skip_ops_[i] = param_.skip_ops(i);
Expand Down Expand Up @@ -876,14 +885,42 @@ void DownpourWorker::TrainFiles() {
#endif

if (need_to_push_dense_) {
for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i));
fleet_ptr_->PushDenseVarsAsync(
*thread_scope_, tid, dense_grad_names_[tid], &push_sparse_status_,
scale_datanorm_, cur_batch);
if (flag_partial_push_) {
Variable* var = (*thread_scope_).FindVar("cond_tag");
LoDTensor* tensor = var->GetMutable<LoDTensor>();
// check type in python code
int64_t* cond_value_batch = tensor->data<int64_t>();

for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i));
if (condvalue_set_.find(tid) != condvalue_set_.end()) {
// common dense table must push dense
if (cond2table_map_[cond_value_batch[0]] != tid) {
// can't push dense
continue;
}
}

VLOG(3) << "push multitask dense gradient " << tid;
fleet_ptr_->PushDenseVarsAsync(
*thread_scope_, tid, dense_grad_names_[tid], &push_sparse_status_,
scale_datanorm_, cur_batch);
}

} else {
for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i));

fleet_ptr_->PushDenseVarsAsync(
*thread_scope_, tid, dense_grad_names_[tid], &push_sparse_status_,
scale_datanorm_, cur_batch);
}
}

VLOG(3) << "push dense gradient done.";

// the following code should be more precise and clean
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/framework/fleet/fleet_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ limitations under the License. */
#include "paddle/fluid/framework/fleet/fleet_wrapper.h"
#include <algorithm>
#include <utility>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/data_feed.h"
#include "paddle/fluid/framework/io/fs.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/timer.h"

namespace paddle {
namespace framework {
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/framework/fleet/heter_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/fleet/heter_wrapper.h"
#include <algorithm>
#include <utility>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/data_feed.h"
#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/io/fs.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/timer.h"
#ifdef PADDLE_WITH_PSLIB

namespace paddle {
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/framework/hetercpu_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/framework/fleet/fleet_wrapper.h"
#include "paddle/fluid/framework/fleet/heter_wrapper.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/string/string_helper.h"

#ifdef PADDLE_WITH_PSLIB

#if defined _WIN32 || defined __APPLE__
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/hogwild_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
#include "paddle/fluid/operators/distributed/distributed.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/lodtensor_printer.h"
Expand Down Expand Up @@ -47,6 +48,8 @@ void HogwildWorker::CreateThreadOperators(const ProgramDesc &program) {
ops_.push_back(local_op_ptr);
continue;
}
operators::PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
program, 0, ops_);
}

void HogwildWorker::CreateThreadScope(const ProgramDesc &program) {
Expand Down
13 changes: 0 additions & 13 deletions paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,6 @@ void ConvBiasFusePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "do not perform " + type() + "+bias fuse";
return;
}
if (conv->Op()->HasAttr("dilations")) {
auto dilations =
BOOST_GET_CONST(std::vector<int>, conv->Op()->GetAttr("dilations"));
for (const auto& d : dilations) {
if (d != 1) {
LOG(WARNING)
<< "dilation conv not supported in MKLDNN, fuse not apply "
<< "and set conv attribute use_mkldnn = false";
conv->Op()->SetAttr("use_mkldnn", false);
return;
}
}
}

auto* eltwise_bias_tensor =
scope->FindVar(eltwise_bias->Name())->GetMutable<LoDTensor>();
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <string>

#include "paddle/fluid/framework/ir/shuffle_channel_detect_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"

Expand Down
7 changes: 2 additions & 5 deletions paddle/fluid/framework/pull_dense_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,11 @@ limitations under the License. */
#include <time.h>

#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/fleet/fleet_wrapper.h"

namespace paddle {
namespace framework {

class LoDTensor;
class Scope;
class Variable;

std::shared_ptr<PullDenseWorker> PullDenseWorker::s_instance_ = NULL;
std::mutex PullDenseWorker::mutex_for_version_;
std::map<uint64_t, uint64_t> PullDenseWorker::last_versions_;
Expand Down Expand Up @@ -70,7 +67,7 @@ void PullDenseWorker::Initialize(const TrainerDesc& param) {
}

void PullDenseWorker::CreatePinVar() {
#if (defined PADDLE_WITH_CUDA) || (defined PADDLE_WITH_PSLIB)
#if (defined PADDLE_WITH_CUDA) || (defined PADDLE_WITH_XPU)
// for (auto& v : dense_value_names_) {
// for (auto& name : v.second) {
for (int i = 0; i < dwp_param_.program_config(0).pull_dense_table_id_size();
Expand Down
21 changes: 21 additions & 0 deletions paddle/fluid/framework/tensor_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/tensor_util.h"

#include <algorithm>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/profiler.h"

Expand Down Expand Up @@ -81,6 +84,12 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
}
#endif
#ifdef PADDLE_WITH_CUDA
else if (platform::is_cuda_pinned_place(src_place) && // NOLINT
platform::is_cuda_pinned_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CUDAPinnedPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CUDAPinnedPlace, src_place), src_ptr,
size);
}
else if (platform::is_cuda_pinned_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
Expand Down Expand Up @@ -282,6 +291,12 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
}
#endif
#ifdef PADDLE_WITH_CUDA
else if (platform::is_cuda_pinned_place(src_place) && // NOLINT
platform::is_cuda_pinned_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CUDAPinnedPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CUDAPinnedPlace, src_place), src_ptr,
size);
}
else if (platform::is_cuda_pinned_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
Expand Down Expand Up @@ -943,6 +958,12 @@ void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst) {
#endif
}

template <typename T>
std::string format_tensor(const framework::Tensor& tensor) {
// TODO(zhiqiu): use the print option to format tensor.
return "NOT IMPLEMENTED";
}

template <typename T>
std::ostream& print_tensor(std::ostream& os, const framework::Tensor& tensor) {
auto inspect = tensor.data<T>();
Expand Down
Loading

0 comments on commit a1d92b7

Please sign in to comment.