Skip to content

Commit

Permalink
merged develop
Browse files Browse the repository at this point in the history
  • Loading branch information
jim19930609 committed Apr 1, 2022
2 parents 022df81 + 53a62ea commit 8c4bb49
Show file tree
Hide file tree
Showing 359 changed files with 15,507 additions and 9,625 deletions.
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ ENDIF()

if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220327")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220331")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
Expand Down
3 changes: 0 additions & 3 deletions paddle/fluid/distributed/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
add_subdirectory(collective)
add_subdirectory(store)
if(NOT WITH_PSCORE)
if(WITH_HETERPS)
add_subdirectory(ps)
endif()
add_subdirectory(fleet_executor)
return()
endif()
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ EagerReducer::EagerReducer(

if (find_unused_vars_each_step_) {
global_used_vars_ = paddle::experimental::empty(
ScalarArray({static_cast<int32_t>(tensors_.size())}), DataType::INT32,
IntArray({static_cast<int32_t>(tensors_.size())}), DataType::INT32,
inner_place_);
}
}
Expand Down Expand Up @@ -364,7 +364,7 @@ void EagerReducer::InitializeGroups(
// process the dense gradient.
InitializeDenseGroups(tensor_indices_, &group);
group.dense_contents_ = paddle::experimental::empty(
ScalarArray({group.all_length_}), group.dtype_, inner_place_);
IntArray({group.all_length_}), group.dtype_, inner_place_);
}

// map tensors to this group by VariableLocator
Expand Down Expand Up @@ -403,7 +403,7 @@ void EagerReducer::InitializeDenseGroups(
p_group->length_.push_back(size);

// for concat operator
p_group->origin_shapes_.push_back(ScalarArray(tensor.shape()));
p_group->origin_shapes_.push_back(IntArray(tensor.shape()));
p_group->dense_tensors_.push_back(phi::DenseTensor());

const auto &dtype = tensor.dtype();
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/distributed/collective/reducer.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ namespace paddle {
namespace distributed {
using Tensor = paddle::experimental::Tensor;
using Scalar = paddle::experimental::ScalarBase<paddle::experimental::Tensor>;
using ScalarArray =
paddle::experimental::ScalarArrayBase<paddle::experimental::Tensor>;
using IntArray =
paddle::experimental::IntArrayBase<paddle::experimental::Tensor>;
using Backend = paddle::experimental::Backend;

std::vector<std::vector<size_t>> Eager_AssignGroupBySize(
Expand All @@ -52,7 +52,7 @@ class EagerGroup {
std::vector<phi::DenseTensor> dense_tensors_;
std::vector<int64_t> length_;
int64_t all_length_{0};
std::vector<ScalarArray> origin_shapes_;
std::vector<IntArray> origin_shapes_;

// Global indices of participating tensors in the group
std::vector<size_t> tensor_indices_;
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/ps/service/CMakeLists.txt
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ cc_library(server SRCS server.cc DEPS downpour_server boost ${RPC_DEPS})
cc_library(communicator SRCS communicator/communicator.cc DEPS scope client boost table math_function selected_rows_functor ${RPC_DEPS})
cc_library(ps_service SRCS ps_service/service.cc DEPS communicator client server boost ${RPC_DEPS})

cc_library(heter_server SRCS heter_server.cc DEPS brpc_utils ${COMMON_DEPS} ${RPC_DEPS})
cc_library(heter_client SRCS heter_client.cc DEPS brpc_utils ${COMMON_DEPS} ${RPC_DEPS})
cc_library(heter_server SRCS heter_server.cc DEPS heter_client brpc_utils ${COMMON_DEPS} ${RPC_DEPS})

set_source_files_properties(ps_service/graph_py_service.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_library(graph_py_service SRCS ps_service/graph_py_service.cc DEPS ps_service)
Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/distributed/ps/service/brpc_ps_client.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ DEFINE_int32(pserver_sparse_merge_thread, 1, "pserver sparse merge thread num");
DEFINE_int32(pserver_sparse_table_shard_num, 1000,
"sparse table shard for save & load");

DEFINE_int32(heter_world_size, 100, "group size"); // 可配置

namespace paddle {
namespace framework {
class Scope;
Expand Down Expand Up @@ -1518,7 +1520,7 @@ void sparse_local_merge(ValueAccessor *accessor, float *merge_data,
merge_data_shell[i] = merge_data + i;
another_data_shell[i] = another_data + i;
}
accessor->merge(merge_data_shell, another_data_shell, 1);
accessor->Merge(merge_data_shell, another_data_shell, 1);
}

int BrpcPsClient::push_sparse_async_shard_merge(
Expand Down Expand Up @@ -1757,7 +1759,7 @@ void BrpcPsClient::push_dense_task_consume() {
async_task]() -> int {
auto &tmp_task_vec = *(async_task->data());
const float *merge_data = tmp_task_vec.data();
accessor->merge(&total_send_data, &merge_data,
accessor->Merge(&total_send_data, &merge_data,
total_send_data_size);
#pragma optimize("", off)
auto *debug_closure = closure;
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/distributed/ps/service/brpc_ps_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ int32_t BrpcPsService::pull_dense(Table *table, const PsRequestMessage &request,
}

auto res_data = butil::get_object<std::vector<float>>();
res_data->resize(num * table->value_accesor()->select_size() / sizeof(float));
res_data->resize(num * table->value_accesor()->GetTableInfo(SELECT_SIZE) /
sizeof(float));
TableContext table_context;
table_context.value_type = Dense;
table_context.pull_context.values = res_data->data();
Expand Down Expand Up @@ -385,7 +386,7 @@ int32_t BrpcPsService::pull_sparse(Table *table,

CostTimer timer("pserver_server_pull_sparse");
uint32_t num = *(uint32_t *)(request.params(0).c_str());
auto dim = table->value_accesor()->select_dim();
auto dim = table->value_accesor()->GetTableInfo(SELECT_DIM);

thread_local std::string req_buffer;
req_buffer.reserve(req_buffer_size);
Expand Down

1 comment on commit 8c4bb49

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #41282 Commit ID: 8c4bb49 contains failed CI.

🔹 Failed: PR-CI-Inference

Unknown Failed
Unknown Failed

Please sign in to comment.