Skip to content

Commit

Permalink
Async GRPC sendrecv (#7133)
Browse files Browse the repository at this point in the history
Async GRPC sendrecv
  • Loading branch information
gongweibao authored Jan 11, 2018
1 parent 020630b commit da3087a
Show file tree
Hide file tree
Showing 14 changed files with 775 additions and 335 deletions.
2 changes: 1 addition & 1 deletion paddle/operators/detail/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
grpc_library(sendrecvop_grpc SRCS recv_impl.cc send_impl.cc PROTO send_recv.proto DEPS lod_tensor selected_rows)
grpc_library(sendrecvop_grpc SRCS sendrecvop_utils.cc grpc_client.cc grpc_server.cc PROTO send_recv.proto DEPS lod_tensor selected_rows)
147 changes: 147 additions & 0 deletions paddle/operators/detail/grpc_client.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "grpc_client.h"
namespace paddle {
namespace operators {
namespace detail {

bool RPCClient::AsyncSendVariable(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& var_name,
int64_t time_out) {
sendrecv::VariableMessage req;
auto* var = scope.FindVar(var_name);
SerializeToMessage(var_name, var, ctx, &req);

// varhandle
VarHandle var_h;
var_h.ep = ep;
var_h.scope = &scope;
var_h.name = var_name;
var_h.ctx = &ctx;

// stub context
auto ch = GetChannel(ep);
SendProcessor* s = new SendProcessor(ch);
s->Prepare(var_h, time_out);
s->response_call_back_ = NULL;

auto rpc = s->stub_->AsyncSendVariable(s->context_.get(), req, &cq_);
rpc->Finish(&s->reply_, &s->status_, (void*)s);

req_count_++;

return true;
}

void ProcGetResponse(const VarHandle& var_h,
const sendrecv::VariableMessage& ret_msg) {
auto* outvar = var_h.scope->FindVar(var_h.name);

std::istringstream iss(ret_msg.serialized());
DeserializeFromMessage(ret_msg, *var_h.ctx, outvar);
}

bool RPCClient::AsyncGetVariable(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& var_name,
int64_t time_out) {
sendrecv::VariableMessage req;
req.set_varname(var_name);

auto* var = scope.FindVar(var_name);
SerializeToMessage(var_name, var, ctx, &req);

// varhandle
VarHandle var_h;
var_h.ep = ep;
var_h.scope = &scope;
var_h.name = var_name;
var_h.ctx = &ctx;

// stub context
auto ch = GetChannel(ep);
GetProcessor* s = new GetProcessor(ch);
s->Prepare(var_h, time_out);
s->response_call_back_ = ProcGetResponse;

auto rpc = s->stub_->AsyncGetVariable(s->context_.get(), req, &cq_);
rpc->Finish(&s->reply_, &s->status_, (void*)s);

req_count_++;

return true;
}

bool RPCClient::wait() {
bool ok = true;

while (true) {
if (req_count_ <= 0) {
break;
}

if (!Proceed()) {
LOG(ERROR) << "Get meets CompletionQueue error";
return false;
}
}

return ok;
}

bool RPCClient::Proceed() {
void* tag = NULL;
bool ok = false;

// request counts.
if (!cq_.Next(&tag, &ok)) {
return false;
}
req_count_--;

GPR_ASSERT(ok);
PADDLE_ENFORCE(tag);

// TODO(gongwb): add more retries.
ClientBase* c = static_cast<ClientBase*>(tag);
if (!c->status_.ok()) {
delete c;
return true;
}

c->Process();
delete c;
return true;
}

std::shared_ptr<grpc::Channel> RPCClient::GetChannel(const std::string& ep) {
auto it = channels_.find(ep);
if (it != channels_.end()) {
return it->second;
}

auto ch = std::shared_ptr<grpc::Channel>(
grpc::CreateChannel(ep, grpc::InsecureChannelCredentials()));

channels_[ep] = ch;
return ch;
}

} // namespace detail
} // namespace operators
} // namespace paddle
147 changes: 147 additions & 0 deletions paddle/operators/detail/grpc_client.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <grpc++/grpc++.h>
#include <grpc/support/log.h>
#include <time.h>
#include <chrono>
#include <ctime>
#include <functional>
#include <iostream>
#include <map>
#include <string>
#include <vector>

#include "paddle/framework/data_type.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/operators/detail/sendrecvop_utils.h"
#include "paddle/operators/detail/simple_block_queue.h"

namespace paddle {
namespace operators {
namespace detail {

struct VarHandle {
std::string ep;
const platform::DeviceContext* ctx;
const framework::Scope* scope;
std::string name;

std::string String() const {
std::ostringstream s;
s << "name:[" << name << "] ep:[" << ep << "]";
return s.str();
}
};

void ProcGetResponse(const VarHandle& var_h,
const sendrecv::VariableMessage& msg);

class ClientBase {
public:
explicit ClientBase(std::shared_ptr<grpc::Channel> ch) {
stub_ = sendrecv::SendRecvService::NewStub(ch);
context_ = NULL;
}

virtual ~ClientBase() {}

virtual void Prepare(const VarHandle& var_info, int64_t time_out) {
context_.reset(new grpc::ClientContext());
var_h_ = var_info;

std::chrono::system_clock::time_point deadline =
std::chrono::system_clock::now() + std::chrono::milliseconds(time_out);

context_->set_deadline(deadline);
}

virtual void Process() = 0;

std::unique_ptr<sendrecv::SendRecvService::Stub> stub_;
std::unique_ptr<grpc::ClientContext> context_;
grpc::Status status_;
VarHandle var_h_;
};

typedef std::function<void(const VarHandle&, const sendrecv::VoidMessage&)>
RequestSendCallBack;

class SendProcessor : public ClientBase {
public:
explicit SendProcessor(std::shared_ptr<grpc::Channel> ch) : ClientBase(ch) {}

virtual ~SendProcessor() {}

virtual void Process() {
if (response_call_back_) {
response_call_back_(var_h_, reply_);
}
}

sendrecv::VoidMessage reply_;
RequestSendCallBack response_call_back_ = NULL;
};

typedef std::function<void(const VarHandle&, const sendrecv::VariableMessage&)>
RequestGetCallBack;

class GetProcessor : public ClientBase {
public:
explicit GetProcessor(std::shared_ptr<grpc::Channel> ch) : ClientBase(ch) {}

virtual ~GetProcessor() {}

virtual void Process() {
if (response_call_back_) {
response_call_back_(var_h_, reply_);
}
}

sendrecv::VariableMessage reply_;
RequestGetCallBack response_call_back_ = ProcGetResponse;
};

class RPCClient {
public:
bool AsyncSendVariable(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& var_name,
int64_t time_out = 600 * 1000);

bool AsyncGetVariable(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& var_name,
int64_t time_out = 600 * 1000);
bool wait();

private:
bool Proceed();
std::shared_ptr<grpc::Channel> GetChannel(const std::string& ep);

private:
grpc::CompletionQueue cq_;
std::map<std::string, std::shared_ptr<grpc::Channel>> channels_;
int64_t req_count_ = 0;
};

} // namespace detail
} // namespace operators
} // namespace paddle
Loading

0 comments on commit da3087a

Please sign in to comment.