Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change the CpuMatrix::copyFrom and CpuVector::copyFrom with the strea… #2618

Merged
merged 1 commit into from
Jun 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions paddle/gserver/layers/Layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ void Layer::addOutputArgument(int deviceId) {
void Layer::copyOutputToOtherDevice() {
for (size_t i = 0; i != outputOtherDevice_.size(); i++) {
SetDevice device(outputOtherDevice_[i].deviceId);
// If outputOtherDevice_[i].value is a CpuMatrix,
// the copyFrom is a synchronous interface.
// If outputOtherDevice_[i].value is a GpuMatrix, since subsequent
// calculations are all on HPPL_STREAM_DEFAULT,
// copyFrom can be an asynchronous interface.
outputOtherDevice_[i].value->copyFrom(*getOutputValue(),
HPPL_STREAM_DEFAULT);
outputOtherDevice_[i].sequenceStartPositions =
Expand Down
2 changes: 2 additions & 0 deletions paddle/math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1565,6 +1565,8 @@ void CpuMatrix::copyFrom(const Matrix& src, hl_stream_t stream) {
const_cast<real*>(src.getData()),
sizeof(real) * elementCnt_,
stream);
// There is a need to add synchronization to ensure that the data is copied.
hl_stream_synchronize(stream);
} else if (typeid(src) == typeid(CpuMatrix)) {
memcpy(data_, src.getData(), sizeof(real) * elementCnt_);
} else {
Expand Down
3 changes: 2 additions & 1 deletion paddle/math/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ class Matrix : public BaseMatrix {
LOG(FATAL) << "Not implemented";
}

// asynchronous copy
// For GpuMatrix this is an asynchronous copy interface
// For CpuMatrix this is an synchronous copy interface
virtual void copyFrom(const Matrix& src, hl_stream_t stream) {
LOG(FATAL) << "Not implemented";
}
Expand Down
2 changes: 2 additions & 0 deletions paddle/math/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,8 @@ void CpuVectorT<T>::copyFrom(const VectorT<T>& src, hl_stream_t stream) {
(void*)src.getData(),
sizeof(T) * this->getSize(),
stream);
// There is a need to add synchronization to ensure that the data is copied.
hl_stream_synchronize(stream);
} else {
src.copyTo(this);
}
Expand Down
8 changes: 4 additions & 4 deletions paddle/math/Vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,11 @@ class VectorT : public BaseVector<T> {
virtual void copyFrom(const VectorT<T>& src) = 0;

/**
* If use_gpu, this function will push the copy-task to the specifed-stream
* and return immediately.
* If GpuVector, this function is an asynchronous interface,
* will push the copy-task to the specifed-stream and return immediately.
*
* If not use GPU, this function is same as
* the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT.
* If CpuVector, this function is an synchronous interface,
* same as the copyFrom(const VectorT<T>& src).
*/
virtual void copyFrom(const VectorT<T>& src, hl_stream_t stream) = 0;

Expand Down
14 changes: 14 additions & 0 deletions paddle/math/tests/test_matrixCompare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,4 +1127,18 @@ TEST(Matrix, MaxOutFwdBwd) {
}
}

TEST(CpuMatrix, copyFrom) {
const size_t height = 1000;
const size_t width = 1000;
CpuMatrix cpu(height, width);
GpuMatrix gpu(height, width);
CpuMatrix copy(height, width);

cpu.randomizeUniform();
gpu.copyFrom(cpu);
copy.copyFrom(gpu, HPPL_STREAM_DEFAULT);

TensorCheckEqual(cpu, copy);
}

#endif