Skip to content

Commit

Permalink
"relauch ci"
Browse files Browse the repository at this point in the history
  • Loading branch information
dzhwinter committed Nov 14, 2017
1 parent fc117ec commit 12858ba
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
29 changes: 24 additions & 5 deletions paddle/operators/accuracy_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ using platform::PADDLE_CUDA_NUM_THREADS;
template <int BlockSize>
__global__ void AccuracyCudaKernel(const int N, const int D,
const int64_t* Xdata,
const int64_t* labeldata, float* accuracy) {
const int64_t* labeldata, int* correct_data,
float* accuracy) {
int count = 0;
__shared__ int total[BlockSize];

Expand All @@ -43,6 +44,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D,
// reduce the count with init value 0, and output accuracy.
int result = thrust::reduce(thrust::device, total, total + BlockSize, 0);
if (threadIdx.x == 0) {
*correct_data = result;
*accuracy = static_cast<float>(result) / static_cast<float>(N);
}
}
Expand All @@ -56,31 +58,48 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
auto* inference = ctx.Input<Tensor>("Out");
auto* indices = ctx.Input<Tensor>("Indices");
auto* label = ctx.Input<Tensor>("Label");

auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total");
// FIXME(typhoonzero): only support indices currently
// if add support for output values, how to detect the data type?
const int64_t* indices_data = indices->data<int64_t>();
const int64_t* label_data = label->data<int64_t>();

int* correct_data = correct->mutable_data<int>(ctx.GetPlace());
int* total_data = total->mutable_data<int>(ctx.GetPlace());
float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace());

size_t num_samples = inference->dims()[0];
int num_samples = static_cast<int>(inference->dims()[0]);
size_t infer_width = inference->dims()[1];
PADDLE_ENFORCE(cudaMemset(accuracy_data, 0, sizeof(float)));
// cudaMemset((void**)&correct_data, 0, sizeof(float));

if (num_samples == 0) {
return;
}
cudaMemcpy(total_data, &num_samples, sizeof(int), cudaMemcpyHostToDevice);

AccuracyCudaKernel<PADDLE_CUDA_NUM_THREADS><<<
1, PADDLE_CUDA_NUM_THREADS, 0, ctx.cuda_device_context().stream()>>>(
num_samples, infer_width, indices_data, label_data, accuracy_data);
num_samples, infer_width, indices_data, label_data, correct_data,
accuracy_data);

int d_num_samples, d_num_correct;
float d_accuracy;
cudaMemcpy(&d_num_correct, correct_data, sizeof(int),
cudaMemcpyDeviceToHost);
cudaMemcpy(&d_num_samples, total_data, sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(&d_accuracy, accuracy_data, sizeof(float),
cudaMemcpyDeviceToHost);
}
};

} // namespace operators
} // namespace paddle

// FIXME(typhoonzero): types of T is for infernece data.
// label data is always int
// FIXME(typhoonzero): types of T is for inference data.
// label data is always int64
REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel<float>,
paddle::operators::AccuracyOpCUDAKernel<double>);
8 changes: 4 additions & 4 deletions python/paddle/v2/framework/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def reset(self, executor, reset_program=None):
"""
Clear metric states at the begin of each pass/user specified batch
"""
if program == None:
if reset_program == None:
reset_program = Program()
else:
reset_program = program
Expand Down Expand Up @@ -147,9 +147,9 @@ def _update_ops(self, input, label, k=1, **kwargs):

return acc_out

def eval(self, executor, program=None):
if program != None:
eval_program = program
def eval(self, executor, eval_program=None):
if eval_program != None:
eval_program = eval_program
else:
eval_program = Program()
block = eval_program.global_block()
Expand Down

0 comments on commit 12858ba

Please sign in to comment.