From 56de7683c33e2e20533facfb4d782c21ba5f8dfe Mon Sep 17 00:00:00 2001 From: zhangkaihuo Date: Thu, 15 Aug 2019 16:51:05 +0800 Subject: [PATCH] reduce cpu occupancy --- solution/miner/libcuckoo/src/cuda/trimmer.cu | 21 ++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/solution/miner/libcuckoo/src/cuda/trimmer.cu b/solution/miner/libcuckoo/src/cuda/trimmer.cu index 87e4781259..270f8361d0 100644 --- a/solution/miner/libcuckoo/src/cuda/trimmer.cu +++ b/solution/miner/libcuckoo/src/cuda/trimmer.cu @@ -689,7 +689,7 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk cudaMemset (indexesE[1], 0, indexesSize); cudaMemcpy (dipkeys, &sipkeys, sizeof (sipkeys), cudaMemcpyHostToDevice); - checkCudaErrors (cudaDeviceSynchronize ()); +// checkCudaErrors (cudaDeviceSynchronize ()); #ifdef TIMER float durationA, durationB; @@ -706,7 +706,7 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk else Cuckaroo_SeedA < EDGES_A ><<< tp.genA.blocks, tp.genA.tpb >>> (*dipkeys, (ulonglong4 *) bufferAB, (int *) indexesE[1]); - checkCudaErrors (cudaDeviceSynchronize ()); +// checkCudaErrors (cudaDeviceSynchronize ()); #ifdef TIMER cudaEventRecord (stop, NULL); @@ -780,7 +780,7 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk Round<<>>(3, part, *dipkeys, (uint2 *)bufferB, (uint2 *)bufferA, indexesE[1], indexesE[0]); // to .117 } - cudaDeviceSynchronize(); +// cudaDeviceSynchronize(); for (int round = 4; round < tp.ntrims; round += 2) { cudaMemset(indexesE[1], 0, indexesSize); @@ -797,9 +797,18 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk cudaDeviceSynchronize(); Tail<<>>((const uint2 *)bufferA, (uint2 *)bufferB, (const u32 *)indexesE[0], (u32 *)indexesE[1]); - cudaMemcpy(&nedges, indexesE[1], sizeof(u32), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - return nedges; +// cudaMemcpy(&nedges, indexesE[1], sizeof(u32), cudaMemcpyDeviceToHost); +// cudaDeviceSynchronize(); + bool ready = false; + while(1){ + usleep(1000); + ready = cudaSuccess == cudaStreamQuery(0); + if(ready){ + cudaMemcpy(&nedges, indexesE[1], sizeof(u32), cudaMemcpyDeviceToHost); + break; + } + } + return nedges; } };