-
Notifications
You must be signed in to change notification settings - Fork 18.7k
/
gpu_util.cuh
35 lines (29 loc) · 1.06 KB
/
gpu_util.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#ifndef CAFFE_UTIL_GPU_UTIL_H_
#define CAFFE_UTIL_GPU_UTIL_H_
namespace caffe {
template <typename Dtype>
inline __device__ Dtype caffe_gpu_atomic_add(const Dtype val, Dtype* address);
template <>
inline __device__
float caffe_gpu_atomic_add(const float val, float* address) {
return atomicAdd(address, val);
}
// double atomicAdd implementation taken from:
// http://docs.nvidia.com/cuda/cuda-c-programming-guide/#axzz3PVCpVsEG
template <>
inline __device__
double caffe_gpu_atomic_add(const double val, double* address) {
unsigned long long int* address_as_ull = // NOLINT(runtime/int)
// NOLINT_NEXT_LINE(runtime/int)
reinterpret_cast<unsigned long long int*>(address);
unsigned long long int old = *address_as_ull; // NOLINT(runtime/int)
unsigned long long int assumed; // NOLINT(runtime/int)
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
} // namespace caffe
#endif // CAFFE_UTIL_GPU_UTIL_H_