Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions gpu/containers/include/pcl/gpu/containers/device_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
#include <pcl/gpu/containers/kernel_containers.h>
#include <pcl/pcl_exports.h>

#include <atomic>

namespace pcl {
namespace gpu {
///////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -167,7 +169,7 @@ class PCL_EXPORTS DeviceMemory {
std::size_t sizeBytes_;

/** \brief Pointer to reference counter in CPU memory. */
int* refcount_;
std::atomic<int>* refcount_;
};

///////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -308,7 +310,7 @@ class PCL_EXPORTS DeviceMemory2D {
int rows_;

/** \brief Pointer to reference counter in CPU memory. */
int* refcount_;
std::atomic<int>* refcount_;
};
} // namespace gpu

Expand Down
49 changes: 13 additions & 36 deletions gpu/containers/src/device_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,13 @@

#include <pcl/gpu/containers/device_memory.h>
#include <pcl/gpu/utils/safe_call.hpp>
#include <pcl/pcl_config.h> // used for HAVE_CUDA
#include <pcl/pcl_macros.h> // used for PCL_DEPRECATED

#include <cuda_runtime_api.h>

#include <cassert>

#define HAVE_CUDA
//#include <pcl_config.h>

#if !defined(HAVE_CUDA)

void
Expand Down Expand Up @@ -163,30 +162,14 @@ pcl::gpu::DeviceMemory2D::empty() const

////////////////////////// XADD ///////////////////////////////
Comment thread
kunaltyagi marked this conversation as resolved.

#ifdef __GNUC__
#if !defined WIN32 && (defined __i486__ || defined __i586__ || defined __i686__ || \
defined __MMX__ || defined __SSE__ || defined __ppc__)
#define CV_XADD __sync_fetch_and_add
#else
#include <ext/atomicity.h>
#define CV_XADD __gnu_cxx::__exchange_and_add
#endif
#elif defined WIN32 || defined _WIN32
#include <intrin.h>
#define CV_XADD(addr, delta) _InterlockedExchangeAdd((long volatile*)(addr), (delta))
#else

template <typename _Tp>
static inline _Tp
CV_XADD(_Tp* addr, _Tp delta)
PCL_DEPRECATED(1, 16, "Removed in favour of c++11 atomics")
static inline _Tp CV_XADD(std::atomic<_Tp>* addr, std::atomic<_Tp> delta)
{
int tmp = *addr;
*addr += delta;
_Tp tmp = addr->fetch_add(delta);
return tmp;
}

#endif

//////////////////////// DeviceArray /////////////////////////////

pcl::gpu::DeviceMemory::DeviceMemory()
Expand All @@ -211,15 +194,15 @@ pcl::gpu::DeviceMemory::DeviceMemory(const DeviceMemory& other_arg)
, refcount_(other_arg.refcount_)
{
if (refcount_)
CV_XADD(refcount_, 1);
refcount_->fetch_add(1);
}

pcl::gpu::DeviceMemory&
pcl::gpu::DeviceMemory::operator=(const pcl::gpu::DeviceMemory& other_arg)
{
if (this != &other_arg) {
if (other_arg.refcount_)
CV_XADD(other_arg.refcount_, 1);
other_arg.refcount_->fetch_add(1);
release();

data_ = other_arg.data_;
Expand All @@ -243,9 +226,7 @@ pcl::gpu::DeviceMemory::create(std::size_t sizeBytes_arg)

cudaSafeCall(cudaMalloc(&data_, sizeBytes_));

// refcount_ = (int*)cv::fastMalloc(sizeof(*refcount_));
refcount_ = new int;
*refcount_ = 1;
refcount_ = new std::atomic<int>(1);
}
}

Expand All @@ -264,8 +245,7 @@ pcl::gpu::DeviceMemory::copyTo(DeviceMemory& other) const
void
pcl::gpu::DeviceMemory::release()
{
if (refcount_ && CV_XADD(refcount_, -1) == 1) {
// cv::fastFree(refcount);
if (refcount_ && refcount_->fetch_sub(1) == 1) {
delete refcount_;
cudaSafeCall(cudaFree(data_));
}
Expand Down Expand Up @@ -369,15 +349,15 @@ pcl::gpu::DeviceMemory2D::DeviceMemory2D(const DeviceMemory2D& other_arg)
, refcount_(other_arg.refcount_)
{
if (refcount_)
CV_XADD(refcount_, 1);
refcount_->fetch_add(1);
}

pcl::gpu::DeviceMemory2D&
pcl::gpu::DeviceMemory2D::operator=(const pcl::gpu::DeviceMemory2D& other_arg)
{
if (this != &other_arg) {
if (other_arg.refcount_)
CV_XADD(other_arg.refcount_, 1);
other_arg.refcount_->fetch_add(1);
release();

colsBytes_ = other_arg.colsBytes_;
Expand Down Expand Up @@ -405,17 +385,14 @@ pcl::gpu::DeviceMemory2D::create(int rows_arg, int colsBytes_arg)

cudaSafeCall(cudaMallocPitch((void**)&data_, &step_, colsBytes_, rows_));

// refcount = (int*)cv::fastMalloc(sizeof(*refcount));
refcount_ = new int;
*refcount_ = 1;
refcount_ = new std::atomic<int>(1);
}
}

void
pcl::gpu::DeviceMemory2D::release()
{
if (refcount_ && CV_XADD(refcount_, -1) == 1) {
// cv::fastFree(refcount);
if (refcount_ && refcount_->fetch_sub(1) == 1) {
delete refcount_;
cudaSafeCall(cudaFree(data_));
}
Expand Down