Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion source/MRCuda/MRCudaBasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ size_t getCudaAvailableMemory()
return memFree - 128 * 1024 * 1024;
}

void logError( cudaError_t code, const char * file, int line )
std::string getError( cudaError_t code )
{
return fmt::format( "CUDA error: {}", cudaGetErrorString( code ) );
}

cudaError_t logError( cudaError_t code, const char * file, int line )
{
if ( file )
{
Expand All @@ -41,6 +46,7 @@ void logError( cudaError_t code, const char * file, int line )
{
spdlog::error( "CUDA error {}: {}", cudaGetErrorName( code ), cudaGetErrorString( code ) );
}
return code;
}

} //namespace Cuda
Expand Down
16 changes: 9 additions & 7 deletions source/MRCuda/MRCudaBasic.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#pragma once

#include "cuda_runtime.h"
#include <cstdint>
#include <vector>
#include <stdint.h>

namespace MR
{
Expand Down Expand Up @@ -28,20 +30,20 @@ public:

// copy given vector to GPU (if this array was allocated with inconsistent size, free it and then malloc again)
template <typename U>
void fromVector( const std::vector<U>& vec );
cudaError_t fromVector( const std::vector<U>& vec );

// copy given data to GPU (if this array was allocated with inconsistent size, free it and then malloc again)
void fromBytes( const uint8_t* data, size_t numBytes );
cudaError_t fromBytes( const uint8_t* data, size_t numBytes );

// copy given data to CPU (data should be already allocated)
void toBytes( uint8_t* data );
cudaError_t toBytes( uint8_t* data );

// copy this GPU array to given vector
template <typename U>
void toVector( std::vector<U>& vec ) const;
cudaError_t toVector( std::vector<U>& vec ) const;

// resize (free and malloc againg if size inconsistent) this GPU array (if size == 0 free it (if needed))
void resize( size_t size );
cudaError_t resize( size_t size );

// pointer to GPU array
T* data()
Expand Down Expand Up @@ -73,7 +75,7 @@ using DynamicArrayU16 = MR::Cuda::DynamicArray<uint16_t>;
using DynamicArrayF = MR::Cuda::DynamicArray<float>;

// Sets all float values of GPU array to zero
void setToZero( DynamicArrayF& devArray );
cudaError_t setToZero( DynamicArrayF& devArray );

}

Expand Down
55 changes: 33 additions & 22 deletions source/MRCuda/MRCudaBasic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@

#include "MRCudaBasic.cuh"
#include "exports.h"
#include "cuda_runtime.h"
#include <assert.h>
#include <stdint.h>
#include <cassert>
#include <string>

namespace MR
{

namespace Cuda
{

/// converts given code in user-readable error string
[[nodiscard]] MRCUDA_API std::string getError( cudaError_t code );

/// spdlog::error the information about some CUDA error including optional filename and line number
MRCUDA_API void logError( cudaError_t code, const char * file = nullptr, int line = 0 );
MRCUDA_API cudaError_t logError( cudaError_t code, const char * file = nullptr, int line = 0 );

/// executes given CUDA function and checks the error code after
#define CUDA_EXEC( func )\
Expand Down Expand Up @@ -44,56 +46,65 @@ DynamicArray<T>::~DynamicArray()

template<typename T>
template<typename U>
inline void DynamicArray<T>::fromVector( const std::vector<U>& vec )
inline cudaError_t DynamicArray<T>::fromVector( const std::vector<U>& vec )
{
static_assert ( sizeof( T ) == sizeof( U ) );
resize( vec.size() );
CUDA_EXEC( cudaMemcpy( data_, vec.data(), size_ * sizeof( T ), cudaMemcpyHostToDevice ) );
if ( auto code = resize( vec.size() ) )
return code;
return logError( cudaMemcpy( data_, vec.data(), size_ * sizeof( T ), cudaMemcpyHostToDevice ), __FILE__ , __LINE__ );
}


template <typename T>
void DynamicArray<T>::fromBytes( const uint8_t* data, size_t numBytes )
inline cudaError_t DynamicArray<T>::fromBytes( const uint8_t* data, size_t numBytes )
{
assert( numBytes % sizeof( T ) == 0 );
resize( numBytes / sizeof( T ) );
CUDA_EXEC( cudaMemcpy( data_, data, numBytes, cudaMemcpyHostToDevice ) );
return logError( cudaMemcpy( data_, data, numBytes, cudaMemcpyHostToDevice ), __FILE__ , __LINE__ );
}

template <typename T>
void DynamicArray<T>::toBytes( uint8_t* data )
inline cudaError_t DynamicArray<T>::toBytes( uint8_t* data )
{
CUDA_EXEC( cudaMemcpy( data, data_, size_ * sizeof( T ), cudaMemcpyDeviceToHost ) );
return logError( cudaMemcpy( data, data_, size_ * sizeof( T ), cudaMemcpyDeviceToHost ), __FILE__ , __LINE__ );
}

template<typename T>
void DynamicArray<T>::resize( size_t size )
cudaError_t DynamicArray<T>::resize( size_t size )
{
if ( size == size_ )
return;
return cudaSuccess;
if ( size_ != 0 )
CUDA_EXEC( cudaFree( data_ ) );
{
if ( auto code = logError( cudaFree( data_ ), __FILE__ , __LINE__ ) )
return code;
}

size_ = size;
if ( size_ != 0 )
CUDA_EXEC( cudaMalloc( ( void** )&data_, size_ * sizeof( T ) ) );
{
if ( auto code = logError( cudaMalloc( ( void** )&data_, size_ * sizeof( T ) ), __FILE__ , __LINE__ ) )
return code;
}
return cudaSuccess;
}

template<typename T>
template<typename U>
void DynamicArray<T>::toVector( std::vector<U>& vec ) const
cudaError_t DynamicArray<T>::toVector( std::vector<U>& vec ) const
{
static_assert ( sizeof( T ) == sizeof( U ) );
vec.resize( size_ );
CUDA_EXEC( cudaMemcpy( vec.data(), data_, size_ * sizeof( T ), cudaMemcpyDeviceToHost ) );
return logError( cudaMemcpy( vec.data(), data_, size_ * sizeof( T ), cudaMemcpyDeviceToHost ), __FILE__ , __LINE__ );
}

inline void setToZero( DynamicArrayF& devArray )
inline cudaError_t setToZero( DynamicArrayF& devArray )
{
if ( devArray.size() == 0 )
return;
CUDA_EXEC( cudaMemset( devArray.data(), 0, devArray.size() * sizeof( float ) ) );
return cudaSuccess;
return logError( cudaMemset( devArray.data(), 0, devArray.size() * sizeof( float ) ), __FILE__ , __LINE__ );
}

}
}
} // namespace Cuda

} // namespace MR