Skip to content

Commit

Permalink
Use read only cache instead of texture cache for CC >= 3.5
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-jasinski committed Jan 5, 2020
1 parent b1772a3 commit 6b40d01
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 40 deletions.
1 change: 1 addition & 0 deletions src/OpenFOAM/Make/files
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ $(ranges)/labelRange/labelRanges.C
$(ranges)/scalarRange/scalarRange.C
$(ranges)/scalarRange/scalarRanges.C

containers/Lists/gpuList/gpuConfig.C
containers/HashTables/HashTable/HashTableCore.C
containers/HashTables/StaticHashTable/StaticHashTableCore.C
containers/Lists/PackedList/PackedListCore.C
Expand Down
42 changes: 42 additions & 0 deletions src/OpenFOAM/containers/Lists/gpuList/gpuConfig.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "gpuConfig.H"

namespace Foam {

int deviceCount()
{
int num_devices;
CUDA_CALL(cudaGetDeviceCount(&num_devices));
return num_devices;
}

int currentDevice()
{
int device;
CUDA_CALL(cudaGetDevice(&device));
return device;
}

void setCurrentDevice(int device)
{
CUDA_CALL(cudaSetDevice(device));
}

int deviceComputeCapability(int device)
{
cudaDeviceProp deviceProp;
CUDA_CALL(cudaGetDeviceProperties(&deviceProp, device));
return 10*deviceProp.major + deviceProp.minor;
}

int currentComputeCapability()
{
return deviceComputeCapability(currentDevice());
}

bool needTextureBind()
{
static bool needBind = currentComputeCapability() < 35;
return needBind;
}

}
24 changes: 9 additions & 15 deletions src/OpenFOAM/containers/Lists/gpuList/gpuConfig.H
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/discard_iterator.h>

#include <thrust/transform.h>
#include <thrust/transform_reduce.h>
Expand All @@ -37,26 +38,19 @@ namespace gpu_api = thrust;

#define GPU_ERROR_CHECK() \
cudaDeviceSynchronize(); \
CUDA_CALL( cudaPeekAtLastError());
CUDA_CALL( cudaPeekAtLastError());

#define GPU_ERROR_CHECK_ASYNC() \
CUDA_CALL(cudaPeekAtLastError());
CUDA_CALL(cudaPeekAtLastError());

namespace Foam
{

inline int getGpuDeviceCount()
{
int num_devices;
CUDA_CALL(cudaGetDeviceCount(&num_devices));
return num_devices;
}

inline void setGpuDevice(int device)
{
CUDA_CALL(cudaSetDevice(device));
}

int deviceCount();
int currentDevice();
void setCurrentDevice(int device);
int deviceComputeCapability(int device);
int currentComputeCapability();
bool needTextureBind();
}

#else
Expand Down
54 changes: 36 additions & 18 deletions src/OpenFOAM/containers/Lists/gpuList/textures.H
Original file line number Diff line number Diff line change
Expand Up @@ -13,46 +13,45 @@ private:
const T* data;

inline void initResourceDesc(cudaResourceDesc& resDesc);
void init(int n, T* data_);
void bind(int n);

public:
textures(int n, T* _data):
tex(0),
data(_data)
{
init(n,_data);
if(needTextureBind())
bind(n);
}

textures(const gpuList<T>& list):
tex(0)
tex(0),
data(list.data())
{
init(list.size(),const_cast<T*>(list.data()));
if(needTextureBind())
bind(list.size());
}

inline __device__ T operator[](const int& i) const;

void destroy()
{
cudaDestroyTextureObject(tex);
if(needTextureBind())
cudaDestroyTextureObject(tex);
}
};

template<class T>
inline void textures<T>::initResourceDesc(cudaResourceDesc& resDesc)
{
}

template<class T>
inline void textures<T>::init(int n, T* _data)
inline void textures<T>::bind(int n)
{
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(cudaResourceDesc));

resDesc.resType = cudaResourceTypeLinear;
resDesc.res.linear.devPtr = _data;
resDesc.res.linear.devPtr = const_cast<T*>(data);
resDesc.res.linear.sizeInBytes = n*sizeof(T);

initResourceDesc(resDesc);
initResourceDesc(resDesc);

cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(cudaTextureDesc));
Expand All @@ -61,29 +60,46 @@ inline void textures<T>::init(int n, T* _data)
cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
}


template<class T>
inline void textures<T>::initResourceDesc(cudaResourceDesc& resDesc)
{
}


template<>
inline void textures<float>::initResourceDesc(cudaResourceDesc& resDesc)
{
resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
resDesc.res.linear.desc.x = 32;
resDesc.res.linear.desc.x = 32;
}


template<>
inline void textures<int>::initResourceDesc(cudaResourceDesc& resDesc)
{
resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
resDesc.res.linear.desc.x = 32;
resDesc.res.linear.desc.x = 32;
}

template<>
inline void textures<double>::initResourceDesc(cudaResourceDesc& resDesc)
{
resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
resDesc.res.linear.desc.x = 32;
resDesc.res.linear.desc.y = 32;
resDesc.res.linear.desc.x = 32;
resDesc.res.linear.desc.y = 32;
}

#if __CUDA_ARCH__ >= 350

template<class T>
inline __device__ T textures<T>::operator[](const int& i) const
{
return __ldg(data + i);
}

#else

template<>
inline __device__ float textures<float>::operator[](const int& i) const
{
Expand All @@ -103,4 +119,6 @@ inline __device__ double textures<double>::operator[](const int& i) const
return __hiloint2double(v.y, v.x);
}

#endif

}
14 changes: 7 additions & 7 deletions src/OpenFOAM/global/argList/argList.C
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ void Foam::argList::parse
// Case is a single processor run unless it is running parallel
int nProcs = 1;

int deviceCount = getGpuDeviceCount();
int nDeviceCount = deviceCount();

// Roots if running distributed
fileNameList roots;
Expand Down Expand Up @@ -787,27 +787,27 @@ void Foam::argList::parse
{
int device = devices[Pstream::myProcNo()];

if(device < 0 || device >= deviceCount)
if(device < 0 || device >= nDeviceCount)
{
FatalError
<<"Invalid device ID: "<<device
<<" for processor "<<Pstream::myProcNo()<<endl;
FatalError.exit();
}

setGpuDevice(device);
setCurrentDevice(device);
}
}
else
{
if(Pstream::myProcNo() >= deviceCount)
if(Pstream::myProcNo() >= nDeviceCount)
{
FatalError
<<"Specify device IDs with 'devices' argument"<<endl;
FatalError.exit();
}

setGpuDevice(Pstream::myProcNo());
setCurrentDevice(Pstream::myProcNo());
}
}
else
Expand All @@ -820,14 +820,14 @@ void Foam::argList::parse
{
int device = optionRead<int>("device");

if(device < 0 || device >= deviceCount)
if(device < 0 || device >= nDeviceCount)
{
FatalError
<<"Invalid device ID: "<<device<<endl;
FatalError.exit();
}

setGpuDevice(device);
setCurrentDevice(device);

}
}
Expand Down

0 comments on commit 6b40d01

Please sign in to comment.