cms-sw · cmsbuild · Apr 29, 2021 · Apr 28, 2021
diff --git a/HeterogeneousCore/CUDAServices/bin/BuildFile.xml b/HeterogeneousCore/CUDAServices/bin/BuildFile.xml
@@ -1,10 +1,9 @@
 <iftool name="cuda-gcc-support">
-  <bin name="cudaComputeCapabilities" file="cudaComputeCapabilities.cpp">
+  <bin name="cudaComputeCapabilities" file="cudaComputeCapabilities.cpp isCudaDeviceSupported.cu">
     <use name="cuda"/>
   </bin>
 
-  <bin name="cudaIsEnabled" file="cudaIsEnabled.cpp">
+  <bin name="cudaIsEnabled" file="cudaIsEnabled.cpp isCudaDeviceSupported.cu">
     <use name="cuda"/>
   </bin>
-
 </iftool>
diff --git a/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp b/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp
@@ -1,23 +1,32 @@
-// C++ standard headers
+// C/C++ standard headers
+#include <cstdlib>
 #include <iomanip>
 #include <iostream>
 
 // CUDA headers
 #include <cuda_runtime.h>
 
 // CMSSW headers
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "isCudaDeviceSupported.h"
 
 int main() {
   int devices = 0;
-  cudaCheck(cudaGetDeviceCount(&devices));
+  cudaError_t status = cudaGetDeviceCount(&devices);
+  if (status != cudaSuccess) {
+    std::cerr << "cudaComputeCapabilities: " << cudaGetErrorString(status) << std::endl;
+    return EXIT_FAILURE;
+  }
 
   for (int i = 0; i < devices; ++i) {
     cudaDeviceProp properties;
     cudaGetDeviceProperties(&properties, i);
     std::cout << std::setw(4) << i << "    " << std::setw(2) << properties.major << "." << properties.minor << "    "
-              << properties.name << std::endl;
+              << properties.name;
+    if (not isCudaDeviceSupported(i)) {
+      std::cout << " (unsupported)";
+    }
+    std::cout << std::endl;
   }
 
-  return 0;
+  return EXIT_SUCCESS;
 }
diff --git a/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp b/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp
@@ -1,31 +1,26 @@
-#include <algorithm>
-#include <array>
+// C/C++ headers
 #include <cstdlib>
-#include <iostream>
 
+// CUDA headers
 #include <cuda_runtime.h>
 
+// local headers
+#include "isCudaDeviceSupported.h"
+
+// returns EXIT_SUCCESS if at least one visible CUDA device can be used, or EXIT_FAILURE otherwise
 int main() {
   int devices = 0;
   auto status = cudaGetDeviceCount(&devices);
   if (status != cudaSuccess) {
     return EXIT_FAILURE;
   }
 
-  int minimumMajor = 6;  // min minor is implicitly 0
-
-  // This approach (requiring all devices are supported) is rather
-  // conservative. In principle we could consider just dropping the
-  // unsupported devices. Currently that would be easiest to achieve
-  // in CUDAService though.
+  // check that at least one visible CUDA device can be used
   for (int i = 0; i < devices; ++i) {
-    cudaDeviceProp properties;
-    cudaGetDeviceProperties(&properties, i);
-
-    if ((not(properties.major == 3 and properties.minor == 5)) and properties.major < minimumMajor) {
-      return EXIT_FAILURE;
-    }
+    if (isCudaDeviceSupported(i))
+      return EXIT_SUCCESS;
   }
 
-  return EXIT_SUCCESS;
+  // no visible usable devices
+  return EXIT_FAILURE;
 }
diff --git a/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu
@@ -0,0 +1,55 @@
+#include <cuda_runtime.h>
+
+#include "isCudaDeviceSupported.h"
+
+__global__ static void setSupported(bool* result) { *result = true; }
+
+bool isCudaDeviceSupported(int device) {
+  bool supported = false;
+  bool* supported_d;
+
+  // select the requested device - will fail if the index is invalid
+  cudaError_t status = cudaSetDevice(device);
+  if (status != cudaSuccess)
+    return false;
+
+  // allocate memory for the flag on the device
+  status = cudaMalloc(&supported_d, sizeof(bool));
+  if (status != cudaSuccess)
+    return false;
+
+  // initialise the flag on the device
+  status = cudaMemset(supported_d, 0x00, sizeof(bool));
+  if (status != cudaSuccess)
+    return false;
+
+  // try to set the flag on the device
+  setSupported<<<1, 1>>>(supported_d);
+
+  // check for an eventual error from launching the kernel on an unsupported device
+  status = cudaGetLastError();
+  if (status != cudaSuccess)
+    return false;
+
+  // wait for the kernelto run
+  status = cudaDeviceSynchronize();
+  if (status != cudaSuccess)
+    return false;
+
+  // copy the flag back to the host
+  status = cudaMemcpy(&supported, supported_d, sizeof(bool), cudaMemcpyDeviceToHost);
+  if (status != cudaSuccess)
+    return false;
+
+  // free the device memory
+  status = cudaFree(supported_d);
+  if (status != cudaSuccess)
+    return false;
+
+  // reset the device
+  status = cudaDeviceReset();
+  if (status != cudaSuccess)
+    return false;
+
+  return supported;
+}
diff --git a/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h
@@ -0,0 +1,6 @@
+#ifndef HeterogeneousCore_CUDAServices_bin_isCudaDeviceSupported_h
+#define HeterogeneousCore_CUDAServices_bin_isCudaDeviceSupported_h
+
+bool isCudaDeviceSupported(int device);
+
+#endif  // HeterogeneousCore_CUDAServices_bin_isCudaDeviceSupported_h