diff --git a/examples/compute/cuda/CMakeLists.txt b/examples/compute/cuda/CMakeLists.txt
index 50530967af1f..49cd8be28542 100644
--- a/examples/compute/cuda/CMakeLists.txt
+++ b/examples/compute/cuda/CMakeLists.txt
@@ -13,8 +13,13 @@ if(HPX_WITH_CUDA)
       hello_compute
       partitioned_vector
     )
-  set(cublas_matmul_FLAGS DEPENDENCIES cublas)
-#  set_source_files_properties(cublas_matmul.cpp PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+
+  include_directories(${CUDA_INCLUDE_DIRS})
+
+  set(cublas_matmul_FLAGS
+      DEPENDENCIES ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES})
+
+  set(cublas_matmul_CUDA Off)
   set(data_copy_CUDA On)
   set(hello_compute_CUDA On)
   set(partitioned_vector_CUDA ON)
diff --git a/hpx/components/containers/partitioned_vector/partitioned_vector_component.hpp b/hpx/components/containers/partitioned_vector/partitioned_vector_component.hpp
index 48aeeca297c3..05affc811d82 100644
--- a/hpx/components/containers/partitioned_vector/partitioned_vector_component.hpp
+++ b/hpx/components/containers/partitioned_vector/partitioned_vector_component.hpp
@@ -442,12 +442,9 @@ namespace hpx { namespace server
     HPX_REGISTER_VECTOR_DECLARATION_IMPL(                                     \
         BOOST_PP_CAT(__partitioned_vector_, BOOST_PP_CAT(type, name)), name)  \
 /**/
-#ifndef __CUDA_ARCH__
+
 #define HPX_REGISTER_PARTITIONED_VECTOR(...)                                  \
-    HPX_REGISTER_VECTOR_(__VA_ARGS__)
-#else
-#define HPX_REGISTER_PARTITIONED_VECTOR(...)
-#endif
+    HPX_REGISTER_VECTOR_(__VA_ARGS__)                                         \
 /**/
 #define HPX_REGISTER_VECTOR_(...)                                             \
     HPX_UTIL_EXPAND_(BOOST_PP_CAT(                                            \
diff --git a/hpx/parallel/algorithms/for_each.hpp b/hpx/parallel/algorithms/for_each.hpp
index 9591832506f4..2a4208353967 100644
--- a/hpx/parallel/algorithms/for_each.hpp
+++ b/hpx/parallel/algorithms/for_each.hpp
@@ -432,7 +432,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
 
 
     // FIXME : is_indirect_callable does not work properly when compiling
-    //         Cuda device code
+    //         Cuda host code
 
     template <typename ExPolicy, typename InIter, typename F,
         typename Proj = util::projection_identity,
@@ -440,7 +440,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
         execution::is_execution_policy<ExPolicy>::value &&
         hpx::traits::is_iterator<InIter>::value &&
         parallel::traits::is_projected<Proj, InIter>::value)
-#if !defined(__CUDA_ARCH__)
+#if (!defined(__NVCC__) && !defined(__CUDACC__)) || defined(__CUDA_ARCH__)
   , HPX_CONCEPT_REQUIRES_(
         parallel::traits::is_indirect_callable<
             ExPolicy, F, traits::projected<Proj, InIter>