Skip to content

Commit

Permalink
add version.h to cover version problem
Browse files Browse the repository at this point in the history
  • Loading branch information
MuGdxy committed Feb 26, 2023
1 parent 1b14da9 commit d28ec9c
Show file tree
Hide file tree
Showing 20 changed files with 119 additions and 64 deletions.
16 changes: 8 additions & 8 deletions example/profile/kernel_name.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
#include <muda/muda.h>
#include <muda/container.h>
#include "../example_common.h"
#include <numeric> // iota
using namespace muda;

void kernel_name()
{
example_desc("use <profile> and <range_name> to help debugging and profiling.");
universal_vector<int> h(100);
universal_var<int> v(1);
for(size_t i = 0; i < h.size(); i++)
{
h[i] = i;
}
host_vector<int> h_vec(100);
device_vector<int> vec(100);
device_var<int> v(1);
std::iota(h_vec.begin(), h_vec.end(), 0);
vec = h_vec;

{ //give a scope for RAII of profile and range_name

Expand All @@ -22,8 +22,8 @@ void kernel_name()

range_name r("kernel apply"); // give a name to this scope.
parallel_for(32, 32)
.apply(h.size(),
[s = make_viewer(h), v = make_viewer(v)] __device__(int i) mutable
.apply(vec.size(),
[s = make_viewer(vec), v = make_viewer(v)] __device__(int i) mutable
{
__shared__ int b[64];
s(i) = v;
Expand Down
2 changes: 1 addition & 1 deletion src/core/muda/check/checkCusparse.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ __host__ __device__ inline const char* mudaCudaGetErrorEnum(cusparseStatus_t err
}

return "<unknown>";
}
}
1 change: 1 addition & 0 deletions src/core/muda/graph/base.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#include <muda/tools/version.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <cuda_device_runtime_api.h>
Expand Down
28 changes: 15 additions & 13 deletions src/core/muda/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,6 @@ class graph
return ret;
}

template <typename T>
auto addMemAllocNode(sptr<memAllocNodeParms<T>>& memAllocParms,
const std::vector<sptr<graphNode>>& deps = {})
{
auto node = std::make_shared<memAllocNode>();
std::vector<cudaGraphNode_t> nodes = mapDependencies(deps);
checkCudaErrors(cudaGraphAddMemAllocNode(
&node->m_handle, m_handle, nodes.data(), nodes.size(), memAllocParms->getRaw()));
auto ptr = reinterpret_cast<T*>(memAllocParms->getRaw()->dptr);
node->m_dptr = ptr;
return std::make_tuple(node, ptr);
}

template <typename T>
auto addMemcpyNode(T* dst,
const T* src,
Expand All @@ -79,6 +66,20 @@ class graph
return ret;
}

#ifdef MUDA_WITH_GRAPH_MEMORY_ALLOC_FREE
template <typename T>
auto addMemAllocNode(sptr<memAllocNodeParms<T>>& memAllocParms,
const std::vector<sptr<graphNode>>& deps = {})
{
auto node = std::make_shared<memAllocNode>();
std::vector<cudaGraphNode_t> nodes = mapDependencies(deps);
checkCudaErrors(cudaGraphAddMemAllocNode(
&node->m_handle, m_handle, nodes.data(), nodes.size(), memAllocParms->getRaw()));
auto ptr = reinterpret_cast<T*>(memAllocParms->getRaw()->dptr);
node->m_dptr = ptr;
return std::make_tuple(node, ptr);
}

auto addMemFreeNode(sptr<memAllocNode> allocNode,
const std::vector<sptr<graphNode>>& deps = {})
{
Expand All @@ -97,6 +98,7 @@ class graph
&ret->m_handle, m_handle, nodes.data(), nodes.size(), ptr));
return ret;
}
#endif

auto addEventRecordNode(cudaEvent_t e, const std::vector<sptr<graphNode>>& deps = {})
{
Expand Down
2 changes: 2 additions & 0 deletions src/core/muda/graph/memory_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

namespace muda
{
#ifdef MUDA_WITH_GRAPH_MEMORY_ALLOC_FREE
class memAllocNode : public graphNode
{
void* m_dptr;
Expand Down Expand Up @@ -43,6 +44,7 @@ class memFreeNode : public graphNode
using this_type = memFreeNode;
friend class graph;
};
#endif

class memcpyNode : public graphNode
{
Expand Down
16 changes: 10 additions & 6 deletions src/core/muda/launch/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,22 @@ class memory : public launch_base<memory>
return *this;
}

memory& copy(void* dst, const void* src, size_t byte_size, cudaMemcpyKind kind)
{
checkCudaErrors(cudaMemcpyAsync(dst, src, byte_size, kind, m_stream));
return *this;
}

#ifdef MUDA_WITH_GRAPH_MEMORY_ALLOC_FREE
template <typename T>
MUDA_NODISCARD static auto asAllocNodeParms(size_t count)
{
auto parms = std::make_shared<memAllocNodeParms<T>>(count);
return parms;
}
#endif

memory& copy(void* dst, const void* src, size_t byte_size, cudaMemcpyKind kind)
{
checkCudaErrors(cudaMemcpyAsync(dst, src, byte_size, kind, m_stream));
return *this;
}



memory& set(void* data, size_t byte_size, char byte = 0)
{
Expand Down
13 changes: 13 additions & 0 deletions src/core/muda/tools/version.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once
// muda's baseline cuda version is 11.6
#define MUDA_BASELINE_CUDACC_VER_MAJOR 11
#define MUDA_BASELINE_CUDACC_VER_MINOR 6

#if (__CUDACC_VER_MAJOR__ >= MUDA_BASELINE_CUDACC_VER_MAJOR) && \
__CUDACC_VER_MINOR__ >= MUDA_BASELINE_CUDACC_VER_MINOR

#define MUDA_BASELINE_CUDACC_VER_SATISFIED
#define MUDA_WITH_THRUST_UNIVERSAL
#define MUDA_WITH_GRAPH_MEMORY_ALLOC_FREE

#endif
19 changes: 12 additions & 7 deletions src/ext/muda/blas/blas.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#pragma once
#include <muda/tools/version.h>

#include <muda/check/checkCusparse.h>
#include <muda/check/checkCublas.h>
#include <muda/check/checkCudaErrors.h>
Expand All @@ -24,22 +26,25 @@ class blasContext
blasContext(cudaStream_t stream = nullptr)
: m_stream(stream)
{
checkCudaErrors(cusparseCreate(&m_csh));
checkCudaErrors(cublasCreate_v2(&m_cbh));
checkCudaErrors(cusparseSetStream(m_csh, stream));
checkCudaErrors(cublasSetStream_v2(m_cbh, stream));

checkCudaErrors(cusparseCreate(&m_csh));
checkCudaErrors(cusparseSetStream(m_csh, stream));
}

~blasContext()
{
checkCudaErrors(cusparseDestroy(m_csh));
checkCudaErrors(cublasDestroy_v2(m_cbh));

checkCudaErrors(cusparseDestroy(m_csh));
}

operator cusparseHandle_t() { return m_csh; }
cusparseHandle_t spHandle() { return m_csh; };

operator cublasHandle_t() { return m_cbh; }
operator cudaStream_t() { return m_stream; }

cusparseHandle_t spHandle() { return m_csh; };
cublasHandle_t dnHandle() { return m_cbh; };
cudaStream_t stream() { return m_stream; };
};
Expand Down Expand Up @@ -144,7 +149,7 @@ class blas : public launch_base<blas>
b.data(),
y_inout,
details::cudaDataTypeMap_v<value_type>,
cusparseSpMVAlg_t::CUSPARSE_SPMV_ALG_DEFAULT,
cusparseSpMVAlg_t::CUSPARSE_MV_ALG_DEFAULT,
&bufferSize));

details::set_stream_check(external_buffer, m_ctx);
Expand All @@ -158,7 +163,7 @@ class blas : public launch_base<blas>
b.data(),
y_inout,
details::cudaDataTypeMap_v<value_type>,
cusparseSpMVAlg_t::CUSPARSE_SPMV_ALG_DEFAULT,
cusparseSpMVAlg_t::CUSPARSE_MV_ALG_DEFAULT,
external_buffer.data()));
return *this;
}
Expand Down
1 change: 0 additions & 1 deletion src/ext/muda/blas/data_type_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ namespace details
//template<> constexpr cudaDataType cudaDataTypeMap_v< > = CUDA_C_64I;
//template<> constexpr cudaDataType cudaDataTypeMap_v< > = CUDA_C_64U;


template <typename T>
struct cusparseIndexTypeMap
{
Expand Down
14 changes: 11 additions & 3 deletions src/ext/muda/blas/dense.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#pragma once
#include <muda/tools/version.h>

#include <cublas.h>
#include "data_type_map.h"
#include <muda/check/checkCublas.h>
Expand All @@ -18,13 +20,19 @@ class dense_vec
{
checkCudaErrors(cusparseCreateDnVec(&m_dnvec, n, data, details::cudaDataTypeMap_v<T>));
}
~dense_vec() { checkCudaErrors(cusparseDestroyDnVec(m_dnvec)); }

~dense_vec()
{
checkCudaErrors(cusparseDestroyDnVec(m_dnvec));
}

value_type* data() { return m_data; }
const value_type* data() const { return m_data; }
size_t size() const { return m_size; }
operator cusparseDnVecDescr_t() { return m_dnvec; }
operator cusparseDnVecDescr_t() { return m_dnvec; }

private:

cusparseDnVecDescr_t m_dnvec;
value_type* m_data;
size_t m_size;
Expand All @@ -51,4 +59,4 @@ inline __host__ auto make_dense_vec(device_buffer<T>& buf)
{
return dense_vec<T>(buf.data(), buf.size());
}
} // namespace muda
} // namespace muda
3 changes: 2 additions & 1 deletion src/ext/muda/blas/mat_view.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ class mat_view
, m_trans(CUSPARSE_OPERATION_NON_TRANSPOSE)
{
}

mat_view(matrix_type& mat, cusparseOperation_t trans)
: m_mat(mat)
, m_trans(trans)
{
}


matrix_type& m_mat;
cusparseOperation_t m_trans;
Expand Down
3 changes: 2 additions & 1 deletion src/ext/muda/blas/sparse.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#include <muda/tools/version.h>
#include <cublas.h>
#include <cusparse.h>
#include <muda/container/vector.h>
Expand Down Expand Up @@ -143,4 +144,4 @@ MUDA_INLINE MUDA_HOST auto make_viewer(matCSR<T>& m) MUDA_NOEXCEPT
{
return make_csr(m);
}
} // namespace muda
} // namespace muda
3 changes: 3 additions & 0 deletions src/ext/muda/composite/cse.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,12 @@ template <typename T>
using host_cse =
compressed_sparse_elements<host_vector<T>, host_vector<int>, host_vector<int>>;


#ifdef MUDA_WITH_THRUST_UNIVERSAL
template <typename T>
using universal_cse =
compressed_sparse_elements<universal_vector<T>, universal_vector<int>, universal_vector<int>>;
#endif

template <typename T>
class device_buffer_cse
Expand Down
13 changes: 12 additions & 1 deletion src/ext/muda/container/var.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
#pragma once
#include <muda/tools/version.h>
#include <vector>
#include <thrust/device_allocator.h>

#ifdef MUDA_WITH_THRUST_UNIVERSAL
#include <thrust/universal_allocator.h>
#endif

#include <thrust/detail/raw_pointer_cast.h>
#include <thrust/fill.h>
#include <thrust/copy.h>
#include <muda/muda_def.h>
#include <muda/viewer/dense.h>

namespace muda
{
Expand Down Expand Up @@ -59,13 +65,17 @@ namespace details
template <typename T, typename Allocator = thrust::device_allocator<T>>
using device_var = details::var_base<T, Allocator>;

#ifdef MUDA_WITH_THRUST_UNIVERSAL
template <typename T, typename Allocator = thrust::universal_allocator<T>>
using universal_var = details::var_base<T, Allocator>;
#endif

template <typename T, typename Allocator = std::allocator<T>>
using host_var = details::var_base<T, Allocator>;
} // namespace muda


// cast
namespace muda
{
template <typename T, typename Allocator>
Expand All @@ -81,7 +91,8 @@ MUDA_INLINE T* data(details::var_base<T, Allocator>& v) MUDA_NOEXCEPT
}
} // namespace muda

#include <muda/viewer/dense.h>

// viewer
namespace muda
{
template <typename T, typename Allocator>
Expand Down
Loading

0 comments on commit d28ec9c

Please sign in to comment.