Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions libcudacxx/include/cuda/__device/arch_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ struct traits_t
int max_grid_dim_z = 64 * 1024 - 1;

// Maximum amount of shared memory available to a thread block in bytes
int max_shared_memory_per_block = 48 * 1024;
::cuda::std::size_t max_shared_memory_per_block = 48 * 1024;

// Memory available on device for __constant__ variables in a CUDA C kernel in bytes
int total_constant_memory = 64 * 1024;
::cuda::std::size_t total_constant_memory = 64 * 1024;

// Warp size in threads
int warp_size = 32;
Expand Down Expand Up @@ -146,7 +146,7 @@ struct traits_t
// Maximum amount of shared memory available to a multiprocessor in bytes;
// this amount is shared by all thread blocks simultaneously resident on a
// multiprocessor
int max_shared_memory_per_multiprocessor;
::cuda::std::size_t max_shared_memory_per_multiprocessor;

// Maximum number of thread blocks that can reside on a multiprocessor
int max_blocks_per_multiprocessor;
Expand All @@ -158,11 +158,11 @@ struct traits_t
int max_warps_per_multiprocessor;

// Shared memory reserved by CUDA driver per block in bytes
int reserved_shared_memory_per_block;
::cuda::std::size_t reserved_shared_memory_per_block;

// Maximum per block shared memory size on the device. This value can be opted
// into when using dynamic_shared_memory with NonPortableSize set to true
int max_shared_memory_per_block_optin;
::cuda::std::size_t max_shared_memory_per_block_optin;

// TODO: Do we want these?:
// true if architecture supports clusters
Expand Down
49 changes: 46 additions & 3 deletions libcudacxx/include/cuda/__device/attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
# include <cuda/__device/device_ref.h>
# include <cuda/__driver/driver_api.h>
# include <cuda/__fwd/devices.h>
# include <cuda/std/__cstddef/types.h>

# include <cuda/std/__cccl/prologue.h>

Expand All @@ -51,13 +52,36 @@ struct __dev_attr_impl
template <::cudaDeviceAttr _Attr>
struct __dev_attr : __dev_attr_impl<_Attr, int>
{};

template <>
struct __dev_attr<::cudaDevAttrMaxSharedMemoryPerBlock> //
: __dev_attr_impl<::cudaDevAttrMaxSharedMemoryPerBlock, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrTotalConstantMemory> //
: __dev_attr_impl<::cudaDevAttrTotalConstantMemory, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrMaxPitch> //
: __dev_attr_impl<::cudaDevAttrMaxPitch, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrMaxTexture2DLinearPitch> //
: __dev_attr_impl<::cudaDevAttrMaxTexture2DLinearPitch, ::cuda::std::size_t>
{};
// TODO: give this a strong type for kilohertz
template <>
struct __dev_attr<::cudaDevAttrClockRate> //
: __dev_attr_impl<::cudaDevAttrClockRate, int>
{};
template <>
struct __dev_attr<::cudaDevAttrTextureAlignment> //
: __dev_attr_impl<::cudaDevAttrTextureAlignment, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrTexturePitchAlignment> //
: __dev_attr_impl<::cudaDevAttrTexturePitchAlignment, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrGpuOverlap> //
: __dev_attr_impl<::cudaDevAttrGpuOverlap, bool>
{};
Expand Down Expand Up @@ -103,10 +127,9 @@ template <>
struct __dev_attr<::cudaDevAttrGlobalMemoryBusWidth> //
: __dev_attr_impl<::cudaDevAttrGlobalMemoryBusWidth, int>
{};
// TODO: give this a strong type for bytes
template <>
struct __dev_attr<::cudaDevAttrL2CacheSize> //
: __dev_attr_impl<::cudaDevAttrL2CacheSize, int>
: __dev_attr_impl<::cudaDevAttrL2CacheSize, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrUnifiedAddressing> //
Expand All @@ -125,6 +148,10 @@ struct __dev_attr<::cudaDevAttrLocalL1CacheSupported> //
: __dev_attr_impl<::cudaDevAttrLocalL1CacheSupported, bool>
{};
template <>
struct __dev_attr<::cudaDevAttrMaxSharedMemoryPerMultiprocessor> //
: __dev_attr_impl<::cudaDevAttrMaxSharedMemoryPerMultiprocessor, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrManagedMemory> //
: __dev_attr_impl<::cudaDevAttrManagedMemory, bool>
{};
Expand Down Expand Up @@ -169,6 +196,22 @@ struct __dev_attr<::cudaDevAttrDirectManagedMemAccessFromHost> //
: __dev_attr_impl<::cudaDevAttrDirectManagedMemAccessFromHost, bool>
{};
template <>
struct __dev_attr<::cudaDevAttrMaxSharedMemoryPerBlockOptin> //
: __dev_attr_impl<::cudaDevAttrMaxSharedMemoryPerBlockOptin, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrMaxPersistingL2CacheSize> //
: __dev_attr_impl<::cudaDevAttrMaxPersistingL2CacheSize, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrMaxAccessPolicyWindowSize> //
: __dev_attr_impl<::cudaDevAttrMaxAccessPolicyWindowSize, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrReservedSharedMemoryPerBlock> //
: __dev_attr_impl<::cudaDevAttrReservedSharedMemoryPerBlock, ::cuda::std::size_t>
{};
template <>
struct __dev_attr<::cudaDevAttrSparseCudaArraySupported> //
: __dev_attr_impl<::cudaDevAttrSparseCudaArraySupported, bool>
{};
Expand Down
33 changes: 21 additions & 12 deletions libcudacxx/test/libcudacxx/cuda/ccclrt/device/device_smoke.c2h.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <cuda/__driver/driver_api.h>
#include <cuda/devices>
#include <cuda/std/__type_traits/is_same.h>
#include <cuda/std/cstddef>

#include <testing.cuh>

Expand Down Expand Up @@ -62,18 +63,22 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
::test_device_attribute<attributes::max_grid_dim_x, ::cudaDevAttrMaxGridDimX, int>();
::test_device_attribute<attributes::max_grid_dim_y, ::cudaDevAttrMaxGridDimY, int>();
::test_device_attribute<attributes::max_grid_dim_z, ::cudaDevAttrMaxGridDimZ, int>();
::test_device_attribute<attributes::max_shared_memory_per_block, ::cudaDevAttrMaxSharedMemoryPerBlock, int>();
::test_device_attribute<attributes::total_constant_memory, ::cudaDevAttrTotalConstantMemory, int>();
::test_device_attribute<attributes::max_shared_memory_per_block,
::cudaDevAttrMaxSharedMemoryPerBlock,
cuda::std::size_t>();
::test_device_attribute<attributes::total_constant_memory, ::cudaDevAttrTotalConstantMemory, cuda::std::size_t>();
::test_device_attribute<attributes::warp_size, ::cudaDevAttrWarpSize, int>();
::test_device_attribute<attributes::max_pitch, ::cudaDevAttrMaxPitch, int>();
::test_device_attribute<attributes::max_pitch, ::cudaDevAttrMaxPitch, cuda::std::size_t>();
::test_device_attribute<attributes::max_texture_1d_width, ::cudaDevAttrMaxTexture1DWidth, int>();
::test_device_attribute<attributes::max_texture_1d_linear_width, ::cudaDevAttrMaxTexture1DLinearWidth, int>();
::test_device_attribute<attributes::max_texture_1d_mipmapped_width, ::cudaDevAttrMaxTexture1DMipmappedWidth, int>();
::test_device_attribute<attributes::max_texture_2d_width, ::cudaDevAttrMaxTexture2DWidth, int>();
::test_device_attribute<attributes::max_texture_2d_height, ::cudaDevAttrMaxTexture2DHeight, int>();
::test_device_attribute<attributes::max_texture_2d_linear_width, ::cudaDevAttrMaxTexture2DLinearWidth, int>();
::test_device_attribute<attributes::max_texture_2d_linear_height, ::cudaDevAttrMaxTexture2DLinearHeight, int>();
::test_device_attribute<attributes::max_texture_2d_linear_pitch, ::cudaDevAttrMaxTexture2DLinearPitch, int>();
::test_device_attribute<attributes::max_texture_2d_linear_pitch,
::cudaDevAttrMaxTexture2DLinearPitch,
cuda::std::size_t>();
::test_device_attribute<attributes::max_texture_2d_mipmapped_width, ::cudaDevAttrMaxTexture2DMipmappedWidth, int>();
::test_device_attribute<attributes::max_texture_2d_mipmapped_height, ::cudaDevAttrMaxTexture2DMipmappedHeight, int>();
::test_device_attribute<attributes::max_texture_3d_width, ::cudaDevAttrMaxTexture3DWidth, int>();
Expand Down Expand Up @@ -114,8 +119,8 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
int>();
::test_device_attribute<attributes::max_registers_per_block, ::cudaDevAttrMaxRegistersPerBlock, int>();
::test_device_attribute<attributes::clock_rate, ::cudaDevAttrClockRate, int>();
::test_device_attribute<attributes::texture_alignment, ::cudaDevAttrTextureAlignment, int>();
::test_device_attribute<attributes::texture_pitch_alignment, ::cudaDevAttrTexturePitchAlignment, int>();
::test_device_attribute<attributes::texture_alignment, ::cudaDevAttrTextureAlignment, cuda::std::size_t>();
::test_device_attribute<attributes::texture_pitch_alignment, ::cudaDevAttrTexturePitchAlignment, cuda::std::size_t>();
::test_device_attribute<attributes::gpu_overlap, ::cudaDevAttrGpuOverlap, bool>();
::test_device_attribute<attributes::multiprocessor_count, ::cudaDevAttrMultiProcessorCount, int>();
::test_device_attribute<attributes::kernel_exec_timeout, ::cudaDevAttrKernelExecTimeout, bool>();
Expand All @@ -127,7 +132,7 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
::test_device_attribute<attributes::pci_bus_id, ::cudaDevAttrPciBusId, int>();
::test_device_attribute<attributes::pci_device_id, ::cudaDevAttrPciDeviceId, int>();
::test_device_attribute<attributes::tcc_driver, ::cudaDevAttrTccDriver, bool>();
::test_device_attribute<attributes::l2_cache_size, ::cudaDevAttrL2CacheSize, int>();
::test_device_attribute<attributes::l2_cache_size, ::cudaDevAttrL2CacheSize, cuda::std::size_t>();
::test_device_attribute<attributes::max_threads_per_multiprocessor, ::cudaDevAttrMaxThreadsPerMultiProcessor, int>();
::test_device_attribute<attributes::unified_addressing, ::cudaDevAttrUnifiedAddressing, bool>();
::test_device_attribute<attributes::compute_capability_major, ::cudaDevAttrComputeCapabilityMajor, int>();
Expand All @@ -137,7 +142,7 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
::test_device_attribute<attributes::local_l1_cache_supported, ::cudaDevAttrLocalL1CacheSupported, bool>();
::test_device_attribute<attributes::max_shared_memory_per_multiprocessor,
::cudaDevAttrMaxSharedMemoryPerMultiprocessor,
int>();
cuda::std::size_t>();
::test_device_attribute<attributes::max_registers_per_multiprocessor,
::cudaDevAttrMaxRegistersPerMultiprocessor,
int>();
Expand All @@ -164,13 +169,17 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
bool>();
::test_device_attribute<attributes::max_shared_memory_per_block_optin,
::cudaDevAttrMaxSharedMemoryPerBlockOptin,
int>();
cuda::std::size_t>();
::test_device_attribute<attributes::max_blocks_per_multiprocessor, ::cudaDevAttrMaxBlocksPerMultiprocessor, int>();
::test_device_attribute<attributes::max_persisting_l2_cache_size, ::cudaDevAttrMaxPersistingL2CacheSize, int>();
::test_device_attribute<attributes::max_access_policy_window_size, ::cudaDevAttrMaxAccessPolicyWindowSize, int>();
::test_device_attribute<attributes::max_persisting_l2_cache_size,
::cudaDevAttrMaxPersistingL2CacheSize,
cuda::std::size_t>();
::test_device_attribute<attributes::max_access_policy_window_size,
::cudaDevAttrMaxAccessPolicyWindowSize,
cuda::std::size_t>();
::test_device_attribute<attributes::reserved_shared_memory_per_block,
::cudaDevAttrReservedSharedMemoryPerBlock,
int>();
cuda::std::size_t>();
::test_device_attribute<attributes::sparse_cuda_array_supported, ::cudaDevAttrSparseCudaArraySupported, bool>();
::test_device_attribute<attributes::host_register_read_only_supported,
::cudaDevAttrHostRegisterReadOnlySupported,
Expand Down
Loading