Use `size_t` for byte count in device attributes #6151

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

davebayer merged 1 commit into NVIDIA:main from davebayer:use_size_t_for_num_bytes_dev_attrs

Oct 8, 2025

libcudacxx/include/cuda/__device/arch_traits.h

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -86,10 +86,10 @@ struct traits_t
  
      int max_grid_dim_z = 64 * 1024 - 1;

      // Maximum amount of shared memory available to a thread block in bytes

      int max_shared_memory_per_block = 48 * 1024;

      ::cuda::std::size_t max_shared_memory_per_block = 48 * 1024;

      // Memory available on device for __constant__ variables in a CUDA C kernel in bytes

      int total_constant_memory = 64 * 1024;

      ::cuda::std::size_t total_constant_memory = 64 * 1024;

      // Warp size in threads

      int warp_size = 32;

    @@ -146,7 +146,7 @@ struct traits_t
  
      // Maximum amount of shared memory available to a multiprocessor in bytes;

      // this amount is shared by all thread blocks simultaneously resident on a

      // multiprocessor

      int max_shared_memory_per_multiprocessor;

      ::cuda::std::size_t max_shared_memory_per_multiprocessor;

      // Maximum number of thread blocks that can reside on a multiprocessor

      int max_blocks_per_multiprocessor;

    @@ -158,11 +158,11 @@ struct traits_t
  
      int max_warps_per_multiprocessor;

      // Shared memory reserved by CUDA driver per block in bytes

      int reserved_shared_memory_per_block;

      ::cuda::std::size_t reserved_shared_memory_per_block;

      // Maximum per block shared memory size on the device. This value can be opted

      // into when using dynamic_shared_memory with NonPortableSize set to true

      int max_shared_memory_per_block_optin;

      ::cuda::std::size_t max_shared_memory_per_block_optin;

      // TODO: Do we want these?:

      // true if architecture supports clusters

libcudacxx/include/cuda/__device/attributes.h

-Original file line number
+Diff line change
@@ Expand Up / @@ -26,6 +26,7 @@ @@
     #  include <cuda/__device/device_ref.h>
     #  include <cuda/__driver/driver_api.h>
     #  include <cuda/__fwd/devices.h>
+    #  include <cuda/std/__cstddef/types.h>
     #  include <cuda/std/__cccl/prologue.h>
@@ Expand All / @@ -51,13 +52,36 @@ struct __dev_attr_impl @@
     template <::cudaDeviceAttr _Attr>
     struct __dev_attr : __dev_attr_impl<_Attr, int>
     {};
+    template <>
+    struct __dev_attr<::cudaDevAttrMaxSharedMemoryPerBlock> //
+        : __dev_attr_impl<::cudaDevAttrMaxSharedMemoryPerBlock, ::cuda::std::size_t>
+    {};
+    template <>
+    struct __dev_attr<::cudaDevAttrTotalConstantMemory> //
+        : __dev_attr_impl<::cudaDevAttrTotalConstantMemory, ::cuda::std::size_t>
+    {};
+    template <>
+    struct __dev_attr<::cudaDevAttrMaxPitch> //
+        : __dev_attr_impl<::cudaDevAttrMaxPitch, ::cuda::std::size_t>
+    {};
+    template <>
+    struct __dev_attr<::cudaDevAttrMaxTexture2DLinearPitch> //
+        : __dev_attr_impl<::cudaDevAttrMaxTexture2DLinearPitch, ::cuda::std::size_t>
+    {};
     // TODO: give this a strong type for kilohertz
     template <>
     struct __dev_attr<::cudaDevAttrClockRate> //
         : __dev_attr_impl<::cudaDevAttrClockRate, int>
     {};
     template <>
+    struct __dev_attr<::cudaDevAttrTextureAlignment> //
+        : __dev_attr_impl<::cudaDevAttrTextureAlignment, ::cuda::std::size_t>
+    {};
+    template <>
+    struct __dev_attr<::cudaDevAttrTexturePitchAlignment> //
+        : __dev_attr_impl<::cudaDevAttrTexturePitchAlignment, ::cuda::std::size_t>
+    {};
+    template <>
     struct __dev_attr<::cudaDevAttrGpuOverlap> //
         : __dev_attr_impl<::cudaDevAttrGpuOverlap, bool>
     {};
@@ Expand Down Expand Up / @@ -103,10 +127,9 @@ template <> @@
     struct __dev_attr<::cudaDevAttrGlobalMemoryBusWidth> //
         : __dev_attr_impl<::cudaDevAttrGlobalMemoryBusWidth, int>
     {};
-    // TODO: give this a strong type for bytes
     template <>
     struct __dev_attr<::cudaDevAttrL2CacheSize> //
-        : __dev_attr_impl<::cudaDevAttrL2CacheSize, int>
+        : __dev_attr_impl<::cudaDevAttrL2CacheSize, ::cuda::std::size_t>
     {};
     template <>
     struct __dev_attr<::cudaDevAttrUnifiedAddressing> //
@@ Expand All / @@ -125,6 +148,10 @@ struct __dev_attr<::cudaDevAttrLocalL1CacheSupported> // @@
         : __dev_attr_impl<::cudaDevAttrLocalL1CacheSupported, bool>
     {};
     template <>
+    struct __dev_attr<::cudaDevAttrMaxSharedMemoryPerMultiprocessor> //
+        : __dev_attr_impl<::cudaDevAttrMaxSharedMemoryPerMultiprocessor, ::cuda::std::size_t>
+    {};
+    template <>
     struct __dev_attr<::cudaDevAttrManagedMemory> //
         : __dev_attr_impl<::cudaDevAttrManagedMemory, bool>
     {};
@@ Expand Down Expand Up @@
         : __dev_attr_impl<::cudaDevAttrDirectManagedMemAccessFromHost, bool>
     {};
     template <>
+    struct __dev_attr<::cudaDevAttrMaxSharedMemoryPerBlockOptin> //
+        : __dev_attr_impl<::cudaDevAttrMaxSharedMemoryPerBlockOptin, ::cuda::std::size_t>
+    {};
+    template <>
+    struct __dev_attr<::cudaDevAttrMaxPersistingL2CacheSize> //
+        : __dev_attr_impl<::cudaDevAttrMaxPersistingL2CacheSize, ::cuda::std::size_t>
+    {};
+    template <>
+    struct __dev_attr<::cudaDevAttrMaxAccessPolicyWindowSize> //
+        : __dev_attr_impl<::cudaDevAttrMaxAccessPolicyWindowSize, ::cuda::std::size_t>
+    {};
+    template <>
+    struct __dev_attr<::cudaDevAttrReservedSharedMemoryPerBlock> //
+        : __dev_attr_impl<::cudaDevAttrReservedSharedMemoryPerBlock, ::cuda::std::size_t>
+    {};
+    template <>
     struct __dev_attr<::cudaDevAttrSparseCudaArraySupported> //
         : __dev_attr_impl<::cudaDevAttrSparseCudaArraySupported, bool>
     {};
@@ Expand Down @@

libcudacxx/test/libcudacxx/cuda/ccclrt/device/device_smoke.c2h.cu

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -11,6 +11,7 @@
  
    #include <cuda/__driver/driver_api.h>

    #include <cuda/devices>

    #include <cuda/std/__type_traits/is_same.h>

    #include <cuda/std/cstddef>

    #include <testing.cuh>

    @@ -62,18 +63,22 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
  
        ::test_device_attribute<attributes::max_grid_dim_x, ::cudaDevAttrMaxGridDimX, int>();

        ::test_device_attribute<attributes::max_grid_dim_y, ::cudaDevAttrMaxGridDimY, int>();

        ::test_device_attribute<attributes::max_grid_dim_z, ::cudaDevAttrMaxGridDimZ, int>();

        ::test_device_attribute<attributes::max_shared_memory_per_block, ::cudaDevAttrMaxSharedMemoryPerBlock, int>();

        ::test_device_attribute<attributes::total_constant_memory, ::cudaDevAttrTotalConstantMemory, int>();

        ::test_device_attribute<attributes::max_shared_memory_per_block,

                                ::cudaDevAttrMaxSharedMemoryPerBlock,

                                cuda::std::size_t>();

        ::test_device_attribute<attributes::total_constant_memory, ::cudaDevAttrTotalConstantMemory, cuda::std::size_t>();

        ::test_device_attribute<attributes::warp_size, ::cudaDevAttrWarpSize, int>();

        ::test_device_attribute<attributes::max_pitch, ::cudaDevAttrMaxPitch, int>();

        ::test_device_attribute<attributes::max_pitch, ::cudaDevAttrMaxPitch, cuda::std::size_t>();

        ::test_device_attribute<attributes::max_texture_1d_width, ::cudaDevAttrMaxTexture1DWidth, int>();

        ::test_device_attribute<attributes::max_texture_1d_linear_width, ::cudaDevAttrMaxTexture1DLinearWidth, int>();

        ::test_device_attribute<attributes::max_texture_1d_mipmapped_width, ::cudaDevAttrMaxTexture1DMipmappedWidth, int>();

        ::test_device_attribute<attributes::max_texture_2d_width, ::cudaDevAttrMaxTexture2DWidth, int>();

        ::test_device_attribute<attributes::max_texture_2d_height, ::cudaDevAttrMaxTexture2DHeight, int>();

        ::test_device_attribute<attributes::max_texture_2d_linear_width, ::cudaDevAttrMaxTexture2DLinearWidth, int>();

        ::test_device_attribute<attributes::max_texture_2d_linear_height, ::cudaDevAttrMaxTexture2DLinearHeight, int>();

        ::test_device_attribute<attributes::max_texture_2d_linear_pitch, ::cudaDevAttrMaxTexture2DLinearPitch, int>();

        ::test_device_attribute<attributes::max_texture_2d_linear_pitch,

                                ::cudaDevAttrMaxTexture2DLinearPitch,

                                cuda::std::size_t>();

        ::test_device_attribute<attributes::max_texture_2d_mipmapped_width, ::cudaDevAttrMaxTexture2DMipmappedWidth, int>();

        ::test_device_attribute<attributes::max_texture_2d_mipmapped_height, ::cudaDevAttrMaxTexture2DMipmappedHeight, int>();

        ::test_device_attribute<attributes::max_texture_3d_width, ::cudaDevAttrMaxTexture3DWidth, int>();

    @@ -114,8 +119,8 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
  
                                int>();

        ::test_device_attribute<attributes::max_registers_per_block, ::cudaDevAttrMaxRegistersPerBlock, int>();

        ::test_device_attribute<attributes::clock_rate, ::cudaDevAttrClockRate, int>();

        ::test_device_attribute<attributes::texture_alignment, ::cudaDevAttrTextureAlignment, int>();

        ::test_device_attribute<attributes::texture_pitch_alignment, ::cudaDevAttrTexturePitchAlignment, int>();

        ::test_device_attribute<attributes::texture_alignment, ::cudaDevAttrTextureAlignment, cuda::std::size_t>();

        ::test_device_attribute<attributes::texture_pitch_alignment, ::cudaDevAttrTexturePitchAlignment, cuda::std::size_t>();

        ::test_device_attribute<attributes::gpu_overlap, ::cudaDevAttrGpuOverlap, bool>();

        ::test_device_attribute<attributes::multiprocessor_count, ::cudaDevAttrMultiProcessorCount, int>();

        ::test_device_attribute<attributes::kernel_exec_timeout, ::cudaDevAttrKernelExecTimeout, bool>();

    @@ -127,7 +132,7 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
  
        ::test_device_attribute<attributes::pci_bus_id, ::cudaDevAttrPciBusId, int>();

        ::test_device_attribute<attributes::pci_device_id, ::cudaDevAttrPciDeviceId, int>();

        ::test_device_attribute<attributes::tcc_driver, ::cudaDevAttrTccDriver, bool>();

        ::test_device_attribute<attributes::l2_cache_size, ::cudaDevAttrL2CacheSize, int>();

        ::test_device_attribute<attributes::l2_cache_size, ::cudaDevAttrL2CacheSize, cuda::std::size_t>();

        ::test_device_attribute<attributes::max_threads_per_multiprocessor, ::cudaDevAttrMaxThreadsPerMultiProcessor, int>();

        ::test_device_attribute<attributes::unified_addressing, ::cudaDevAttrUnifiedAddressing, bool>();

        ::test_device_attribute<attributes::compute_capability_major, ::cudaDevAttrComputeCapabilityMajor, int>();

    @@ -137,7 +142,7 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
  
        ::test_device_attribute<attributes::local_l1_cache_supported, ::cudaDevAttrLocalL1CacheSupported, bool>();

        ::test_device_attribute<attributes::max_shared_memory_per_multiprocessor,

                                ::cudaDevAttrMaxSharedMemoryPerMultiprocessor,

                                int>();

                                cuda::std::size_t>();

        ::test_device_attribute<attributes::max_registers_per_multiprocessor,

                                ::cudaDevAttrMaxRegistersPerMultiprocessor,

                                int>();

    @@ -164,13 +169,17 @@ C2H_CCCLRT_TEST("Smoke", "[device]")
  
                                bool>();

        ::test_device_attribute<attributes::max_shared_memory_per_block_optin,

                                ::cudaDevAttrMaxSharedMemoryPerBlockOptin,

                                int>();

                                cuda::std::size_t>();

        ::test_device_attribute<attributes::max_blocks_per_multiprocessor, ::cudaDevAttrMaxBlocksPerMultiprocessor, int>();

        ::test_device_attribute<attributes::max_persisting_l2_cache_size, ::cudaDevAttrMaxPersistingL2CacheSize, int>();

        ::test_device_attribute<attributes::max_access_policy_window_size, ::cudaDevAttrMaxAccessPolicyWindowSize, int>();

        ::test_device_attribute<attributes::max_persisting_l2_cache_size,

                                ::cudaDevAttrMaxPersistingL2CacheSize,

                                cuda::std::size_t>();

        ::test_device_attribute<attributes::max_access_policy_window_size,

                                ::cudaDevAttrMaxAccessPolicyWindowSize,

                                cuda::std::size_t>();

        ::test_device_attribute<attributes::reserved_shared_memory_per_block,

                                ::cudaDevAttrReservedSharedMemoryPerBlock,

                                int>();

                                cuda::std::size_t>();

        ::test_device_attribute<attributes::sparse_cuda_array_supported, ::cudaDevAttrSparseCudaArraySupported, bool>();

        ::test_device_attribute<attributes::host_register_read_only_supported,

                                ::cudaDevAttrHostRegisterReadOnlySupported,

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Use `size_t` for byte count in device attributes #6151

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Use size_t for byte count in device attributes #6151

Uh oh!

Use size_t for byte count in device attributes #6151

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Use `size_t` for byte count in device attributes #6151

Use `size_t` for byte count in device attributes #6151