From 84dcd4374021aaf1c159a1c1c0efb1616cec0e14 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 02:58:28 +0000 Subject: [PATCH 1/5] Initial plan From c3610f6f7461b49cc004e34f8f4b506a5e881ab8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 03:04:27 +0000 Subject: [PATCH 2/5] Add Device.arch property and update examples to use it Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_device.py | 5 +++++ cuda_core/examples/cuda_graphs.py | 2 +- cuda_core/examples/memory_ops.py | 2 +- cuda_core/examples/pytorch_example.py | 2 +- cuda_core/examples/saxpy.py | 2 +- cuda_core/examples/simple_multi_gpu_example.py | 4 ++-- cuda_core/examples/strided_memory_view_gpu.py | 2 +- cuda_core/examples/vector_add.py | 2 +- cuda_core/tests/test_device.py | 12 ++++++++++++ 9 files changed, 25 insertions(+), 8 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index 384db9195c..c8e0de189a 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -1112,6 +1112,11 @@ def compute_capability(self) -> ComputeCapability: self.properties._cache["compute_capability"] = cc return cc + @property + def arch(self) -> str: + """Return compute capability as a string (e.g., '75' for CC 7.5).""" + return "".join(f"{i}" for i in self.compute_capability) + @property def context(self) -> Context: """Return the current :obj:`~_context.Context` associated with this device. diff --git a/cuda_core/examples/cuda_graphs.py b/cuda_core/examples/cuda_graphs.py index b6c5edbe18..3f195d1d9d 100644 --- a/cuda_core/examples/cuda_graphs.py +++ b/cuda_core/examples/cuda_graphs.py @@ -53,7 +53,7 @@ def main(): cp.cuda.ExternalStream(int(stream.handle)).use() # Compile the program - arch = "".join(f"{i}" for i in dev.compute_capability) + arch = dev.arch program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}") prog = Program(code, code_type="c++", options=program_options) mod = prog.compile( diff --git a/cuda_core/examples/memory_ops.py b/cuda_core/examples/memory_ops.py index 6f3de7a670..71fb3bf54d 100644 --- a/cuda_core/examples/memory_ops.py +++ b/cuda_core/examples/memory_ops.py @@ -54,7 +54,7 @@ cp.cuda.ExternalStream(int(stream.handle)).use() # Compile kernel -arch = "".join(f"{i}" for i in dev.compute_capability) +arch = dev.arch program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}") prog = Program(code, code_type="c++", options=program_options) mod = prog.compile("cubin") diff --git a/cuda_core/examples/pytorch_example.py b/cuda_core/examples/pytorch_example.py index 11f049443d..5aada9f04f 100644 --- a/cuda_core/examples/pytorch_example.py +++ b/cuda_core/examples/pytorch_example.py @@ -51,7 +51,7 @@ def __cuda_stream__(self): s = dev.create_stream(PyTorchStreamWrapper(pt_stream)) # prepare program -arch = "".join(f"{i}" for i in dev.compute_capability) +arch = dev.arch program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}") prog = Program(code, code_type="c++", options=program_options) mod = prog.compile( diff --git a/cuda_core/examples/saxpy.py b/cuda_core/examples/saxpy.py index 6048c6d5df..a2b1de82a9 100644 --- a/cuda_core/examples/saxpy.py +++ b/cuda_core/examples/saxpy.py @@ -38,7 +38,7 @@ s = dev.create_stream() # prepare program -arch = "".join(f"{i}" for i in dev.compute_capability) +arch = dev.arch program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}") prog = Program(code, code_type="c++", options=program_options) diff --git a/cuda_core/examples/simple_multi_gpu_example.py b/cuda_core/examples/simple_multi_gpu_example.py index 456c7cacaa..4af360ae26 100644 --- a/cuda_core/examples/simple_multi_gpu_example.py +++ b/cuda_core/examples/simple_multi_gpu_example.py @@ -40,7 +40,7 @@ } } """ -arch0 = "".join(f"{i}" for i in dev0.compute_capability) +arch0 = dev0.arch prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"}) mod_add = prog_add.compile("cubin") ker_add = mod_add.get_kernel("vector_add") @@ -63,7 +63,7 @@ } } """ -arch1 = "".join(f"{i}" for i in dev1.compute_capability) +arch1 = dev1.arch prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch1}"}) mod_sub = prog_sub.compile("cubin") ker_sub = mod_sub.get_kernel("vector_sub") diff --git a/cuda_core/examples/strided_memory_view_gpu.py b/cuda_core/examples/strided_memory_view_gpu.py index 58bc9634bd..178d8874a8 100644 --- a/cuda_core/examples/strided_memory_view_gpu.py +++ b/cuda_core/examples/strided_memory_view_gpu.py @@ -103,7 +103,7 @@ def run(): # To know the GPU's compute capability, we need to identify which GPU to use. dev = Device(0) dev.set_current() - arch = "".join(f"{i}" for i in dev.compute_capability) + arch = dev.arch gpu_prog = Program(gpu_code, code_type="c++", options=ProgramOptions(arch=f"sm_{arch}", std="c++11")) mod = gpu_prog.compile(target_type="cubin") gpu_ker = mod.get_kernel(func_name) diff --git a/cuda_core/examples/vector_add.py b/cuda_core/examples/vector_add.py index a5b9b036fb..76730f895a 100644 --- a/cuda_core/examples/vector_add.py +++ b/cuda_core/examples/vector_add.py @@ -33,7 +33,7 @@ s = dev.create_stream() # prepare program -arch = "".join(f"{i}" for i in dev.compute_capability) +arch = dev.arch program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}") prog = Program(code, code_type="c++", options=program_options) mod = prog.compile("cubin", name_expressions=("vector_add",)) diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 2a135c49a9..1eebd784f1 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -105,6 +105,18 @@ def test_compute_capability(): assert device.compute_capability == expected_cc +def test_arch(): + device = Device() + # Test that arch returns the same as the old pattern + expected_arch = "".join(f"{i}" for i in device.compute_capability) + assert device.arch == expected_arch + # Test that it's a string + assert isinstance(device.arch, str) + # Test that it matches the expected format (e.g., "75" for CC 7.5) + cc = device.compute_capability + assert device.arch == f"{cc.major}{cc.minor}" + + cuda_base_properties = [ ("max_threads_per_block", int), ("max_block_dim_x", int), From c940ecab337c28fc1968280f0014360f51a4c717 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 03:20:49 +0000 Subject: [PATCH 3/5] Inline dev.arch calls in f-strings per PR feedback Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/examples/cuda_graphs.py | 3 +-- cuda_core/examples/memory_ops.py | 3 +-- cuda_core/examples/pytorch_example.py | 3 +-- cuda_core/examples/saxpy.py | 3 +-- cuda_core/examples/simple_multi_gpu_example.py | 6 ++---- cuda_core/examples/strided_memory_view_gpu.py | 3 +-- cuda_core/examples/vector_add.py | 3 +-- 7 files changed, 8 insertions(+), 16 deletions(-) diff --git a/cuda_core/examples/cuda_graphs.py b/cuda_core/examples/cuda_graphs.py index 3f195d1d9d..38c48fb111 100644 --- a/cuda_core/examples/cuda_graphs.py +++ b/cuda_core/examples/cuda_graphs.py @@ -53,8 +53,7 @@ def main(): cp.cuda.ExternalStream(int(stream.handle)).use() # Compile the program - arch = dev.arch - program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}") + program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}") prog = Program(code, code_type="c++", options=program_options) mod = prog.compile( "cubin", name_expressions=("vector_add", "vector_multiply", "vector_subtract") diff --git a/cuda_core/examples/memory_ops.py b/cuda_core/examples/memory_ops.py index 71fb3bf54d..b12bc5039a 100644 --- a/cuda_core/examples/memory_ops.py +++ b/cuda_core/examples/memory_ops.py @@ -54,8 +54,7 @@ cp.cuda.ExternalStream(int(stream.handle)).use() # Compile kernel -arch = dev.arch -program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}") +program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}") prog = Program(code, code_type="c++", options=program_options) mod = prog.compile("cubin") kernel = mod.get_kernel("memory_ops") diff --git a/cuda_core/examples/pytorch_example.py b/cuda_core/examples/pytorch_example.py index 5aada9f04f..37288ebab3 100644 --- a/cuda_core/examples/pytorch_example.py +++ b/cuda_core/examples/pytorch_example.py @@ -51,8 +51,7 @@ def __cuda_stream__(self): s = dev.create_stream(PyTorchStreamWrapper(pt_stream)) # prepare program -arch = dev.arch -program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}") +program_options = ProgramOptions(std="c++11", arch=f"sm_{dev.arch}") prog = Program(code, code_type="c++", options=program_options) mod = prog.compile( "cubin", diff --git a/cuda_core/examples/saxpy.py b/cuda_core/examples/saxpy.py index a2b1de82a9..4e4d548bbc 100644 --- a/cuda_core/examples/saxpy.py +++ b/cuda_core/examples/saxpy.py @@ -38,8 +38,7 @@ s = dev.create_stream() # prepare program -arch = dev.arch -program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}") +program_options = ProgramOptions(std="c++11", arch=f"sm_{dev.arch}") prog = Program(code, code_type="c++", options=program_options) # Note the use of the `name_expressions` argument to specify the template diff --git a/cuda_core/examples/simple_multi_gpu_example.py b/cuda_core/examples/simple_multi_gpu_example.py index 4af360ae26..1f9e43c03a 100644 --- a/cuda_core/examples/simple_multi_gpu_example.py +++ b/cuda_core/examples/simple_multi_gpu_example.py @@ -40,8 +40,7 @@ } } """ -arch0 = dev0.arch -prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"}) +prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev0.arch}"}) mod_add = prog_add.compile("cubin") ker_add = mod_add.get_kernel("vector_add") @@ -63,8 +62,7 @@ } } """ -arch1 = dev1.arch -prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch1}"}) +prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev1.arch}"}) mod_sub = prog_sub.compile("cubin") ker_sub = mod_sub.get_kernel("vector_sub") diff --git a/cuda_core/examples/strided_memory_view_gpu.py b/cuda_core/examples/strided_memory_view_gpu.py index 178d8874a8..5fb723ac70 100644 --- a/cuda_core/examples/strided_memory_view_gpu.py +++ b/cuda_core/examples/strided_memory_view_gpu.py @@ -103,8 +103,7 @@ def run(): # To know the GPU's compute capability, we need to identify which GPU to use. dev = Device(0) dev.set_current() - arch = dev.arch - gpu_prog = Program(gpu_code, code_type="c++", options=ProgramOptions(arch=f"sm_{arch}", std="c++11")) + gpu_prog = Program(gpu_code, code_type="c++", options=ProgramOptions(arch=f"sm_{dev.arch}", std="c++11")) mod = gpu_prog.compile(target_type="cubin") gpu_ker = mod.get_kernel(func_name) diff --git a/cuda_core/examples/vector_add.py b/cuda_core/examples/vector_add.py index 76730f895a..303c774180 100644 --- a/cuda_core/examples/vector_add.py +++ b/cuda_core/examples/vector_add.py @@ -33,8 +33,7 @@ s = dev.create_stream() # prepare program -arch = dev.arch -program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}") +program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}") prog = Program(code, code_type="c++", options=program_options) mod = prog.compile("cubin", name_expressions=("vector_add",)) From 7fc9d1be62327e8a56ccf58d892e42a3c100a6d9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 03:29:38 +0000 Subject: [PATCH 4/5] Add release note for Device.arch property Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/docs/source/release/0.X.Y-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst index 3a9c7076a7..7ad3f616d6 100644 --- a/cuda_core/docs/source/release/0.X.Y-notes.rst +++ b/cuda_core/docs/source/release/0.X.Y-notes.rst @@ -24,7 +24,7 @@ Breaking Changes New features ------------ -None. +- Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple. New examples From 491fb8922788edd6f656f8869aa909549eefbb5e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 04:25:43 +0000 Subject: [PATCH 5/5] Use f-string instead of "".join for Device.arch property Co-authored-by: kkraus14 <3665167+kkraus14@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_device.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index c8e0de189a..0499baa581 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -1115,7 +1115,7 @@ def compute_capability(self) -> ComputeCapability: @property def arch(self) -> str: """Return compute capability as a string (e.g., '75' for CC 7.5).""" - return "".join(f"{i}" for i in self.compute_capability) + return f"{self.compute_capability.major}{self.compute_capability.minor}" @property def context(self) -> Context: