Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cuda_core/cuda/core/experimental/_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,11 @@ def compute_capability(self) -> ComputeCapability:
self.properties._cache["compute_capability"] = cc
return cc

@property
def arch(self) -> str:
"""Return compute capability as a string (e.g., '75' for CC 7.5)."""
return f"{self.compute_capability.major}{self.compute_capability.minor}"

@property
def context(self) -> Context:
"""Return the current :obj:`~_context.Context` associated with this device.
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/docs/source/release/0.X.Y-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Breaking Changes
New features
------------

None.
- Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple.


New examples
Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/cuda_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ def main():
cp.cuda.ExternalStream(int(stream.handle)).use()

# Compile the program
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)
mod = prog.compile(
"cubin", name_expressions=("vector_add<float>", "vector_multiply<float>", "vector_subtract<float>")
Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/memory_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@
cp.cuda.ExternalStream(int(stream.handle)).use()

# Compile kernel
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)
mod = prog.compile("cubin")
kernel = mod.get_kernel("memory_ops")
Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/pytorch_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ def __cuda_stream__(self):
s = dev.create_stream(PyTorchStreamWrapper(pt_stream))

# prepare program
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++11", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)
mod = prog.compile(
"cubin",
Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/saxpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@
s = dev.create_stream()

# prepare program
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++11", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)

# Note the use of the `name_expressions` argument to specify the template
Expand Down
6 changes: 2 additions & 4 deletions cuda_core/examples/simple_multi_gpu_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@
}
}
"""
arch0 = "".join(f"{i}" for i in dev0.compute_capability)
prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"})
prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev0.arch}"})
mod_add = prog_add.compile("cubin")
ker_add = mod_add.get_kernel("vector_add")

Expand All @@ -63,8 +62,7 @@
}
}
"""
arch1 = "".join(f"{i}" for i in dev1.compute_capability)
prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch1}"})
prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev1.arch}"})
mod_sub = prog_sub.compile("cubin")
ker_sub = mod_sub.get_kernel("vector_sub")

Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/strided_memory_view_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ def run():
# To know the GPU's compute capability, we need to identify which GPU to use.
dev = Device(0)
dev.set_current()
arch = "".join(f"{i}" for i in dev.compute_capability)
gpu_prog = Program(gpu_code, code_type="c++", options=ProgramOptions(arch=f"sm_{arch}", std="c++11"))
gpu_prog = Program(gpu_code, code_type="c++", options=ProgramOptions(arch=f"sm_{dev.arch}", std="c++11"))
mod = gpu_prog.compile(target_type="cubin")
gpu_ker = mod.get_kernel(func_name)

Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/vector_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
s = dev.create_stream()

# prepare program
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)
mod = prog.compile("cubin", name_expressions=("vector_add<float>",))

Expand Down
12 changes: 12 additions & 0 deletions cuda_core/tests/test_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,18 @@ def test_compute_capability():
assert device.compute_capability == expected_cc


def test_arch():
device = Device()
# Test that arch returns the same as the old pattern
expected_arch = "".join(f"{i}" for i in device.compute_capability)
assert device.arch == expected_arch
# Test that it's a string
assert isinstance(device.arch, str)
# Test that it matches the expected format (e.g., "75" for CC 7.5)
cc = device.compute_capability
assert device.arch == f"{cc.major}{cc.minor}"


cuda_base_properties = [
("max_threads_per_block", int),
("max_block_dim_x", int),
Expand Down
Loading