Conversation
Contributor
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/cudadrv/state.jl b/lib/cudadrv/state.jl
index 5fc70d29d..fb0722f14 100644
--- a/lib/cudadrv/state.jl
+++ b/lib/cudadrv/state.jl
@@ -218,12 +218,14 @@ function context(dev::CuDevice)
# check if the device isn't too old
if capability(dev) < v"5.0"
@error("""Your $(name(dev)) GPU (compute capability $(capability(dev).major).$(capability(dev).minor)) is not supported by CUDA.jl.
- Please use a device with at least capability 5.0, or downgrade CUDA.jl (see the README for compatibility details).""",
+ Please use a device with at least capability 5.0, or downgrade CUDA.jl (see the README for compatibility details).""",
maxlog=1, _id=devidx)
elseif runtime_version() >= v"13" && capability(dev) <= v"7.5"
- @error("""Your $(name(dev)) GPU (compute capability $(capability(dev).major).$(capability(dev).minor)) is not supported on CUDA 13+.
- Please use a device with at least capability 7.5, or downgrade your NVIDIA driver to below v580.""",
- maxlog=1, _id=devidx)
+ @error(
+ """Your $(name(dev)) GPU (compute capability $(capability(dev).major).$(capability(dev).minor)) is not supported on CUDA 13+.
+ Please use a device with at least capability 7.5, or downgrade your NVIDIA driver to below v580.""",
+ maxlog = 1, _id = devidx
+ )
end
# ... or too new
if !in(capability(dev), cuda_compat().cap)
diff --git a/src/compatibility.jl b/src/compatibility.jl
index 272a51ebf..8a06bdaad 100644
--- a/src/compatibility.jl
+++ b/src/compatibility.jl
@@ -41,15 +41,15 @@ const cuda_cap_db = Dict(
v"3.0" => between(v"4.2", v"10.2"),
v"3.2" => between(v"6.0", v"10.2"),
v"3.5" => between(v"5.0", v"11.8"),
- v"3.7" => between(v"6.5", v"11.8"),
- v"5.0" => between(v"6.0", v"12.9"),
- v"5.2" => between(v"7.0", v"12.9"),
- v"5.3" => between(v"7.5", v"12.9"),
- v"6.0" => between(v"8.0", v"12.9"),
- v"6.1" => between(v"8.0", v"12.9"),
- v"6.2" => between(v"8.0", v"12.9"),
- v"7.0" => between(v"9.0", v"12.9"),
- v"7.2" => between(v"9.2", v"12.9"),
+ v"3.7" => between(v"6.5", v"11.8"),
+ v"5.0" => between(v"6.0", v"12.9"),
+ v"5.2" => between(v"7.0", v"12.9"),
+ v"5.3" => between(v"7.5", v"12.9"),
+ v"6.0" => between(v"8.0", v"12.9"),
+ v"6.1" => between(v"8.0", v"12.9"),
+ v"6.2" => between(v"8.0", v"12.9"),
+ v"7.0" => between(v"9.0", v"12.9"),
+ v"7.2" => between(v"9.2", v"12.9"),
v"7.5" => between(v"10.0", highest),
v"8.0" => between(v"11.0", highest),
v"8.6" => between(v"11.1", highest), |
Contributor
There was a problem hiding this comment.
CUDA.jl Benchmarks
Details
| Benchmark suite | Current: 00141af | Previous: 6c6977f | Ratio |
|---|---|---|---|
latency/precompile |
43747535057.5 ns |
43775122238.5 ns |
1.00 |
latency/ttfp |
7306977522 ns |
7276778491 ns |
1.00 |
latency/import |
3865063206 ns |
3836255124 ns |
1.01 |
integration/volumerhs |
9625850 ns |
9623790.5 ns |
1.00 |
integration/byval/slices=1 |
146811 ns |
146826 ns |
1.00 |
integration/byval/slices=3 |
425646 ns |
426011 ns |
1.00 |
integration/byval/reference |
144934 ns |
145073 ns |
1.00 |
integration/byval/slices=2 |
286202 ns |
286240 ns |
1.00 |
integration/cudadevrt |
103644 ns |
103460 ns |
1.00 |
kernel/indexing |
14328 ns |
14196 ns |
1.01 |
kernel/indexing_checked |
15116 ns |
15033 ns |
1.01 |
kernel/occupancy |
663.5375 ns |
670.506329113924 ns |
0.99 |
kernel/launch |
2199.6666666666665 ns |
2162.5555555555557 ns |
1.02 |
kernel/rand |
18468 ns |
16555 ns |
1.12 |
array/reverse/1d |
19699 ns |
19646 ns |
1.00 |
array/reverse/2dL_inplace |
66763 ns |
66804 ns |
1.00 |
array/reverse/1dL |
69936 ns |
69923 ns |
1.00 |
array/reverse/2d |
21653 ns |
21677 ns |
1.00 |
array/reverse/1d_inplace |
9649 ns |
9658 ns |
1.00 |
array/reverse/2d_inplace |
11005 ns |
13323 ns |
0.83 |
array/reverse/2dL |
73860.5 ns |
73803 ns |
1.00 |
array/reverse/1dL_inplace |
66808 ns |
66812 ns |
1.00 |
array/copy |
20723 ns |
20306 ns |
1.02 |
array/iteration/findall/int |
157751.5 ns |
157164 ns |
1.00 |
array/iteration/findall/bool |
139359 ns |
139633 ns |
1.00 |
array/iteration/findfirst/int |
160407 ns |
160554.5 ns |
1.00 |
array/iteration/findfirst/bool |
161907.5 ns |
160957 ns |
1.01 |
array/iteration/scalar |
74241.5 ns |
72124 ns |
1.03 |
array/iteration/logical |
216961.5 ns |
215036.5 ns |
1.01 |
array/iteration/findmin/1d |
49809.5 ns |
49445 ns |
1.01 |
array/iteration/findmin/2d |
96400 ns |
96493.5 ns |
1.00 |
array/reductions/reduce/Int64/1d |
42944 ns |
42960 ns |
1.00 |
array/reductions/reduce/Int64/dims=1 |
44783 ns |
44742.5 ns |
1.00 |
array/reductions/reduce/Int64/dims=2 |
61378.5 ns |
61453 ns |
1.00 |
array/reductions/reduce/Int64/dims=1L |
88824 ns |
88951 ns |
1.00 |
array/reductions/reduce/Int64/dims=2L |
88093 ns |
88014.5 ns |
1.00 |
array/reductions/reduce/Float32/1d |
36277 ns |
35769 ns |
1.01 |
array/reductions/reduce/Float32/dims=1 |
45881 ns |
51586 ns |
0.89 |
array/reductions/reduce/Float32/dims=2 |
59535 ns |
59511 ns |
1.00 |
array/reductions/reduce/Float32/dims=1L |
52305 ns |
52474 ns |
1.00 |
array/reductions/reduce/Float32/dims=2L |
72046 ns |
71419 ns |
1.01 |
array/reductions/mapreduce/Int64/1d |
43535 ns |
43189 ns |
1.01 |
array/reductions/mapreduce/Int64/dims=1 |
53216.5 ns |
46540.5 ns |
1.14 |
array/reductions/mapreduce/Int64/dims=2 |
61398 ns |
61279.5 ns |
1.00 |
array/reductions/mapreduce/Int64/dims=1L |
88811 ns |
88854 ns |
1.00 |
array/reductions/mapreduce/Int64/dims=2L |
87956 ns |
88014 ns |
1.00 |
array/reductions/mapreduce/Float32/1d |
36670.5 ns |
36287 ns |
1.01 |
array/reductions/mapreduce/Float32/dims=1 |
41606 ns |
41466 ns |
1.00 |
array/reductions/mapreduce/Float32/dims=2 |
59638 ns |
59744 ns |
1.00 |
array/reductions/mapreduce/Float32/dims=1L |
52436 ns |
52550 ns |
1.00 |
array/reductions/mapreduce/Float32/dims=2L |
71915 ns |
71985 ns |
1.00 |
array/broadcast |
20004 ns |
20047 ns |
1.00 |
array/copyto!/gpu_to_gpu |
13004 ns |
11191 ns |
1.16 |
array/copyto!/cpu_to_gpu |
215322 ns |
213964 ns |
1.01 |
array/copyto!/gpu_to_cpu |
284829 ns |
284661.5 ns |
1.00 |
array/accumulate/Int64/1d |
124632 ns |
124888 ns |
1.00 |
array/accumulate/Int64/dims=1 |
83493 ns |
83130 ns |
1.00 |
array/accumulate/Int64/dims=2 |
157849 ns |
157680 ns |
1.00 |
array/accumulate/Int64/dims=1L |
1709343 ns |
1709578 ns |
1.00 |
array/accumulate/Int64/dims=2L |
966272.5 ns |
966045 ns |
1.00 |
array/accumulate/Float32/1d |
109267 ns |
108910 ns |
1.00 |
array/accumulate/Float32/dims=1 |
80442 ns |
80564 ns |
1.00 |
array/accumulate/Float32/dims=2 |
147384 ns |
147715 ns |
1.00 |
array/accumulate/Float32/dims=1L |
1618007.5 ns |
1618612 ns |
1.00 |
array/accumulate/Float32/dims=2L |
697763 ns |
698318 ns |
1.00 |
array/construct |
1280.7 ns |
1287.5 ns |
0.99 |
array/random/randn/Float32 |
45216.5 ns |
43976 ns |
1.03 |
array/random/randn!/Float32 |
24668 ns |
24816 ns |
0.99 |
array/random/rand!/Int64 |
27287 ns |
27267 ns |
1.00 |
array/random/rand!/Float32 |
8946 ns |
8653.333333333334 ns |
1.03 |
array/random/rand/Int64 |
38233 ns |
38285 ns |
1.00 |
array/random/rand/Float32 |
13128 ns |
13026 ns |
1.01 |
array/permutedims/4d |
60540 ns |
60152.5 ns |
1.01 |
array/permutedims/2d |
54289.5 ns |
53934 ns |
1.01 |
array/permutedims/3d |
55099.5 ns |
54649.5 ns |
1.01 |
array/sorting/1d |
2757149.5 ns |
2757180 ns |
1.00 |
array/sorting/by |
3344173.5 ns |
3343619 ns |
1.00 |
array/sorting/2d |
1080103 ns |
1080730 ns |
1.00 |
cuda/synchronization/stream/auto |
1034.3 ns |
1029.6 ns |
1.00 |
cuda/synchronization/stream/nonblocking |
7569.5 ns |
7270.700000000001 ns |
1.04 |
cuda/synchronization/stream/blocking |
850.5135135135135 ns |
850.7415730337078 ns |
1.00 |
cuda/synchronization/context/auto |
1162.8 ns |
1157 ns |
1.01 |
cuda/synchronization/context/nonblocking |
8437.6 ns |
6964 ns |
1.21 |
cuda/synchronization/context/blocking |
901.4444444444445 ns |
888.5208333333334 ns |
1.01 |
This comment was automatically generated by workflow using github-action-benchmark.
Member
Author
|
Timeout happens on master too. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
#2834 missed the compat bits and device detection.
Closes #2896