Skip to content

Commit

Permalink
Bump architecture baseline from sm_35 to sm_50 [#30]
Browse files Browse the repository at this point in the history
..., so that things keep working with CUDA 12.0+.
  • Loading branch information
tschwinge committed Sep 4, 2023
1 parent 1b5946d commit 1321670
Show file tree
Hide file tree
Showing 21 changed files with 107 additions and 84 deletions.
15 changes: 5 additions & 10 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -53,22 +53,17 @@ Individual nvptx-tools depend on CUDA/PTX as follows:

Unless overridden with the '-m' option, the target architecture used for
'ptxas' verification ('--gpu-name' option to 'ptxas') is deduced from the
'.target' directive in the input's preamble. This means that it's the
responsibility of the producer (nvptx-none GCC toolchain) that the PTX code
works with the CUDA version used.
'.target' directive in the input's preamble. As support for the Kepler
microarchitecture in 'ptxas' has been removed in CUDA 11.0 through 12.0,
sm_3x code is verified with '--gpu-name sm_50'.

* nvptx-none-run

Uses the CUDA Driver library ('libcuda') to run PTX binaries.

Support for sm_50 was introduced with PTX ISA 4.0, CUDA 6.0, driver r331, and
remains supported at this time (CUDA 12.2.2).

For the testsuite (only), the baseline dependency is sm_35 functionality (or
higher).

Support for sm_35 has been introduced with PTX ISA 3.1, CUDA 5.0, driver r302,
has been deprecated in CUDA 10.2 (see
<https://docs.nvidia.com/cuda/archive/10.2/cuda-toolkit-release-notes/#deprecated-features>),
but has not yet been removed.

Certain testcases for the nvptx-tools have dependencies as follows:

Expand Down
20 changes: 14 additions & 6 deletions nvptx-as.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1249,26 +1249,34 @@ This program has absolutely no warranty.\n",
target_arg = preamble_target_arg;

if ((strcmp ("sm_30", target_arg) == 0)
|| (strcmp ("sm_32", target_arg) == 0))
|| (strcmp ("sm_32", target_arg) == 0)
|| (strcmp ("sm_35", target_arg) == 0)
|| (strcmp ("sm_37", target_arg) == 0))
{
/* Starting with CUDA 11.0, "Support for Kepler 'sm_30' and
'sm_32' architecture based products is dropped", and these may
no longer be specified in '--gpu-name' of 'ptxas':
/* In CUDA 11.0, "Support for Kepler 'sm_30' and 'sm_32'
architecture based products is dropped", and in CUDA 12.0,
"Kepler architecture support is removed" (that is, sm_35,
sm_37), and these may no longer be specified in '--gpu-name'
of 'ptxas':
ptxas fatal : Value 'sm_30' is not defined for option 'gpu-name'
ptxas fatal : Value 'sm_32' is not defined for option 'gpu-name'
ptxas fatal : Value 'sm_35' is not defined for option 'gpu-name'
ptxas fatal : Value 'sm_37' is not defined for option 'gpu-name'
..., but we need to continue supporting GCC emitting
'.target sm_30' code, for example.
Detecting the CUDA/'ptxas' version and the supported
'--gpu-name' options is clumsy, so in this case, just use
'sm_35', which is the baseline supported by all current CUDA
'sm_50', which is the baseline supported by all current CUDA
versions down to CUDA 6.5, at least. */
if (verbose)
fprintf (stderr, "Verifying %s code", target_arg);
target_arg = "sm_35";
target_arg = "sm_50";
if (verbose)
fprintf (stderr, " with %s code generation.\n", target_arg);
}
Expand Down
4 changes: 2 additions & 2 deletions test/GLOBAL_FUNCTION_DECL_f.s
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE

Expand Down
4 changes: 2 additions & 2 deletions test/GLOBAL_FUNCTION_DEF_f.s
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE

Expand Down
4 changes: 2 additions & 2 deletions test/as/bare-1.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
4 changes: 2 additions & 2 deletions test/as/debug/directive-file-1.o.golden
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// $ [nvptx-none-gcc] directive-file-1.c -o directive-file-1.s -S -g
// $ [nvptx-none-as] directive-file-1.s -o directive-file-1.o.golden
// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE
// BEGIN GLOBAL FUNCTION DECL: f
Expand Down
4 changes: 2 additions & 2 deletions test/as/debug/directive-file-1.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@


// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE

Expand Down
2 changes: 1 addition & 1 deletion test/as/ptxas/dummy_ptxas_log.golden
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ dummy ptxas
arg: /dev/null
arg: /dev/null
arg: --gpu-name
arg: sm_35
arg: sm_50
arg: -O0
88 changes: 54 additions & 34 deletions test/as/ptxas/invoke-1.test
Original file line number Diff line number Diff line change
Expand Up @@ -55,105 +55,125 @@ Implicit '--verify', '-m sm_2020'

RUN: rm -f %t*
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %S/../bare-1.s -m sm_2020 > %t.stdout 2> %t.stderr
RUN: sed -e 's|sm_35|sm_2020|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: sed -e 's|sm_50|sm_2020|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: cmp %t.dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: sed -e 's|sm_35|sm_2020|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: sed -e 's|sm_50|sm_2020|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr


Implicit '--verify', preamble '.target sm_2022'

RUN: rm -f %t*
RUN: sed -e 's|sm_35|sm_2022|g' < %S/../bare-1.s > %t.bare-1.s
RUN: sed -e 's|sm_50|sm_2022|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: sed -e 's|sm_35|sm_2022|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: sed -e 's|sm_50|sm_2022|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: cmp %t.dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: sed -e 's|sm_35|sm_2022|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: sed -e 's|sm_50|sm_2022|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr


Special handling re CUDA 11.0, "Support for Kepler 'sm_30' and 'sm_32' architecture based products is dropped"
Special handling re CUDA 11.0, "Support for Kepler 'sm_30' and 'sm_32' architecture based products is dropped", and CUDA 12.0, "Kepler architecture support is removed" (that is, sm_35, sm_37).

Preamble '.target sm_3': doesn't exist; not special-cased.

RUN: rm -f %t*
RUN: sed -e 's|sm_35|sm_3|g' < %S/../bare-1.s > %t.bare-1.s
RUN: sed -e 's|sm_50|sm_3|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: sed -e 's|sm_35|sm_3|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: sed -e 's|sm_50|sm_3|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: cmp %t.dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: sed -e 's|sm_35|sm_3|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: sed -e 's|sm_50|sm_3|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr

Preamble '.target sm_30': special-cased to '--gpu-name sm_35'.
Preamble '.target sm_30': special-cased to '--gpu-name sm_50'.

RUN: rm -f %t*
RUN: sed -e 's|sm_35|sm_30|g' < %S/../bare-1.s > %t.bare-1.s
RUN: sed -e 's|sm_50|sm_30|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: cmp %S/dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: cmp %S/verbose_stderr-sm_30.golden %t.stderr

Preamble '.target sm_30x': doesn't exist; not special-cased.

RUN: rm -f %t*
RUN: sed -e 's|sm_50|sm_30x|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: sed -e 's|sm_50|sm_30x|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: cmp %t.dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: sed -e 's|sm_50|sm_30x|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr

Preamble '.target sm_31': doesn't exist; not special-cased.

RUN: rm -f %t*
RUN: sed -e 's|sm_35|sm_31|g' < %S/../bare-1.s > %t.bare-1.s
RUN: sed -e 's|sm_50|sm_31|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: sed -e 's|sm_35|sm_31|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: sed -e 's|sm_50|sm_31|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: cmp %t.dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: sed -e 's|sm_35|sm_31|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: sed -e 's|sm_50|sm_31|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr

Preamble '.target sm_32': special-cased to '--gpu-name sm_35'.
Preamble '.target sm_32': special-cased to '--gpu-name sm_50'.

RUN: rm -f %t*
RUN: sed -e 's|sm_35|sm_32|g' < %S/../bare-1.s > %t.bare-1.s
RUN: sed -e 's|sm_50|sm_32|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: cmp %S/dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: sed -e 's|sm_30|sm_32|g' < %S/verbose_stderr-sm_30.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr

Preamble '.target sm_35': not special-cased.
Preamble '.target sm_32x': doesn't exist; not special-cased.

RUN: rm -f %t*
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %S/../bare-1.s > %t.stdout 2> %t.stderr
RUN: cmp %S/dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: sed -e 's|sm_50|sm_32x|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: sed -e 's|sm_50|sm_32x|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: cmp %t.dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: cmp %S/verbose_stderr.golden %t.stderr
RUN: sed -e 's|sm_50|sm_32x|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr

Preamble '.target sm_37': not special-cased.
Preamble '.target sm_35': special-cased to '--gpu-name sm_50'.

RUN: rm -f %t*
RUN: sed -e 's|sm_35|sm_37|g' < %S/../bare-1.s > %t.bare-1.s
RUN: sed -e 's|sm_50|sm_35|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: sed -e 's|sm_35|sm_37|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: cmp %t.dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: cmp %S/dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: sed -e 's|sm_35|sm_37|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: sed -e 's|sm_30|sm_35|g' < %S/verbose_stderr-sm_30.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr

Preamble '.target sm_30x': doesn't exist; not special-cased.
Preamble '.target sm_37': special-cased to '--gpu-name sm_50'.

RUN: rm -f %t*
RUN: sed -e 's|sm_35|sm_30x|g' < %S/../bare-1.s > %t.bare-1.s
RUN: sed -e 's|sm_50|sm_37|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: sed -e 's|sm_35|sm_30x|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: cmp %t.dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: cmp %S/dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: sed -e 's|sm_35|sm_30x|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: sed -e 's|sm_30|sm_37|g' < %S/verbose_stderr-sm_30.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr

Preamble '.target sm_32x': doesn't exist; not special-cased.
Preamble '.target sm_50': not special-cased.

RUN: rm -f %t*
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %S/../bare-1.s > %t.stdout 2> %t.stderr
RUN: cmp %S/dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: cmp %S/verbose_stderr.golden %t.stderr

Preamble '.target sm_52': not special-cased.

RUN: rm -f %t*
RUN: sed -e 's|sm_35|sm_32x|g' < %S/../bare-1.s > %t.bare-1.s
RUN: sed -e 's|sm_50|sm_52|g' < %S/../bare-1.s > %t.bare-1.s
RUN: %dummy_ptxas_path DUMMY_PTXAS_LOG=%t.dummy_ptxas_log %target_as_cmd -v -o /dev/null %t.bare-1.s > %t.stdout 2> %t.stderr
RUN: sed -e 's|sm_35|sm_32x|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: sed -e 's|sm_50|sm_52|g' < %S/dummy_ptxas_log.golden > %t.dummy_ptxas_log.golden
RUN: cmp %t.dummy_ptxas_log.golden %t.dummy_ptxas_log
RUN: ! test -s %t.stdout
RUN: sed -e 's|sm_35|sm_32x|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: sed -e 's|sm_50|sm_52|g' < %S/verbose_stderr.golden > %t.stderr.golden
RUN: cmp %t.stderr.golden %t.stderr
4 changes: 2 additions & 2 deletions test/as/ptxas/verbose_stderr-sm_30.golden
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Verifying sm_30 code with sm_35 code generation.
ptxas -c -o /dev/null /dev/null --gpu-name sm_35 -O0
Verifying sm_30 code with sm_50 code generation.
ptxas -c -o /dev/null /dev/null --gpu-name sm_50 -O0
2 changes: 1 addition & 1 deletion test/as/ptxas/verbose_stderr.golden
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ptxas -c -o /dev/null /dev/null --gpu-name sm_35 -O0
ptxas -c -o /dev/null /dev/null --gpu-name sm_50 -O0
4 changes: 2 additions & 2 deletions test/nm/1-1-C++.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// ..., and then manually simplify.

// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE

Expand Down
4 changes: 2 additions & 2 deletions test/nm/1-1-C.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// ..., and then manually simplify.

// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE

Expand Down
4 changes: 2 additions & 2 deletions test/nm/1-2-C++.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// ..., and then manually simplify.

// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE

Expand Down
4 changes: 2 additions & 2 deletions test/nm/1-2-C.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// ..., and then manually simplify.

// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE

Expand Down
4 changes: 2 additions & 2 deletions test/nm/libc/libc_a-dtoa.o
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE
// BEGIN FUNCTION DECL: quorem
Expand Down
4 changes: 2 additions & 2 deletions test/nm/libc/libc_a-environ.o
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE
// BEGIN VAR DEF: initial_env
Expand Down
4 changes: 2 additions & 2 deletions test/nm/libc/libc_a-memset.o
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// BEGIN PREAMBLE
.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
// END PREAMBLE
// BEGIN GLOBAL FUNCTION DECL: memset
Expand Down
4 changes: 2 additions & 2 deletions test/run/exit-argc_minus_five.nvptx
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// $ [nvptx-none-gcc] exit-argc_minus_five.c -o exit-argc_minus_five.nvptx -O3
// ..., and then manually simplify.

.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
.visible .entry __main (.param .u64 %in_ar0, .param .u32 %in_ar1, .param .u64 %in_ar2)
{
Expand Down
4 changes: 2 additions & 2 deletions test/run/print-argv-1.nvptx
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// $ [nvptx-none-gcc] print-argv-1.c -o print-argv-1.nvptx -O3
// ..., and then manually simplify.

.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
.extern .func (.param .u32 %value_out) vprintf (.param .u64 %in_ar0, .param .u64 %in_ar1);
.const .align 1 .u8 $LC0[8] =
Expand Down
4 changes: 2 additions & 2 deletions test/run/sigsegv-1.nvptx
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// $ [nvptx-none-gcc] sigsegv-1.c -o sigsegv-1.nvptx -O3
// ..., and then manually simplify.

.version 3.1
.target sm_35
.version 4.0
.target sm_50
.address_size 64
.visible .entry __main (.param .u64 %in_ar0, .param .u32 %in_ar1, .param .u64 %in_ar2)
{
Expand Down

0 comments on commit 1321670

Please sign in to comment.