From d2fe136cbc7fe53adc5d391f8e823eca059463af Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@vectorware.com>
Date: Thu, 20 Nov 2025 11:49:14 +1100
Subject: [PATCH 1/5] Remove duplicate documentation.

The docs on the `CudaBuilder::arch` method are a subset of those on the
`CudaBuilder::arch` field. So just remove all the method docs except for
the pointer to the arch docs.

XXX: NVVM IR 1.6 -> 2.0
https://docs.nvidia.com/cuda/archive/12.0.0/cuda-toolkit-release-notes/index.html
---
 crates/cuda_builder/src/lib.rs | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs
index 47df458d..2a974f96 100644
--- a/crates/cuda_builder/src/lib.rs
+++ b/crates/cuda_builder/src/lib.rs
@@ -257,23 +257,6 @@ impl CudaBuilder {
         self
     }
 
-    /// The virtual compute architecture to target for PTX generation. This
-    /// dictates how certain things are codegenned and may affect performance
-    /// and/or which gpus the code can run on.
-    ///
-    /// You should generally try to pick an arch that will work with most
-    /// GPUs you want your program to work with.
-    ///
-    /// If you are unsure, either leave this option to default, or pick something around 5.2 to 7.x.
-    ///
-    /// You can find a list of features supported on each arch and a list of GPUs for every
-    /// arch [`here`](https://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications).
-    ///
-    /// NOTE that this does not necessarily mean that code using a certain capability
-    /// will not work on older capabilities. It means that if it uses certain
-    /// features it may not work.
-    ///
-    /// The chosen architecture enables target features for conditional compilation.
     /// See the documentation on the `arch` field for more details.
     pub fn arch(mut self, arch: NvvmArch) -> Self {
         self.arch = arch;

From b3b4dea2a76cf41a4e8da45703aae425f4fda95f Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@vectorware.com>
Date: Thu, 20 Nov 2025 14:41:48 +1100
Subject: [PATCH 2/5] Add `impl FromStr for NvvmArch`.

It can be used in `NvvmOption::from_str`, and will also be used in a
subsequent commit in compiletests.
---
 crates/nvvm/src/lib.rs | 88 ++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 41 deletions(-)

diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs
index 0ff7a017..43b5ecfa 100644
--- a/crates/nvvm/src/lib.rs
+++ b/crates/nvvm/src/lib.rs
@@ -243,46 +243,10 @@ impl FromStr for NvvmOption {
             }
             _ if s.starts_with("-arch=") => {
                 let slice = &s[6..];
-                if !slice.starts_with("compute_") {
-                    return Err(format!("unknown -arch value: {slice}"));
+                match NvvmArch::from_str(slice) {
+                    Ok(arch) => Self::Arch(arch),
+                    Err(_) => return Err(format!("unknown -arch value: {slice}")),
                 }
-                let arch_num = &slice[8..];
-                let arch = match arch_num {
-                    "35" => NvvmArch::Compute35,
-                    "37" => NvvmArch::Compute37,
-                    "50" => NvvmArch::Compute50,
-                    "52" => NvvmArch::Compute52,
-                    "53" => NvvmArch::Compute53,
-                    "60" => NvvmArch::Compute60,
-                    "61" => NvvmArch::Compute61,
-                    "62" => NvvmArch::Compute62,
-                    "70" => NvvmArch::Compute70,
-                    "72" => NvvmArch::Compute72,
-                    "75" => NvvmArch::Compute75,
-                    "80" => NvvmArch::Compute80,
-                    "86" => NvvmArch::Compute86,
-                    "87" => NvvmArch::Compute87,
-                    "89" => NvvmArch::Compute89,
-                    "90" => NvvmArch::Compute90,
-                    "90a" => NvvmArch::Compute90a,
-                    "100" => NvvmArch::Compute100,
-                    "100f" => NvvmArch::Compute100f,
-                    "100a" => NvvmArch::Compute100a,
-                    "101" => NvvmArch::Compute101,
-                    "101f" => NvvmArch::Compute101f,
-                    "101a" => NvvmArch::Compute101a,
-                    "103" => NvvmArch::Compute103,
-                    "103f" => NvvmArch::Compute103f,
-                    "103a" => NvvmArch::Compute103a,
-                    "120" => NvvmArch::Compute120,
-                    "120f" => NvvmArch::Compute120f,
-                    "120a" => NvvmArch::Compute120a,
-                    "121" => NvvmArch::Compute121,
-                    "121f" => NvvmArch::Compute121f,
-                    "121a" => NvvmArch::Compute121a,
-                    _ => return Err(format!("unknown -arch=compute_NN value: {arch_num}")),
-                };
-                Self::Arch(arch)
             }
             _ => return Err(format!("unknown option: {s}")),
         })
@@ -340,6 +304,48 @@ impl Display for NvvmArch {
     }
 }
 
+impl FromStr for NvvmArch {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "compute_35" => NvvmArch::Compute35,
+            "compute_37" => NvvmArch::Compute37,
+            "compute_50" => NvvmArch::Compute50,
+            "compute_52" => NvvmArch::Compute52,
+            "compute_53" => NvvmArch::Compute53,
+            "compute_60" => NvvmArch::Compute60,
+            "compute_61" => NvvmArch::Compute61,
+            "compute_62" => NvvmArch::Compute62,
+            "compute_70" => NvvmArch::Compute70,
+            "compute_72" => NvvmArch::Compute72,
+            "compute_75" => NvvmArch::Compute75,
+            "compute_80" => NvvmArch::Compute80,
+            "compute_86" => NvvmArch::Compute86,
+            "compute_87" => NvvmArch::Compute87,
+            "compute_89" => NvvmArch::Compute89,
+            "compute_90" => NvvmArch::Compute90,
+            "compute_90a" => NvvmArch::Compute90a,
+            "compute_100" => NvvmArch::Compute100,
+            "compute_100f" => NvvmArch::Compute100f,
+            "compute_100a" => NvvmArch::Compute100a,
+            "compute_101" => NvvmArch::Compute101,
+            "compute_101f" => NvvmArch::Compute101f,
+            "compute_101a" => NvvmArch::Compute101a,
+            "compute_103" => NvvmArch::Compute103,
+            "compute_103f" => NvvmArch::Compute103f,
+            "compute_103a" => NvvmArch::Compute103a,
+            "compute_120" => NvvmArch::Compute120,
+            "compute_120f" => NvvmArch::Compute120f,
+            "compute_120a" => NvvmArch::Compute120a,
+            "compute_121" => NvvmArch::Compute121,
+            "compute_121f" => NvvmArch::Compute121f,
+            "compute_121a" => NvvmArch::Compute121a,
+            _ => return Err("unknown compile target"),
+        })
+    }
+}
+
 impl Default for NvvmArch {
     fn default() -> Self {
         Self::Compute52
@@ -1116,8 +1122,8 @@ mod tests {
         err("blah", "unknown option: blah");
         err("-aardvark", "unknown option: -aardvark");
         err("-arch=compute75", "unknown -arch value: compute75");
-        err("-arch=compute_10", "unknown -arch=compute_NN value: 10");
-        err("-arch=compute_100x", "unknown -arch=compute_NN value: 100x");
+        err("-arch=compute_10", "unknown -arch value: compute_10");
+        err("-arch=compute_100x", "unknown -arch value: compute_100x");
         err("-opt=3", "-opt=3 is the default");
         err("-opt=99", "unknown -opt value: 99");
     }

From 41da9b0431a8191ff467c742900beb810ae6f5b4 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@vectorware.com>
Date: Thu, 20 Nov 2025 14:44:23 +1100
Subject: [PATCH 3/5] Use `derive(Default)` for `NvvmArch`.

It's possible with the `#[default]` attribute.
---
 crates/nvvm/src/lib.rs | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs
index 43b5ecfa..179721cb 100644
--- a/crates/nvvm/src/lib.rs
+++ b/crates/nvvm/src/lib.rs
@@ -254,11 +254,12 @@ impl FromStr for NvvmOption {
 }
 
 /// Nvvm architecture, default is `Compute52`
-#[derive(Debug, Clone, Copy, PartialEq, Eq, strum::EnumIter)]
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, strum::EnumIter)]
 pub enum NvvmArch {
     Compute35,
     Compute37,
     Compute50,
+    #[default]
     Compute52,
     Compute53,
     Compute60,
@@ -346,12 +347,6 @@ impl FromStr for NvvmArch {
     }
 }
 
-impl Default for NvvmArch {
-    fn default() -> Self {
-        Self::Compute52
-    }
-}
-
 impl NvvmArch {
     /// Get the numeric capability value (e.g., 35 for Compute35)
     pub fn capability_value(&self) -> u32 {

From f27df130dc31cfb273345cd92233eb1802b4754f Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@vectorware.com>
Date: Thu, 20 Nov 2025 14:44:49 +1100
Subject: [PATCH 4/5] Increase the minimum CUDA version to 12.0 and the default
 arch to `compute_75`.

CUDA 12.0 was released in December 2022, and CUDA 13.0 was released in
August 2025. It feels like a good time to drop CUDA 11.x support. This
means later Kepler devices with compute capabilities of 3.5 and 3.7 will
no longer be supported. The minimum version of NVVM IR increases from
1.6 to 2.0, because that's what CUDA 12.0 uses.

Along with this, change the default compiler target to `compute_75`.
This is a good choice because it's the minimum supported by CUDA 13.0,
and gets Rust CUDA a step closer to working with CUDA 13.0.

The existing defaults were all over the place.
- `NvvmArch::default()` was `compute_52`.
- `CudaBuilder`'s default was `compute_61`.
- compiletest's default was `compute_70`.

This commit makes the latter two determined by `NvvmArch::default()`,
which is changed to `compute_75`.

Currently CI runs compiletests on `compute_61`, `compute_70`, and
`compute_90`; this commit changes the `compute_70` to `compute_75`. It
seems sensible to have the default value as one of the things tested by
CI.

This comment also adds a comment on NvvmArch with a table of
CUDA/`compute_*` values, which I found very useful.

Resources:
- https://en.wikipedia.org/wiki/CUDA#GPUs_supported for compute
  capabilities supported by different CUDA versions.
- https://docs.nvidia.com/cuda/archive/12.0.0/cuda-toolkit-release-notes/index.html
  for NVVM IR version information.
---
 .github/workflows/ci_linux.yml        |  2 +-
 .github/workflows/ci_windows.yml      |  4 +--
 Cargo.lock                            |  1 +
 crates/cuda_builder/src/lib.rs        |  7 ++--
 crates/nvvm/src/lib.rs                | 50 +++++++++++++++++++++++++--
 crates/rustc_codegen_nvvm/src/nvvm.rs | 10 +++---
 guide/src/guide/getting_started.md    |  4 ++-
 tests/compiletests/Cargo.toml         |  1 +
 tests/compiletests/README.md          |  2 +-
 tests/compiletests/src/main.rs        | 26 +++++++-------
 10 files changed, 78 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/ci_linux.yml b/.github/workflows/ci_linux.yml
index e60d2f3f..e7a85ad1 100644
--- a/.github/workflows/ci_linux.yml
+++ b/.github/workflows/ci_linux.yml
@@ -289,4 +289,4 @@ jobs:
         shell: bash
         run: shopt -s globstar && rustfmt --check tests/compiletests/ui/**/*.rs
       - name: Compiletest
-        run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_70,compute_90
+        run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_75,compute_90
diff --git a/.github/workflows/ci_windows.yml b/.github/workflows/ci_windows.yml
index a5910cf4..cb7e7167 100644
--- a/.github/workflows/ci_windows.yml
+++ b/.github/workflows/ci_windows.yml
@@ -105,6 +105,6 @@ jobs:
           RUSTDOCFLAGS: -Dwarnings
         run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex0*" --exclude "cudnn*" --exclude "sha2*" --exclude "cust_raw"
 
-      # Disabled due to dll issues, someone with  Windows knowledge needed
+      # Disabled due to dll issues, someone with Windows knowledge needed
       # - name: Compiletest
-      #  run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_70,compute_90
+      #  run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_75,compute_90
diff --git a/Cargo.lock b/Cargo.lock
index 1af88d55..f21cd1d4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -599,6 +599,7 @@ dependencies = [
  "clap 4.5.45",
  "compiletest_rs",
  "cuda_builder",
+ "nvvm",
  "tracing",
  "tracing-subscriber",
 ]
diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs
index 2a974f96..f187ede2 100644
--- a/crates/cuda_builder/src/lib.rs
+++ b/crates/cuda_builder/src/lib.rs
@@ -91,10 +91,7 @@ pub struct CudaBuilder {
     /// will not work on older capabilities. It means that if it uses certain features
     /// it may not work.
     ///
-    /// This currently defaults to `6.1`. Which corresponds to Pascal, GPUs such as the
-    /// GTX 1030, GTX 1050, GTX 1080, Tesla P40, etc. We default to this because Maxwell
-    /// (5.x) will be deprecated in CUDA 12 and we anticipate for that. Moreover, `6.x`
-    /// contains support for things like f64 atomic add and half precision float ops.
+    /// This defaults to the default value of `NvvmArch`.
     ///
     /// Starting with CUDA 12.9, architectures can have suffixes:
     ///
@@ -207,7 +204,7 @@ impl CudaBuilder {
             ptx_file_copy_path: None,
             generate_line_info: true,
             nvvm_opts: true,
-            arch: NvvmArch::Compute61,
+            arch: NvvmArch::default(),
             ftz: false,
             fast_sqrt: false,
             fast_div: false,
diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs
index 179721cb..d3f34c4e 100644
--- a/crates/nvvm/src/lib.rs
+++ b/crates/nvvm/src/lib.rs
@@ -253,13 +253,53 @@ impl FromStr for NvvmOption {
     }
 }
 
-/// Nvvm architecture, default is `Compute52`
+/// Nvvm architecture.
+///
+/// The following table indicates which `compute_*` values are supported by which CUDA versions.
+///
+/// ```text
+/// -----------------------------------------------------------------------------
+///             | Supported `compute_*` values (written vertically)
+/// -----------------------------------------------------------------------------
+/// CUDA        |                                 1 1 1 1 1 1
+/// Toolkit     | 5 5 5 6 6 6 7 7 7 7 8 8 8 8 8 9 0 0 0 1 2 2
+/// version     | 0 2 3 0 1 2 0 2 3 5 0 6 7 8 9 0 0 1 3 0 0 1
+/// -----------------------------------------------------------------------------
+/// 12.[01].0   | b b b b b b b b b b b b - - b b - - - - - -
+/// 12.2.0      | b b b b b b b b b b b b - - b a - - - - - -
+/// 12.[3456].0 | b b b b b b b b b b b b b - b a - - - - - -
+/// 12.8.0      | b b b b b b b b b b b b b - b a a a - - a -
+/// 12.9.0      | b b b b b b b b - b b b b - b a f f f - f f
+/// 13.0.0      | - - - - - - - - - b b b b b b a f - f f f f
+/// -----------------------------------------------------------------------------  
+/// Legend:
+/// - 'b': baseline features only
+/// - 'a': baseline + architecture-specific features
+/// - 'f': baseline + architecture-specific + family-specific features
+///
+/// Note: there was no 12.7 release.
+/// ```
+///
+/// For example, CUDA 12.9.0 supports `compute_89`, `compute_90{,a}`, `compute_100{,a,f}`.
+///
+/// This information is from "PTX Compiler APIs" documents under
+/// <https://developer.nvidia.com/cuda-toolkit-archive>, e.g.
+/// <https://docs.nvidia.com/cuda/archive/13.0.0/ptx-compiler-api/index.html>. (Adjust the version
+/// in that URL as necessary.) Specifically, the `compute-*` values allowed with the `--gpu-name`
+/// option.
+///
+/// # Example
+///
+/// ```
+/// // The default value is `NvvmArch::Compute75`.
+/// # use nvvm::NvvmArch;
+/// assert_eq!(NvvmArch::default(), NvvmArch::Compute75);
+/// ```
 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, strum::EnumIter)]
 pub enum NvvmArch {
     Compute35,
     Compute37,
     Compute50,
-    #[default]
     Compute52,
     Compute53,
     Compute60,
@@ -267,6 +307,12 @@ pub enum NvvmArch {
     Compute62,
     Compute70,
     Compute72,
+    /// This default value of 7.5 corresponds to Turing and later devices. We default to this
+    /// because it is the minimum supported by CUDA 13.0 while being in the middle of the range
+    /// supported by CUDA 12.x.
+    // WARNING: If you change the default, consider updating the `--target-arch` values used for
+    // compiletests in `ci_linux.yml` and `.github/workflows/ci_{linux,windows}.yml`.
+    #[default]
     Compute75,
     Compute80,
     Compute86,
diff --git a/crates/rustc_codegen_nvvm/src/nvvm.rs b/crates/rustc_codegen_nvvm/src/nvvm.rs
index 2c1ae5b2..e165225f 100644
--- a/crates/rustc_codegen_nvvm/src/nvvm.rs
+++ b/crates/rustc_codegen_nvvm/src/nvvm.rs
@@ -44,7 +44,7 @@ impl Display for CodegenErr {
 }
 
 /// Take a list of bitcode module bytes and their names and codegen it
-/// into ptx bytes. The final PTX *should* be utf8, but just to be on the safe side
+/// into PTX bytes. The final PTX *should* be utf8, but just to be on the safe side
 /// it returns a vector of bytes.
 ///
 /// Note that this will implicitly try to find libdevice and add it, so don't do that
@@ -57,15 +57,15 @@ pub fn codegen_bitcode_modules(
 ) -> Result<Vec<u8>, CodegenErr> {
     debug!("Codegenning bitcode to PTX");
 
-    // make sure the nvvm version is high enough so users don't get confusing compilation errors.
+    // Make sure the nvvm version is high enough so users don't get confusing compilation errors.
     let (major, minor) = nvvm::ir_version();
 
-    if major <= 1 && minor < 6 {
+    if major <= 2 && minor < 0 {
         sess.dcx()
-            .fatal("rustc_codegen_nvvm requires at least libnvvm 1.6 (CUDA 11.2)");
+            .fatal("rustc_codegen_nvvm requires at least libnvvm 2.0 (CUDA 12.0)");
     }
 
-    // first, create the nvvm program we will add modules to.
+    // First, create the nvvm program we will add modules to.
     let prog = NvvmProgram::new()?;
 
     let module = merge_llvm_modules(modules, llcx);
diff --git a/guide/src/guide/getting_started.md b/guide/src/guide/getting_started.md
index be30c946..e7a8f728 100644
--- a/guide/src/guide/getting_started.md
+++ b/guide/src/guide/getting_started.md
@@ -6,7 +6,9 @@ This section covers how to get started writing GPU crates with `cuda_std` and `c
 
 Before you can use the project to write GPU crates, you will need a couple of prerequisites:
 
-- [The CUDA SDK](https://developer.nvidia.com/cuda-downloads), version 11.2 or later (and the appropriate driver - [see CUDA release notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)).
+- [The CUDA SDK](https://developer.nvidia.com/cuda-downloads), version 12.0 or later (and the
+  appropriate driver - [see CUDA release
+  notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)).
 
   This is only for building GPU crates, to execute built PTX you only need CUDA `9+`.
 
diff --git a/tests/compiletests/Cargo.toml b/tests/compiletests/Cargo.toml
index 48102ec7..989726d6 100644
--- a/tests/compiletests/Cargo.toml
+++ b/tests/compiletests/Cargo.toml
@@ -10,6 +10,7 @@ path = "src/main.rs"
 [dependencies]
 compiletest_rs = "0.11"
 clap = { version = "4.5", features = ["derive"] }
+nvvm = { path = "../../crates/nvvm" }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 cuda_builder = { workspace = true }
diff --git a/tests/compiletests/README.md b/tests/compiletests/README.md
index 55f4bcd1..2f638816 100644
--- a/tests/compiletests/README.md
+++ b/tests/compiletests/README.md
@@ -22,7 +22,7 @@ cargo run --release
 ### Options
 
 - `--bless` - Update expected output files
-- `--target-arch=compute_61,compute_70,compute_90` - Test multiple CUDA compute capabilities (comma-separated)
+- `--target-arch=compute_61,compute_75,compute_90` - Test multiple CUDA compute capabilities (comma-separated)
 - Filter by test name: `cargo compiletest simple`
 - `RUST_LOG=info` - Enable progress logging
 - `RUST_LOG=debug` - Enable detailed debug logging
diff --git a/tests/compiletests/src/main.rs b/tests/compiletests/src/main.rs
index 927a8c06..e797f11b 100644
--- a/tests/compiletests/src/main.rs
+++ b/tests/compiletests/src/main.rs
@@ -1,4 +1,5 @@
 use clap::Parser;
+use nvvm::NvvmArch;
 use std::env;
 use std::io;
 use std::path::{Path, PathBuf};
@@ -13,8 +14,9 @@ struct Opt {
 
     /// The CUDA compute capability to target (e.g., compute_70, compute_80, compute_90).
     /// Can specify multiple architectures comma-separated.
-    #[arg(long, default_value = "compute_70", value_delimiter = ',')]
-    target_arch: Vec<String>,
+    // WARNING: This should be kept in sync with the default on `CudaBuilder::arch`.
+    #[arg(long, default_values_t = [NvvmArch::default()], value_delimiter = ',')]
+    target_arch: Vec<NvvmArch>,
 
     /// Only run tests that match these filters.
     #[arg(name = "FILTER")]
@@ -22,8 +24,8 @@ struct Opt {
 }
 
 impl Opt {
-    pub fn architectures(&self) -> impl Iterator<Item = &str> {
-        self.target_arch.iter().map(|s| s.as_str())
+    pub fn architectures(&self) -> impl Iterator<Item = NvvmArch> + use<'_> {
+        self.target_arch.iter().copied()
     }
 }
 
@@ -136,18 +138,18 @@ impl Runner {
             extra_flags: "",
         }];
 
-        for (arch, variation) in self
-            .opt
-            .architectures()
-            .flat_map(|arch| VARIATIONS.iter().map(move |variation| (arch, variation)))
-        {
+        for (arch, variation) in self.opt.architectures().flat_map(|arch| {
+            VARIATIONS
+                .iter()
+                .map(move |variation| (arch.target_feature(), variation))
+        }) {
             // HACK(eddyb) in order to allow *some* tests to have separate output
             // in different testing variations (i.e. experimental features), while
             // keeping *most* of the tests unchanged, we make use of "stage IDs",
             // which offer `// only-S` and `// ignore-S` for any stage ID `S`.
             let stage_id = if variation.name == "default" {
                 // Use the architecture name as the stage ID.
-                arch.to_string()
+                arch.clone()
             } else {
                 // Include the variation name in the stage ID.
                 format!("{}-{}", arch, variation.name)
@@ -159,7 +161,7 @@ impl Runner {
                 &self.deps_target_dir,
                 &self.codegen_backend_path,
                 CUDA_TARGET,
-                arch,
+                &arch,
             );
             let mut flags = test_rustc_flags(
                 &self.codegen_backend_path,
@@ -172,7 +174,7 @@ impl Runner {
                         .deps_target_dir
                         .join(DepKind::ProcMacro.target_dir_suffix(CUDA_TARGET)),
                 ],
-                arch,
+                &arch,
             );
             flags += variation.extra_flags;
 

From 5204d427ff6e3af0024c8f783ff54bfab465c5eb Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@vectorware.com>
Date: Fri, 21 Nov 2025 20:55:21 +1100
Subject: [PATCH 5/5] Remove CUDA 11 Dockerfile and related CI tasks.

---
 .github/workflows/ci_linux.yml         |  6 --
 .github/workflows/container_images.yml |  5 --
 container/ubuntu22-cuda11/Dockerfile   | 89 --------------------------
 3 files changed, 100 deletions(-)
 delete mode 100644 container/ubuntu22-cuda11/Dockerfile

diff --git a/.github/workflows/ci_linux.yml b/.github/workflows/ci_linux.yml
index e7a85ad1..92b1f70b 100644
--- a/.github/workflows/ci_linux.yml
+++ b/.github/workflows/ci_linux.yml
@@ -28,9 +28,6 @@ jobs:
       fail-fast: false
       matrix:
         variance:
-          # - name: Ubuntu-22.04/CUDA-11.8.0
-          #   image: "ghcr.io/rust-gpu/rust-cuda-ubuntu22-cuda11:latest"
-          #   runner: ubuntu-latest
           - name: Ubuntu-22.04 / CUDA-12.8.1 / x86_64
             image: "ghcr.io/rust-gpu/rust-cuda-ubuntu22-cuda12:latest"
             runner: ubuntu-latest
@@ -235,9 +232,6 @@ jobs:
       matrix:
         variance:
           # Must match the build job's matrix definition
-          # - name: Ubuntu-22.04 / CUDA-11.8.0
-          #   image: "ghcr.io/rust-gpu/rust-cuda-ubuntu22-cuda11:latest"
-          #   runner: ubuntu-latest
           - name: Ubuntu-22.04 / CUDA-12.8.1 / x86_64
             image: "ghcr.io/rust-gpu/rust-cuda-ubuntu22-cuda12:latest"
             runner: ubuntu-latest
diff --git a/.github/workflows/container_images.yml b/.github/workflows/container_images.yml
index eb66d98a..e3fcd8af 100644
--- a/.github/workflows/container_images.yml
+++ b/.github/workflows/container_images.yml
@@ -30,9 +30,6 @@ jobs:
           - runner: ubuntu-24.04-arm
             arch: arm64
         variance:
-          - name: Ubuntu-22.04/CUDA-11.8.0
-            image: "rust-gpu/rust-cuda-ubuntu22-cuda11"
-            dockerfile: ./container/ubuntu22-cuda11/Dockerfile
           - name: Ubuntu-22.04/CUDA-12.8.1
             image: "rust-gpu/rust-cuda-ubuntu22-cuda12"
             dockerfile: ./container/ubuntu22-cuda12/Dockerfile
@@ -157,8 +154,6 @@ jobs:
       fail-fast: false
       matrix:
         variance:
-          - name: Ubuntu-22.04/CUDA-11.8.0
-            image: "rust-gpu/rust-cuda-ubuntu22-cuda11"
           - name: Ubuntu-22.04/CUDA-12.8.1
             image: "rust-gpu/rust-cuda-ubuntu22-cuda12"
           - name: Ubuntu-24.04/CUDA-12.8.1
diff --git a/container/ubuntu22-cuda11/Dockerfile b/container/ubuntu22-cuda11/Dockerfile
deleted file mode 100644
index 6f4996ec..00000000
--- a/container/ubuntu22-cuda11/Dockerfile
+++ /dev/null
@@ -1,89 +0,0 @@
-FROM nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS llvm-builder
-
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
-    build-essential \
-    clang \
-    curl \
-    libffi-dev \
-    libedit-dev \
-    libncurses5-dev \
-    libssl-dev \
-    libtinfo-dev \
-    libxml2-dev \
-    cmake \
-    ninja-build \
-    pkg-config \
-    python3 \
-    xz-utils \
-    zlib1g-dev && \
-    rm -rf /var/lib/apt/lists/*
-
-WORKDIR /data/llvm7
-
-# Download and build LLVM 7.1.0 for all architectures.
-RUN curl -sSf -L -O https://github.com/llvm/llvm-project/releases/download/llvmorg-7.1.0/llvm-7.1.0.src.tar.xz && \
-    tar -xf llvm-7.1.0.src.tar.xz && \
-    cd llvm-7.1.0.src && \
-    mkdir build && cd build && \
-    ARCH=$(dpkg --print-architecture) && \
-    if [ "$ARCH" = "amd64" ]; then \
-        TARGETS="X86;NVPTX"; \
-    else \
-        TARGETS="AArch64;NVPTX"; \
-    fi && \
-    cmake -G Ninja \
-        -DCMAKE_BUILD_TYPE=Release \
-        -DLLVM_TARGETS_TO_BUILD="$TARGETS" \
-        -DLLVM_BUILD_LLVM_DYLIB=ON \
-        -DLLVM_LINK_LLVM_DYLIB=ON \
-        -DLLVM_ENABLE_ASSERTIONS=OFF \
-        -DLLVM_ENABLE_BINDINGS=OFF \
-        -DLLVM_INCLUDE_EXAMPLES=OFF \
-        -DLLVM_INCLUDE_TESTS=OFF \
-        -DLLVM_INCLUDE_BENCHMARKS=OFF \
-        -DLLVM_ENABLE_ZLIB=ON \
-        -DLLVM_ENABLE_TERMINFO=ON \
-        -DCMAKE_INSTALL_PREFIX=/opt/llvm-7 \
-        .. && \
-    ninja -j$(nproc) && \
-    ninja install && \
-    cd ../.. && \
-    rm -rf llvm-7.1.0.src*
-
-FROM nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
-
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
-    build-essential \
-    clang \
-    curl \
-    libssl-dev \
-    libtinfo-dev \
-    pkg-config \
-    xz-utils \
-    zlib1g-dev \
-    cmake \
-    libfontconfig-dev \
-    libx11-xcb-dev \
-    libxcursor-dev \
-    libxi-dev \
-    libxinerama-dev \
-    libxrandr-dev && \
-    rm -rf /var/lib/apt/lists/*
-
-COPY --from=llvm-builder /opt/llvm-7 /opt/llvm-7
-RUN ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config && \
-    ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config-7
-
-# Get Rust (install rustup; toolchain installed from rust-toolchain.toml below)
-RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y --profile minimal --default-toolchain none
-ENV PATH="/root/.cargo/bin:${PATH}"
-
-# Setup the workspace
-WORKDIR /data/rust-cuda
-RUN --mount=type=bind,source=rust-toolchain.toml,target=/data/rust-cuda/rust-toolchain.toml \
-    rustup show
-
-# Add nvvm to LD_LIBRARY_PATH.
-ENV LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}"
-ENV LLVM_LINK_STATIC=1
-ENV RUST_LOG=info