Enable cuda_compat by default on Jetson devices

Some nvidia devices, such as the Jetson family, support the Nvidia compatibility package (nvidia_compat) which allows to run executables built against a higher CUDA major version on a system with an older CUDA driver. On such platforms, the consensus among CUDA maintainers is that there is no downside in always enabling it by default. This commit links to the relevant cuda_compat shared libraries by patching the CUDA core packages' runpaths when cuda_compat is available, in the same way as we do for OpenGL drivers currently.
NixOS · Dec 12, 2023 · b685883 · b685883
1 parent a8dac2f
commit b685883
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 0 deletions.
diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix
@@ -1,6 +1,7 @@
 {
   # General callPackage-supplied arguments
   autoAddOpenGLRunpathHook,
+  autoAddCudaCompatRunpathHook,
   autoPatchelfHook,
   backendStdenv,
   fetchurl,
@@ -126,6 +127,14 @@ backendStdenv.mkDerivation (
       # Check e.g. with `patchelf --print-rpath path/to/my/binary
       autoAddOpenGLRunpathHook
       markForCudatoolkitRootHook
+    ]
+    # autoAddCudaCompatRunpathHook depends on cuda_compat and would cause
+    # infinite recursion if applied to `cuda_compat` itself (beside the fact
+    # that it doesn't make sense in the first place)
+    ++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [
+      # autoAddCudaCompatRunpathHook must appear AFTER autoAddOpenGLRunpathHook.
+      # See its documentation in ./setup-hooks/extension.nix.
+      autoAddCudaCompatRunpathHook
     ];
 
     buildInputs =

diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh
@@ -0,0 +1,27 @@
+# shellcheck shell=bash
+# Patch all dynamically linked, ELF files with the CUDA driver (libcuda.so)
+# coming from the cuda_compat package by adding it to the RUNPATH.
+echo "Sourcing auto-add-cuda-compat-runpath-hook"
+
+elfHasDynamicSection() {
+    patchelf --print-rpath "$1" >& /dev/null
+}
+
+autoAddCudaCompatRunpathPhase() (
+  local outputPaths
+  mapfile -t outputPaths < <(for o in $(getAllOutputNames); do echo "${!o}"; done)
+  find "${outputPaths[@]}" -type f -executable -print0  | while IFS= read -rd "" f; do
+    if isELF "$f"; then
+      # patchelf returns an error on statically linked ELF files
+      if elfHasDynamicSection "$f" ; then
+        echo "autoAddCudaCompatRunpathHook: patching $f"
+        local origRpath="$(patchelf --print-rpath "$f")"
+        patchelf --set-rpath "@libcudaPath@:$origRpath" "$f"
+      elif (( "${NIX_DEBUG:-0}" >= 1 )) ; then
+        echo "autoAddCudaCompatRunpathHook: skipping a statically-linked ELF file $f"
+      fi
+    fi
+  done
+)
+
+postFixupHooks+=(autoAddCudaCompatRunpathPhase)
diff --git a/pkgs/development/cuda-modules/setup-hooks/extension.nix b/pkgs/development/cuda-modules/setup-hooks/extension.nix
@@ -44,4 +44,24 @@ final: _: {
           ./auto-add-opengl-runpath-hook.sh
       )
       {};
+
+  # autoAddCudaCompatRunpathHook hook must be added AFTER `setupCudaHook`. Both
+  # hooks prepend a path with `libcuda.so` to the `DT_RUNPATH` section of
+  # patched elf files, but `cuda_compat` path must take precedence (otherwise,
+  # it doesn't have any effect) and thus appear first. Meaning this hook must be
+  # executed last.
+  autoAddCudaCompatRunpathHook =
+    final.callPackage
+      (
+        {makeSetupHook, cuda_compat}:
+        makeSetupHook
+          {
+            name = "auto-add-cuda-compat-runpath-hook";
+            substitutions = {
+              libcudaPath = "${cuda_compat}/compat";
+            };
+          }
+          ./auto-add-cuda-compat-runpath.sh
+      )
+      {};
 }