Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opencv: misc CUDA-related updates and fixes; add enableLto #221370

Merged
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
68 changes: 58 additions & 10 deletions pkgs/development/libraries/opencv/4.x.nix
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,14 @@
, enableContrib ? true

, enableCuda ? (config.cudaSupport or false) && stdenv.hostPlatform.isx86_64
, cudaPackages ? { }
, enableCublas ? enableCuda
, enableCudnn ? false # NOTE: CUDNN has a large impact on closure size so we disable it by default
, enableCufft ? enableCuda
, cudaPackages ? {}
, symlinkJoin
, nvidia-optical-flow-sdk

, enableLto ? true
, enableUnfree ? false
, enableIpp ? false
, enablePython ? false
Expand Down Expand Up @@ -79,9 +84,6 @@
}:

let
inherit (cudaPackages) cudatoolkit;
inherit (cudaPackages.cudaFlags) cudaCapabilities;

version = "4.7.0";

src = fetchFromGitHub {
Expand Down Expand Up @@ -227,6 +229,33 @@ let
#multithreaded openblas conflicts with opencv multithreading, which manifest itself in hung tests
#https://github.com/xianyi/OpenBLAS/wiki/Faq/4bded95e8dc8aadc70ce65267d1093ca7bdefc4c#multi-threaded
openblas_ = blas.provider.override { singleThreaded = true; };

inherit (cudaPackages) cudaFlags cudatoolkit cudaVersion;
inherit (cudaFlags) cudaCapabilities;

cuda-common-redist = with cudaPackages; [
cuda_cccl # <thrust/*>
libnpp # npp.h
] ++ lib.optionals enableCublas [
libcublas # cublas_v2.h
] ++ lib.optionals enableCudnn [
cudnn # cudnn.h
] ++ lib.optionals enableCufft [
libcufft # cufft.h
];

cuda-native-redist = symlinkJoin {
name = "cuda-native-redist-${cudaVersion}";
paths = with cudaPackages; [
cuda_cudart # cuda_runtime.h
ConnorBaker marked this conversation as resolved.
Show resolved Hide resolved
cuda_nvcc
] ++ cuda-common-redist;
};

cuda-redist = symlinkJoin {
name = "cuda-redist-${cudaVersion}";
paths = cuda-common-redist;
};
in

stdenv.mkDerivation {
Expand Down Expand Up @@ -298,17 +327,18 @@ stdenv.mkDerivation {
++ lib.optionals enableTesseract [ tesseract leptonica ]
++ lib.optional enableTbb tbb
++ lib.optionals stdenv.isDarwin [ bzip2 AVFoundation Cocoa VideoDecodeAcceleration CoreMedia MediaToolbox ]
++ lib.optionals enableDocs [ doxygen graphviz-nox ];
++ lib.optionals enableDocs [ doxygen graphviz-nox ]
++ lib.optionals enableCuda [ cuda-redist ];

propagatedBuildInputs = lib.optional enablePython pythonPackages.numpy
++ lib.optionals enableCuda [ cudatoolkit nvidia-optical-flow-sdk ];
++ lib.optionals enableCuda [ nvidia-optical-flow-sdk ];

nativeBuildInputs = [ cmake pkg-config unzip ]
++ lib.optionals enablePython [
pythonPackages.pip
pythonPackages.wheel
pythonPackages.setuptools
];
] ++ lib.optionals enableCuda [ cuda-native-redist ];

env.NIX_CFLAGS_COMPILE = lib.optionalString enableEXR "-I${ilmbase.dev}/include/OpenEXR";

Expand Down Expand Up @@ -338,12 +368,30 @@ stdenv.mkDerivation {
(opencvFlag "OPENEXR" enableEXR)
(opencvFlag "OPENJPEG" enableJPEG2000)
"-DWITH_JASPER=OFF" # OpenCV falls back to a vendored copy of Jasper when OpenJPEG is disabled
(opencvFlag "CUDA" enableCuda)
(opencvFlag "CUBLAS" enableCuda)
(opencvFlag "TBB" enableTbb)

# CUDA options
(opencvFlag "CUDA" enableCuda)
(opencvFlag "CUDA_FAST_MATH" enableCuda)
(opencvFlag "CUBLAS" enableCublas)
(opencvFlag "CUDNN" enableCudnn)
(opencvFlag "CUFFT" enableCufft)

# LTO options
(opencvFlag "ENABLE_LTO" enableLto)
(opencvFlag "ENABLE_THIN_LTO" (
enableLto && (
# Only clang supports thin LTO, so we must either be using clang through the stdenv,
stdenv.cc.isClang ||
# or through cudatoolkit.
(enableCuda && cudatoolkit.cc.isClang)
)
))
] ++ lib.optionals enableCuda [
"-DCUDA_FAST_MATH=ON"
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
# We need to set the C and C++ host compilers for CUDA to the same compiler.
"-DCMAKE_C_COMPILER=${cudatoolkit.cc}/bin/cc"
ConnorBaker marked this conversation as resolved.
Show resolved Hide resolved
"-DCMAKE_CXX_COMPILER=${cudatoolkit.cc}/bin/c++"
"-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr"

# OpenCV respects at least three variables:
Expand Down