undo changes to python torch module

NixOS · Jul 10, 2023 · 09ea472 · 09ea472
1 parent 75abed8
commit 09ea472
Showing 1 changed file with 8 additions and 141 deletions.
diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix
@@ -1,5 +1,6 @@
-{ pkgs, stdenv, lib, fetchFromGitHub, buildPythonPackage, python,
+{ stdenv, lib, fetchFromGitHub, buildPythonPackage, python,
   cudaSupport ? false, cudaPackages, magma,
+  useSystemNccl ? true,
   MPISupport ? false, mpi,
   buildDocs ? false,
 
@@ -130,74 +131,6 @@ let
       rocm-runtime rocm-opencl-runtime hipify
     ];
   };
-
-  # Not all of these are used by PyTorch, but they are all used by
-  # one of the transitive dependencies.
-  # TODO(@connorbaker): Static linking?
-  clog = pkgs.clog.override { inherit cpuinfo; };
-
-  # TODO(@connorbaker): Seems cpuinfo isn't re-using clog?
-  # TODO(@connorbaker): Static builds don't seem to export clog symbols?
-  cpuinfo =(pkgs.cpuinfo.overrideAttrs (oldAttrs: {
-    # src = oldAttrs.src.override {
-    #   rev = "8ec7bd91ad0470e61cf38f618cc1f270dede599c";
-    #   hash = "sha256-d9/Enm5lh27dSPOnMHblATuxNRY/ssEiE129TwbDTf0=";
-    # };
-  })).override { buildTools = false; buildSharedLibs = true; };
-
-  fxdiv = pkgs.fxdiv.overrideAttrs (oldAttrs: {
-    # src = oldAttrs.src.override {
-    #   rev = "b408327ac2a15ec3e43352421954f5b1967701d1";
-    #   hash = "sha256-BEjscsejYVhRxDAmah5DT3+bglp8G5wUTTYL7+HjWds=";
-    # };
-  });
-
-  pthreadpool = (pkgs.pthreadpool.overrideAttrs (oldAttrs: {
-    # src = oldAttrs.src.override {
-    #   rev = "a134dd5d4cee80cce15db81a72e7f929d71dd413";
-    #   hash = "sha256-vzNrcVDkcJeqpHNNO0IAg+vzn1smJfTyGwzp18kUu/I=";
-    # };
-  })).override { inherit fxdiv; buildSharedLibs = true; };
-
-  psimd = pkgs.psimd.overrideAttrs (oldAttrs: {
-    # src = oldAttrs.src.override {
-    #   rev = "072586a71b55b7f8c584153d223e95687148a900";
-    #   hash = "sha256-lV+VZi2b4SQlRYrhKx9Dxc6HlDEFz3newvcBjTekupo=";
-    # };
-  });
-
-  # TODO(@connorbaker): CMake patch fails to apply cleanly.
-  fp16 = (pkgs.fp16.overrideAttrs (oldAttrs: {
-    # src = oldAttrs.src.override {
-    #   rev = "4dfe081cf6bcd15db339cf2680b9281b8451eeb3";
-    #   hash = "sha256-B27LtVnL52niaFgPW0pp5Uulub/Q3NvtSDkJNahrSBk=";
-    # };
-  })).override { inherit psimd; };
-
-  xnnpack = (pkgs.xnnpack.overrideAttrs (oldAttrs: {
-    # API changes in the latest version break the build; invalid conversions/casts.
-    src = oldAttrs.src.override {
-      rev = "51a987591a6fc9f0fc0707077f53d763ac132cbf";
-      hash = "sha256-NZeSKz6xpvH84V4sxArXlyUoEfoekdACzaAh3AXy6+c=";
-    };
-  })).override { inherit cpuinfo fp16 fxdiv pthreadpool; buildSharedLibs = false; };
-
-  # TODO(@connorbaker): Patches are incompatible with current version
-  sleef = (pkgs.sleef.overrideAttrs (oldAttrs: {
-    patches = [];
-    src = oldAttrs.src.override {
-      rev = "e0a003ee838b75d11763aa9c3ef17bf71a725bff";
-      hash = "sha256-0atbkbLqyMVdZDZiSvGNp7vgZ6/dAQz9BL4Wu2kURlY=";
-    };
-  })).override { buildSharedLibs = false; };
-
-  # TODO(@connorbaker): Seems like though `gloo_cuda` is available and in the exported config, PyTorch isn't picking it up. Is this a quirk of using SYSTEM_LIBS? Are we excluded from a path which does the include?
-  gloo = (pkgs.gloo.overrideAttrs (oldAttrs: {
-    # src = oldAttrs.src.override {
-    #   rev = "10909297fedab0a680799211a299203e53515032";
-    #   hash = "sha256-jDn6AkvkkmlrdFKHxG+ObHnGZNp8x2+CatZJlMmvOuI=";
-    # };
-  })).override { buildSharedLibs = false; };
 in buildPythonPackage rec {
   pname = "torch";
   # Don't forget to update torch-bin to the same version.
@@ -270,28 +203,6 @@ in buildPythonPackage rec {
     export PYTORCH_ROCM_ARCH="${gpuTargetString}"
     export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
     python tools/amd_build/build_amd.py
-  ''
-  # TODO(@connorbaker): For some reason, does not allow using system provided header.
-  # TODO(@connorbaker): Find a way to package Onnx so it can still be found by CMake.
-  # TODO(@connorbaker): PyTorch can also use Mimalloc -- is there a performance gain there?
-  + ''
-    substituteInPlace cmake/Dependencies.cmake \
-      --replace \
-        'set(POCKETFFT_INCLUDE_DIR "''${Torch_SOURCE_DIR}/third_party/pocketfft/")' \
-        'set(POCKETFFT_INCLUDE_DIR "${pkgs.pocketfft}/include/")'
-    substituteInPlace CMakeLists.txt \
-      --replace \
-        'set(USE_SYSTEM_ONNX ON)' \
-        'set(USE_SYSTEM_ONNX OFF)'
-  ''
-  # TODO(@connorbaker): Generate a list of all the targets we could build.
-  + ''
-    mkdir build
-    pushd build
-    cmake -G Ninja ..
-    cmake --build . --target help
-    popd
-    rm -rf build
   '';
 
   # Use pytorch's custom configurations
@@ -309,42 +220,9 @@ in buildPythonPackage rec {
   USE_MKLDNN = setBool mklDnnSupport;
   USE_MKLDNN_CBLAS = setBool mklDnnSupport;
 
-  USE_SYSTEM_LIBS = setBool true;
-  USE_NCCL = setBool cudaSupport;
-  # TODO(@connorbaker): Even though USE_SYSTEM_LIBS is set to true, we still need to set
-  # USE_SYSTEM_NCCL! Likely need to do something similar to what PyTorch does for TBB:
-  # https://github.com/pytorch/pytorch/blob/e9ebda29d87ce0916ab08c06ab26fd3766a870e5/CMakeLists.txt#L426-L428
-  USE_SYSTEM_NCCL = setBool cudaSupport;
-  BUILD_CUSTOM_PROTOBUF = setBool false;
-
-  # TODO(@connorbaker): Statically link CUDA?
-  # Seems like PyTorch stopped doing that due to size limitations of PyPi wheels?
-  # https://github.com/pytorch/pytorch/issues/96595#issuecomment-1468636755
-  # Also possibly due to the size of the resulting binary?
-  # https://github.com/pytorch/pytorch/pull/87502#issue-1418802284
-  # With respect to CUDNN, it's also possible it reduces performance?
-  # https://github.com/pytorch/pytorch/issues/87389#issuecomment-1286267629
-  # https://github.com/pytorch/pytorch/issues/50153
-  # Useful patches for static linking?
-  # https://github.com/pytorch/pytorch/pull/95153
-  # See the static linking master issue:
-  # https://github.com/pytorch/pytorch/issues/21737
-  USE_CUDA_STATIC_LINK = setBool cudaSupport;
-  CAFFE2_STATIC_LINK_CUDA = setBool cudaSupport;
-
-  # TODO(@connorbaker): Should we expose options to avoid building (Q|X)?NNPACK and MKLDNN when
-  # building with CUDA? Even better -- can we fully split up compilation so we're able to reuse
-  # these components?
-  # TODO(@connorbaker): Perhaps UCC/UCX would be interesting to add?
-  # https://github.com/pytorch/pytorch/pull/81583
-  # TODO(@connorbaker): Look at adding ZSTD?
-  # https://github.com/pytorch/pytorch/issues/44255
-  # TODO(@connorbaker): Fix https://github.com/pytorch/pytorch/issues/41999
-  # TODO(@connorbaker): Useful resources for how previous "use system" options were added:
-  # - https://github.com/pytorch/pytorch/pull/37137
-  # - https://github.com/pytorch/pytorch/pull/37277
-  # - https://github.com/pytorch/pytorch/pull/37501
-
+  # Avoid using pybind11 from git submodule
+  # Also avoids pytorch exporting the headers of pybind11
+  USE_SYSTEM_BIND11 = true;
 
   preBuild = ''
     export MAX_JOBS=$NIX_BUILD_CORES
@@ -373,6 +251,8 @@ in buildPythonPackage rec {
   PYTORCH_BUILD_VERSION = version;
   PYTORCH_BUILD_NUMBER = 0;
 
+  USE_SYSTEM_NCCL = setBool useSystemNccl;                  # don't build pytorch's third_party NCCL
+
   # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
   # (upstream seems to have fixed this in the wrong place?)
   # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
@@ -405,20 +285,7 @@ in buildPythonPackage rec {
   ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ]
     ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
 
-  buildInputs = [ 
-    # pkgs.onnx
-    blas 
-    blas.provider 
-    clog
-    cpuinfo
-    fp16
-    fxdiv
-    gloo
-    psimd
-    pthreadpool
-    sleef
-    xnnpack
-    ]
+  buildInputs = [ blas blas.provider pybind11 ]
     ++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now
     ++ lib.optionals cudaSupport [ cudnn nccl ]
     ++ lib.optionals rocmSupport [ openmp ]