Skip to content

Commit

Permalink
undo changes to python torch module
Browse files Browse the repository at this point in the history
  • Loading branch information
ConnorBaker committed Jul 10, 2023
1 parent 75abed8 commit 09ea472
Showing 1 changed file with 8 additions and 141 deletions.
149 changes: 8 additions & 141 deletions pkgs/development/python-modules/torch/default.nix
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{ pkgs, stdenv, lib, fetchFromGitHub, buildPythonPackage, python,
{ stdenv, lib, fetchFromGitHub, buildPythonPackage, python,
cudaSupport ? false, cudaPackages, magma,
useSystemNccl ? true,
MPISupport ? false, mpi,
buildDocs ? false,

Expand Down Expand Up @@ -130,74 +131,6 @@ let
rocm-runtime rocm-opencl-runtime hipify
];
};

# Not all of these are used by PyTorch, but they are all used by
# one of the transitive dependencies.
# TODO(@connorbaker): Static linking?
clog = pkgs.clog.override { inherit cpuinfo; };

# TODO(@connorbaker): Seems cpuinfo isn't re-using clog?
# TODO(@connorbaker): Static builds don't seem to export clog symbols?
cpuinfo =(pkgs.cpuinfo.overrideAttrs (oldAttrs: {
# src = oldAttrs.src.override {
# rev = "8ec7bd91ad0470e61cf38f618cc1f270dede599c";
# hash = "sha256-d9/Enm5lh27dSPOnMHblATuxNRY/ssEiE129TwbDTf0=";
# };
})).override { buildTools = false; buildSharedLibs = true; };

fxdiv = pkgs.fxdiv.overrideAttrs (oldAttrs: {
# src = oldAttrs.src.override {
# rev = "b408327ac2a15ec3e43352421954f5b1967701d1";
# hash = "sha256-BEjscsejYVhRxDAmah5DT3+bglp8G5wUTTYL7+HjWds=";
# };
});

pthreadpool = (pkgs.pthreadpool.overrideAttrs (oldAttrs: {
# src = oldAttrs.src.override {
# rev = "a134dd5d4cee80cce15db81a72e7f929d71dd413";
# hash = "sha256-vzNrcVDkcJeqpHNNO0IAg+vzn1smJfTyGwzp18kUu/I=";
# };
})).override { inherit fxdiv; buildSharedLibs = true; };

psimd = pkgs.psimd.overrideAttrs (oldAttrs: {
# src = oldAttrs.src.override {
# rev = "072586a71b55b7f8c584153d223e95687148a900";
# hash = "sha256-lV+VZi2b4SQlRYrhKx9Dxc6HlDEFz3newvcBjTekupo=";
# };
});

# TODO(@connorbaker): CMake patch fails to apply cleanly.
fp16 = (pkgs.fp16.overrideAttrs (oldAttrs: {
# src = oldAttrs.src.override {
# rev = "4dfe081cf6bcd15db339cf2680b9281b8451eeb3";
# hash = "sha256-B27LtVnL52niaFgPW0pp5Uulub/Q3NvtSDkJNahrSBk=";
# };
})).override { inherit psimd; };

xnnpack = (pkgs.xnnpack.overrideAttrs (oldAttrs: {
# API changes in the latest version break the build; invalid conversions/casts.
src = oldAttrs.src.override {
rev = "51a987591a6fc9f0fc0707077f53d763ac132cbf";
hash = "sha256-NZeSKz6xpvH84V4sxArXlyUoEfoekdACzaAh3AXy6+c=";
};
})).override { inherit cpuinfo fp16 fxdiv pthreadpool; buildSharedLibs = false; };

# TODO(@connorbaker): Patches are incompatible with current version
sleef = (pkgs.sleef.overrideAttrs (oldAttrs: {
patches = [];
src = oldAttrs.src.override {
rev = "e0a003ee838b75d11763aa9c3ef17bf71a725bff";
hash = "sha256-0atbkbLqyMVdZDZiSvGNp7vgZ6/dAQz9BL4Wu2kURlY=";
};
})).override { buildSharedLibs = false; };

# TODO(@connorbaker): Seems like though `gloo_cuda` is available and in the exported config, PyTorch isn't picking it up. Is this a quirk of using SYSTEM_LIBS? Are we excluded from a path which does the include?
gloo = (pkgs.gloo.overrideAttrs (oldAttrs: {
# src = oldAttrs.src.override {
# rev = "10909297fedab0a680799211a299203e53515032";
# hash = "sha256-jDn6AkvkkmlrdFKHxG+ObHnGZNp8x2+CatZJlMmvOuI=";
# };
})).override { buildSharedLibs = false; };
in buildPythonPackage rec {
pname = "torch";
# Don't forget to update torch-bin to the same version.
Expand Down Expand Up @@ -270,28 +203,6 @@ in buildPythonPackage rec {
export PYTORCH_ROCM_ARCH="${gpuTargetString}"
export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
python tools/amd_build/build_amd.py
''
# TODO(@connorbaker): For some reason, does not allow using system provided header.
# TODO(@connorbaker): Find a way to package Onnx so it can still be found by CMake.
# TODO(@connorbaker): PyTorch can also use Mimalloc -- is there a performance gain there?
+ ''
substituteInPlace cmake/Dependencies.cmake \
--replace \
'set(POCKETFFT_INCLUDE_DIR "''${Torch_SOURCE_DIR}/third_party/pocketfft/")' \
'set(POCKETFFT_INCLUDE_DIR "${pkgs.pocketfft}/include/")'
substituteInPlace CMakeLists.txt \
--replace \
'set(USE_SYSTEM_ONNX ON)' \
'set(USE_SYSTEM_ONNX OFF)'
''
# TODO(@connorbaker): Generate a list of all the targets we could build.
+ ''
mkdir build
pushd build
cmake -G Ninja ..
cmake --build . --target help
popd
rm -rf build
'';

# Use pytorch's custom configurations
Expand All @@ -309,42 +220,9 @@ in buildPythonPackage rec {
USE_MKLDNN = setBool mklDnnSupport;
USE_MKLDNN_CBLAS = setBool mklDnnSupport;

USE_SYSTEM_LIBS = setBool true;
USE_NCCL = setBool cudaSupport;
# TODO(@connorbaker): Even though USE_SYSTEM_LIBS is set to true, we still need to set
# USE_SYSTEM_NCCL! Likely need to do something similar to what PyTorch does for TBB:
# https://github.com/pytorch/pytorch/blob/e9ebda29d87ce0916ab08c06ab26fd3766a870e5/CMakeLists.txt#L426-L428
USE_SYSTEM_NCCL = setBool cudaSupport;
BUILD_CUSTOM_PROTOBUF = setBool false;

# TODO(@connorbaker): Statically link CUDA?
# Seems like PyTorch stopped doing that due to size limitations of PyPi wheels?
# https://github.com/pytorch/pytorch/issues/96595#issuecomment-1468636755
# Also possibly due to the size of the resulting binary?
# https://github.com/pytorch/pytorch/pull/87502#issue-1418802284
# With respect to CUDNN, it's also possible it reduces performance?
# https://github.com/pytorch/pytorch/issues/87389#issuecomment-1286267629
# https://github.com/pytorch/pytorch/issues/50153
# Useful patches for static linking?
# https://github.com/pytorch/pytorch/pull/95153
# See the static linking master issue:
# https://github.com/pytorch/pytorch/issues/21737
USE_CUDA_STATIC_LINK = setBool cudaSupport;
CAFFE2_STATIC_LINK_CUDA = setBool cudaSupport;

# TODO(@connorbaker): Should we expose options to avoid building (Q|X)?NNPACK and MKLDNN when
# building with CUDA? Even better -- can we fully split up compilation so we're able to reuse
# these components?
# TODO(@connorbaker): Perhaps UCC/UCX would be interesting to add?
# https://github.com/pytorch/pytorch/pull/81583
# TODO(@connorbaker): Look at adding ZSTD?
# https://github.com/pytorch/pytorch/issues/44255
# TODO(@connorbaker): Fix https://github.com/pytorch/pytorch/issues/41999
# TODO(@connorbaker): Useful resources for how previous "use system" options were added:
# - https://github.com/pytorch/pytorch/pull/37137
# - https://github.com/pytorch/pytorch/pull/37277
# - https://github.com/pytorch/pytorch/pull/37501

# Avoid using pybind11 from git submodule
# Also avoids pytorch exporting the headers of pybind11
USE_SYSTEM_BIND11 = true;

preBuild = ''
export MAX_JOBS=$NIX_BUILD_CORES
Expand Down Expand Up @@ -373,6 +251,8 @@ in buildPythonPackage rec {
PYTORCH_BUILD_VERSION = version;
PYTORCH_BUILD_NUMBER = 0;

USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL

# Suppress a weird warning in mkl-dnn, part of ideep in pytorch
# (upstream seems to have fixed this in the wrong place?)
# https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
Expand Down Expand Up @@ -405,20 +285,7 @@ in buildPythonPackage rec {
] ++ lib.optionals cudaSupport [ cudatoolkit_joined ]
++ lib.optionals rocmSupport [ rocmtoolkit_joined ];

buildInputs = [
# pkgs.onnx
blas
blas.provider
clog
cpuinfo
fp16
fxdiv
gloo
psimd
pthreadpool
sleef
xnnpack
]
buildInputs = [ blas blas.provider pybind11 ]
++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now
++ lib.optionals cudaSupport [ cudnn nccl ]
++ lib.optionals rocmSupport [ openmp ]
Expand Down

0 comments on commit 09ea472

Please sign in to comment.