Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python3Packages.pytorch: 1.2.0 -> 1.4.1, python3Packages.ignite: 0.2.1 -> 0.3.0 #75827

Merged
merged 4 commits into from
May 9, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions pkgs/development/python-modules/ignite/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
, buildPythonPackage
, fetchFromGitHub
, pytest
, matplotlib
, mock
, pytorch
, pynvml
Expand All @@ -11,23 +12,23 @@

buildPythonPackage rec {
pname = "ignite";
version = "0.2.1";
version = "0.3.0";

src = fetchFromGitHub {
owner = "pytorch";
repo = pname;
rev = "v${version}";
sha256 = "15k6dd11yxn4923llcpmw4srl1by5ljhh7aw5pnkn4n4qpywh6cm";
sha256 = "0i863kxi1r1hspj19lhn6r8256vdazjcyvis0s33fgzrf7kxi08x";
};

checkInputs = [ pytest mock ];
checkInputs = [ pytest matplotlib mock ];
propagatedBuildInputs = [ pytorch scikitlearn tqdm pynvml ];

# Some packages are not in NixPkgs; other tests try to build distributed
# models, which doesn't work in the sandbox.
checkPhase = ''
pytest -k 'not visdom and not tensorboard and not mlflow and not polyaxon' tests/
pytest -k 'not visdom and not tensorboard and not mlflow and not polyaxon and not conftest and not engines and not distrib_' tests/
'';
# these packages are not currently in nixpkgs

propagatedBuildInputs = [ pytorch scikitlearn tqdm pynvml ];

meta = with lib; {
description = "High-level training library for PyTorch";
Expand Down
112 changes: 78 additions & 34 deletions pkgs/development/python-modules/pytorch/default.nix
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
{ stdenv, fetchurl, fetchgit, buildPythonPackage, python, pythonOlder,
{ stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python,
cudaSupport ? false, cudatoolkit ? null, cudnn ? null, nccl ? null, magma ? null,
mklSupport ? false, mkl ? null,
mklDnnSupport ? true, useSystemNccl ? true,
openMPISupport ? false, openmpi ? null,
buildNamedTensor ? false,
buildBinaries ? false,
buildDocs ? false,
cudaArchList ? null,
fetchFromGitHub, lib, numpy, pyyaml, cffi, click, typing, cmake, hypothesis, numactl,
numpy, pyyaml, cffi, click, typing, cmake, dnnl, hypothesis, numactl, psutil,
linkFarm, symlinkJoin,

# virtual pkg that consistently instantiates blas across nixpkgs
# See https://github.com/NixOS/nixpkgs/pull/83888
blas,

# ninja (https://ninja-build.org) must be available to run C++ extensions tests,
ninja,

# dependencies for torch.utils.tensorboard
tensorboardSupport ? true, pillow, six, future, tensorflow-tensorboard,
pillow, six, future, tensorflow-tensorboard, protobuf,

utillinux, which, isPy3k }:

assert !openMPISupport || openmpi != null;
assert !tensorboardSupport || tensorflow-tensorboard != null;

# assert that everything needed for cuda is present and that the correct cuda versions are used
assert !cudaSupport || cudatoolkit != null;
Expand All @@ -28,17 +30,11 @@ assert !cudaSupport || (let majorIs = lib.versions.major cudatoolkit.version;
let
hasDependency = dep: pkg: lib.lists.any (inp: inp == dep) pkg.buildInputs;
matchesCudatoolkit = hasDependency cudatoolkit;
matchesMkl = hasDependency mkl;
in
# confirm that cudatoolkits are sync'd across dependencies
assert !(openMPISupport && cudaSupport) || matchesCudatoolkit openmpi;
assert !cudaSupport || matchesCudatoolkit magma;

# confirm that mkl is sync'd across dependencies
assert !mklSupport || mkl != null;
assert !(mklSupport && cudaSupport) || matchesMkl magma;
assert !mklSupport || (numpy.blasImplementation == "mkl" && numpy.blas == mkl);

let
cudatoolkit_joined = symlinkJoin {
name = "${cudatoolkit.name}-unsplit";
Expand Down Expand Up @@ -108,7 +104,7 @@ let
"LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ";

in buildPythonPackage rec {
version = "1.2.0";
version = "1.4.1";
pname = "pytorch";
disabled = !isPy3k;

Expand All @@ -122,18 +118,54 @@ in buildPythonPackage rec {
repo = "pytorch";
rev = "v${version}";
fetchSubmodules = true;
sha256 = "1biyq2p48chakf2xw7hazzqmr5ps1nx475ql8vkmxjg5zaa071cz";
sha256 = "1aa1il4f98pswfj20cv27yfb91l1jcq4515i7mvq7sh5647yzwms";
};

dontUseCmakeConfigure = true;

preConfigure = lib.optionalString cudaSupport ''
export TORCH_CUDA_ARCH_LIST="${lib.strings.concatStringsSep ";" final_cudaArchList}"
export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
'' + lib.optionalString (cudaSupport && cudnn != null) ''
export CUDNN_INCLUDE_DIR=${cudnn}/include
'';

patches = [
# Prevents a race condition which would be introduced by pull 30333.
# See https://github.com/pytorch/pytorch/issues/32277
# Can be removed >1.5.0.
(fetchpatch {
jonringer marked this conversation as resolved.
Show resolved Hide resolved
url = "https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/30332.patch";
sha256 = "1v9dwbhz3rdxcx6sz8y8j9n3bj6nqs78b1r8yg89yc15n6l4cqx2";
})

# Fixes errors with gcc-9 compilation. Cherry-picked on advice from ezyang.
# See https://github.com/pytorch/pytorch/issues/32277
# Can be removed >1.5.0.
(fetchpatch {
jonringer marked this conversation as resolved.
Show resolved Hide resolved
url = "https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/30333.patch";
sha256 = "139413fl37h2fnil0cv99a67mqqnsh02k74b92by1qyr6pcfyg3q";
})
];

# Use pytorch's custom configurations
dontUseCmakeConfigure = true;

BUILD_NAMEDTENSOR = true;
BUILD_DOCS = buildDocs;

USE_MKL = blas.implementation == "mkl";

# Unlike MKL, MKLDNN is FOSS, so we enable support for it by default. Note
# that this was renamed to dnnl and then renamed again to oneDNN upstream, but
# pytorch still calls it by the old name mkldnn.
USE_MKLDNN = mklDnnSupport;
USE_MKLDNN_CBLAS = mklDnnSupport;

preBuild = ''
export MAX_JOBS=$NIX_BUILD_CORES
${python.interpreter} setup.py build --cmake-only
${cmake}/bin/cmake build
'';

preFixup = ''
function join_by { local IFS="$1"; shift; echo "$*"; }
function strip2 {
Expand All @@ -155,8 +187,7 @@ in buildPythonPackage rec {
PYTORCH_BUILD_VERSION = version;
PYTORCH_BUILD_NUMBER = 0;

BUILD_NAMEDTENSOR = buildNamedTensor; # experimental feature
USE_SYSTEM_NCCL=true; # don't build pytorch's third_party NCCL
USE_SYSTEM_NCCL=useSystemNccl; # don't build pytorch's third_party NCCL

# Suppress a weird warning in mkl-dnn, part of ideep in pytorch
# (upstream seems to have fixed this in the wrong place?)
Expand All @@ -165,7 +196,7 @@ in buildPythonPackage rec {
#
# Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
# https://github.com/pytorch/pytorch/blob/v1.2.0/setup.py#L17
NIX_CFLAGS_COMPILE = lib.optionals (numpy.blas == mkl) [ "-Wno-error=array-bounds" ];
NIX_CFLAGS_COMPILE = lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ];

nativeBuildInputs = [
cmake
Expand All @@ -174,33 +205,46 @@ in buildPythonPackage rec {
ninja
] ++ lib.optionals cudaSupport [ cudatoolkit_joined ];

buildInputs = [
numpy.blas
] ++ lib.optionals cudaSupport [ cudnn magma nccl ]
buildInputs = [ blas blas.provider dnnl ]
++ lib.optionals cudaSupport [ cudnn magma nccl ]
++ lib.optionals stdenv.isLinux [ numactl ];

propagatedBuildInputs = [
cffi
click
numpy
pyyaml
] ++ lib.optionals openMPISupport [ openmpi ]
++ lib.optional (pythonOlder "3.5") typing
++ lib.optionals tensorboardSupport [pillow six future tensorflow-tensorboard];
# the following are required for tensorboard support
pillow six future tensorflow-tensorboard protobuf
] ++ lib.optionals openMPISupport [ openmpi ];

checkInputs = [ hypothesis ninja ];
checkInputs = [ hypothesis ninja psutil ];

doCheck = false; # tests take a long time for channel release, so doCheck should be overridden only when developing
checkPhase = "${cudaStubEnv}python test/run_test.py"
+ " --exclude utils" # utils requires git, which is not allowed in the check phase
# Tests take a long time and may be flaky, so just sanity-check imports
doCheck = false;
pythonImportsCheck = [
"torch"
];

checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [
cudaStubEnv
"${python.interpreter} test/run_test.py"
"--exclude"
(concatStringsSep " " [
"utils" # utils requires git, which is not allowed in the check phase

# Other tests which have been disabled in previous nix derivations of pytorch.
# --exclude dataloader sparse torch utils thd_distributed distributed cpp_extensions
;
# "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors
# ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build

# tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins
(optionalString (majorMinor version == "1.3" ) "tensorboard")
])
];
postInstall = ''
mkdir $dev
cp -r $out/${python.sitePackages}/torch/lib $dev/lib
cp -r $out/${python.sitePackages}/torch/include $dev/include
cp -r $out/${python.sitePackages}/torch/share $dev/share
'';

postFixup = stdenv.lib.optionalString stdenv.isDarwin ''
Expand Down Expand Up @@ -233,6 +277,6 @@ in buildPythonPackage rec {
homepage = "https://pytorch.org/";
license = lib.licenses.bsd3;
platforms = with lib.platforms; linux ++ lib.optionals (!cudaSupport) darwin;
maintainers = with lib.maintainers; [ teh thoughtpolice stites tscholak ]; # tscholak esp. for darwin-related builds
maintainers = with lib.maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds
};
}