ROCm · iotamudelta · Aug 16, 2018 · Aug 13, 2018 · Aug 13, 2018 · Aug 13, 2018
diff --git a/.jenkins/pytorch/build-asan.sh b/.jenkins/pytorch/build-asan.sh
@@ -17,5 +17,5 @@ export ASAN_OPTIONS=detect_leaks=0:symbolize=1
 # TODO: Make the ASAN flags a more unified env var
 CC="clang" CXX="clang++" LDSHARED="clang --shared" \
   CFLAGS="-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -shared-libasan" \
-  NO_CUDA=1 DEBUG=1 \
+  NO_CUDA=1 \
   python setup.py install
diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh
@@ -42,11 +42,11 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
   # This environment variable enabled HCC Optimizations that speed up the linking stage.
   # https://github.com/RadeonOpenCompute/hcc#hcc-with-thinlto-linking
   export KMTHINLTO=1
-
+  
   # Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime
   sudo apt-get install libc++1
   sudo apt-get install libc++abi1
-
+  
   python tools/amd_build/build_pytorch_amd.py
   USE_ROCM=1 python setup.py install --user
   exit 0
@@ -64,13 +64,25 @@ if ([[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]
 fi
 
 # Target only our CI GPU machine's CUDA arch to speed up the build
-export TORCH_CUDA_ARCH_LIST=5.2
+export TORCH_CUDA_ARCH_LIST="5.2"
+
+if [[ "$BUILD_ENVIRONMENT" == *ppc64le* ]]; then
+  export TORCH_CUDA_ARCH_LIST="6.0"
+fi
 
 if [[ "$BUILD_ENVIRONMENT" == *trusty-py3.6-gcc5.4* ]]; then
   export DEBUG=1
 fi
 
-WERROR=1 python setup.py install
+# ppc64le build fails when WERROR=1
+# set only when building other architectures
+# only use for "python setup.py install" line
+if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then
+  WERROR=1 python setup.py install
+elif [[ "$BUILD_ENVIRONMENT" == *ppc64le* ]]; then
+  python setup.py install
+fi
+
 
 # Add the test binaries so that they won't be git clean'ed away
 git add -f build/bin

diff --git a/.jenkins/pytorch/enabled-configs.txt b/.jenkins/pytorch/enabled-configs.txt
@@ -42,3 +42,7 @@ short-perf-test-cpu
 short-perf-test-gpu
 py2-clang3.8-rocm1.7.1-ubuntu16.04-build
 py2-clang3.8-rocm1.7.1-ubuntu16.04-test
+pytorch-ppc64le-cuda9.2-cudnn7-py3-build
+pytorch-ppc64le-cuda9.2-cudnn7-py3-test
+pytorch-ppc64le-cuda9.1-cudnn7-py3-build
+pytorch-ppc64le-cuda9.1-cudnn7-py3-test
diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
@@ -80,8 +80,13 @@ test_aten() {
     # NB: the ATen test binaries don't have RPATH set, so it's necessary to
     # put the dynamic libraries somewhere were the dynamic linker can find them.
     # This is a bit of a hack.
-    ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin
-    ln -s "$TORCH_LIB_PATH"/libnccl* build/bin
+    if [[ "$BUILD_ENVIRONMENT" == *ppc64le* ]]; then
+      SUDO=sudo 
+    fi
+
+    ${SUDO} ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin
+    ${SUDO} ln -s "$TORCH_LIB_PATH"/libnccl* build/bin
+
     ls build/bin
     aten/tools/run_tests.sh build/bin
   fi

diff --git a/.jenkins/pytorch/win-build.sh b/.jenkins/pytorch/win-build.sh
@@ -38,7 +38,7 @@ EOL
 
 cat >ci_scripts/build_pytorch.bat <<EOL
 
-set PATH=C:\\Program Files\\CMake\\bin;C:\\Program Files\\7-Zip;C:\\curl-7.57.0-win64-mingw\\bin;C:\\Program Files\\Git\\cmd;C:\\Program Files\\Amazon\\AWSCLI;%PATH%
+set PATH=C:\\Program Files\\CMake\\bin;C:\\Program Files\\7-Zip;C:\\ProgramData\\chocolatey\\bin;C:\\Program Files\\Git\\cmd;C:\\Program Files\\Amazon\\AWSCLI;%PATH%
 
 :: Install MKL
 if "%REBUILD%"=="" (

diff --git a/.jenkins/pytorch/win-test.sh b/.jenkins/pytorch/win-test.sh
@@ -36,7 +36,7 @@ EOL
 
 cat >ci_scripts/setup_pytorch_env.bat <<EOL
 
-set PATH=C:\\Program Files\\CMake\\bin;C:\\Program Files\\7-Zip;C:\\curl-7.57.0-win64-mingw\\bin;C:\\Program Files\\Git\\cmd;C:\\Program Files\\Amazon\\AWSCLI;%PATH%
+set PATH=C:\\Program Files\\CMake\\bin;C:\\Program Files\\7-Zip;C:\\ProgramData\\chocolatey\\bin;C:\\Program Files\\Git\\cmd;C:\\Program Files\\Amazon\\AWSCLI;%PATH%
 
 :: Install Miniconda3
 IF EXIST C:\\Jenkins\\Miniconda3 ( rd /s /q C:\\Jenkins\\Miniconda3 )

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -19,9 +19,9 @@ If you are not familiar with creating a Pull Request, here are some guides:
 - https://help.github.com/articles/creating-a-pull-request/
 
 
-## Developing locally with PyTorch
+## Developing PyTorch
 
-To locally develop with PyTorch, here are some tips:
+To develop PyTorch on your machine, here are some tips:
 
 1. Uninstall all existing pytorch installs
 ```
@@ -30,7 +30,7 @@ pip uninstall torch
 pip uninstall torch # run this command twice
 ```
 
-2. Locally clone a copy of PyTorch from source:
+2. Clone a copy of PyTorch from source:
 
 ```
 git clone https://github.com/pytorch/pytorch
@@ -142,10 +142,13 @@ working on:
 - Working on the Python bindings?  Run `python setup.py develop` to rebuild
   (NB: no `build` here!)
 
-- Working on `torch/csrc` or `aten`?  Run `python setup.py build_caffe2` to
+- Working on `torch/csrc` or `aten`?  Run `python setup.py rebuild_libtorch` to
   rebuild and avoid having to rebuild other dependent libraries we
-  depend on.  The other valid targets are listed in `dep_libs` in `setup.py`
-  (prepend `build_` to get a target).
+  depend on.
+
+- Working on one of the other dependent libraries? The other valid
+  targets are listed in `dep_libs` in `setup.py`. prepend `build_` to
+  get a target, and run as e.g. `python setup.py build_gloo`.
 
 - Working on a test binary?  Run `(cd build && ninja bin/test_binary_name)` to
   rebuild only that test binary (without rerunning cmake).  (Replace `ninja` with

diff --git a/README.md b/README.md
@@ -56,8 +56,8 @@ If you use NumPy, then you have used Tensors (a.k.a ndarray).
 
 ![Tensor illustration](https://github.com/pytorch/pytorch/blob/master/docs/source/_static/img/tensor_illustration.png)
 
-PyTorch provides Tensors that can live either on the CPU or the GPU, and accelerate
-compute by a huge amount.
+PyTorch provides Tensors that can live either on the CPU or the GPU, and accelerates the
+computation by a huge amount.
 
 We provide a wide variety of tensor routines to accelerate and fit your scientific computation needs
 such as slicing, indexing, math operations, linear algebra, reductions.

diff --git a/aten/src/ATen/Allocator.h b/aten/src/ATen/Allocator.h
@@ -4,7 +4,6 @@
 #include <stddef.h>
 
 #include <ATen/Device.h>
-#include <ATen/Retainable.h>
 #include <ATen/core/Error.h>
 #include <ATen/core/UniqueVoidPtr.h>
 

diff --git a/aten/src/ATen/Layout.h b/aten/src/ATen/Layout.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <ATen/ScalarType.h>
+#include <ATen/Backend.h>
 #include <ATen/core/Error.h>
 
 #include <iostream>

diff --git a/aten/src/ATen/Registry.h b/aten/src/ATen/Registry.h
@@ -114,7 +114,7 @@ class AT_API Registry {
 };
 
 template <class SrcType, class ObjectPtrType, class... Args>
-class AT_API Registerer {
+class Registerer {
  public:
   Registerer(
       const SrcType& key,

diff --git a/aten/src/ATen/TensorAccessor.h b/aten/src/ATen/TensorAccessor.h
@@ -3,8 +3,6 @@
 #include <cstddef>
 #include <stdint.h>
 
-#include "ATen/ScalarType.h"
-
 namespace at {
 
 

diff --git a/aten/src/ATen/TensorImpl.cpp b/aten/src/ATen/TensorImpl.cpp
@@ -91,9 +91,6 @@ void TensorImpl::release_resources() {
 }
 
 int64_t TensorImpl::dim() const {
-  if(THTensor_isZeroDim(tensor)) {
-    return 0;
-  }
   return tensor->dim();
 }
 

diff --git a/aten/src/ATen/core/typeid.h b/aten/src/ATen/core/typeid.h
@@ -457,34 +457,34 @@ class Tensor;
 
 struct _CaffeHighestPreallocatedTypeId final {};
 
-CAFFE_DECLARE_KNOWN_TYPE(0, uint8_t);
-CAFFE_DECLARE_KNOWN_TYPE(1, int8_t);
-CAFFE_DECLARE_KNOWN_TYPE(2, int16_t);
-CAFFE_DECLARE_KNOWN_TYPE(3, int);
-CAFFE_DECLARE_KNOWN_TYPE(4, int64_t);
-CAFFE_DECLARE_KNOWN_TYPE(5, at::Half);
-CAFFE_DECLARE_KNOWN_TYPE(6, float);
-CAFFE_DECLARE_KNOWN_TYPE(7, double);
+CAFFE_DECLARE_KNOWN_TYPE(0, uint8_t)
+CAFFE_DECLARE_KNOWN_TYPE(1, int8_t)
+CAFFE_DECLARE_KNOWN_TYPE(2, int16_t)
+CAFFE_DECLARE_KNOWN_TYPE(3, int)
+CAFFE_DECLARE_KNOWN_TYPE(4, int64_t)
+CAFFE_DECLARE_KNOWN_TYPE(5, at::Half)
+CAFFE_DECLARE_KNOWN_TYPE(6, float)
+CAFFE_DECLARE_KNOWN_TYPE(7, double)
 // 8 = undefined type id
 
-CAFFE_DECLARE_KNOWN_TYPE(9, Tensor);
-CAFFE_DECLARE_KNOWN_TYPE(10, std::string);
-CAFFE_DECLARE_KNOWN_TYPE(11, bool);
-CAFFE_DECLARE_KNOWN_TYPE(12, uint16_t);
-CAFFE_DECLARE_KNOWN_TYPE(13, char);
-CAFFE_DECLARE_KNOWN_TYPE(14, std::unique_ptr<std::mutex>);
-CAFFE_DECLARE_KNOWN_TYPE(15, std::unique_ptr<std::atomic<bool>>);
-CAFFE_DECLARE_KNOWN_TYPE(16, std::vector<int32_t>);
-CAFFE_DECLARE_KNOWN_TYPE(17, std::vector<int64_t>);
-CAFFE_DECLARE_KNOWN_TYPE(18, std::vector<unsigned long>);
-CAFFE_DECLARE_KNOWN_TYPE(19, bool*);
-CAFFE_DECLARE_KNOWN_TYPE(20, char*);
-CAFFE_DECLARE_KNOWN_TYPE(21, int*);
+CAFFE_DECLARE_KNOWN_TYPE(9, Tensor)
+CAFFE_DECLARE_KNOWN_TYPE(10, std::string)
+CAFFE_DECLARE_KNOWN_TYPE(11, bool)
+CAFFE_DECLARE_KNOWN_TYPE(12, uint16_t)
+CAFFE_DECLARE_KNOWN_TYPE(13, char)
+CAFFE_DECLARE_KNOWN_TYPE(14, std::unique_ptr<std::mutex>)
+CAFFE_DECLARE_KNOWN_TYPE(15, std::unique_ptr<std::atomic<bool>>)
+CAFFE_DECLARE_KNOWN_TYPE(16, std::vector<int32_t>)
+CAFFE_DECLARE_KNOWN_TYPE(17, std::vector<int64_t>)
+CAFFE_DECLARE_KNOWN_TYPE(18, std::vector<unsigned long>)
+CAFFE_DECLARE_KNOWN_TYPE(19, bool*)
+CAFFE_DECLARE_KNOWN_TYPE(20, char*)
+CAFFE_DECLARE_KNOWN_TYPE(21, int*)
 
 #ifdef CAFFE2_UNIQUE_LONG_TYPEMETA
-CAFFE_DECLARE_KNOWN_TYPE(22, long);
-CAFFE_DECLARE_KNOWN_TYPE(23, std::vector<long>);
+CAFFE_DECLARE_KNOWN_TYPE(22, long)
+CAFFE_DECLARE_KNOWN_TYPE(23, std::vector<long>)
 #endif // CAFFE2_UNIQUE_LONG_TYPEMETA
 
-CAFFE_DECLARE_KNOWN_TYPE(24, _CaffeHighestPreallocatedTypeId);
+CAFFE_DECLARE_KNOWN_TYPE(24, _CaffeHighestPreallocatedTypeId)
 } // namespace caffe2
diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py
@@ -119,7 +119,7 @@ def TypedDict(name, attrs, total=True):  # type: ignore
 
 # add non-virtual declaration to Tensor.h
 TENSOR_METHOD_DECLARATION = CodeTemplate("""\
-${return_type} ${api_name}(${method_formals_with_defaults})${const_mark};
+AT_API ${return_type} ${api_name}(${method_formals_with_defaults})${const_mark};
 """)
 # add non-virtual declaration to Tensor.cpp
 TENSOR_METHOD_DEFINITION = CodeTemplate("""\
@@ -295,6 +295,7 @@ def __init__(self, reason):
         CodeTemplate(
             'check_generator<${Backend}Generator>(${arg_name}, &globalContext().defaultGenerator(backend()))'),
     # This is a cast done via direct-construction
+    'IntListStride': CodeTemplate('at::IntList ${result_name} = get_intlist_stride_th(${arg_name});'),
     'real': CodeTemplate('${arg_name}.to${ScalarName}()'),
     'accreal': CodeTemplate('${arg_name}.to${AccScalarName}()'),
     'TensorList': CodeTemplate(