ROCm · iotamudelta · Jul 23, 2018 · Jul 20, 2018 · Jul 20, 2018 · Jul 20, 2018
diff --git a/.gitignore b/.gitignore
@@ -49,6 +49,7 @@ torch/csrc/nn/THNN.cpp
 torch/csrc/nn/THNN.cwrap
 torch/lib/*.a*
 torch/lib/*.dll*
+torch/lib/*.exe*
 torch/lib/*.dylib*
 torch/lib/*.h
 torch/lib/*.lib

diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
@@ -44,10 +44,13 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
     (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_aten_asan(3)")
 fi
 
+export ATEN_DISABLE_AVX=
+export ATEN_DISABLE_AVX2=
 if [[ "${JOB_BASE_NAME}" == *-NO_AVX-* ]]; then
-  export ATEN_CPU_CAPABILITY=default
-elif [[ "${JOB_BASE_NAME}" == *-NO_AVX2-* ]]; then
-  export ATEN_CPU_CAPABILITY=avx
+  export ATEN_DISABLE_AVX=1
+fi
+if [[ "${JOB_BASE_NAME}" == *-NO_AVX2-* ]]; then
+  export ATEN_DISABLE_AVX2=1
 fi
 
 test_python_nn() {

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -152,10 +152,6 @@ endif()
 # ---[ CMake scripts + modules
 list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
 
-if (MSVC AND ${BUILD_SHARED_LIBS})
-  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-endif()
-
 # ---[ CMake build directories
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)

diff --git a/aten/src/ATen/Backtrace.cpp b/aten/src/ATen/Backtrace.cpp
@@ -1,4 +1,5 @@
 #include <ATen/optional.h>
+#include <ATen/Backtrace.h>
 
 #include <functional>
 #include <memory>

diff --git a/aten/src/ATen/Backtrace.h b/aten/src/ATen/Backtrace.h
@@ -4,9 +4,11 @@
 #include <string>
 #include <typeinfo>
 
+#include <ATen/ATenGeneral.h>
+
 namespace at {
 /// Utility to demangle a C++ symbol name.
-std::string demangle(const char* name);
+AT_API std::string demangle(const char* name);
 
 /// Returns the printable name of the type.
 template <typename T>
@@ -19,7 +21,7 @@ inline const char* demangle_type() {
 #endif // __GXX_RTTI
 }
 
-std::string get_backtrace(
+AT_API std::string get_backtrace(
     size_t frames_to_skip = 0,
     size_t maximum_number_of_frames = 64,
     bool skip_python_frames = true);

diff --git a/aten/src/ATen/CUDAGuard.h b/aten/src/ATen/CUDAGuard.h
@@ -1,110 +0,0 @@
-#pragma once
-
-#include <ATen/ArrayRef.h>
-#include <ATen/CUDAStream.h>
-#include <ATen/Context.h>
-#include <ATen/DeviceGuard.h>
-
-#include <cstddef>
-#include <vector>
-
-namespace at {
-
-/// A variant of `DeviceGuard` that augments it with an understanding of CUDA
-/// streams. This guard can not only set and reset the current CUDA device, but
-/// also set and reset the current CUDA stream. It is important to note that
-/// because a CUDA stream is intrinsically associated with the CUDA device to
-/// which it is bound, setting the CUDA stream *also* sets the current CUDA
-/// device to that of the stream.
-struct CUDAGuard {
-  /// Default constructor, does nothing and causes no change in the current
-  /// stream or device until `set_stream` or `set_device` is called.
-  CUDAGuard() = default;
-
-  /// Sets the CUDA stream and its associated device as the current one (calls
-  /// `set_stream`).
-  explicit CUDAGuard(const CUDAStream& stream) {
-    set_stream(stream);
-  }
-
-  /// Calls `set_device` with the given index.
-  explicit CUDAGuard(int32_t device) {
-    set_device(device);
-  }
-
-  CUDAGuard(const CUDAGuard&) = delete;
-  CUDAGuard& operator=(const CUDAGuard&) = delete;
-
-  /// Move-constructs this `CUDAGuard` from another `CUDAGuard`. The
-  /// moved-from `CUDAGuard` is modified such that its destruction has no
-  /// effect (does not reset the stream or device).
-  CUDAGuard(CUDAGuard&& other) noexcept = default;
-
-  /// Move-assigns this `CUDAGuard` from another `CUDAGuard`. The
-  /// moved-from `CUDAGuard` is modified such that its destruction has no
-  /// effect (does not reset the stream or device).
-  CUDAGuard& operator=(CUDAGuard&& other) {
-    device_guard_ = std::move(other.device_guard_);
-    original_streams_ = std::move(other.original_streams_);
-    other.original_streams_.clear();
-    return *this;
-  }
-
-  /// Resets the CUDA stream on each device to the one that was active upon
-  /// construction.
-  ~CUDAGuard() {
-    if (!original_streams_.empty()) {
-      for (size_t device = 0; device < original_streams_.size(); ++device) {
-        globalContext().uncheckedSetCurrentCUDAStreamOnDevice(
-            device, original_streams_[device]);
-      }
-    }
-  }
-
-  /// Sets the current CUDA device to the device associated with the given
-  /// stream, and then sets the current stream on that device to the one given.
-  void set_stream(const CUDAStream& stream) {
-    device_guard_.set_index(stream.device());
-    // If we haven't stored the current stream yet, store it now.
-    if (original_streams_.empty()) {
-      const size_t device_count = globalContext().getNumGPUs();
-      original_streams_.reserve(device_count);
-      for (size_t device = 0; device < device_count; ++device) {
-        original_streams_.push_back(
-            globalContext().getCurrentCUDAStreamOnDevice(device));
-      }
-    }
-    globalContext().setCurrentCUDAStreamOnDevice(
-        device_guard_.last_index(), stream);
-  }
-
-  /// Sets the CUDA device to the given one.
-  void set_device(int32_t device) {
-    device_guard_.set_index(device);
-  }
-
-  /// Returns the CUDA streams that were active in the first call to
-  /// `set_stream`. If there was no such call, the returned container is
-  /// empty.
-  ArrayRef<CUDAStream> original_streams() const noexcept {
-    return original_streams_;
-  }
-
-  /// Returns the device that was set upon construction of the guard.
-  int32_t original_device() const noexcept {
-    return device_guard_.original_index();
-  }
-
-  /// Returns the last device that was set via `set_device`, if any.
-  int32_t last_device() const noexcept {
-    return device_guard_.last_index();
-  }
-
- private:
-  /// The guard for the current device.
-  DeviceGuard device_guard_;
-  /// The original streams that were active on all devices.
-  std::vector<CUDAStream> original_streams_;
-};
-
-} // namespace at

diff --git a/aten/src/ATen/CUDAStream.cpp b/aten/src/ATen/CUDAStream.cpp
@@ -1,197 +0,0 @@
- #include "ATen/CUDAStream.h"
-#include "ATen/Error.h"
-#include "ATen/detail/CUDAHooksInterface.h"
-
-#include <mutex>
-
-// Internal implementation is entirely hidden
-struct CUDAStreamInternals {
-  bool is_destructible;
-  std::atomic<int> refcount;
-  int64_t device; // Note: cudaGetDevice works with int32_t, not int64_t
-  cudaStream_t stream;
-};
-
-namespace at {
-
-namespace detail {
-
-  /*
-  * Stream state
-  */
-  static constexpr cudaStream_t DEFAULT_STREAM = 0;
-
-  static std::once_flag init_flag;
-  static int64_t num_gpus;
-  static CUDAStreamInternals* default_streams;
-  static thread_local CUDAStreamInternals** current_streams = nullptr;
-
-  // Creates a(n indestructible) default stream for each device
-  // Note: the default stream on each device is signified by a zero
-  // value for the pointer, and so is not actually created as usual.
-  // In particular, we don't need to switch devices when creating the
-  // streams.
-  static void initDefaultCUDAStreams() {
-    num_gpus = getCUDAHooks().getNumGPUs();
-    default_streams = (CUDAStreamInternals*) malloc(num_gpus * sizeof(CUDAStreamInternals));
-    for (auto i = decltype(num_gpus){0}; i < num_gpus; ++i) {
-      default_streams[i].is_destructible = false;
-      default_streams[i].refcount = 0;
-      default_streams[i].device = i;
-      default_streams[i].stream = DEFAULT_STREAM;
-    }
-  }
-
-  // Init front-end to ensure initialization only occurs once
-  static void initCUDAStreamsOnce() {
-    // Inits default streams (once, globally)
-    std::call_once(init_flag, initDefaultCUDAStreams);
-
-    // Inits current streams (thread local) to default streams
-    if (current_streams) return;
-    current_streams = (CUDAStreamInternals**) malloc(num_gpus * sizeof(CUDAStreamInternals*));
-    for (auto i = decltype(num_gpus){0}; i < num_gpus; ++i) {
-      current_streams[i] = &default_streams[i];
-    }
-  }
-
-  /*
-  * Pointer-based stream API
-  */
-
-  // Helper to return the current device
-  static inline int64_t current_device() {
-    int cur_device;
-    DynamicCUDAInterface::get_device(&cur_device);
-    return cur_device;
-  }
-
-  // Helper to verify the GPU index is valid
-  static inline void check_gpu(int64_t device) {
-    AT_ASSERT(device >= 0 && device < num_gpus);
-  }
-
-  CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device) {
-    initCUDAStreamsOnce();
-    check_gpu(device);
-    return &default_streams[device];
-  }
-  CUDAStreamInternals* CUDAStream_getDefaultStream() {
-    return CUDAStream_getDefaultStreamOnDevice(current_device());
-  }
-
-  // Creates (and retains) and new cuda stream
-  CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority) {
-    CUDAStreamInternals* internals = (CUDAStreamInternals*) malloc(sizeof(CUDAStreamInternals));
-    internals->is_destructible = true;
-    internals->refcount = 1;
-    internals->device = current_device();
-    DynamicCUDAInterface::cuda_stream_create_with_priority(&internals->stream, flags, priority);
-    return internals;
-  }
-
-  // Note: despite not being "unsafe," is using these methods in a multithreaded
-  // environment then the caller must be sure that streams are valid
-  // when they're requested. These methods will throw an error if an
-  // invalid stream is requested.
-  CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device) {
-    initCUDAStreamsOnce();
-    check_gpu(device);
-    auto cur = current_streams[device];
-    AT_ASSERT(CUDAStream_retain(cur));
-    return cur;
-  }
-  CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream() {
-    return CUDAStream_getAndRetainCurrentStreamOnDevice(current_device());
-  }
-
-  // Note: these unsafe methods do not retain the stream before returning it.
-  // This is unsafe behavior and these methods SHOULD NOT BE USED.
-  // They are here only for legacy compatibility.
-  CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device) {
-    initCUDAStreamsOnce();
-    check_gpu(device);
-    return current_streams[device];
-  }
-  CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe() {
-    return CUDAStream_getCurrentStreamOnDeviceUnsafe(current_device());
-  }
-
-  void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* ptr) {
-    initCUDAStreamsOnce();
-    check_gpu(device);
-    AT_ASSERT(ptr);
-    AT_ASSERT(ptr->device == device);
-    AT_ASSERT(CUDAStream_retain(ptr));
-
-    CUDAStream_free(current_streams[device]);
-    current_streams[device] = ptr;
-  }
-
-  void CUDAStream_uncheckedSetStreamOnDevice(int64_t device, CUDAStreamInternals* ptr) {
-    initCUDAStreamsOnce();
-    CUDAStream_uncheckedFree(current_streams[device]);
-    current_streams[device] = ptr;
-  }
-
-  void CUDAStream_setStream(CUDAStreamInternals* ptr) {
-    CUDAStream_setStreamOnDevice(current_device(), ptr);
-  }
-
-  // Getters
-  cudaStream_t CUDAStream_stream(CUDAStreamInternals* ptr) {
-    AT_ASSERT(ptr);
-    return ptr->stream;
-  }
-
-  int64_t CUDAStream_device(CUDAStreamInternals* ptr) {
-    AT_ASSERT(ptr);
-    return ptr->device;
-  }
-
-  // Memory management
-  // Note: only destructible (non-default) streams are ref counted
-  bool CUDAStream_retain(CUDAStreamInternals* ptr) {
-    AT_ASSERT(ptr);
-    if (ptr->is_destructible) return(++ptr->refcount > 1);
-    return true;
-  }
-
-  void CUDAStream_free(CUDAStreamInternals*& ptr) {
-    if (ptr && ptr->stream && ptr->is_destructible && --ptr->refcount <= 0) {
-      AT_ASSERT(ptr->refcount == 0);
-      DynamicCUDAInterface::cuda_stream_destroy(ptr->stream);
-      free(ptr);
-      ptr = nullptr;
-    }
-  }
-  void CUDAStream_uncheckedFree(CUDAStreamInternals*& ptr) {
-    if (ptr && ptr->stream && ptr->is_destructible && --ptr->refcount <= 0) {
-      DynamicCUDAInterface::unchecked_cuda_stream_destroy(ptr->stream);
-      free(ptr);
-      ptr = nullptr;
-    }
-  }
-
-} // namespace detail
-
-  /*
-  * CUDAStream functions
-  */
-
-   // Copy constructor
-  CUDAStream::CUDAStream(const CUDAStream& other) {
-    AT_ASSERT(other.internals_);
-    AT_ASSERT(detail::CUDAStream_retain(other.internals_));
-
-    internals_ = other.internals_;
-  }
-
-  // Move constructor
-  CUDAStream::CUDAStream(CUDAStream&& other) {
-    AT_ASSERT(other.internals_);
-
-    std::swap(internals_, other.internals_);
-  }
-
-} // namespace at