Migrate PyTorch to C++17 (pytorch#85969)

With CUDA-10.2 gone we can finally do it! This PR mostly contains build system related changes, invasive functional ones are to be followed. Among many expected tweaks to the build system, here are few unexpected ones: - Force onnx_proto project to be updated to C++17 to avoid `duplicate symbols` error when compiled by gcc-7.5.0, as storage rule for `constexpr` changed in C++17, but gcc does not seem to follow it - Do not use `std::apply` on CUDA but rely on the built-in variant, as it results in test failures when CUDA runtime picks host rather than device function when `std::apply` is invoked from CUDA code. - `std::decay_t` -> `::std::decay_t` and `std::move`->`::std::move` as VC++ for some reason claims that `std` symbol is ambigious - Disable use of `std::aligned_alloc` on Android, as its `libc++` does not implement it. Some prerequisites: - pytorch#89297 - pytorch#89605 - pytorch#90228 - pytorch#90389 - pytorch#90379 - pytorch#89570 - facebookincubator/gloo#336 - facebookincubator/gloo#343 - pytorch/builder@919676f Fixes pytorch#56055 Pull Request resolved: pytorch#85969 Approved by: https://github.com/ezyang, https://github.com/kulinseth
ROCm · Jan 3, 2023 · dc93247 · dc93247
1 parent 6934745
commit dc93247
Show file tree

Hide file tree

Showing 25 changed files with 64 additions and 49 deletions.
diff --git a/.circleci/scripts/build_android_gradle.sh b/.circleci/scripts/build_android_gradle.sh
@@ -20,6 +20,11 @@ do
   touch "$file" || true
 done < <(find /var/lib/jenkins/.gradle -type f -print0)
 
+# Patch pocketfft (as Android does not have aligned_alloc even if compiled with c++17
+if [ -f ~/workspace/third_party/pocketfft/pocketfft_hdronly.h ]; then
+  sed -i -e "s/#if __cplusplus >= 201703L/#if 0/" ~/workspace/third_party/pocketfft/pocketfft_hdronly.h
+fi
+
 export GRADLE_LOCAL_PROPERTIES=~/workspace/android/local.properties
 rm -f $GRADLE_LOCAL_PROPERTIES
 echo "sdk.dir=/opt/android/sdk" >> $GRADLE_LOCAL_PROPERTIES

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -31,9 +31,9 @@ string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
 if(env_cxx_standard GREATER -1)
   message(
       WARNING "C++ standard version definition detected in environment variable."
-      "PyTorch requires -std=c++14. Please remove -std=c++ settings in your environment.")
+      "PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment.")
 endif()
-set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
+set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
 set(CMAKE_C_STANDARD   11 CACHE STRING "The C standard whose features are requested to build this target.")
 
 if(DEFINED GLIBCXX_USE_CXX11_ABI)
@@ -885,7 +885,6 @@ if(NOT MSVC)
     append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
     append_cxx_flag_if_supported("-Wno-inconsistent-missing-override" CMAKE_CXX_FLAGS)
     append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable" CMAKE_CXX_FLAGS)
-    append_cxx_flag_if_supported("-Wno-c++14-extensions" CMAKE_CXX_FLAGS)
     append_cxx_flag_if_supported("-Wno-constexpr-not-const" CMAKE_CXX_FLAGS)
     append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
     append_cxx_flag_if_supported("-Wunused-lambda-capture" CMAKE_CXX_FLAGS)
@@ -988,7 +987,6 @@ if(APPLE)
     endif()
     append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
     append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
-    append_cxx_flag_if_supported("-Wno-c++14-extensions" CMAKE_CXX_FLAGS)
     append_cxx_flag_if_supported("-Wno-constexpr-not-const" CMAKE_CXX_FLAGS)
 endif()
 

diff --git a/android/pytorch_android/CMakeLists.txt b/android/pytorch_android/CMakeLists.txt
@@ -14,7 +14,7 @@ endif()
 
 include(GNUInstallDirs)
 
-set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
+set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
 set(CMAKE_VERBOSE_MAKEFILE ON)
 message(STATUS "ANDROID_STL:${ANDROID_STL}")
 

diff --git a/android/pytorch_android_torchvision/CMakeLists.txt b/android/pytorch_android_torchvision/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.4.1)
 project(pytorch_vision_jni CXX)
-set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
+set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
 set(CMAKE_VERBOSE_MAKEFILE ON)
 
 set(pytorch_vision_cpp_DIR ${CMAKE_CURRENT_LIST_DIR}/src/main/cpp)

diff --git a/android/test_app/app/CMakeLists.txt b/android/test_app/app/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.4.1)
 set(PROJECT_NAME pytorch_testapp_jni)
 project(${PROJECT_NAME} CXX)
-set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
+set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
 set(CMAKE_VERBOSE_MAKEFILE ON)
 
 set(build_DIR ${CMAKE_SOURCE_DIR}/build)

diff --git a/aten/src/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h b/aten/src/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h
@@ -577,14 +577,16 @@ namespace impl {
         // Decay ReturnType to ReturnType_ so that if a reference gets returned, we actually store it by value
         // and don't get a dangling reference. This is only required because some kernels still return `Tensor&`.
 #ifdef __cpp_if_constexpr
-        using ReturnType_ = std::decay_t<ReturnType>;
+        // [Note: VC++ and 'std': ambiguous symbol]
+        using ReturnType_ = ::std::decay_t<ReturnType>;
         ReturnType_ output = call_functor_with_args_from_stack<KernelFunctor, AllowDeprecatedTypes>(functor, dispatchKeySet, stack);
 #else
         using ReturnType_ = std::decay_t<typename decltype(delay_check)::template type_identity<ReturnType>>;
         ReturnType_ output = call_functor_with_args_from_stack<KernelFunctor, AllowDeprecatedTypes>(functor, dispatchKeySet, delay_check(stack));
 #endif
         torch::jit::drop(*stack, num_inputs);
-        push_outputs<ReturnType_, AllowDeprecatedTypes>::call(std::move(output), stack);
+        // See note [ VC++ and 'std': ambiguous symbol]
+        push_outputs<ReturnType_, AllowDeprecatedTypes>::call(::std::move(output), stack);
 #ifdef __cpp_if_constexpr
       } else {
 #else

diff --git a/aten/src/ATen/native/cpu/GridSamplerKernel.cpp b/aten/src/ATen/native/cpu/GridSamplerKernel.cpp
@@ -511,8 +511,8 @@ struct ApplyGridSample<scalar_t, 2, GridSamplerInterpolation::Bilinear,
     auto sw = n * e;
     auto se = n * w;
 
-    auto i_x_w = convert_to_int_of_same_size(x_w);
-    auto i_y_n = convert_to_int_of_same_size(y_n);
+    auto i_x_w = convert_to_int_of_same_size<scalar_t>(x_w);
+    auto i_y_n = convert_to_int_of_same_size<scalar_t>(y_n);
     auto i_x_e = i_x_w + iVec(1);
     auto i_y_s = i_y_n + iVec(1);
 

diff --git a/aten/src/ATen/native/cuda/jit_utils.cpp b/aten/src/ATen/native/cuda/jit_utils.cpp
@@ -1533,7 +1533,7 @@ NvrtcFunction jit_pwise_function(
       &program, code.c_str(), nullptr, 0, nullptr, nullptr));
 
 #ifdef USE_ROCM
-  std::vector<const char*> args = {"--std=c++14"};
+  std::vector<const char*> args = {"--std=c++17"};
 #else
   // Constructs nvrtc build arguments
   // CUDA 11.1 allows going directly to SASS (sm_) instead of PTX (compute_)
@@ -1548,7 +1548,7 @@ NvrtcFunction jit_pwise_function(
       std::to_string(cuda_minor);
   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
   std::vector<const char*> args = {
-      "--std=c++14", compute.c_str(), "-default-device"};
+      "--std=c++17", compute.c_str(), "-default-device"};
 #endif
 
   #ifndef NDEBUG

diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 project(c10 CXX)
 
-set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
+set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 # Main build file for the C10 library.

diff --git a/c10/util/C++17.h b/c10/util/C++17.h
@@ -127,7 +127,7 @@ using void_t = typename make_void<Ts...>::type;
 #define CUDA_HOST_DEVICE C10_HOST_DEVICE
 #endif
 
-#ifdef __cpp_lib_apply
+#if defined(__cpp_lib_apply) && !defined(__CUDA_ARCH__)
 
 template <class F, class Tuple>
 CUDA_HOST_DEVICE inline constexpr decltype(auto) apply(F&& f, Tuple&& t) {

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
@@ -1307,7 +1307,7 @@ if(USE_ROCM)
     list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion)
     list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
     list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
-    list(APPEND HIP_CXX_FLAGS -std=c++14)
+    list(APPEND HIP_CXX_FLAGS -std=c++17)
     add_definitions(-DROCM_VERSION=${ROCM_VERSION_DEV_INT})
     add_definitions(-DTORCH_HIP_VERSION=${TORCH_HIP_VERSION})
     message("TORCH_HIP_VERSION=${TORCH_HIP_VERSION} is added as a compiler defines")
@@ -1571,6 +1571,9 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
   add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../caffe2/onnx/torch_ops")
   if(NOT USE_SYSTEM_ONNX)
     add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/onnx EXCLUDE_FROM_ALL)
+    if(NOT MSVC)
+      set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
+    endif()
   endif()
   add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
 
@@ -1673,7 +1676,7 @@ if(NOT INTERN_BUILD_MOBILE)
   string(APPEND CMAKE_CUDA_FLAGS " -Wno-deprecated-gpu-targets --expt-extended-lambda")
 
   if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
+    set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
   endif()
 
   # use cub in a safe manner, see:

diff --git a/cmake/ProtoBufPatch.cmake b/cmake/ProtoBufPatch.cmake
@@ -31,12 +31,14 @@ if(NOT SYSTEM_PROTOBUF)
   # https://github.com/protocolbuffers/protobuf/commit/0400cca3236de1ca303af38bf81eab332d042b7c
   # changes PROTOBUF_CONSTEXPR to constexpr, which breaks windows
   # build.
-  string(
-    REGEX REPLACE
-    "static constexpr ([^ ]+) ([^ ]+) ="
-    "static \\1 const \\2 ="
-    content
-    "${content}")
+  if(MSVC)
+    string(
+      REGEX REPLACE
+      "static constexpr ([^ ]+) ([^ ]+) ="
+      "static \\1 const \\2 ="
+      content
+      "${content}")
+  endif()
 
   foreach(ns ${NAMESPACES})
     # Insert "const ::std::string& GetEmptyStringAlreadyInited();" within

diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake
@@ -407,7 +407,7 @@ endmacro()
 # Usage:
 #   torch_compile_options(lib_name)
 function(torch_compile_options libname)
-  set_property(TARGET ${libname} PROPERTY CXX_STANDARD 14)
+  set_property(TARGET ${libname} PROPERTY CXX_STANDARD 17)
   set(private_compile_options "")
 
   # ---[ Check if warnings should be errors.

diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.12)
 project(functorch)
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 17)
 
 include(GNUInstallDirs)
 include(CMakePackageConfigHelpers)

diff --git a/ios/TestApp/TestApp.xcodeproj/project.pbxproj b/ios/TestApp/TestApp.xcodeproj/project.pbxproj
@@ -253,7 +253,7 @@
                 ALWAYS_SEARCH_USER_PATHS = NO;
                 CLANG_ANALYZER_NONNULL = YES;
                 CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-                CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+                CLANG_CXX_LANGUAGE_STANDARD = "gnu++17";
                 CLANG_CXX_LIBRARY = "libc++";
                 CLANG_ENABLE_MODULES = YES;
                 CLANG_ENABLE_OBJC_ARC = YES;
@@ -312,7 +312,7 @@
                 ALWAYS_SEARCH_USER_PATHS = NO;
                 CLANG_ANALYZER_NONNULL = YES;
                 CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-                CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+                CLANG_CXX_LANGUAGE_STANDARD = "gnu++17";
                 CLANG_CXX_LIBRARY = "libc++";
                 CLANG_ENABLE_MODULES = YES;
                 CLANG_ENABLE_OBJC_ARC = YES;

diff --git a/scripts/build_android.sh b/scripts/build_android.sh
@@ -165,6 +165,11 @@ fi
 # Use-specified CMake arguments go last to allow overridding defaults
 CMAKE_ARGS+=($@)
 
+# Patch pocketfft (as Android does not have aligned_alloc even if compiled with c++17
+if [ -f third_party/pocketfft/pocketfft_hdronly.h ]; then
+  sed -i -e "s/#if __cplusplus >= 201703L/#if 0/" third_party/pocketfft/pocketfft_hdronly.h
+fi
+
 # Now, actually build the Android target.
 BUILD_ROOT=${BUILD_ROOT:-"$CAFFE2_ROOT/build_android"}
 INSTALL_PREFIX=${BUILD_ROOT}/install

diff --git a/test/custom_backend/CMakeLists.txt b/test/custom_backend/CMakeLists.txt
@@ -9,9 +9,9 @@ endif()
 find_package(Torch REQUIRED)
 
 add_library(custom_backend SHARED custom_backend.cpp)
-set_property(TARGET custom_backend PROPERTY CXX_STANDARD 14)
+set_property(TARGET custom_backend PROPERTY CXX_STANDARD 17)
 target_link_libraries(custom_backend "${TORCH_LIBRARIES}")
 
 add_executable(test_custom_backend test_custom_backend.cpp)
-set_property(TARGET test_custom_backend PROPERTY CXX_STANDARD 14)
+set_property(TARGET test_custom_backend PROPERTY CXX_STANDARD 17)
 target_link_libraries(test_custom_backend custom_backend)
diff --git a/test/custom_operator/CMakeLists.txt b/test/custom_operator/CMakeLists.txt
@@ -9,12 +9,12 @@ endif()
 find_package(Torch REQUIRED)
 
 add_library(custom_ops SHARED op.cpp)
-set_property(TARGET custom_ops PROPERTY CXX_STANDARD 14)
+set_property(TARGET custom_ops PROPERTY CXX_STANDARD 17)
 
 target_compile_features(custom_ops PUBLIC cxx_range_for)
 target_link_libraries(custom_ops "${TORCH_LIBRARIES}")
 target_compile_definitions(custom_ops PRIVATE custom_ops_EXPORTS)
 
 add_executable(test_custom_ops test_custom_ops.cpp)
-set_property(TARGET test_custom_ops PROPERTY CXX_STANDARD 14)
+set_property(TARGET test_custom_ops PROPERTY CXX_STANDARD 17)
 target_link_libraries(test_custom_ops custom_ops)
diff --git a/test/jit_hooks/CMakeLists.txt b/test/jit_hooks/CMakeLists.txt
@@ -9,5 +9,5 @@ endif()
 find_package(Torch REQUIRED)
 
 add_executable(test_jit_hooks test_jit_hooks.cpp)
-set_property(TARGET test_jit_hooks PROPERTY CXX_STANDARD 14)
+set_property(TARGET test_jit_hooks PROPERTY CXX_STANDARD 17)
 target_link_libraries(test_jit_hooks "${TORCH_LIBRARIES}")
diff --git a/test/mobile/custom_build/CMakeLists.txt b/test/mobile/custom_build/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.1)
 
 project(custom_build_project)
 
-set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
+set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
 
 # Find torch library
 find_package(Torch REQUIRED)

diff --git a/torch/csrc/jit/codegen/cuda/executor_utils.cpp b/torch/csrc/jit/codegen/cuda/executor_utils.cpp
@@ -1015,7 +1015,7 @@ std::pair<NvrtcFunction, std::string> nvrtcCompile(
   });
 
 #ifdef USE_ROCM
-  std::vector<const char*> args = {"--std=c++14"};
+  std::vector<const char*> args = {"--std=c++17"};
 #if ROCM_VERSION >= 40200
   args.push_back("-hip-pch");
 #endif
@@ -1036,7 +1036,7 @@ std::pair<NvrtcFunction, std::string> nvrtcCompile(
       std::to_string(minor);
   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
   std::vector<const char*> args = {
-      "--std=c++14", compute.c_str(), "-default-device"};
+      "--std=c++17", compute.c_str(), "-default-device"};
 #endif
 
   const bool disable_fma = isOptionDisabled(DisableOption::Fma);

diff --git a/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp b/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
@@ -263,7 +263,7 @@ static const std::string compile_string =
 #ifndef __PPC64__
 //  "-march=native "
 #endif
-    "-std=c++14 -fPIC ${fopenmp} -shared \"${cpp_file}\" -o \"${so_file}\" -lm";
+    "-std=c++17 -fPIC ${fopenmp} -shared \"${cpp_file}\" -o \"${so_file}\" -lm";
 #endif
 static void runCompiler(
     const std::string& cpp_file,

diff --git a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
@@ -127,7 +127,7 @@ FusedKernelCUDA::FusedKernelCUDA(
       &program, code_.c_str(), nullptr, 0, nullptr, nullptr));
 
 #if defined(USE_ROCM)
-  std::vector<const char*> args = {"--std=c++14"};
+  std::vector<const char*> args = {"--std=c++17"};
 #if ROCM_VERSION >= 40200
   args.push_back("-hip-pch");
 #endif
@@ -148,7 +148,7 @@ FusedKernelCUDA::FusedKernelCUDA(
       std::to_string(major) + std::to_string(minor);
   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
   const std::vector<const char*> args = {
-      "--std=c++14", compute.c_str(), "-default-device"};
+      "--std=c++17", compute.c_str(), "-default-device"};
 #endif
   const auto result =
       nvrtc().nvrtcCompileProgram(program, args.size(), args.data());

diff --git a/torch/csrc/jit/tensorexpr/cuda_codegen.cpp b/torch/csrc/jit/tensorexpr/cuda_codegen.cpp
@@ -1314,7 +1314,7 @@ void CudaCodeGen::CompileToNVRTC(
       &program, code.c_str(), nullptr, 0, nullptr, nullptr));
 
 #if defined(USE_ROCM)
-  std::vector<const char*> args = {"--std=c++14"};
+  std::vector<const char*> args = {"--std=c++17"};
 #if ROCM_VERSION >= 40200
   args.push_back("-hip-pch");
 #endif
@@ -1335,7 +1335,7 @@ void CudaCodeGen::CompileToNVRTC(
       std::to_string(major) + std::to_string(minor);
   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
   const std::vector<const char*> args = {
-      "--std=c++14", compute.c_str(), "-default-device"};
+      "--std=c++17", compute.c_str(), "-default-device"};
 #endif
 
   auto result = nvrtc().nvrtcCompileProgram(program, args.size(), args.data());