Skip to content

Commit

Permalink
Migrate PyTorch to C++17 (pytorch#85969)
Browse files Browse the repository at this point in the history
With CUDA-10.2 gone we can finally do it!

This PR mostly contains build system related changes, invasive functional ones are to be followed.
Among many expected tweaks to the build system, here are few unexpected ones:
 - Force onnx_proto project to be updated to C++17 to avoid `duplicate symbols` error when compiled by gcc-7.5.0, as storage rule for `constexpr` changed in C++17, but gcc does not seem to follow it
 - Do not use `std::apply` on CUDA but rely on the built-in variant, as it results in test failures when CUDA runtime picks host rather than device function when `std::apply` is invoked from CUDA code.
 - `std::decay_t` -> `::std::decay_t` and `std::move`->`::std::move` as VC++ for some reason claims that `std` symbol is ambigious
 - Disable use of `std::aligned_alloc` on Android, as its `libc++` does not implement it.

Some prerequisites:
 - pytorch#89297
 - pytorch#89605
 - pytorch#90228
 - pytorch#90389
 - pytorch#90379
 - pytorch#89570
 - facebookincubator/gloo#336
 - facebookincubator/gloo#343
 - pytorch/builder@919676f

Fixes pytorch#56055

Pull Request resolved: pytorch#85969
Approved by: https://github.com/ezyang, https://github.com/kulinseth
  • Loading branch information
malfet authored and pruthvistony committed Jan 3, 2023
1 parent 6934745 commit dc93247
Show file tree
Hide file tree
Showing 25 changed files with 64 additions and 49 deletions.
5 changes: 5 additions & 0 deletions .circleci/scripts/build_android_gradle.sh
Expand Up @@ -20,6 +20,11 @@ do
touch "$file" || true
done < <(find /var/lib/jenkins/.gradle -type f -print0)

# Patch pocketfft (as Android does not have aligned_alloc even if compiled with c++17
if [ -f ~/workspace/third_party/pocketfft/pocketfft_hdronly.h ]; then
sed -i -e "s/#if __cplusplus >= 201703L/#if 0/" ~/workspace/third_party/pocketfft/pocketfft_hdronly.h
fi

export GRADLE_LOCAL_PROPERTIES=~/workspace/android/local.properties
rm -f $GRADLE_LOCAL_PROPERTIES
echo "sdk.dir=/opt/android/sdk" >> $GRADLE_LOCAL_PROPERTIES
Expand Down
6 changes: 2 additions & 4 deletions CMakeLists.txt
Expand Up @@ -31,9 +31,9 @@ string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
if(env_cxx_standard GREATER -1)
message(
WARNING "C++ standard version definition detected in environment variable."
"PyTorch requires -std=c++14. Please remove -std=c++ settings in your environment.")
"PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment.")
endif()
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_C_STANDARD 11 CACHE STRING "The C standard whose features are requested to build this target.")

if(DEFINED GLIBCXX_USE_CXX11_ABI)
Expand Down Expand Up @@ -885,7 +885,6 @@ if(NOT MSVC)
append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-inconsistent-missing-override" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-c++14-extensions" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-constexpr-not-const" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wunused-lambda-capture" CMAKE_CXX_FLAGS)
Expand Down Expand Up @@ -988,7 +987,6 @@ if(APPLE)
endif()
append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-c++14-extensions" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-constexpr-not-const" CMAKE_CXX_FLAGS)
endif()

Expand Down
2 changes: 1 addition & 1 deletion android/pytorch_android/CMakeLists.txt
Expand Up @@ -14,7 +14,7 @@ endif()

include(GNUInstallDirs)

set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_VERBOSE_MAKEFILE ON)
message(STATUS "ANDROID_STL:${ANDROID_STL}")

Expand Down
2 changes: 1 addition & 1 deletion android/pytorch_android_torchvision/CMakeLists.txt
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.4.1)
project(pytorch_vision_jni CXX)
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_VERBOSE_MAKEFILE ON)

set(pytorch_vision_cpp_DIR ${CMAKE_CURRENT_LIST_DIR}/src/main/cpp)
Expand Down
2 changes: 1 addition & 1 deletion android/test_app/app/CMakeLists.txt
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.4.1)
set(PROJECT_NAME pytorch_testapp_jni)
project(${PROJECT_NAME} CXX)
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_VERBOSE_MAKEFILE ON)

set(build_DIR ${CMAKE_SOURCE_DIR}/build)
Expand Down
Expand Up @@ -577,14 +577,16 @@ namespace impl {
// Decay ReturnType to ReturnType_ so that if a reference gets returned, we actually store it by value
// and don't get a dangling reference. This is only required because some kernels still return `Tensor&`.
#ifdef __cpp_if_constexpr
using ReturnType_ = std::decay_t<ReturnType>;
// [Note: VC++ and 'std': ambiguous symbol]
using ReturnType_ = ::std::decay_t<ReturnType>;
ReturnType_ output = call_functor_with_args_from_stack<KernelFunctor, AllowDeprecatedTypes>(functor, dispatchKeySet, stack);
#else
using ReturnType_ = std::decay_t<typename decltype(delay_check)::template type_identity<ReturnType>>;
ReturnType_ output = call_functor_with_args_from_stack<KernelFunctor, AllowDeprecatedTypes>(functor, dispatchKeySet, delay_check(stack));
#endif
torch::jit::drop(*stack, num_inputs);
push_outputs<ReturnType_, AllowDeprecatedTypes>::call(std::move(output), stack);
// See note [ VC++ and 'std': ambiguous symbol]
push_outputs<ReturnType_, AllowDeprecatedTypes>::call(::std::move(output), stack);
#ifdef __cpp_if_constexpr
} else {
#else
Expand Down
4 changes: 2 additions & 2 deletions aten/src/ATen/native/cpu/GridSamplerKernel.cpp
Expand Up @@ -511,8 +511,8 @@ struct ApplyGridSample<scalar_t, 2, GridSamplerInterpolation::Bilinear,
auto sw = n * e;
auto se = n * w;

auto i_x_w = convert_to_int_of_same_size(x_w);
auto i_y_n = convert_to_int_of_same_size(y_n);
auto i_x_w = convert_to_int_of_same_size<scalar_t>(x_w);
auto i_y_n = convert_to_int_of_same_size<scalar_t>(y_n);
auto i_x_e = i_x_w + iVec(1);
auto i_y_s = i_y_n + iVec(1);

Expand Down
4 changes: 2 additions & 2 deletions aten/src/ATen/native/cuda/jit_utils.cpp
Expand Up @@ -1533,7 +1533,7 @@ NvrtcFunction jit_pwise_function(
&program, code.c_str(), nullptr, 0, nullptr, nullptr));

#ifdef USE_ROCM
std::vector<const char*> args = {"--std=c++14"};
std::vector<const char*> args = {"--std=c++17"};
#else
// Constructs nvrtc build arguments
// CUDA 11.1 allows going directly to SASS (sm_) instead of PTX (compute_)
Expand All @@ -1548,7 +1548,7 @@ NvrtcFunction jit_pwise_function(
std::to_string(cuda_minor);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::vector<const char*> args = {
"--std=c++14", compute.c_str(), "-default-device"};
"--std=c++17", compute.c_str(), "-default-device"};
#endif

#ifndef NDEBUG
Expand Down
2 changes: 1 addition & 1 deletion c10/CMakeLists.txt
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
project(c10 CXX)

set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Main build file for the C10 library.
Expand Down
2 changes: 1 addition & 1 deletion c10/util/C++17.h
Expand Up @@ -127,7 +127,7 @@ using void_t = typename make_void<Ts...>::type;
#define CUDA_HOST_DEVICE C10_HOST_DEVICE
#endif

#ifdef __cpp_lib_apply
#if defined(__cpp_lib_apply) && !defined(__CUDA_ARCH__)

template <class F, class Tuple>
CUDA_HOST_DEVICE inline constexpr decltype(auto) apply(F&& f, Tuple&& t) {
Expand Down
7 changes: 5 additions & 2 deletions cmake/Dependencies.cmake
Expand Up @@ -1307,7 +1307,7 @@ if(USE_ROCM)
list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion)
list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_CXX_FLAGS -std=c++14)
list(APPEND HIP_CXX_FLAGS -std=c++17)
add_definitions(-DROCM_VERSION=${ROCM_VERSION_DEV_INT})
add_definitions(-DTORCH_HIP_VERSION=${TORCH_HIP_VERSION})
message("TORCH_HIP_VERSION=${TORCH_HIP_VERSION} is added as a compiler defines")
Expand Down Expand Up @@ -1571,6 +1571,9 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../caffe2/onnx/torch_ops")
if(NOT USE_SYSTEM_ONNX)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/onnx EXCLUDE_FROM_ALL)
if(NOT MSVC)
set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
endif()
endif()
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)

Expand Down Expand Up @@ -1673,7 +1676,7 @@ if(NOT INTERN_BUILD_MOBILE)
string(APPEND CMAKE_CUDA_FLAGS " -Wno-deprecated-gpu-targets --expt-extended-lambda")

if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
endif()

# use cub in a safe manner, see:
Expand Down
14 changes: 8 additions & 6 deletions cmake/ProtoBufPatch.cmake
Expand Up @@ -31,12 +31,14 @@ if(NOT SYSTEM_PROTOBUF)
# https://github.com/protocolbuffers/protobuf/commit/0400cca3236de1ca303af38bf81eab332d042b7c
# changes PROTOBUF_CONSTEXPR to constexpr, which breaks windows
# build.
string(
REGEX REPLACE
"static constexpr ([^ ]+) ([^ ]+) ="
"static \\1 const \\2 ="
content
"${content}")
if(MSVC)
string(
REGEX REPLACE
"static constexpr ([^ ]+) ([^ ]+) ="
"static \\1 const \\2 ="
content
"${content}")
endif()

foreach(ns ${NAMESPACES})
# Insert "const ::std::string& GetEmptyStringAlreadyInited();" within
Expand Down
2 changes: 1 addition & 1 deletion cmake/public/utils.cmake
Expand Up @@ -407,7 +407,7 @@ endmacro()
# Usage:
# torch_compile_options(lib_name)
function(torch_compile_options libname)
set_property(TARGET ${libname} PROPERTY CXX_STANDARD 14)
set_property(TARGET ${libname} PROPERTY CXX_STANDARD 17)
set(private_compile_options "")

# ---[ Check if warnings should be errors.
Expand Down
2 changes: 1 addition & 1 deletion functorch/CMakeLists.txt
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.12)
project(functorch)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 17)

include(GNUInstallDirs)
include(CMakePackageConfigHelpers)
Expand Down
4 changes: 2 additions & 2 deletions ios/TestApp/TestApp.xcodeproj/project.pbxproj
Expand Up @@ -253,7 +253,7 @@
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LANGUAGE_STANDARD = "gnu++17";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
Expand Down Expand Up @@ -312,7 +312,7 @@
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LANGUAGE_STANDARD = "gnu++17";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
Expand Down
5 changes: 5 additions & 0 deletions scripts/build_android.sh
Expand Up @@ -165,6 +165,11 @@ fi
# Use-specified CMake arguments go last to allow overridding defaults
CMAKE_ARGS+=($@)

# Patch pocketfft (as Android does not have aligned_alloc even if compiled with c++17
if [ -f third_party/pocketfft/pocketfft_hdronly.h ]; then
sed -i -e "s/#if __cplusplus >= 201703L/#if 0/" third_party/pocketfft/pocketfft_hdronly.h
fi

# Now, actually build the Android target.
BUILD_ROOT=${BUILD_ROOT:-"$CAFFE2_ROOT/build_android"}
INSTALL_PREFIX=${BUILD_ROOT}/install
Expand Down
4 changes: 2 additions & 2 deletions test/custom_backend/CMakeLists.txt
Expand Up @@ -9,9 +9,9 @@ endif()
find_package(Torch REQUIRED)

add_library(custom_backend SHARED custom_backend.cpp)
set_property(TARGET custom_backend PROPERTY CXX_STANDARD 14)
set_property(TARGET custom_backend PROPERTY CXX_STANDARD 17)
target_link_libraries(custom_backend "${TORCH_LIBRARIES}")

add_executable(test_custom_backend test_custom_backend.cpp)
set_property(TARGET test_custom_backend PROPERTY CXX_STANDARD 14)
set_property(TARGET test_custom_backend PROPERTY CXX_STANDARD 17)
target_link_libraries(test_custom_backend custom_backend)
4 changes: 2 additions & 2 deletions test/custom_operator/CMakeLists.txt
Expand Up @@ -9,12 +9,12 @@ endif()
find_package(Torch REQUIRED)

add_library(custom_ops SHARED op.cpp)
set_property(TARGET custom_ops PROPERTY CXX_STANDARD 14)
set_property(TARGET custom_ops PROPERTY CXX_STANDARD 17)

target_compile_features(custom_ops PUBLIC cxx_range_for)
target_link_libraries(custom_ops "${TORCH_LIBRARIES}")
target_compile_definitions(custom_ops PRIVATE custom_ops_EXPORTS)

add_executable(test_custom_ops test_custom_ops.cpp)
set_property(TARGET test_custom_ops PROPERTY CXX_STANDARD 14)
set_property(TARGET test_custom_ops PROPERTY CXX_STANDARD 17)
target_link_libraries(test_custom_ops custom_ops)
2 changes: 1 addition & 1 deletion test/jit_hooks/CMakeLists.txt
Expand Up @@ -9,5 +9,5 @@ endif()
find_package(Torch REQUIRED)

add_executable(test_jit_hooks test_jit_hooks.cpp)
set_property(TARGET test_jit_hooks PROPERTY CXX_STANDARD 14)
set_property(TARGET test_jit_hooks PROPERTY CXX_STANDARD 17)
target_link_libraries(test_jit_hooks "${TORCH_LIBRARIES}")
2 changes: 1 addition & 1 deletion test/mobile/custom_build/CMakeLists.txt
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.1)

project(custom_build_project)

set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")

# Find torch library
find_package(Torch REQUIRED)
Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/jit/codegen/cuda/executor_utils.cpp
Expand Up @@ -1015,7 +1015,7 @@ std::pair<NvrtcFunction, std::string> nvrtcCompile(
});

#ifdef USE_ROCM
std::vector<const char*> args = {"--std=c++14"};
std::vector<const char*> args = {"--std=c++17"};
#if ROCM_VERSION >= 40200
args.push_back("-hip-pch");
#endif
Expand All @@ -1036,7 +1036,7 @@ std::pair<NvrtcFunction, std::string> nvrtcCompile(
std::to_string(minor);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::vector<const char*> args = {
"--std=c++14", compute.c_str(), "-default-device"};
"--std=c++17", compute.c_str(), "-default-device"};
#endif

const bool disable_fma = isOptionDisabled(DisableOption::Fma);
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
Expand Up @@ -263,7 +263,7 @@ static const std::string compile_string =
#ifndef __PPC64__
// "-march=native "
#endif
"-std=c++14 -fPIC ${fopenmp} -shared \"${cpp_file}\" -o \"${so_file}\" -lm";
"-std=c++17 -fPIC ${fopenmp} -shared \"${cpp_file}\" -o \"${so_file}\" -lm";
#endif
static void runCompiler(
const std::string& cpp_file,
Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
Expand Up @@ -127,7 +127,7 @@ FusedKernelCUDA::FusedKernelCUDA(
&program, code_.c_str(), nullptr, 0, nullptr, nullptr));

#if defined(USE_ROCM)
std::vector<const char*> args = {"--std=c++14"};
std::vector<const char*> args = {"--std=c++17"};
#if ROCM_VERSION >= 40200
args.push_back("-hip-pch");
#endif
Expand All @@ -148,7 +148,7 @@ FusedKernelCUDA::FusedKernelCUDA(
std::to_string(major) + std::to_string(minor);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
const std::vector<const char*> args = {
"--std=c++14", compute.c_str(), "-default-device"};
"--std=c++17", compute.c_str(), "-default-device"};
#endif
const auto result =
nvrtc().nvrtcCompileProgram(program, args.size(), args.data());
Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/jit/tensorexpr/cuda_codegen.cpp
Expand Up @@ -1314,7 +1314,7 @@ void CudaCodeGen::CompileToNVRTC(
&program, code.c_str(), nullptr, 0, nullptr, nullptr));

#if defined(USE_ROCM)
std::vector<const char*> args = {"--std=c++14"};
std::vector<const char*> args = {"--std=c++17"};
#if ROCM_VERSION >= 40200
args.push_back("-hip-pch");
#endif
Expand All @@ -1335,7 +1335,7 @@ void CudaCodeGen::CompileToNVRTC(
std::to_string(major) + std::to_string(minor);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
const std::vector<const char*> args = {
"--std=c++14", compute.c_str(), "-default-device"};
"--std=c++17", compute.c_str(), "-default-device"};
#endif

auto result = nvrtc().nvrtcCompileProgram(program, args.size(), args.data());
Expand Down

0 comments on commit dc93247

Please sign in to comment.