diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000..e7d6fd6d93d99 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,12 @@ +# binary files +test/Object/Inputs/*.a-* binary +test/tools/dsymutil/Inputs/* binary +test/tools/llvm-ar/Inputs/*.lib binary +test/tools/llvm-objdump/Inputs/*.a binary +test/tools/llvm-rc/Inputs/* binary +test/tools/llvm-strings/Inputs/numbers binary +test/MC/AsmParser/incbin_abcd binary +test/YAMLParser/spec-09-02.test binary + +# Windows line ending test +test/MC/AsmParser/preserve-comments-crlf.s text eol=crlf diff --git a/CMakeLists.txt b/CMakeLists.txt index 1435859851a27..8cd9d053c63bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,22 +2,20 @@ cmake_minimum_required(VERSION 3.4.3) -if(POLICY CMP0022) - cmake_policy(SET CMP0022 NEW) # automatic when 2.8.12 is required -endif() +cmake_policy(SET CMP0022 NEW) -if (POLICY CMP0051) - # CMake 3.1 and higher include generator expressions of the form - # $ in the SOURCES property. These need to be - # stripped everywhere that access the SOURCES property, so we just - # defer to the OLD behavior of not including generator expressions - # in the output for now. - cmake_policy(SET CMP0051 OLD) -endif() +cmake_policy(SET CMP0048 NEW) -if(POLICY CMP0057) - cmake_policy(SET CMP0057 NEW) -endif() +# CMake 3.1 and higher include generator expressions of the form +# $ in the SOURCES property. These need to be +# stripped everywhere that access the SOURCES property, so we just +# defer to the OLD behavior of not including generator expressions +# in the output for now. +cmake_policy(SET CMP0051 OLD) + +cmake_policy(SET CMP0056 NEW) + +cmake_policy(SET CMP0057 NEW) if(NOT DEFINED LLVM_VERSION_MAJOR) set(LLVM_VERSION_MAJOR 6) @@ -32,13 +30,6 @@ if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX svn) endif() -if (POLICY CMP0048) - cmake_policy(SET CMP0048 NEW) - set(cmake_3_0_PROJ_VERSION - VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}) - set(cmake_3_0_LANGUAGES LANGUAGES) -endif() - if (NOT PACKAGE_VERSION) set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") @@ -52,9 +43,8 @@ if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (CMAKE_GENERATOR_TOOLSET STREQ endif() project(LLVM - ${cmake_3_0_PROJ_VERSION} - ${cmake_3_0_LANGUAGES} - C CXX ASM) + VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH} + LANGUAGES C CXX ASM) if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "No build type selected, default to Debug") @@ -177,6 +167,10 @@ if(LLVM_DEPENDENCY_DEBUGGING) endif() option(LLVM_ENABLE_DAGISEL_COV "Debug: Prints tablegen patterns that were used for selecting" OFF) +option(LLVM_ENABLE_GISEL_COV "Enable collection of GlobalISel rule coverage" OFF) +if(LLVM_ENABLE_GISEL_COV) + set(LLVM_GISEL_COV_PREFIX "${CMAKE_BINARY_DIR}/gisel-coverage-" CACHE STRING "Provide a filename prefix to collect the GlobalISel rule coverage") +endif() # Add path for custom modules set(CMAKE_MODULE_PATH @@ -189,6 +183,9 @@ set(CMAKE_MODULE_PATH # for use by clang_complete, YouCompleteMe, etc. set(CMAKE_EXPORT_COMPILE_COMMANDS 1) +option(LLVM_INSTALL_BINUTILS_SYMLINKS + "Install symlinks from the binutils tool names to the corresponding LLVM tools." OFF) + option(LLVM_INSTALL_UTILS "Include utility binaries in the 'install' target." OFF) option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF) @@ -354,7 +351,7 @@ set(LLVM_TARGET_ARCH "host" option(LLVM_ENABLE_TERMINFO "Use terminfo database if available." ON) -option(LLVM_ENABLE_LIBXML2 "Use libxml2 if available." ON) +set(LLVM_ENABLE_LIBXML2 "ON" CACHE STRING "Use libxml2 if available. Can be ON, OFF, or FORCE_ON") option(LLVM_ENABLE_LIBEDIT "Use libedit if available." ON) @@ -762,6 +759,7 @@ configure_file( add_custom_target(srpm COMMAND cpack -G TGZ --config CPackSourceConfig.cmake -B ${LLVM_SRPM_DIR}/SOURCES COMMAND rpmbuild -bs --define '_topdir ${LLVM_SRPM_DIR}' ${LLVM_SRPM_BINARY_SPECFILE}) +set_target_properties(srpm PROPERTIES FOLDER "Misc") # They are not referenced. See set_output_directory(). @@ -974,6 +972,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) # Installing the headers needs to depend on generating any public # tablegen'd headers. add_custom_target(llvm-headers DEPENDS intrinsics_gen) + set_target_properties(llvm-headers PROPERTIES FOLDER "Misc") if (NOT CMAKE_CONFIGURATION_TYPES) add_custom_target(install-llvm-headers @@ -997,13 +996,13 @@ if(LLVM_DISTRIBUTION_COMPONENTS) if(TARGET ${target}) add_dependencies(distribution ${target}) else() - message(FATAL_ERROR "Specified distribution component '${target}' doesn't have a target") + message(SEND_ERROR "Specified distribution component '${target}' doesn't have a target") endif() if(TARGET install-${target}) add_dependencies(install-distribution install-${target}) else() - message(FATAL_ERROR "Specified distribution component '${target}' doesn't have an install target") + message(SEND_ERROR "Specified distribution component '${target}' doesn't have an install target") endif() endforeach() endif() diff --git a/bindings/go/llvm/DIBuilderBindings.cpp b/bindings/go/llvm/DIBuilderBindings.cpp index a43bf97821fd0..ea53694b9c136 100644 --- a/bindings/go/llvm/DIBuilderBindings.cpp +++ b/bindings/go/llvm/DIBuilderBindings.cpp @@ -29,25 +29,6 @@ void LLVMDIBuilderDestroy(LLVMDIBuilderRef dref) { delete d; } -void LLVMDIBuilderFinalize(LLVMDIBuilderRef dref) { unwrap(dref)->finalize(); } - -LLVMMetadataRef LLVMDIBuilderCreateCompileUnit(LLVMDIBuilderRef Dref, - unsigned Lang, const char *File, - const char *Dir, - const char *Producer, - int Optimized, const char *Flags, - unsigned RuntimeVersion) { - DIBuilder *D = unwrap(Dref); - return wrap(D->createCompileUnit(Lang, D->createFile(File, Dir), Producer, - Optimized, Flags, RuntimeVersion)); -} - -LLVMMetadataRef LLVMDIBuilderCreateFile(LLVMDIBuilderRef Dref, const char *File, - const char *Dir) { - DIBuilder *D = unwrap(Dref); - return wrap(D->createFile(File, Dir)); -} - LLVMMetadataRef LLVMDIBuilderCreateLexicalBlock(LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, LLVMMetadataRef File, diff --git a/bindings/go/llvm/DIBuilderBindings.h b/bindings/go/llvm/DIBuilderBindings.h index b097f2ec40059..cc5d2c1177f64 100644 --- a/bindings/go/llvm/DIBuilderBindings.h +++ b/bindings/go/llvm/DIBuilderBindings.h @@ -16,6 +16,7 @@ #include "IRBindings.h" #include "llvm-c/Core.h" +#include "llvm-c/DebugInfo.h" #ifdef __cplusplus extern "C" { @@ -30,16 +31,6 @@ typedef struct LLVMOpaqueDIBuilder *LLVMDIBuilderRef; LLVMDIBuilderRef LLVMNewDIBuilder(LLVMModuleRef m); void LLVMDIBuilderDestroy(LLVMDIBuilderRef d); -void LLVMDIBuilderFinalize(LLVMDIBuilderRef d); - -LLVMMetadataRef -LLVMDIBuilderCreateCompileUnit(LLVMDIBuilderRef D, unsigned Language, - const char *File, const char *Dir, - const char *Producer, int Optimized, - const char *Flags, unsigned RuntimeVersion); - -LLVMMetadataRef LLVMDIBuilderCreateFile(LLVMDIBuilderRef D, const char *File, - const char *Dir); LLVMMetadataRef LLVMDIBuilderCreateLexicalBlock(LLVMDIBuilderRef D, LLVMMetadataRef Scope, diff --git a/bindings/go/llvm/dibuilder.go b/bindings/go/llvm/dibuilder.go index 9d59e83f40a65..475fa685cc4b7 100644 --- a/bindings/go/llvm/dibuilder.go +++ b/bindings/go/llvm/dibuilder.go @@ -132,12 +132,17 @@ func (d *DIBuilder) CreateCompileUnit(cu DICompileUnit) Metadata { defer C.free(unsafe.Pointer(flags)) result := C.LLVMDIBuilderCreateCompileUnit( d.ref, - C.unsigned(cu.Language), - file, dir, - producer, - boolToCInt(cu.Optimized), - flags, + C.LLVMDWARFSourceLanguage(cu.Language), + C.LLVMDIBuilderCreateFile(d.ref, file, C.size_t(len(cu.File)), dir, C.size_t(len(cu.Dir))), + producer, C.size_t(len(cu.Producer)), + C.LLVMBool(boolToCInt(cu.Optimized)), + flags, C.size_t(len(cu.Flags)), C.unsigned(cu.RuntimeVersion), + /*SplitName=*/ nil, 0, + C.LLVMDWARFEmissionFull, + /*DWOId=*/ 0, + /*SplitDebugInlining*/ C.LLVMBool(boolToCInt(true)), + /*DebugInfoForProfiling*/ C.LLVMBool(boolToCInt(false)), ) return Metadata{C: result} } @@ -148,7 +153,9 @@ func (d *DIBuilder) CreateFile(filename, dir string) Metadata { defer C.free(unsafe.Pointer(cfilename)) cdir := C.CString(dir) defer C.free(unsafe.Pointer(cdir)) - result := C.LLVMDIBuilderCreateFile(d.ref, cfilename, cdir) + result := C.LLVMDIBuilderCreateFile(d.ref, + cfilename, C.size_t(len(filename)), + cdir, C.size_t(len(dir))) return Metadata{C: result} } diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index a1b4846f19ab1..23494fb96c6ba 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -157,7 +157,7 @@ if( NOT PURE_WINDOWS AND NOT LLVM_USE_SANITIZER MATCHES "Memory.*") set(HAVE_TERMINFO 0) endif() - find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2) + find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c) set(LLVM_LIBXML2_ENABLED 0) set(LIBXML2_FOUND 0) if((LLVM_ENABLE_LIBXML2) AND ((CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH) OR APPLE)) @@ -170,6 +170,10 @@ if( NOT PURE_WINDOWS AND NOT LLVM_USE_SANITIZER MATCHES "Memory.*") endif() endif() +if (LLVM_ENABLE_LIBXML2 STREQUAL "FORCE_ON" AND NOT LLVM_LIBXML2_ENABLED) + message(FATAL_ERROR "Failed to congifure libxml2") +endif() + check_library_exists(xar xar_open "" HAVE_LIBXAR) if(HAVE_LIBXAR) set(XAR_LIB xar) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 3952d041344b4..908e7ee51ca8f 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -149,8 +149,13 @@ endfunction(add_llvm_symbol_exports) if(NOT WIN32 AND NOT APPLE) # Detect what linker we have here + if( LLVM_USE_LINKER ) + set(command ${CMAKE_C_COMPILER} -fuse-ld=${LLVM_USE_LINKER} -Wl,--version) + else() + set(command ${CMAKE_C_COMPILER} -Wl,--version) + endif() execute_process( - COMMAND ${CMAKE_C_COMPILER} -Wl,--version + COMMAND ${command} OUTPUT_VARIABLE stdout ERROR_VARIABLE stderr ) @@ -164,7 +169,8 @@ if(NOT WIN32 AND NOT APPLE) elseif("${stdout}" MATCHES "GNU ld") set(LLVM_LINKER_IS_GNULD ON) message(STATUS "Linker detection: GNU ld") - elseif("${stderr}" MATCHES "Solaris Link Editors") + elseif("${stderr}" MATCHES "Solaris Link Editors" OR + "${stdout}" MATCHES "Solaris Link Editors") set(LLVM_LINKER_IS_SOLARISLD ON) message(STATUS "Linker detection: Solaris ld") else() @@ -1037,6 +1043,13 @@ function(add_unittest test_suite test_name) set(EXCLUDE_FROM_ALL ON) endif() + # Our current version of gtest does not properly recognize C++11 support + # with MSVC, so it falls back to tr1 / experimental classes. Since LLVM + # itself requires C++11, we can safely force it on unconditionally so that + # we don't have to fight with the buggy gtest check. + add_definitions(-DGTEST_LANG_CXX11=1) + add_definitions(-DGTEST_HAS_TR1_TUPLE=0) + include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include) if (NOT LLVM_ENABLE_THREADS) @@ -1112,6 +1125,15 @@ function(llvm_canonicalize_cmake_booleans) endforeach() endfunction(llvm_canonicalize_cmake_booleans) +macro(set_llvm_build_mode) + # Configuration-time: See Unit/lit.site.cfg.in + if (CMAKE_CFG_INTDIR STREQUAL ".") + set(LLVM_BUILD_MODE ".") + else () + set(LLVM_BUILD_MODE "%(build_mode)s") + endif () +endmacro() + # This function provides an automatic way to 'configure'-like generate a file # based on a set of common and custom variables, specifically targeting the # variables needed for the 'lit.site.cfg' files. This function bundles the @@ -1135,12 +1157,7 @@ function(configure_lit_site_cfg site_in site_out) set(SHLIBEXT "${LTDL_SHLIB_EXT}") - # Configuration-time: See Unit/lit.site.cfg.in - if (CMAKE_CFG_INTDIR STREQUAL ".") - set(LLVM_BUILD_MODE ".") - else () - set(LLVM_BUILD_MODE "%(build_mode)s") - endif () + set_llvm_build_mode() # They below might not be the build tree but provided binary tree. set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}) diff --git a/cmake/modules/AddOCaml.cmake b/cmake/modules/AddOCaml.cmake index 1d8094cc505f5..02bab6846376b 100644 --- a/cmake/modules/AddOCaml.cmake +++ b/cmake/modules/AddOCaml.cmake @@ -221,3 +221,4 @@ add_custom_target(ocaml_make_directory COMMAND "${CMAKE_COMMAND}" "-E" "make_directory" "${LLVM_LIBRARY_DIR}/ocaml/llvm") add_custom_target("ocaml_all") set_target_properties(ocaml_all PROPERTIES FOLDER "Misc") +set_target_properties(ocaml_make_directory PROPERTIES FOLDER "Misc") diff --git a/cmake/modules/CheckLinkerFlag.cmake b/cmake/modules/CheckLinkerFlag.cmake index e96d35e7721ec..fe9d01a349cd8 100644 --- a/cmake/modules/CheckLinkerFlag.cmake +++ b/cmake/modules/CheckLinkerFlag.cmake @@ -1,8 +1,6 @@ include(CheckCXXCompilerFlag) function(check_linker_flag flag out_var) - set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${flag}") check_cxx_compiler_flag("" ${out_var}) - set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) endfunction() diff --git a/cmake/modules/CrossCompile.cmake b/cmake/modules/CrossCompile.cmake index b0726ebd4de23..ff092b257ab72 100644 --- a/cmake/modules/CrossCompile.cmake +++ b/cmake/modules/CrossCompile.cmake @@ -16,12 +16,15 @@ function(llvm_create_cross_target_internal target_name toolchain buildtype) COMMAND ${CMAKE_COMMAND} -E make_directory ${LLVM_${target_name}_BUILD} COMMENT "Creating ${LLVM_${target_name}_BUILD}...") + add_custom_target(CREATE_LLVM_${target_name} + DEPENDS ${LLVM_${target_name}_BUILD}) + add_custom_command(OUTPUT ${LLVM_${target_name}_BUILD}/CMakeCache.txt COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR} -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE WORKING_DIRECTORY ${LLVM_${target_name}_BUILD} - DEPENDS ${LLVM_${target_name}_BUILD} + DEPENDS CREATE_LLVM_${target_name} COMMENT "Configuring ${target_name} LLVM...") add_custom_target(CONFIGURE_LLVM_${target_name} diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 76683c351d812..b5059a8a60e72 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -151,6 +151,14 @@ if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32 OR CYGWIN OR set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,defs") endif() +# Pass -Wl,-z,nodelete. This makes sure our shared libraries are not unloaded +# by dlclose(). We need that since the CLI API relies on cross-references +# between global objects which became horribly broken when one of the libraries +# is unloaded. +if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,nodelete") +endif() + function(append value) foreach(variable ${ARGN}) @@ -194,10 +202,13 @@ if( LLVM_ENABLE_LLD ) endif() if( LLVM_USE_LINKER ) - check_cxx_compiler_flag("-fuse-ld=${LLVM_USE_LINKER}" CXX_SUPPORTS_CUSTOM_LINKER) + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -fuse-ld=${LLVM_USE_LINKER}") + check_cxx_source_compiles("int main() { return 0; }" CXX_SUPPORTS_CUSTOM_LINKER) if ( NOT CXX_SUPPORTS_CUSTOM_LINKER ) message(FATAL_ERROR "Host compiler does not support '-fuse-ld=${LLVM_USE_LINKER}'") endif() + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) append("-fuse-ld=${LLVM_USE_LINKER}" CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) endif() @@ -229,6 +240,10 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -m32") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -m32") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -m32") + + # FIXME: CMAKE_SIZEOF_VOID_P is still 8 + add_definitions(-D_LARGEFILE_SOURCE) + add_definitions(-D_FILE_OFFSET_BITS=64) endif( LLVM_BUILD_32_BITS ) endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) @@ -239,6 +254,7 @@ if (ANDROID AND (ANDROID_NATIVE_API_LEVEL LESS 24)) set(LLVM_FORCE_SMALLFILE_FOR_ANDROID TRUE) endif() if( CMAKE_SIZEOF_VOID_P EQUAL 4 AND NOT LLVM_FORCE_SMALLFILE_FOR_ANDROID) + # FIXME: It isn't handled in LLVM_BUILD_32_BITS. add_definitions( -D_LARGEFILE_SOURCE ) add_definitions( -D_FILE_OFFSET_BITS=64 ) endif() @@ -730,14 +746,15 @@ if(LLVM_ENABLE_EH AND NOT LLVM_ENABLE_RTTI) message(FATAL_ERROR "Exception handling requires RTTI. You must set LLVM_ENABLE_RTTI to ON") endif() -option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (experimental)" Off) +option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off) mark_as_advanced(LLVM_ENABLE_IR_PGO) -option(LLVM_BUILD_INSTRUMENTED "Build LLVM and tools with PGO instrumentation" Off) +set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend") mark_as_advanced(LLVM_BUILD_INSTRUMENTED) +string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED) if (LLVM_BUILD_INSTRUMENTED) - if (LLVM_ENABLE_IR_PGO) + if (LLVM_ENABLE_IR_PGO OR uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "IR") append("-fprofile-generate='${LLVM_PROFILE_DATA_DIR}'" CMAKE_CXX_FLAGS CMAKE_C_FLAGS @@ -760,6 +777,10 @@ append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate='${LLVM_PRO CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) +if (LLVM_BUILD_INSTRUMENTED AND LLVM_BUILD_INSTRUMENTED_COVERAGE) + message(FATAL_ERROR "LLVM_BUILD_INSTRUMENTED and LLVM_BUILD_INSTRUMENTED_COVERAGE cannot both be specified") +endif() + if(LLVM_ENABLE_LTO AND LLVM_ON_WIN32 AND NOT LINKER_IS_LLD_LINK) message(FATAL_ERROR "When compiling for Windows, LLVM_ENABLE_LTO requires using lld as the linker (point CMAKE_LINKER at lld-link.exe)") endif() diff --git a/cmake/modules/LLVMExternalProjectUtils.cmake b/cmake/modules/LLVMExternalProjectUtils.cmake index c3560836f7ed4..8ecf42acfee10 100644 --- a/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/cmake/modules/LLVMExternalProjectUtils.cmake @@ -95,7 +95,7 @@ function(llvm_ExternalProject_Add name source_dir) foreach(prefix ${ARG_PASSTHROUGH_PREFIXES}) foreach(variableName ${variableNames}) if(variableName MATCHES "^${prefix}") - string(REPLACE ";" "\;" value "${${variableName}}") + string(REPLACE ";" "," value "${${variableName}}") list(APPEND PASSTHROUGH_VARIABLES -D${variableName}=${value}) endif() @@ -143,9 +143,11 @@ function(llvm_ExternalProject_Add name source_dir) CMAKE_ARGS ${${nameCanon}_CMAKE_ARGS} ${compiler_args} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} + -DCMAKE_SYSROOT=${CMAKE_SYSROOT} -DLLVM_BINARY_DIR=${PROJECT_BINARY_DIR} -DLLVM_CONFIG_PATH=$ -DLLVM_ENABLE_WERROR=${LLVM_ENABLE_WERROR} + -DLLVM_HOST_TRIPLE=${LLVM_HOST_TRIPLE} -DPACKAGE_VERSION=${PACKAGE_VERSION} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} @@ -158,6 +160,7 @@ function(llvm_ExternalProject_Add name source_dir) USES_TERMINAL_CONFIGURE 1 USES_TERMINAL_BUILD 1 USES_TERMINAL_INSTALL 1 + LIST_SEPARATOR , ) if(ARG_USE_TOOLCHAIN) diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake index 7f17f7016b69a..d1afcb42f9de7 100644 --- a/cmake/modules/TableGen.cmake +++ b/cmake/modules/TableGen.cmake @@ -52,6 +52,13 @@ function(tablegen project ofn) list(APPEND LLVM_TABLEGEN_FLAGS "-instrument-coverage") endif() endif() + if (LLVM_ENABLE_GISEL_COV) + list(FIND ARGN "-gen-global-isel" idx) + if( NOT idx EQUAL -1 ) + list(APPEND LLVM_TABLEGEN_FLAGS "-instrument-gisel-coverage") + list(APPEND LLVM_TABLEGEN_FLAGS "-gisel-coverage-file=${LLVM_GISEL_COV_PREFIX}all") + endif() + endif() # We need both _TABLEGEN_TARGET and _TABLEGEN_EXE in the DEPENDS list # (both the target and the file) to have .inc files rebuilt on @@ -110,19 +117,6 @@ function(add_public_tablegen_target target) set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} ${target} PARENT_SCOPE) endfunction() -if(LLVM_USE_HOST_TOOLS AND NOT TARGET NATIVE_LIB_LLVMTABLEGEN) - llvm_ExternalProject_BuildCmd(tblgen_build_cmd LLVMSupport - ${LLVM_NATIVE_BUILD} - CONFIGURATION Release) - add_custom_command(OUTPUT LIB_LLVMTABLEGEN - COMMAND ${tblgen_build_cmd} - DEPENDS CONFIGURE_LLVM_NATIVE - WORKING_DIRECTORY ${LLVM_NATIVE_BUILD} - COMMENT "Building libLLVMTableGen for native TableGen..." - USES_TERMINAL) - add_custom_target(NATIVE_LIB_LLVMTABLEGEN DEPENDS LIB_LLVMTABLEGEN) -endif() - macro(add_tablegen target project) set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS}) set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen) @@ -166,7 +160,7 @@ macro(add_tablegen target project) CONFIGURATION Release) add_custom_command(OUTPUT ${${project}_TABLEGEN_EXE} COMMAND ${tblgen_build_cmd} - DEPENDS ${target} NATIVE_LIB_LLVMTABLEGEN + DEPENDS CONFIGURE_LLVM_NATIVE ${target} WORKING_DIRECTORY ${LLVM_NATIVE_BUILD} COMMENT "Building native TableGen..." USES_TERMINAL) diff --git a/cmake/platforms/WinMsvc.cmake b/cmake/platforms/WinMsvc.cmake new file mode 100644 index 0000000000000..9ea9ff78a2550 --- /dev/null +++ b/cmake/platforms/WinMsvc.cmake @@ -0,0 +1,219 @@ +# Cross toolchain configuration for using clang-cl on non-Windows hosts to +# target MSVC. +# +# Usage: +# cmake -G Ninja +# -DCMAKE_TOOLCHAIN_FILE=/path/to/this/file +# -DLLVM_NATIVE_TOOLCHAIN=/path/to/llvm/installation +# -DMSVC_BASE=/path/to/MSVC/system/libraries/and/includes +# -DWINSDK_BASE=/path/to/windows-sdk +# -DWINSDK_VER=windows sdk version folder name +# +# LLVM_NATIVE_TOOLCHAIN: +# *Absolute path* to a folder containing the toolchain which will be used to +# build. At a minimum, this folder should have a bin directory with a +# copy of clang-cl, clang, clang++, and lld-link, as well as a lib directory +# containing clang's system resource directory. +# +# MSVC_BASE: +# *Absolute path* to the folder containing MSVC headers and system libraries. +# The layout of the folder matches that which is intalled by MSVC 2017 on +# Windows, and should look like this: +# +# ${MSVC_BASE} +# include +# vector +# stdint.h +# etc... +# lib +# x64 +# libcmt.lib +# msvcrt.lib +# etc... +# x86 +# libcmt.lib +# msvcrt.lib +# etc... +# +# For versions of MSVC < 2017, or where you have a hermetic toolchain in a +# custom format, you must use symlinks or restructure it to look like the above. +# +# WINSDK_BASE: +# Together with WINSDK_VER, determines the location of Windows SDK headers +# and libraries. +# +# WINSDK_VER: +# Together with WINSDK_BASE, determines the locations of Windows SDK headers +# and libraries. +# +# WINSDK_BASE and WINSDK_VER work together to define a folder layout that matches +# that of the Windows SDK installation on a standard Windows machine. It should +# match the layout described below. +# +# Note that if you install Windows SDK to a windows machine and simply copy the +# files, it will already be in the correct layout. +# +# ${WINSDK_BASE} +# Include +# ${WINSDK_VER} +# shared +# ucrt +# um +# windows.h +# etc... +# Lib +# ${WINSDK_VER} +# ucrt +# x64 +# x86 +# ucrt.lib +# etc... +# um +# x64 +# x86 +# kernel32.lib +# etc +# +# IMPORTANT: In order for this to work, you will need a valid copy of the Windows +# SDK and C++ STL headers and libraries on your host. Additionally, since the +# Windows libraries and headers are not case-correct, you will need to have these +# mounted in a case-insensitive mount. This requires one command to set up. +# +# ~/src: mkdir winsdk +# ~/src: mkdir winsdk.icase +# ~/src: ciopfs winsdk/ winsdk.icase +# +# Now copy or otherwise install your headers and libraries to the winsdk.icase folder +# and use *that* folder as the path when configuring CMake. +# +# TODO: We could also provide a CMake option -DUSE_ICASE_VFS_OVERLAY=ON/OFF that would +# make this optional. For now, we require ciopfs. + + +# When configuring CMake with a toolchain file against a top-level CMakeLists.txt, +# it will actually run CMake many times, once for each small test program used to +# determine what features a compiler supports. Unfortunately, none of these +# invocations share a CMakeCache.txt with the top-level invocation, meaning they +# won't see the value of any arguments the user passed via -D. Since these are +# necessary to properly configure MSVC in both the top-level configuration as well as +# all feature-test invocations, we set environment variables with the values so that +# these environments get inherited by child invocations. +function(init_user_prop prop) + if(${prop}) + set(ENV{_${prop}} "${${prop}}") + else() + set(${prop} "$ENV{_${prop}}" PARENT_SCOPE) + endif() +endfunction() + +# FIXME: We should support target architectures other than x64 +set(CMAKE_SYSTEM_NAME Windows) +set(CMAKE_SYSTEM_VERSION 10.0) +set(CMAKE_SYSTEM_PROCESSOR AMD64) + +init_user_prop(LLVM_NATIVE_TOOLCHAIN) +init_user_prop(MSVC_BASE) +init_user_prop(WINSDK_BASE) +init_user_prop(WINSDK_VER) + +set(MSVC_INCLUDE "${MSVC_BASE}/include") +set(MSVC_LIB "${MSVC_BASE}/lib") +set(WINSDK_INCLUDE "${WINSDK_BASE}/Include/${WINSDK_VER}") +set(WINSDK_LIB "${WINSDK_BASE}/Lib/${WINSDK_VER}") + +# Do some sanity checking to make sure we can find a native toolchain and +# that the Windows SDK / MSVC STL directories look kosher. +if(NOT EXISTS "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl" OR + NOT EXISTS "${LLVM_NATIVE_TOOLCHAIN}/bin/lld-link") + message(SEND_ERROR + "LLVM_NATIVE_TOOLCHAIN folder '${LLVM_NATIVE_TOOLCHAIN}' does not " + "point to a valid directory containing bin/clang-cl and bin/lld-link " + "binaries") +endif() + +if(NOT EXISTS "${MSVC_BASE}" OR + NOT EXISTS "${MSVC_INCLUDE}" OR + NOT EXISTS "${MSVC_LIB}") + message(SEND_ERROR + "CMake variable MSVC_BASE must point to a folder containing MSVC " + "system headers and libraries") +endif() + +if(NOT EXISTS "${WINSDK_BASE}" OR + NOT EXISTS "${WINSDK_INCLUDE}" OR + NOT EXISTS "${WINSDK_LIB}") + message(SEND_ERROR + "CMake variable WINSDK_BASE and WINSDK_VER must resolve to a valid " + "Windows SDK installation") +endif() + +set(CMAKE_C_COMPILER "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl" CACHE FILEPATH "") +set(CMAKE_CXX_COMPILER "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl" CACHE FILEPATH "") +set(CMAKE_LINKER "${LLVM_NATIVE_TOOLCHAIN}/bin/lld-link" CACHE FILEPATH "") + +# Even though we're cross-compiling, we need some native tools (e.g. llvm-tblgen), and those +# native tools have to be built before we can start doing the cross-build. LLVM supports +# a CROSS_TOOLCHAIN_FLAGS_NATIVE argument which consists of a list of flags to pass to CMake +# when configuring the NATIVE portion of the cross-build. By default we construct this so +# that it points to the tools in the same location as the native clang-cl that we're using. +list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_ASM_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang") +list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_C_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang") +list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_CXX_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang++") + +set(CROSS_TOOLCHAIN_FLAGS_NATIVE "${_CTF_NATIVE_DEFAULT}" CACHE STRING "") + +set(COMPILE_FLAGS + -D_CRT_SECURE_NO_WARNINGS + -imsvc "${MSVC_INCLUDE}" + -imsvc "${WINSDK_INCLUDE}/ucrt" + -imsvc "${WINSDK_INCLUDE}/shared" + -imsvc "${WINSDK_INCLUDE}/um" + -imsvc "${WINSDK_INCLUDE}/winrt") + +string(REPLACE ";" " " COMPILE_FLAGS "${COMPILE_FLAGS}") + +# We need to preserve any flags that were passed in by the user. However, we +# can't append to CMAKE_C_FLAGS and friends directly, because toolchain files +# will be re-invoked on each reconfigure and therefore need to be idempotent. +# The assignments to the _INITIAL cache variables don't use FORCE, so they'll +# only be populated on the initial configure, and their values won't change +# afterward. +set(_CMAKE_C_FLAGS_INITIAL "${CMAKE_C_FLAGS}" CACHE STRING "") +set(CMAKE_C_FLAGS "${_CMAKE_C_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE) + +set(_CMAKE_CXX_FLAGS_INITIAL "${CMAKE_CXX_FLAGS}" CACHE STRING "") +set(CMAKE_CXX_FLAGS "${_CMAKE_CXX_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE) + +set(LINK_FLAGS + # Prevent CMake from attempting to invoke mt.exe. It only recognizes the slashed form and not the dashed form. + /manifest:no + + # FIXME: We should support target architectures other than x64. + -libpath:"${MSVC_LIB}/x64" + -libpath:"${WINSDK_LIB}/ucrt/x64" + -libpath:"${WINSDK_LIB}/um/x64") + +string(REPLACE ";" " " LINK_FLAGS "${LINK_FLAGS}") + +# See explanation for compiler flags above for the _INITIAL variables. +set(_CMAKE_EXE_LINKER_FLAGS_INITIAL "${CMAKE_EXE_LINKER_FLAGS}" CACHE STRING "") +set(CMAKE_EXE_LINKER_FLAGS "${_CMAKE_EXE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE) + +set(_CMAKE_MODULE_LINKER_FLAGS_INITIAL "${CMAKE_MODULE_LINKER_FLAGS}" CACHE STRING "") +set(CMAKE_MODULE_LINKER_FLAGS "${_CMAKE_MODULE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE) + +set(_CMAKE_SHARED_LINKER_FLAGS_INITIAL "${CMAKE_SHARED_LINKER_FLAGS}" CACHE STRING "") +set(CMAKE_SHARED_LINKER_FLAGS "${_CMAKE_SHARED_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE) + +# CMake populates these with a bunch of unnecessary libraries, which requires +# extra case-correcting symlinks and what not. Instead, let projects explicitly +# control which libraries they require. +set(CMAKE_C_STANDARD_LIBRARIES "" CACHE STRING "" FORCE) +set(CMAKE_CXX_STANDARD_LIBRARIES "" CACHE STRING "" FORCE) + +# CMake's InstallRequiredSystemLibraries module searches for a Visual Studio +# installation in order to determine where to copy the required DLLs. This +# installation won't exist when cross-compiling, of course, so silence the +# resulting warnings about missing libraries. +set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) + diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst index c135aec73fc27..1cf30304dfc84 100644 --- a/docs/AMDGPUUsage.rst +++ b/docs/AMDGPUUsage.rst @@ -23,50 +23,55 @@ Target Triples Use the ``clang -target ---`` option to specify the target triple: - .. table:: AMDGPU Target Triples - :name: amdgpu-target-triples-table - - ============ ======== ========= =========== - Architecture Vendor OS Environment - ============ ======== ========= =========== - r600 amd - amdgcn amd - amdgcn amd amdhsa - amdgcn amd amdhsa opencl - amdgcn amd amdhsa amdgizcl - amdgcn amd amdhsa amdgiz - amdgcn amd amdhsa hcc - ============ ======== ========= =========== - -``r600-amd--`` - Supports AMD GPUs HD2XXX-HD6XXX for graphics and compute shaders executed on - the MESA runtime. - -``amdgcn-amd--`` - Supports AMD GPUs GCN GFX6 onwards for graphics and compute shaders executed on - the MESA runtime. - -``amdgcn-amd-amdhsa-`` - Supports AMD GCN GPUs GFX6 onwards for compute kernels executed on HSA [HSA]_ - compatible runtimes such as AMD's ROCm [AMD-ROCm]_. - -``amdgcn-amd-amdhsa-opencl`` - Supports AMD GCN GPUs GFX6 onwards for OpenCL compute kernels executed on HSA - [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See - :ref:`amdgpu-opencl`. - -``amdgcn-amd-amdhsa-amdgizcl`` - Same as ``amdgcn-amd-amdhsa-opencl`` except a different address space mapping - is used (see :ref:`amdgpu-address-spaces`). - -``amdgcn-amd-amdhsa-amdgiz`` - Same as ``amdgcn-amd-amdhsa-`` except a different address space mapping is - used (see :ref:`amdgpu-address-spaces`). - -``amdgcn-amd-amdhsa-hcc`` - Supports AMD GCN GPUs GFX6 onwards for AMD HC language compute kernels - executed on HSA [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See - :ref:`amdgpu-hcc`. + .. table:: AMDGPU Architectures + :name: amdgpu-architecture-table + + ============ ============================================================== + Architecture Description + ============ ============================================================== + ``r600`` AMD GPUs HD2XXX-HD6XXX for graphics and compute shaders. + ``amdgcn`` AMD GPUs GCN GFX6 onwards for graphics and compute shaders. + ============ ============================================================== + + .. table:: AMDGPU Vendors + :name: amdgpu-vendor-table + + ============ ============================================================== + Vendor Description + ============ ============================================================== + ``amd`` Can be used for all AMD GPU usage. + ``mesa3d`` Can be used if the OS is ``mesa3d``. + ============ ============================================================== + + .. table:: AMDGPU Operating Systems + :name: amdgpu-os-table + + ============== ============================================================ + OS Description + ============== ============================================================ + ** Defaults to the *unknown* OS. + ``amdhsa`` Compute kernels executed on HSA [HSA]_ compatible runtimes + such as AMD's ROCm [AMD-ROCm]_. + ``amdpal`` Graphic shaders and compute kernels executed on AMD PAL + runtime. + ``mesa3d`` Graphic shaders and compute kernels executed on Mesa 3D + runtime. + ============== ============================================================ + + .. table:: AMDGPU Environments + :name: amdgpu-environment-table + + ============ ============================================================== + Environment Description + ============ ============================================================== + ** Defaults to ``opencl``. + ``opencl`` OpenCL compute kernel (see :ref:`amdgpu-opencl`). + ``amdgizcl`` Same as ``opencl`` except a different address space mapping is + used (see :ref:`amdgpu-address-spaces`). + ``amdgiz`` Same as ``opencl`` except a different address space mapping is + used (see :ref:`amdgpu-address-spaces`). + ``hcc`` AMD HC language compute kernel (see :ref:`amdgpu-hcc`). + ============ ============================================================== .. _amdgpu-processors: @@ -77,139 +82,179 @@ Use the ``clang -mcpu `` option to specify the AMD GPU processor. The names from both the *Processor* and *Alternative Processor* can be used. .. table:: AMDGPU Processors - :name: amdgpu-processors-table + :name: amdgpu-processor-table - ========== =========== ============ ===== ======= ================== - Processor Alternative Target dGPU/ Runtime Example - Processor Triple APU Support Products - Architecture - ========== =========== ============ ===== ======= ================== + =========== =============== ============ ===== ======= ================== + Processor Alternative Target dGPU/ ROCm Example + Processor Triple APU Support Products + Architecture + =========== =============== ============ ===== ======= ================== **Radeon HD 2000/3000 Series (R600)** [AMD-RADEON-HD-2000-3000]_ - -------------------------------------------------------------------- - r600 r600 dGPU - r630 r600 dGPU - rs880 r600 dGPU - rv670 r600 dGPU + ------------------------------------------------------------------------- + ``r600`` ``r600`` dGPU + ``r630`` ``r600`` dGPU + ``rs880`` ``r600`` dGPU + ``rv670`` ``r600`` dGPU **Radeon HD 4000 Series (R700)** [AMD-RADEON-HD-4000]_ - -------------------------------------------------------------------- - rv710 r600 dGPU - rv730 r600 dGPU - rv770 r600 dGPU + ------------------------------------------------------------------------- + ``rv710`` ``r600`` dGPU + ``rv730`` ``r600`` dGPU + ``rv770`` ``r600`` dGPU **Radeon HD 5000 Series (Evergreen)** [AMD-RADEON-HD-5000]_ - -------------------------------------------------------------------- - cedar r600 dGPU - redwood r600 dGPU - sumo r600 dGPU - juniper r600 dGPU - cypress r600 dGPU + ------------------------------------------------------------------------- + ``cedar`` ``r600`` dGPU + ``redwood`` ``r600`` dGPU + ``sumo`` ``r600`` dGPU + ``juniper`` ``r600`` dGPU + ``cypress`` ``r600`` dGPU **Radeon HD 6000 Series (Northern Islands)** [AMD-RADEON-HD-6000]_ - -------------------------------------------------------------------- - barts r600 dGPU - turks r600 dGPU - caicos r600 dGPU - cayman r600 dGPU + ------------------------------------------------------------------------- + ``barts`` ``r600`` dGPU + ``turks`` ``r600`` dGPU + ``caicos`` ``r600`` dGPU + ``cayman`` ``r600`` dGPU **GCN GFX6 (Southern Islands (SI))** [AMD-GCN-GFX6]_ - -------------------------------------------------------------------- - gfx600 - tahiti amdgcn dGPU - gfx601 - pitcairn amdgcn dGPU - - verde - - oland - - hainan + ------------------------------------------------------------------------- + ``gfx600`` - ``tahiti`` ``amdgcn`` dGPU + ``gfx601`` - ``pitcairn`` ``amdgcn`` dGPU + - ``verde`` + - ``oland`` + - ``hainan`` **GCN GFX7 (Sea Islands (CI))** [AMD-GCN-GFX7]_ - -------------------------------------------------------------------- - gfx700 - bonaire amdgcn dGPU - Radeon HD 7790 - - Radeon HD 8770 - - R7 260 - - R7 260X - \ - kaveri amdgcn APU - A6-7000 - - A6 Pro-7050B - - A8-7100 - - A8 Pro-7150B - - A10-7300 - - A10 Pro-7350B - - FX-7500 - - A8-7200P - - A10-7400P - - FX-7600P - gfx701 - hawaii amdgcn dGPU ROCm - FirePro W8100 - - FirePro W9100 - - FirePro S9150 - - FirePro S9170 - gfx702 dGPU ROCm - Radeon R9 290 - - Radeon R9 290x - - Radeon R390 - - Radeon R390x - gfx703 - kabini amdgcn APU - E1-2100 - - mullins - E1-2200 - - E1-2500 - - E2-3000 - - E2-3800 - - A4-5000 - - A4-5100 - - A6-5200 - - A4 Pro-3340B + ------------------------------------------------------------------------- + ``gfx700`` - ``bonaire`` ``amdgcn`` dGPU - Radeon HD 7790 + - Radeon HD 8770 + - R7 260 + - R7 260X + \ - ``kaveri`` ``amdgcn`` APU - A6-7000 + - A6 Pro-7050B + - A8-7100 + - A8 Pro-7150B + - A10-7300 + - A10 Pro-7350B + - FX-7500 + - A8-7200P + - A10-7400P + - FX-7600P + ``gfx701`` - ``hawaii`` ``amdgcn`` dGPU ROCm - FirePro W8100 + - FirePro W9100 + - FirePro S9150 + - FirePro S9170 + ``gfx702`` ``amdgcn`` dGPU ROCm - Radeon R9 290 + - Radeon R9 290x + - Radeon R390 + - Radeon R390x + ``gfx703`` - ``kabini`` ``amdgcn`` APU - E1-2100 + - ``mullins`` - E1-2200 + - E1-2500 + - E2-3000 + - E2-3800 + - A4-5000 + - A4-5100 + - A6-5200 + - A4 Pro-3340B **GCN GFX8 (Volcanic Islands (VI))** [AMD-GCN-GFX8]_ - -------------------------------------------------------------------- - gfx800 - iceland amdgcn dGPU - FirePro S7150 - - FirePro S7100 - - FirePro W7100 - - Radeon R285 - - Radeon R9 380 - - Radeon R9 385 - - Mobile FirePro - M7170 - gfx801 - carrizo amdgcn APU - A6-8500P - - Pro A6-8500B - - A8-8600P - - Pro A8-8600B - - FX-8800P - - Pro A12-8800B - \ amdgcn APU ROCm - A10-8700P - - Pro A10-8700B - - A10-8780P - \ amdgcn APU - A10-9600P - - A10-9630P - - A12-9700P - - A12-9730P - - FX-9800P - - FX-9830P - \ amdgcn APU - E2-9010 - - A6-9210 - - A9-9410 - gfx802 - tonga amdgcn dGPU ROCm Same as gfx800 - gfx803 - fiji amdgcn dGPU ROCm - Radeon R9 Nano - - Radeon R9 Fury - - Radeon R9 FuryX - - Radeon Pro Duo - - FirePro S9300x2 - - Radeon Instinct MI8 - \ - polaris10 amdgcn dGPU ROCm - Radeon RX 470 - - Radeon RX 480 - - Radeon Instinct MI6 - \ - polaris11 amdgcn dGPU ROCm - Radeon RX 460 - gfx804 amdgcn dGPU Same as gfx803 - gfx810 - stoney amdgcn APU + ------------------------------------------------------------------------- + ``gfx800`` - ``iceland`` ``amdgcn`` dGPU - FirePro S7150 + - FirePro S7100 + - FirePro W7100 + - Radeon R285 + - Radeon R9 380 + - Radeon R9 385 + - Mobile FirePro + M7170 + ``gfx801`` - ``carrizo`` ``amdgcn`` APU - A6-8500P + - Pro A6-8500B + - A8-8600P + - Pro A8-8600B + - FX-8800P + - Pro A12-8800B + \ ``amdgcn`` APU ROCm - A10-8700P + - Pro A10-8700B + - A10-8780P + \ ``amdgcn`` APU - A10-9600P + - A10-9630P + - A12-9700P + - A12-9730P + - FX-9800P + - FX-9830P + \ ``amdgcn`` APU - E2-9010 + - A6-9210 + - A9-9410 + ``gfx802`` - ``tonga`` ``amdgcn`` dGPU ROCm Same as gfx800 + ``gfx803`` - ``fiji`` ``amdgcn`` dGPU ROCm - Radeon R9 Nano + - Radeon R9 Fury + - Radeon R9 FuryX + - Radeon Pro Duo + - FirePro S9300x2 + - Radeon Instinct MI8 + \ - ``polaris10`` ``amdgcn`` dGPU ROCm - Radeon RX 470 + - Radeon RX 480 + - Radeon Instinct MI6 + \ - ``polaris11`` ``amdgcn`` dGPU ROCm - Radeon RX 460 + ``gfx810`` - ``stoney`` ``amdgcn`` APU **GCN GFX9** [AMD-GCN-GFX9]_ - -------------------------------------------------------------------- - gfx900 amdgcn dGPU - Radeon Vega - Frontier Edition - - Radeon RX Vega 56 - - Radeon RX Vega 64 - - Radeon RX Vega 64 - Liquid - - Radeon Instinct MI25 - gfx901 amdgcn dGPU ROCm Same as gfx900 - except XNACK is - enabled - gfx902 amdgcn APU *TBA* - - .. TODO - Add product - names. - gfx903 amdgcn APU Same as gfx902 - except XNACK is - enabled - ========== =========== ============ ===== ======= ================== + ------------------------------------------------------------------------- + ``gfx900`` ``amdgcn`` dGPU ROCm - Radeon Vega + Frontier Edition + - Radeon RX Vega 56 + - Radeon RX Vega 64 + - Radeon RX Vega 64 + Liquid + - Radeon Instinct MI25 + ``gfx902`` ``amdgcn`` APU *TBA* + + .. TODO + Add product + names. + =========== =============== ============ ===== ======= ================== + +.. _amdgpu-target-features: + +Target Features +--------------- + +Target features control how code is generated to support certain +features. Not all target features are supported by all processors. The +runtime must ensure that the features supported by the device used to +execute the code match the features enabled when generating the +code. A mismatch of features may result in incorrect execution, or a +reduction in performance. + +Use the ``clang -m[no-]`` option to specify the AMD GPU +target features. + +For example: + +``-mxnack`` + Enable the *XNACK* feature. +``-mno-xnack`` + Disable the *XNACK* feature. + + .. table:: AMDGPU Target Features + :name: amdgpu-target-feature-table + + ============== ======== ================================================== + Target Feature Default Description + ============== ======== ================================================== + -m[no-]xnack disabled Enable/disable generating code that has + memory clauses that are compatible with + having XNACK replay enabled. + + This is used for demand paging and page + migration. If XNACK replay is enabled in + the device, then if a page fault occurs + the code may execute incorrectly if the + XNACK feature is not enabled. Executing + code that has the feature enabled on a + device that does not have XNACK replay + enabled will execute correctly, but may + be less performant than code with the + feature disabled. + + This feature is supported by the + ``amdgcn`` architecture for GFX8-GFX9. + ============== ======== ================================================== .. _amdgpu-address-spaces: @@ -268,14 +313,14 @@ The memory model supported is based on the HSA memory model [HSA]_ which is based in turn on HRF-indirect with scope inclusion [HRF]_. The happens-before relation is transitive over the synchonizes-with relation independent of scope, and synchonizes-with allows the memory scope instances to be inclusive (see -table :ref:`amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table`). +table :ref:`amdgpu-amdhsa-llvm-sync-scopes-table`). This is different to the OpenCL [OpenCL]_ memory model which does not have scope inclusion and requires the memory scopes to exactly match. However, this is conservatively correct for OpenCL. - .. table:: AMDHSA LLVM Sync Scopes for AMDHSA - :name: amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table + .. table:: AMDHSA LLVM Sync Scopes + :name: amdgpu-amdhsa-llvm-sync-scopes-table ================ ========================================================== LLVM Sync Scope Description @@ -364,16 +409,18 @@ The AMDGPU backend uses the following ELF header: ========================== =============================== ``e_ident[EI_CLASS]`` ``ELFCLASS64`` ``e_ident[EI_DATA]`` ``ELFDATA2LSB`` - ``e_ident[EI_OSABI]`` ``ELFOSABI_AMDGPU_HSA``, - ``ELFOSABI_AMDGPU_PAL`` or - ``ELFOSABI_AMDGPU_MESA3D`` - ``e_ident[EI_ABIVERSION]`` ``ELFABIVERSION_AMDGPU_HSA``, - ``ELFABIVERSION_AMDGPU_PAL`` or - ``ELFABIVERSION_AMDGPU_MESA3D`` - ``e_type`` ``ET_REL`` or ``ET_DYN`` + ``e_ident[EI_OSABI]`` - ``ELFOSABI_NONE`` + - ``ELFOSABI_AMDGPU_HSA`` + - ``ELFOSABI_AMDGPU_PAL`` + - ``ELFOSABI_AMDGPU_MESA3D`` + ``e_ident[EI_ABIVERSION]`` - ``ELFABIVERSION_AMDGPU_HSA`` + - ``ELFABIVERSION_AMDGPU_PAL`` + - ``ELFABIVERSION_AMDGPU_MESA3D`` + ``e_type`` - ``ET_REL`` + - ``ET_DYN`` ``e_machine`` ``EM_AMDGPU`` ``e_entry`` 0 - ``e_flags`` 0 + ``e_flags`` See :ref:`amdgpu-elf-header-e_flags-table` ========================== =============================== .. @@ -385,6 +432,7 @@ The AMDGPU backend uses the following ELF header: Name Value =============================== ===== ``EM_AMDGPU`` 224 + ``ELFOSABI_NONE`` 0 ``ELFOSABI_AMDGPU_HSA`` 64 ``ELFOSABI_AMDGPU_PAL`` 65 ``ELFOSABI_AMDGPU_MESA3D`` 66 @@ -394,23 +442,27 @@ The AMDGPU backend uses the following ELF header: =============================== ===== ``e_ident[EI_CLASS]`` - The ELF class is always ``ELFCLASS64``. The AMDGPU backend only supports 64 - bit applications. + The ELF class is: + + * ``ELFCLASS32`` for ``r600`` architecture. + + * ``ELFCLASS64`` for ``amdgcn`` architecture which only supports 64 + bit applications. ``e_ident[EI_DATA]`` - All AMDGPU targets use ELFDATA2LSB for little-endian byte ordering. + All AMDGPU targets use ``ELFDATA2LSB`` for little-endian byte ordering. ``e_ident[EI_OSABI]`` - One of the following AMD GPU architecture specific OS ABIs: + One of the following AMD GPU architecture specific OS ABIs + (see :ref:`amdgpu-os-table`): - * ``ELFOSABI_AMDGPU_HSA`` is used to specify that the code object conforms to - the AMD HSA runtime ABI [HSA]_. + * ``ELFOSABI_NONE`` for *unknown* OS. - * ``ELFOSABI_AMDGPU_PAL`` is used to specify that the code object conforms to - the AMD PAL runtime ABI. + * ``ELFOSABI_AMDGPU_HSA`` for ``amdhsa`` OS. - * ``ELFOSABI_AMDGPU_MESA3D`` is used to specify that the code object conforms - to the AMD MESA runtime ABI. + * ``ELFOSABI_AMDGPU_PAL`` for ``amdpal`` OS. + + * ``ELFOSABI_AMDGPU_MESA3D`` for ``mesa3D`` OS. ``e_ident[EI_ABIVERSION]`` The ABI version of the AMD GPU architecture specific OS ABI to which the code @@ -423,7 +475,7 @@ The AMDGPU backend uses the following ELF header: runtime ABI. * ``ELFABIVERSION_AMDGPU_MESA3D`` is used to specify the version of AMD MESA - runtime ABI. + 3D runtime ABI. ``e_type`` Can be one of the following values: @@ -439,17 +491,74 @@ The AMDGPU backend uses the following ELF header: The AMD HSA runtime loader requires a ``ET_DYN`` code object. ``e_machine`` - The value ``EM_AMDGPU`` is used for the machine for all members of the AMD GPU - architecture family. The specific member is specified in the - ``NT_AMD_AMDGPU_ISA`` entry in the ``.note`` section (see - :ref:`amdgpu-note-records`). + The value ``EM_AMDGPU`` is used for the machine for all processors supported + by the ``r600`` and ``amdgcn`` architectures (see + :ref:`amdgpu-processor-table`). The specific processor is specified in the + ``EF_AMDGPU_MACH`` bit field of the ``e_flags`` (see + :ref:`amdgpu-elf-header-e_flags-table`). ``e_entry`` The entry point is 0 as the entry points for individual kernels must be selected in order to invoke them through AQL packets. ``e_flags`` - The value is 0 as no flags are used. + The AMDGPU backend uses the following ELF header flags: + + .. table:: AMDGPU ELF Header ``e_flags`` + :name: amdgpu-elf-header-e_flags-table + + ================================= ========== ============================= + Name Value Description + ================================= ========== ============================= + **AMDGPU Processor Flag** See :ref:`amdgpu-processor-table`. + -------------------------------------------- ----------------------------- + ``EF_AMDGPU_MACH`` 0x000000ff AMDGPU processor selection + mask for + ``EF_AMDGPU_MACH_xxx`` values + defined in + :ref:`amdgpu-ef-amdgpu-mach-table`. + ================================= ========== ============================= + + .. table:: AMDGPU ``EF_AMDGPU_MACH`` Values + :name: amdgpu-ef-amdgpu-mach-table + + ================================= ========== ============================= + Name Value Description (see + :ref:`amdgpu-processor-table`) + ================================= ========== ============================= + ``EF_AMDGPU_MACH_NONE`` 0 *not specified* + ``EF_AMDGPU_MACH_R600_R600`` 1 ``r600`` + ``EF_AMDGPU_MACH_R600_R630`` 2 ``r630`` + ``EF_AMDGPU_MACH_R600_RS880`` 3 ``rs880`` + ``EF_AMDGPU_MACH_R600_RV670`` 4 ``rv670`` + ``EF_AMDGPU_MACH_R600_RV710`` 5 ``rv710`` + ``EF_AMDGPU_MACH_R600_RV730`` 6 ``rv730`` + ``EF_AMDGPU_MACH_R600_RV770`` 7 ``rv770`` + ``EF_AMDGPU_MACH_R600_CEDAR`` 8 ``cedar`` + ``EF_AMDGPU_MACH_R600_REDWOOD`` 9 ``redwood`` + ``EF_AMDGPU_MACH_R600_SUMO`` 10 ``sumo`` + ``EF_AMDGPU_MACH_R600_JUNIPER`` 11 ``juniper`` + ``EF_AMDGPU_MACH_R600_CYPRESS`` 12 ``cypress`` + ``EF_AMDGPU_MACH_R600_BARTS`` 13 ``barts`` + ``EF_AMDGPU_MACH_R600_TURKS`` 14 ``turks`` + ``EF_AMDGPU_MACH_R600_CAICOS`` 15 ``caicos`` + ``EF_AMDGPU_MACH_R600_CAYMAN`` 16 ``cayman`` + *reserved* 17-31 Reserved for ``r600`` + architecture processors. + ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 32 ``gfx600`` + ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 33 ``gfx601`` + ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 34 ``gfx700`` + ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 35 ``gfx701`` + ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 36 ``gfx702`` + ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 37 ``gfx703`` + ``EF_AMDGPU_MACH_AMDGCN_GFX800`` 38 ``gfx800`` + ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 39 ``gfx801`` + ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 40 ``gfx802`` + ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 41 ``gfx803`` + ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 42 ``gfx810`` + ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 43 ``gfx900`` + ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 44 ``gfx902`` + ================================= ========== ============================= Sections -------- @@ -515,11 +624,11 @@ if needed. Note Records ------------ -As required by ``ELFCLASS64``, minimal zero byte padding must be generated after -the ``name`` field to ensure the ``desc`` field is 4 byte aligned. In addition, -minimal zero byte padding must be generated to ensure the ``desc`` field size is -a multiple of 4 bytes. The ``sh_addralign`` field of the ``.note`` section must -be at least 4 to indicate at least 8 byte alignment. +As required by ``ELFCLASS32`` and ``ELFCLASS64``, minimal zero byte padding must +be generated after the ``name`` field to ensure the ``desc`` field is 4 byte +aligned. In addition, minimal zero byte padding must be generated to ensure the +``desc`` field size is a multiple of 4 bytes. The ``sh_addralign`` field of the +``.note`` section must be at least 4 to indicate at least 8 byte alignment. The AMDGPU backend code object uses the following ELF note records in the ``.note`` section. The *Description* column specifies the layout of the note @@ -537,7 +646,6 @@ Additional note records can be present. Name Type Description ===== ============================== ====================================== "AMD" ``NT_AMD_AMDGPU_HSA_METADATA`` - "AMD" ``NT_AMD_AMDGPU_ISA`` ===== ============================== ====================================== .. @@ -550,49 +658,9 @@ Additional note records can be present. ============================== ===== *reserved* 0-9 ``NT_AMD_AMDGPU_HSA_METADATA`` 10 - ``NT_AMD_AMDGPU_ISA`` 11 + *reserved* 11 ============================== ===== -``NT_AMD_AMDGPU_ISA`` - Specifies the instruction set architecture used by the machine code contained - in the code object. - - This note record is required for code objects containing machine code for - processors matching the ``amdgcn`` architecture in table - :ref:`amdgpu-processors`. - - The null terminated string has the following syntax: - - *architecture*\ ``-``\ *vendor*\ ``-``\ *os*\ ``-``\ *environment*\ ``-``\ *processor* - - where: - - *architecture* - The architecture from table :ref:`amdgpu-target-triples-table`. - - This is always ``amdgcn`` when the target triple OS is ``amdhsa`` (see - :ref:`amdgpu-target-triples`). - - *vendor* - The vendor from table :ref:`amdgpu-target-triples-table`. - - For the AMDGPU backend this is always ``amd``. - - *os* - The OS from table :ref:`amdgpu-target-triples-table`. - - *environment* - An environment from table :ref:`amdgpu-target-triples-table`, or blank if - the environment has no affect on the execution of the code object. - - For the AMDGPU backend this is currently always blank. - *processor* - The processor from table :ref:`amdgpu-processors-table`. - - For example: - - ``amdgcn-amd-amdhsa--gfx901`` - ``NT_AMD_AMDGPU_HSA_METADATA`` Specifies extensible metadata associated with the code objects executed on HSA [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. It is required when @@ -904,10 +972,6 @@ non-AMD key names should be prefixed by "*vendor-name*.". the kernel code. See :ref:`amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table` for the mapping definition. - "DebugProps" mapping Mapping of properties related to - the kernel debugging. See - :ref:`amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table` - for the mapping definition. ================= ============== ========= ================================ .. @@ -1039,10 +1103,10 @@ non-AMD key names should be prefixed by "*vendor-name*.". passed in the kernarg. "HiddenCompletionAction" - *TBD* - - .. TODO - Add description. + A global address space pointer + to help link enqueued kernels into + the ancestor tree for determining + when the parent kernel has finished. "ValueType" string Required Kernel argument value type. Only present if "ValueKind" is @@ -1175,9 +1239,9 @@ non-AMD key names should be prefixed by "*vendor-name*.". private address space memory required for a work-item in - bytes. If - IsDynamicCallstack - is 1 then additional + bytes. If the kernel + uses a dynamic call + stack then additional space must be added to this value for the call stack. @@ -1188,7 +1252,7 @@ non-AMD key names should be prefixed by "*vendor-name*.". be a power of 2. "WavefrontSize" integer Required Wavefront size. Must be a power of 2. - "NumSGPRs" integer Number of scalar + "NumSGPRs" integer Required Number of scalar registers used by a wavefront for GFX6-GFX9. This @@ -1204,44 +1268,37 @@ non-AMD key names should be prefixed by "*vendor-name*.". rounded up to the allocation granularity. - "NumVGPRs" integer Number of vector + "NumVGPRs" integer Required Number of vector registers used by each work-item for GFX6-GFX9 - "MaxFlatWorkGroupSize" integer Maximum flat + "MaxFlatWorkGroupSize" integer Required Maximum flat work-group size supported by the kernel in work-items. - "IsDynamicCallStack" boolean Indicates if the - generated machine - code is using a - dynamically sized - call stack. + Must be >=1 and + consistent with any + non-0 values in + FixedWorkGroupSize. + "FixedWorkGroupSize" sequence of Corresponds to the + 3 integers dispatch work-group + size X, Y, Z. If + omitted, defaults to + 0, 0, 0. If an + element is non-0 then + the kernel must only + be launched with a + matching corresponding + work-group size. "IsXNACKEnabled" boolean Indicates if the generated machine code is capable of - supporting XNACK. + supporting XNACK. See + :ref:`amdgpu-target-features`. ============================ ============== ========= ===================== .. - .. table:: AMDHSA Code Object Kernel Debug Properties Metadata Mapping - :name: amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table - - =================================== ============== ========= ============== - String Key Value Type Required? Description - =================================== ============== ========= ============== - "DebuggerABIVersion" sequence of - 2 integers - "ReservedNumVGPRs" integer - "ReservedFirstVGPR" integer - "PrivateSegmentBufferSGPR" integer - "WavefrontPrivateSegmentOffsetSGPR" integer - =================================== ============== ========= ============== - -.. TODO - Plan to remove the debug properties metadata. - Kernel Dispatch ~~~~~~~~~~~~~~~ @@ -1451,17 +1508,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. then additional space must be added to this value for the call stack. - 95:64 4 bytes MaxFlatWorkGroupSize Maximum flat work-group - size supported by the - kernel in work-items. - 96 1 bit IsDynamicCallStack Indicates if the generated - machine code is using a - dynamically sized call - stack. - 97 1 bit IsXNACKEnabled Indicates if the generated - machine code is capable of - suppoting XNACK. - 127:98 30 bits Reserved, must be 0. + 127:64 8 bytes Reserved, must be 0. 191:128 8 bytes KernelCodeEntryByteOffset Byte offset (possibly negative) from base address of kernel @@ -1469,7 +1516,30 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. entry point instruction which must be 256 byte aligned. - 383:192 24 Reserved, must be 0. + 223:192 4 bytes MaxFlatWorkGroupSize Maximum flat work-group + size supported by the + kernel in work-items. If + an exact work-group size + is required then must be + omitted or 0 and + ReqdWorkGroupSize* must + be set to non-0. + 239:224 2 bytes ReqdWorkGroupSizeX If present and non-0 then + the kernel + must be executed with the + specified work-group size + for X. + 255:240 2 bytes ReqdWorkGroupSizeY If present and non-0 then + the kernel + must be executed with the + specified work-group size + for Y. + 271:256 2 bytes ReqdWorkGroupSizeZ If present and non-0 then + the kernel + must be executed with the + specified work-group size + for Z. + 383:271 14 Reserved, must be 0. bytes 415:384 4 bytes ComputePgmRsrc1 Compute Shader (CS) program settings used by @@ -1509,7 +1579,10 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. should always be 0. 457 1 bit EnableSGPRGridWorkgroupCountZ Not implemented in CP and should always be 0. - 463:458 6 bits Reserved, must be 0. + 462:458 5 bits Reserved, must be 0. + 463 1 bit IsXNACKEnabled Indicates if the generated + machine code is capable of + supporting XNACK. 511:464 6 Reserved, must be 0. bytes 512 **Total size 64 bytes.** @@ -1528,7 +1601,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. granularity is device specific: - GFX6-9 + GFX6-GFX9 - max_vgpr 1..256 - roundup((max_vgpg + 1) / 4) - 1 @@ -1540,7 +1613,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. granularity is device specific: - GFX6-8 + GFX6-GFX8 - max_sgpr 1..112 - roundup((max_sgpg + 1) / 8) - 1 @@ -1679,7 +1752,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. CP is responsible for filling in ``COMPUTE_PGM_RSRC1.CDBG_USER``. - 26 1 bit FP16_OVFL GFX6-8 + 26 1 bit FP16_OVFL GFX6-GFX8 Reserved, must be 0. GFX9 Wavefront starts execution @@ -2026,7 +2099,9 @@ SGPR register initial state is defined in instructions. Having CP load it once avoids loading it at the beginning of every - wavefront. GFX9 This is the + wavefront. + GFX9 + This is the 64 bit base address of the per SPI scratch backing memory managed by SPI for @@ -2043,18 +2118,17 @@ SGPR register initial state is defined in SGPR which is SGPRn-6 and SGPRn-5. It is used as the FLAT SCRATCH BASE in flat - memory instructions. then - Private Segment Size 1 The - 32 bit byte size of a - (enable_sgpr_private single - work-item's - scratch_segment_size) memory - allocation. This is the - value from the kernel - dispatch packet Private - Segment Byte Size rounded up - by CP to a multiple of - DWORD. + memory instructions. + then Private Segment Size 1 The 32 bit byte size of a + (enable_sgpr_private single + work-item's + scratch_segment_size) memory + allocation. This is the + value from the kernel + dispatch packet Private + Segment Byte Size rounded up + by CP to a multiple of + DWORD. Having CP load it once avoids loading it at the beginning of @@ -2166,7 +2240,7 @@ Flat Scratch register pair are adjacent SGRRs so they can be moved as a 64 bit value to the hardware required SGPRn-3 and SGPRn-4 respectively. The global segment can be accessed either using buffer instructions (GFX6 which -has V# 64 bit address support), flat instructions (GFX7-9), or global +has V# 64 bit address support), flat instructions (GFX7-GFX9), or global instructions (GFX9). If buffer operations are used then the compiler can generate a V# with the @@ -2212,7 +2286,7 @@ Offset SGPR registers (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`): GFX6 Flat scratch is not supported. -GFX7-8 +GFX7-GFX8 1. The low word of Flat Scratch Init is 32 bit byte offset from ``SH_HIDDEN_PRIVATE_BASE_VIMID`` to the base of scratch backing memory being managed by SPI for the queue executing the kernel dispatch. This is @@ -2227,6 +2301,7 @@ GFX7-8 DWORD. Having CP load it once avoids loading it at the beginning of every wavefront. The prolog must move it to FLAT_SCRATCH_LO for use as FLAT SCRATCH SIZE. + GFX9 The Flat Scratch Init is the 64 bit address of the base of scratch backing memory being managed by SPI for the queue executing the kernel dispatch. The @@ -2305,7 +2380,7 @@ For GFX6-GFX9: same wavefront. * The vector memory operations are performed as wavefront wide operations and completion is reported to a wavefront in execution order. The exception is - that for GFX7-9 ``flat_load/store/atomic`` instructions can report out of + that for GFX7-GFX9 ``flat_load/store/atomic`` instructions can report out of vector memory order if they access LDS memory, and out of LDS operation order if they access global memory. * The vector memory operations access a single vector L1 cache shared by all @@ -2332,7 +2407,7 @@ For GFX6-GFX9: * The L2 cache can be kept coherent with other agents on some targets, or ranges of virtual addresses can be set up to bypass it to ensure system coherence. -Private address space uses ``buffer_load/store`` using the scratch V# (GFX6-8), +Private address space uses ``buffer_load/store`` using the scratch V# (GFX6-GFX8), or ``scratch_load/store`` (GFX9). Since only a single thread is accessing the memory, atomic memory orderings are not meaningful and all accesses are treated as non-atomic. @@ -3727,7 +3802,7 @@ Assembler --------- AMDGPU backend has LLVM-MC based assembler which is currently in development. -It supports AMDGCN GFX6-GFX8. +It supports AMDGCN GFX6-GFX9. This section describes general syntax for instructions and operands. For more information about instructions, their semantics and supported combinations of diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index 6ee3842c8d908..98a3156e08251 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -681,7 +681,7 @@ for each library name referenced. MODULE_CODE_GLOBALVAR Record ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``[GLOBALVAR, strtab offset, strtab size, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat]`` +``[GLOBALVAR, strtab offset, strtab size, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat, attributes, preemptionspecifier]`` The ``GLOBALVAR`` record (code 7) marks the declaration or definition of a global variable. The operand fields are: @@ -761,12 +761,21 @@ global variable. The operand fields are: * *comdat*: An encoding of the COMDAT of this function +* *attributes*: If nonzero, the 1-based index into the table of AttributeLists. + +.. _bcpreemptionspecifier: + +* *preemptionspecifier*: If present, an encoding of the runtime preemption specifier of this variable: + + * ``dso_preemptable``: code 0 + * ``dso_local``: code 1 + .. _FUNCTION: MODULE_CODE_FUNCTION Record ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``[FUNCTION, strtab offset, strtab size, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn]`` +``[FUNCTION, strtab offset, strtab size, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn, preemptionspecifier]`` The ``FUNCTION`` record (code 8) marks the declaration or definition of a function. The operand fields are: @@ -828,10 +837,12 @@ function. The operand fields are: * *personalityfn*: If non-zero, the value index of the personality function for this function, plus 1. +* *preemptionspecifier*: If present, an encoding of the :ref:`runtime preemption specifier` of this function. + MODULE_CODE_ALIAS Record ^^^^^^^^^^^^^^^^^^^^^^^^ -``[ALIAS, strtab offset, strtab size, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr]`` +``[ALIAS, strtab offset, strtab size, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr, preemptionspecifier]`` The ``ALIAS`` record (code 9) marks the definition of an alias. The operand fields are @@ -856,6 +867,8 @@ fields are * *unnamed_addr*: If present, an encoding of the :ref:`unnamed_addr` attribute of this alias +* *preemptionspecifier*: If present, an encoding of the :ref:`runtime preemption specifier` of this alias. + .. _MODULE_CODE_GCNAME: MODULE_CODE_GCNAME Record diff --git a/docs/Bugpoint.rst b/docs/Bugpoint.rst index 6bd7ff99564ff..27732e0fffbdd 100644 --- a/docs/Bugpoint.rst +++ b/docs/Bugpoint.rst @@ -151,6 +151,11 @@ non-obvious ways. Here are some hints and tips: optimizations to be randomized and applied to the program. This process will repeat until a bug is found or the user kills ``bugpoint``. +* ``bugpoint`` can produce IR which contains long names. Run ``opt + -metarenamer`` over the IR to rename everything using easy-to-read, + metasyntactic names. Alternatively, run ``opt -strip -instnamer`` to rename + everything with very short (often purely numeric) names. + What to do when bugpoint isn't enough ===================================== diff --git a/docs/CMake.rst b/docs/CMake.rst index 473672b5f736f..05edec64da332 100644 --- a/docs/CMake.rst +++ b/docs/CMake.rst @@ -224,6 +224,10 @@ LLVM-specific variables Generate build targets for the LLVM tools. Defaults to ON. You can use this option to disable the generation of build targets for the LLVM tools. +**LLVM_INSTALL_BINUTILS_SYMLINKS**:BOOL + Install symlinks from the binutils tool names to the corresponding LLVM tools. + For example, ar will be symlinked to llvm-ar. + **LLVM_BUILD_EXAMPLES**:BOOL Build LLVM examples. Defaults to OFF. Targets for building each example are generated in any case. See documentation for *LLVM_BUILD_TOOLS* above for more diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index f1f93c7a228b0..0f2681e0cd86b 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -3,7 +3,7 @@ if (DOXYGEN_FOUND) if (LLVM_ENABLE_DOXYGEN) set(abs_top_srcdir ${CMAKE_CURRENT_SOURCE_DIR}) set(abs_top_builddir ${CMAKE_CURRENT_BINARY_DIR}) - + if (HAVE_DOT) set(DOT ${LLVM_PATH_DOT}) endif() @@ -21,20 +21,20 @@ if (LLVM_ENABLE_DOXYGEN) set(enable_external_search "NO") set(extra_search_mappings "") endif() - + # If asked, configure doxygen for the creation of a Qt Compressed Help file. option(LLVM_ENABLE_DOXYGEN_QT_HELP "Generate a Qt Compressed Help file." OFF) if (LLVM_ENABLE_DOXYGEN_QT_HELP) set(LLVM_DOXYGEN_QCH_FILENAME "org.llvm.qch" CACHE STRING "Filename of the Qt Compressed help file") - set(LLVM_DOXYGEN_QHP_NAMESPACE "org.llvm" CACHE STRING + set(LLVM_DOXYGEN_QHP_NAMESPACE "org.llvm" CACHE STRING "Namespace under which the intermediate Qt Help Project file lives") set(LLVM_DOXYGEN_QHP_CUST_FILTER_NAME "${PACKAGE_STRING}" CACHE STRING "See http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters") set(LLVM_DOXYGEN_QHP_CUST_FILTER_ATTRS "${PACKAGE_NAME},${PACKAGE_VERSION}" CACHE STRING "See http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes") - find_program(LLVM_DOXYGEN_QHELPGENERATOR_PATH qhelpgenerator + find_program(LLVM_DOXYGEN_QHELPGENERATOR_PATH qhelpgenerator DOC "Path to the qhelpgenerator binary") if (NOT LLVM_DOXYGEN_QHELPGENERATOR_PATH) message(FATAL_ERROR "Failed to find qhelpgenerator binary") @@ -55,7 +55,7 @@ if (LLVM_ENABLE_DOXYGEN) set(llvm_doxygen_qhp_cust_filter_name "") set(llvm_doxygen_qhp_cust_filter_attrs "") endif() - + option(LLVM_DOXYGEN_SVG "Use svg instead of png files for doxygen graphs." OFF) if (LLVM_DOXYGEN_SVG) @@ -113,6 +113,7 @@ if (LLVM_ENABLE_SPHINX) if (${SPHINX_OUTPUT_MAN}) add_sphinx_target(man llvm) add_sphinx_target(man llvm-dwarfdump) + add_sphinx_target(man dsymutil) endif() endif() diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst index 0de2fc6b1a8bb..231c034be19d7 100644 --- a/docs/CodingStandards.rst +++ b/docs/CodingStandards.rst @@ -203,7 +203,7 @@ this means are `Effective Go`_ and `Go Code Review Comments`_. https://golang.org/doc/effective_go.html .. _Go Code Review Comments: - https://code.google.com/p/go-wiki/wiki/CodeReviewComments + https://github.com/golang/go/wiki/CodeReviewComments Mechanical Source Issues ======================== diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst index 44cc57cebafe6..9078f65e01c52 100644 --- a/docs/CommandGuide/FileCheck.rst +++ b/docs/CommandGuide/FileCheck.rst @@ -86,6 +86,11 @@ OPTIONS All other variables get undefined after each encountered ``CHECK-LABEL``. +.. option:: -D + + Sets a filecheck variable ``VAR`` with value ``VALUE`` that can be used in + ``CHECK:`` lines. + .. option:: -version Show the version number of this program. diff --git a/docs/CommandGuide/dsymutil.rst b/docs/CommandGuide/dsymutil.rst new file mode 100644 index 0000000000000..a29bc3c295c7a --- /dev/null +++ b/docs/CommandGuide/dsymutil.rst @@ -0,0 +1,89 @@ +dsymutil - manipulate archived DWARF debug symbol files +======================================================= + +SYNOPSIS +-------- + +| :program:`dsymutil` [*options*] *executable* + +DESCRIPTION +----------- + +:program:`dsymutil` links the DWARF debug information found in the object files +for an executable *executable* by using debug symbols information contained in +its symbol table. By default, the linked debug information is placed in a +``.dSYM`` bundle with the same name as the executable. + +OPTIONS +------- +.. option:: --arch= + + Link DWARF debug information only for specified CPU architecture types. + Architectures may be specified by name. When using this option, an error will + be returned if any architectures can not be properly linked. This option can + be specified multiple times, once for each desired architecture. All CPU + architectures will be linked by default and any architectures that can't be + properly linked will cause :program:`dsymutil` to return an error. + +.. option:: --dump-debug-map + + Dump the *executable*'s debug-map (the list of the object files containing the + debug information) in YAML format and exit. Not DWARF link will take place. + +.. option:: -f, --flat + + Produce a flat dSYM file. A ``.dwarf`` extension will be appended to the + executable name unless the output file is specified using the -o option. + +.. option:: --no-odr + + Do not use ODR (One Definition Rule) for uniquing C++ types. + +.. option:: --no-output + + Do the link in memory, but do not emit the result file. + +.. option:: --no-swiftmodule-timestamp + + Don't check the timestamp for swiftmodule files. + +.. option:: -j , --num-threads= + + Specifies the maximum number (``n``) of simultaneous threads to use when + linking multiple architectures. + +.. option:: -o + + Specifies an alternate ``path`` to place the dSYM bundle. The default dSYM + bundle path is created by appending ``.dSYM`` to the executable name. + +.. option:: --oso-prepend-path= + + Specifies a ``path`` to prepend to all debug symbol object file paths. + +.. option:: -s, --symtab + + Dumps the symbol table found in *executable* or object file(s) and exits. + +.. option:: -v, --verbose + + Display verbose information when linking. + +.. option:: --version + + Display the version of the tool. + +.. option:: -y + + Treat *executable* as a YAML debug-map rather than an executable. + +EXIT STATUS +----------- + +:program:`dsymutil` returns 0 if the DWARF debug information was linked +successfully. Otherwise, it returns 1. + +SEE ALSO +-------- + +:manpage:`llvm-dwarfdump(1)` diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst index 5a0a98ceb1f98..805df00c1738c 100644 --- a/docs/CommandGuide/index.rst +++ b/docs/CommandGuide/index.rst @@ -30,6 +30,7 @@ Basic Commands llvm-stress llvm-symbolizer llvm-dwarfdump + dsymutil Debugging Tools ~~~~~~~~~~~~~~~ diff --git a/docs/CommandGuide/lli.rst b/docs/CommandGuide/lli.rst index 9da13ee47e0e1..58481073d0699 100644 --- a/docs/CommandGuide/lli.rst +++ b/docs/CommandGuide/lli.rst @@ -122,7 +122,7 @@ CODE GENERATION OPTIONS Choose the code model from: - .. code-block:: perl + .. code-block:: text default: Target default code model small: Small code model @@ -154,7 +154,7 @@ CODE GENERATION OPTIONS Instruction schedulers available (before register allocation): - .. code-block:: perl + .. code-block:: text =default: Best scheduler for the target =none: No scheduling: breadth first sequencing @@ -168,7 +168,7 @@ CODE GENERATION OPTIONS Register allocator to use (default=linearscan) - .. code-block:: perl + .. code-block:: text =bigblock: Big-block register allocator =linearscan: linear scan register allocator =local - local register allocator @@ -178,7 +178,7 @@ CODE GENERATION OPTIONS Choose relocation model from: - .. code-block:: perl + .. code-block:: text =default: Target default relocation model =static: Non-relocatable code =pic - Fully relocatable, position independent code @@ -188,7 +188,7 @@ CODE GENERATION OPTIONS Spiller to use (default=local) - .. code-block:: perl + .. code-block:: text =simple: simple spiller =local: local spiller @@ -197,7 +197,7 @@ CODE GENERATION OPTIONS Choose style of code to emit from X86 backend: - .. code-block:: perl + .. code-block:: text =att: Emit AT&T-style assembly =intel: Emit Intel-style assembly diff --git a/docs/CommandGuide/llvm-pdbutil.rst b/docs/CommandGuide/llvm-pdbutil.rst index 8836f3a3eb4cf..29d487e0e7409 100644 --- a/docs/CommandGuide/llvm-pdbutil.rst +++ b/docs/CommandGuide/llvm-pdbutil.rst @@ -142,7 +142,7 @@ Symbol Type Options Displays class definitions in the specified format. - .. code-block:: perl + .. code-block:: text =all - Display all class members including data, constants, typedefs, functions, etc (default) =layout - Only display members that contribute to class size. @@ -152,7 +152,7 @@ Symbol Type Options Displays classes in the specified order. - .. code-block:: perl + .. code-block:: text =none - Undefined / no particular sort order (default) =name - Sort classes by name @@ -200,7 +200,7 @@ Symbol Type Options Type of symbols to dump when -globals, -externals, or -module-syms is specified. (default all) - .. code-block:: perl + .. code-block:: text =thunks - Display thunk symbols =data - Display data symbols @@ -212,7 +212,7 @@ Symbol Type Options For symbols dumped via the -module-syms, -globals, or -externals options, sort the results in specified order. - .. code-block:: perl + .. code-block:: text =none - Undefined / no particular sort order =name - Sort symbols by name diff --git a/docs/FuzzingLLVM.rst b/docs/FuzzingLLVM.rst index e6ebeaf80cb47..b3cf719f275b2 100644 --- a/docs/FuzzingLLVM.rst +++ b/docs/FuzzingLLVM.rst @@ -100,6 +100,28 @@ mode, the same example could be run like so: % bin/llvm-isel-fuzzer--aarch64-O0-gisel +llvm-opt-fuzzer +--------------- + +A |LLVM IR fuzzer| aimed at finding bugs in optimization passes. + +It receives optimzation pipeline and runs it for each fuzzer input. + +Interface of this fuzzer almost directly mirrors ``llvm-isel-fuzzer``. Both +``mtriple`` and ``passes`` arguments are required. Passes are specified in a +format suitable for the new pass manager. + +.. code-block:: shell + + % bin/llvm-opt-fuzzer -ignore_remaining_args=1 -mtriple x86_64 -passes instcombine + +Similarly to the ``llvm-isel-fuzzer`` arguments in some predefined configurations +might be embedded directly into the binary file name: + +.. code-block:: shell + + % bin/llvm-opt-fuzzer--x86_64-instcombine + llvm-mc-assemble-fuzzer ----------------------- diff --git a/docs/GetElementPtr.rst b/docs/GetElementPtr.rst index c2da640fe0678..b593871695fac 100644 --- a/docs/GetElementPtr.rst +++ b/docs/GetElementPtr.rst @@ -196,7 +196,7 @@ illegal. In order to access the 18th integer in the array, you would need to do the following: -.. code-block:: llvm +.. code-block:: text %idx = getelementptr { [40 x i32]* }, { [40 x i32]* }* %, i64 0, i32 0 %arr = load [40 x i32]** %idx diff --git a/docs/GettingStartedVS.rst b/docs/GettingStartedVS.rst index 50f7aa123c558..a4ff2b822fc35 100644 --- a/docs/GettingStartedVS.rst +++ b/docs/GettingStartedVS.rst @@ -76,6 +76,11 @@ Here's the short story for getting up and running quickly with LLVM: * With anonymous Subversion access: + *Note:* some regression tests require Unix-style line ending (``\n``). To + pass all regression tests, please add two lines *enable-auto-props = yes* + and *\* = svn:mime-type=application/octet-stream* to + ``C:\Users\\AppData\Roaming\Subversion\config``. + 1. ``cd `` 2. ``svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm`` 3. ``cd llvm`` diff --git a/docs/HowToCrossCompileBuiltinsOnArm.rst b/docs/HowToCrossCompileBuiltinsOnArm.rst new file mode 100644 index 0000000000000..4b4d563a5a968 --- /dev/null +++ b/docs/HowToCrossCompileBuiltinsOnArm.rst @@ -0,0 +1,201 @@ +=================================================================== +How to Cross Compile Compiler-rt Builtins For Arm +=================================================================== + +Introduction +============ + +This document contains information about building and testing the builtins part +of compiler-rt for an Arm target, from an x86_64 Linux machine. + +While this document concentrates on Arm and Linux the general principles should +apply to other targets supported by compiler-rt. Further contributions for other +targets are welcome. + +The instructions in this document depend on libraries and programs external to +LLVM, there are many ways to install and configure these dependencies so you +may need to adapt the instructions here to fit your own local situation. + +Prerequisites +============= + +In this use case we'll be using CMake on a Debian-based Linux system, +cross-compiling from an x86_64 host to a hard-float Armv7-A target. We'll be +using as many of the LLVM tools as we can, but it is possible to use GNU +equivalents. + + * ``A build of LLVM/clang for the llvm-tools and llvm-config`` + * ``The qemu-arm user mode emulator`` + * ``An arm-linux-gnueabihf sysroot`` + +See https://compiler-rt.llvm.org/ for more information about the dependencies +on clang and LLVM. + +``qemu-arm`` should be available as a package for your Linux distribution. + +The most complicated of the prequisites to satisfy is the arm-linux-gnueabihf +sysroot. The :doc:`HowToCrossCompileLLVM` has information about how to use the +Linux distributions multiarch support to fulfill the dependencies for building +LLVM. Alternatively, as building and testing just the compiler-rt builtins +requires fewer dependencies than LLVM, it is possible to use the Linaro +arm-linux-gnueabihf gcc installation as our sysroot. + +Building compiler-rt builtins for Arm +===================================== +We will be doing a standalone build of compiler-rt using the following cmake +options. + +* ``path/to/llvm/projects/compiler-rt`` +* ``-DCOMPILER_RT_BUILD_BUILTINS=ON`` +* ``-DCOMPILER_RT_BUILD_SANITIZERS=OFF`` +* ``-DCOMPILER_RT_BUILD_XRAY=OFF`` +* ``-DCOMPILER_RT_BUILD_LIBFUZZER=OFF`` +* ``-DCOMPILER_RT_BUILD_PROFILE=OFF`` +* ``-DCMAKE_C_COMPILER=/path/to/clang`` +* ``-DCMAKE_AR=/path/to/llvm-ar`` +* ``-DCMAKE_NM=/path/to/llvm-nm`` +* ``-DCMAKE_RANLIB=/path/to/llvm-ranlib`` +* ``-DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld"`` +* ``-DCMAKE_C_COMPILER_TARGET="arm-linux-gnueabihf"`` +* ``-DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON`` +* ``-DLLVM_CONFIG_PATH=/path/to/llvm-config`` +* ``-DCMAKE_C_FLAGS="build-c-flags"`` + +The build-c-flags need to be sufficient to pass the C-make compiler check and +to compile compiler-rt. When using a GCC 7 Linaro arm-linux-gnueabihf +installation the following flags are needed: + +* ``--target=arm-linux-gnueabihf`` +* ``--march=armv7a`` +* ``--gcc-toolchain=/path/to/dir/toolchain`` +* ``--sysroot=/path/to/toolchain/arm-linux-gnueabihf/libc`` + +Depending on how your sysroot is laid out, you may not need ``--gcc-toolchain``. +For example if you have added armhf as an architecture using your Linux +distributions multiarch support then you should be able to use ``--sysroot=/``. + +Once cmake has completed the builtins can be built with ``ninja builtins`` + +Testing compiler-rt builtins using qemu-arm +=========================================== +To test the builtins library we need to add a few more cmake flags to enable +testing and set up the compiler and flags for test case. We must also tell +cmake that we wish to run the tests on ``qemu-arm``. + +* ``-DCOMPILER_RT_EMULATOR="qemu-arm -L /path/to/armhf/sysroot`` +* ``-DCOMPILER_RT_INCLUDE_TESTS=ON`` +* ``-DCOMPILER_RT_TEST_COMPILER="/path/to/clang"`` +* ``-DCOMPILER_RT_TEST_COMPILER_CFLAGS="test-c-flags"`` + +The ``/path/to/armhf/sysroot`` should be the same as the one passed to +``--sysroot`` in the "build-c-flags". + +The "test-c-flags" can be the same as the "build-c-flags", with the addition +of ``"-fuse-ld=lld`` if you wish to use lld to link the tests. + +Once cmake has completed the tests can be built and run using +``ninja check-builtins`` + +Modifications for other Targets +=============================== + +Arm Soft-Float Target +--------------------- +The instructions for the Arm hard-float target can be used for the soft-float +target by substituting soft-float equivalents for the sysroot and target. The +target to use is: + +* ``-DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi`` + +Depending on whether you want to use floating point instructions or not you +may need extra c-flags such as ``-mfloat-abi=softfp`` for use of floating-point +instructions, and ``-mfloat-abi=soft -mfpu=none`` for software floating-point +emulation. + +AArch64 Target +-------------- +The instructions for Arm can be used for AArch64 by substituting AArch64 +equivalents for the sysroot, emulator and target. + +* ``-DCMAKE_C_COMPILER_TARGET=aarch64-linux-gnu`` +* ``-DCOMPILER_RT_EMULATOR="qemu-aarch64 -L /path/to/aarch64/sysroot`` + +The CMAKE_C_FLAGS and COMPILER_RT_TEST_COMPILER_CFLAGS may also need: +``"--sysroot=/path/to/aarch64/sysroot --gcc-toolchain=/path/to/gcc-toolchain"`` + +Armv6-m, Armv7-m and Armv7E-M targets +------------------------------------- +If you wish to build, but not test compiler-rt for Armv6-M, Armv7-M or Armv7E-M +then the easiest way is to use the BaremetalARM.cmake recipe in +clang/cmake/caches. + +You will need a bare metal sysroot such as that provided by the GNU ARM +Embedded toolchain. + +The libraries can be built with the cmake options: + +* ``-DBAREMETAL_ARMV6M_SYSROOT=/path/to/bare/metal/sysroot`` +* ``-DBAREMETAL_ARMV7M_SYSROOT=/path/to/bare/metal/sysroot`` +* ``-DBAREMETAL_ARMV7EM_SYSROOT=/path/to/bare/metal/sysroot`` +* ``-C /path/to/llvm/source/tools/clang/cmake/caches/BaremetalARM.cmake`` + +**Note** that for the recipe to work the compiler-rt source must be checked out +into the directory llvm/runtimes and not llvm/projects. + +To build and test the libraries using a similar method to Armv7-A is possible +but more difficult. The main problems are: + +* There isn't a ``qemu-arm`` user-mode emulator for bare-metal systems. The ``qemu-system-arm`` can be used but this is significantly more difficult to setup. +* The target to compile compiler-rt have the suffix -none-eabi. This uses the BareMetal driver in clang and by default won't find the libraries needed to pass the cmake compiler check. + +As the Armv6-M, Armv7-M and Armv7E-M builds of compiler-rt only use instructions +that are supported on Armv7-A we can still get most of the value of running the +tests using the same ``qemu-arm`` that we used for Armv7-A by building and +running the test cases for Armv7-A but using the builtins compiled for +Armv6-M, Armv7-M or Armv7E-M. This will not catch instructions that are +supported on Armv7-A but not Armv6-M, Armv7-M and Armv7E-M. + +To get the cmake compile test to pass the libraries needed to successfully link +the test application will need to be manually added to ``CMAKE_CFLAGS``. +Alternatively if you are using version 3.6 or above of cmake you can use +``CMAKE_TRY_COMPILE_TARGET=STATIC_LIBRARY`` to skip the link step. + +* ``-DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY`` +* ``-DCOMPILER_RT_OS_DIR="baremetal"`` +* ``-DCOMPILER_RT_BUILD_BUILTINS=ON`` +* ``-DCOMPILER_RT_BUILD_SANITIZERS=OFF`` +* ``-DCOMPILER_RT_BUILD_XRAY=OFF`` +* ``-DCOMPILER_RT_BUILD_LIBFUZZER=OFF`` +* ``-DCOMPILER_RT_BUILD_PROFILE=OFF`` +* ``-DCMAKE_C_COMPILER=${host_install_dir}/bin/clang`` +* ``-DCMAKE_C_COMPILER_TARGET="your *-none-eabi target"`` +* ``-DCMAKE_AR=/path/to/llvm-ar`` +* ``-DCMAKE_NM=/path/to/llvm-nm`` +* ``-DCMAKE_RANLIB=/path/to/llvm-ranlib`` +* ``-DCOMPILER_RT_BAREMETAL_BUILD=ON`` +* ``-DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON`` +* ``-DLLVM_CONFIG_PATH=/path/to/llvm-config`` +* ``-DCMAKE_C_FLAGS="build-c-flags"`` +* ``-DCMAKE_ASM_FLAGS="${arm_cflags}"`` +* ``-DCOMPILER_RT_EMULATOR="qemu-arm -L /path/to/armv7-A/sysroot"`` +* ``-DCOMPILER_RT_INCLUDE_TESTS=ON`` +* ``-DCOMPILER_RT_TEST_COMPILER="/path/to/clang"`` +* ``-DCOMPILER_RT_TEST_COMPILER_CFLAGS="test-c-flags"`` + +The Armv6-M builtins will use the soft-float ABI. When compiling the tests for +Armv7-A we must include ``"-mthumb -mfloat-abi=soft -mfpu=none"`` in the +test-c-flags. We must use an Armv7-A soft-float abi sysroot for ``qemu-arm``. + +Unfortunately at time of writing the Armv7-M and Armv7E-M builds of +compiler-rt will always include assembler files including floating point +instructions. This means that building for a cpu without a floating point unit +requires something like removing the arm_Thumb1_VFPv2_SOURCES from the +arm_Thumb1_SOURCES in builtins/CMakeLists.txt. The float-abi of the compiler-rt +library must be matched by the float abi of the Armv7-A sysroot used by +qemu-arm. + +Depending on the linker used for the test cases you may encounter BuildAttribute +mismatches between the M-profile objects from compiler-rt and the A-profile +objects from the test. The lld linker does not check the BuildAttributes so it +can be used to link the tests by adding -fuse-ld=lld to the +``COMPILER_RT_TEST_COMPILER_CFLAGS``. diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 99a2ffa40d60a..a091cc1dd2ae7 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -527,6 +527,24 @@ the alias is accessed. It will not have any effect in the aliasee. For platforms without linker support of ELF TLS model, the -femulated-tls flag can be used to generate GCC compatible emulated TLS code. +.. _runtime_preemption_model: + +Runtime Preemption Specifiers +----------------------------- + +Global variables, functions and aliases may have an optional runtime preemption +specifier. If a preemption specifier isn't given explicitly, then a +symbol is assumed to be ``dso_preemptable``. + +``dso_preemptable`` + Indicates that the function or variable may be replaced by a symbol from + outside the linkage unit at runtime. + +``dso_local`` + The compiler may assume that a function or variable marked as ``dso_local`` + will resolve to a symbol within the same linkage unit. Direct access will + be generated even if the definition is not within this compilation unit. + .. _namedtypes: Structure Types @@ -579,9 +597,9 @@ Global variables in other translation units can also be declared, in which case they don't have an initializer. Either global variable definitions or declarations may have an explicit section -to be placed in and may have an optional explicit alignment specified. If there -is a mismatch between the explicit or inferred section information for the -variable declaration and its definition the resulting behavior is undefined. +to be placed in and may have an optional explicit alignment specified. If there +is a mismatch between the explicit or inferred section information for the +variable declaration and its definition the resulting behavior is undefined. A variable may be defined as a global ``constant``, which indicates that the contents of the variable will **never** be modified (enabling better @@ -624,11 +642,11 @@ target supports it, it will emit globals to the section specified. Additionally, the global can placed in a comdat if the target has the necessary support. -External declarations may have an explicit section specified. Section -information is retained in LLVM IR for targets that make use of this -information. Attaching section information to an external declaration is an -assertion that its definition is located in the specified section. If the -definition is located in a different section, the behavior is undefined. +External declarations may have an explicit section specified. Section +information is retained in LLVM IR for targets that make use of this +information. Attaching section information to an external declaration is an +assertion that its definition is located in the specified section. If the +definition is located in a different section, the behavior is undefined. By default, global initializers are optimized by assuming that global variables defined within the module are not modified from their @@ -650,6 +668,7 @@ iterate over them as an array, alignment padding would break this iteration. The maximum alignment is ``1 << 29``. Globals can also have a :ref:`DLL storage class `, +an optional :ref:`runtime preemption specifier `, an optional :ref:`global attributes ` and an optional list of attached :ref:`metadata `. @@ -658,7 +677,8 @@ Variables and aliases can have a Syntax:: - @ = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] + @ = [Linkage] [PreemptionSpecifier] [Visibility] + [DLLStorageClass] [ThreadLocal] [(unnamed_addr|local_unnamed_addr)] [AddrSpace] [ExternallyInitialized] [] @@ -691,7 +711,8 @@ Functions --------- LLVM function definitions consist of the "``define``" keyword, an -optional :ref:`linkage type `, an optional :ref:`visibility +optional :ref:`linkage type `, an optional :ref:`runtime preemption +specifier `, an optional :ref:`visibility style `, an optional :ref:`DLL storage class `, an optional :ref:`calling convention `, an optional ``unnamed_addr`` attribute, a return type, an optional @@ -750,7 +771,7 @@ not be significant within the module. Syntax:: - define [linkage] [visibility] [DLLStorageClass] + define [linkage] [PreemptionSpecifier] [visibility] [DLLStorageClass] [cconv] [ret attrs] @ ([argument list]) [(unnamed_addr|local_unnamed_addr)] [fn Attrs] [section "name"] @@ -777,12 +798,13 @@ Aliases have a name and an aliasee that is either a global value or a constant expression. Aliases may have an optional :ref:`linkage type `, an optional +:ref:`runtime preemption specifier `, an optional :ref:`visibility style `, an optional :ref:`DLL storage class ` and an optional :ref:`tls model `. Syntax:: - @ = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] [(unnamed_addr|local_unnamed_addr)] alias , * @ + @ = [Linkage] [PreemptionSpecifier] [Visibility] [DLLStorageClass] [ThreadLocal] [(unnamed_addr|local_unnamed_addr)] alias , * @ The linkage must be one of ``private``, ``internal``, ``linkonce``, ``weak``, ``linkonce_odr``, ``weak_odr``, ``external``. Note that some system linkers @@ -2250,11 +2272,11 @@ seq\_cst total orderings of other operations that are not marked Fast-Math Flags --------------- -LLVM IR floating-point binary ops (:ref:`fadd `, +LLVM IR floating-point operations (:ref:`fadd `, :ref:`fsub `, :ref:`fmul `, :ref:`fdiv `, :ref:`frem `, :ref:`fcmp `) and :ref:`call ` -instructions have the following flags that can be set to enable -otherwise unsafe floating point transformations. +may use the following flags to enable otherwise unsafe +floating-point transformations. ``nnan`` No NaNs - Allow optimizations to assume the arguments and result are not @@ -2278,10 +2300,17 @@ otherwise unsafe floating point transformations. Allow floating-point contraction (e.g. fusing a multiply followed by an addition into a fused multiply-and-add). +``afn`` + Approximate functions - Allow substitution of approximate calculations for + functions (sin, log, sqrt, etc). See floating-point intrinsic definitions + for places where this can apply to LLVM's intrinsic math functions. + +``reassoc`` + Allow reassociation transformations for floating-point instructions. + This may dramatically change results in floating point. + ``fast`` - Fast - Allow algebraically equivalent transformations that may - dramatically change results in floating point (e.g. reassociate). This - flag implies all the others. + This flag implies all of the others. .. _uselistorder: @@ -4477,7 +4506,7 @@ source variable. DIExpressions also follow this model: A DIExpression that doesn't have a trailing ``DW_OP_stack_value`` will describe an *address* when combined with a concrete location. -.. code-block:: llvm +.. code-block:: text !0 = !DIExpression(DW_OP_deref) !1 = !DIExpression(DW_OP_plus_uconst, 3) @@ -4617,13 +4646,13 @@ As a concrete example, the type descriptor graph for the following program int i; // offset 0 float f; // offset 4 }; - + struct Outer { float f; // offset 0 double d; // offset 4 struct Inner inner_a; // offset 12 }; - + void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) { outer->f = 0; // tag0: (OuterStructTy, FloatScalarTy, 0) outer->inner_a.i = 0; // tag1: (OuterStructTy, IntScalarTy, 12) @@ -5172,14 +5201,37 @@ the loop identifier metadata node directly: !1 = !{!1} ; an identifier for the inner loop !2 = !{!2} ; an identifier for the outer loop +'``irr_loop``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^ + +``irr_loop`` metadata may be attached to the terminator instruction of a basic +block that's an irreducible loop header (note that an irreducible loop has more +than once header basic blocks.) If ``irr_loop`` metadata is attached to the +terminator instruction of a basic block that is not really an irreducible loop +header, the behavior is undefined. The intent of this metadata is to improve the +accuracy of the block frequency propagation. For example, in the code below, the +block ``header0`` may have a loop header weight (relative to the other headers of +the irreducible loop) of 100: + +.. code-block:: llvm + + header0: + ... + br i1 %cmp, label %t1, label %t2, !irr_loop !0 + + ... + !0 = !{"loop_header_weight", i64 100} + +Irreducible loop header weights are typically based on profile data. + '``invariant.group``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``invariant.group`` metadata may be attached to ``load``/``store`` instructions. -The existence of the ``invariant.group`` metadata on the instruction tells -the optimizer that every ``load`` and ``store`` to the same pointer operand -within the same invariant group can be assumed to load or store the same -value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects +The existence of the ``invariant.group`` metadata on the instruction tells +the optimizer that every ``load`` and ``store`` to the same pointer operand +within the same invariant group can be assumed to load or store the same +value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects when two pointers are considered the same). Pointers returned by bitcast or getelementptr with only zero indices are considered the same. @@ -5192,26 +5244,26 @@ Examples: %ptr = alloca i8 store i8 42, i8* %ptr, !invariant.group !0 call void @foo(i8* %ptr) - + %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change call void @foo(i8* %ptr) %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed - - %newPtr = call i8* @getPointer(i8* %ptr) + + %newPtr = call i8* @getPointer(i8* %ptr) %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr - + %unknownValue = load i8, i8* @unknownPtr store i8 %unknownValue, i8* %ptr, !invariant.group !0 ; Can assume that %unknownValue == 42 - + call void @foo(i8* %ptr) %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr - + ... declare void @foo(i8*) declare i8* @getPointer(i8*) declare i8* @llvm.invariant.group.barrier(i8*) - + !0 = !{!"magic ptr"} !1 = !{!"other ptr"} @@ -5220,7 +5272,7 @@ another based on aliasing information. This is because invariant.group is tied to the SSA value of the pointer operand. .. code-block:: llvm - + %v = load i8, i8* %x, !invariant.group !0 ; if %x mustalias %y then we can replace the above instruction with %v = load i8, i8* %y @@ -5250,7 +5302,7 @@ It does not have any effect on non-ELF targets. Example: -.. code-block:: llvm +.. code-block:: text $a = comdat any @a = global i32 1, comdat $a @@ -6678,9 +6730,9 @@ remainder. Note that unsigned integer remainder and signed integer remainder are distinct operations; for signed integer remainder, use '``srem``'. - + Taking the remainder of a division by zero is undefined behavior. -For vectors, if any element of the divisor is zero, the operation has +For vectors, if any element of the divisor is zero, the operation has undefined behavior. Example: @@ -6732,7 +6784,7 @@ Note that signed integer remainder and unsigned integer remainder are distinct operations; for unsigned integer remainder, use '``urem``'. Taking the remainder of a division by zero is undefined behavior. -For vectors, if any element of the divisor is zero, the operation has +For vectors, if any element of the divisor is zero, the operation has undefined behavior. Overflow also leads to undefined behavior; this is a rare case, but can occur, for example, by taking the remainder of a 32-bit division of @@ -7605,7 +7657,7 @@ be reused in the cache. The code generator may select special instructions to save cache bandwidth, such as the ``MOVNT`` instruction on x86. -The optional ``!invariant.group`` metadata must reference a +The optional ``!invariant.group`` metadata must reference a single metadata name ````. See ``invariant.group`` metadata. Semantics: @@ -7679,7 +7731,7 @@ A ``fence`` instruction can also take an optional Example: """""""" -.. code-block:: llvm +.. code-block:: text fence acquire ; yields void fence syncscope("singlethread") seq_cst ; yields void @@ -7711,10 +7763,10 @@ There are three arguments to the '``cmpxchg``' instruction: an address to operate on, a value to compare to the value currently be at that address, and a new value to place at that address if the compared values are equal. The type of '' must be an integer or pointer type whose -bit width is a power of two greater than or equal to eight and less +bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. '' and '' must -have the same type, and the type of '' must be a pointer to -that type. If the ``cmpxchg`` is marked as ``volatile``, then the +have the same type, and the type of '' must be a pointer to +that type. If the ``cmpxchg`` is marked as ``volatile``, then the optimizer is not allowed to modify the number or order of execution of this ``cmpxchg`` with other :ref:`volatile operations `. @@ -9008,7 +9060,7 @@ This instruction requires several arguments: ``tail`` or ``musttail`` markers to the call. It is used to prevent tail call optimization from being performed on the call. -#. The optional ``fast-math flags`` marker indicates that the call has one or more +#. The optional ``fast-math flags`` marker indicates that the call has one or more :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math flags are only valid for calls that return a floating-point scalar or vector type. @@ -10438,7 +10490,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.sqrt`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10452,20 +10504,22 @@ all types however. Overview: """"""""" -The '``llvm.sqrt``' intrinsics return the square root of the specified value, -returning the same value as the libm '``sqrt``' functions would, but without -trapping or setting ``errno``. +The '``llvm.sqrt``' intrinsics return the square root of the specified value. Arguments: """""""""" -The argument and return value are floating point numbers of the same type. +The argument and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the square root of the operand if it is a nonnegative -floating point number. +Return the same value as a corresponding libm '``sqrt``' function but without +trapping or setting ``errno``. For types specified by IEEE-754, the result +matches a conforming libm implementation. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.powi.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10512,7 +10566,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.sin`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10531,14 +10585,16 @@ The '``llvm.sin.*``' intrinsics return the sine of the operand. Arguments: """""""""" -The argument and return value are floating point numbers of the same type. +The argument and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the sine of the specified operand, returning the -same values as the libm ``sin`` functions would, and handles error -conditions in the same way. +Return the same value as a corresponding libm '``sin``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.cos.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10547,7 +10603,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.cos`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10566,14 +10622,16 @@ The '``llvm.cos.*``' intrinsics return the cosine of the operand. Arguments: """""""""" -The argument and return value are floating point numbers of the same type. +The argument and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the cosine of the specified operand, returning the -same values as the libm ``cos`` functions would, and handles error -conditions in the same way. +Return the same value as a corresponding libm '``cos``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.pow.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10582,7 +10640,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.pow`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10602,15 +10660,16 @@ specified (positive or negative) power. Arguments: """""""""" -The second argument is a floating point power, and the first is a value -to raise to that power. +The arguments and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the first value raised to the second power, -returning the same values as the libm ``pow`` functions would, and -handles error conditions in the same way. +Return the same value as a corresponding libm '``pow``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.exp.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10619,7 +10678,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.exp`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10639,13 +10698,16 @@ value. Arguments: """""""""" -The argument and return value are floating point numbers of the same type. +The argument and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the same values as the libm ``exp`` functions -would, and handles error conditions in the same way. +Return the same value as a corresponding libm '``exp``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.exp2.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10654,7 +10716,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.exp2`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10674,13 +10736,16 @@ specified value. Arguments: """""""""" -The argument and return value are floating point numbers of the same type. +The argument and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the same values as the libm ``exp2`` functions -would, and handles error conditions in the same way. +Return the same value as a corresponding libm '``exp2``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.log.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10689,7 +10754,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.log`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10709,13 +10774,16 @@ value. Arguments: """""""""" -The argument and return value are floating point numbers of the same type. +The argument and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the same values as the libm ``log`` functions -would, and handles error conditions in the same way. +Return the same value as a corresponding libm '``log``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.log10.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10724,7 +10792,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.log10`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10744,13 +10812,16 @@ specified value. Arguments: """""""""" -The argument and return value are floating point numbers of the same type. +The argument and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the same values as the libm ``log10`` functions -would, and handles error conditions in the same way. +Return the same value as a corresponding libm '``log10``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.log2.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10759,7 +10830,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.log2`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10779,13 +10850,16 @@ value. Arguments: """""""""" -The argument and return value are floating point numbers of the same type. +The argument and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the same values as the libm ``log2`` functions -would, and handles error conditions in the same way. +Return the same value as a corresponding libm '``log2``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.fma.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10794,7 +10868,7 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.fma`` on any -floating point or vector of floating point type. Not all targets support +floating-point or vector of floating-point type. Not all targets support all types however. :: @@ -10808,20 +10882,21 @@ all types however. Overview: """"""""" -The '``llvm.fma.*``' intrinsics perform the fused multiply-add -operation. +The '``llvm.fma.*``' intrinsics perform the fused multiply-add operation. Arguments: """""""""" -The argument and return value are floating point numbers of the same -type. +The arguments and return value are floating-point numbers of the same type. Semantics: """""""""" -This function returns the same values as the libm ``fma`` functions -would, and does not set errno. +Return the same value as a corresponding libm '``fma``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. '``llvm.fabs.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12742,15 +12817,18 @@ This intrinsic indicates that the memory is mutable again. Syntax: """"""" +This is an overloaded intrinsic. The memory object can belong to any address +space. The returned pointer must belong to the same address space as the +argument. :: - declare i8* @llvm.invariant.group.barrier(i8* ) + declare i8* @llvm.invariant.group.barrier.p0i8(i8* ) Overview: """"""""" -The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant +The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant established by invariant.group metadata no longer holds, to obtain a new pointer value that does not carry the invariant information. @@ -12764,7 +12842,7 @@ the pointer to the memory for which the ``invariant.group`` no longer holds. Semantics: """""""""" -Returns another pointer that aliases its argument but which is considered different +Returns another pointer that aliases its argument but which is considered different for the purposes of ``load``/``store`` ``invariant.group`` metadata. Constrained Floating Point Intrinsics @@ -12842,7 +12920,7 @@ strictly preserve the floating point exception semantics of the original code. Any FP exception that would have been raised by the original code must be raised by the transformed code, and the transformed code must not raise any FP exceptions that would not have been raised by the original code. This is the -exception behavior argument that will be used if the code being compiled reads +exception behavior argument that will be used if the code being compiled reads the FP exception status flags, but this mode can also be used with code that unmasks FP exceptions. @@ -12860,7 +12938,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.fadd( , , metadata , metadata ) @@ -12897,7 +12975,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.fsub( , , metadata , metadata ) @@ -12934,7 +13012,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.fmul( , , metadata , metadata ) @@ -12971,7 +13049,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.fdiv( , , metadata , metadata ) @@ -13008,7 +13086,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.frem( , , metadata , metadata ) @@ -13037,7 +13115,7 @@ Semantics: The value produced is the floating point remainder from the division of the two value operands and has the same type as the operands. The remainder has the -same sign as the dividend. +same sign as the dividend. '``llvm.experimental.constrained.fma``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -13097,7 +13175,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.sqrt( , metadata , metadata ) @@ -13134,7 +13212,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.pow( , , metadata , metadata ) @@ -13171,7 +13249,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.powi( , i32 , metadata , metadata ) @@ -13210,7 +13288,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.sin( , metadata , metadata ) @@ -13246,7 +13324,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.cos( , metadata , metadata ) @@ -13282,7 +13360,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.exp( , metadata , metadata ) @@ -13317,7 +13395,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.exp2( , metadata , metadata ) @@ -13353,7 +13431,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.log( , metadata , metadata ) @@ -13389,7 +13467,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.log10( , metadata , metadata ) @@ -13424,7 +13502,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.log2( , metadata , metadata ) @@ -13459,7 +13537,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.rint( , metadata , metadata ) @@ -13498,7 +13576,7 @@ Syntax: :: - declare + declare @llvm.experimental.constrained.nearbyint( , metadata , metadata ) @@ -14192,6 +14270,36 @@ not overflow at link time under the medium code model if ``x`` is an a constant initializer folded into a function body. This intrinsic can be used to avoid the possibility of overflows when loading from such a constant. +'``llvm.sideeffect``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.sideeffect() inaccessiblememonly nounwind + +Overview: +""""""""" + +The ``llvm.sideeffect`` intrinsic doesn't perform any operation. Optimizers +treat it as having side effects, so it can be inserted into a loop to +indicate that the loop shouldn't be assumed to terminate (which could +potentially lead to the loop being optimized away entirely), even if it's +an infinite loop with no other side effects. + +Arguments: +"""""""""" + +None. + +Semantics: +"""""""""" + +This intrinsic actually does nothing, but optimizers must assume that it +has externally observable side effects. + Stack Map Intrinsics -------------------- @@ -14259,7 +14367,7 @@ The '``llvm.memcpy.element.unordered.atomic.*``' intrinsic copies ``len`` bytes memory from the source location to the destination location. These locations are not allowed to overlap. The memory copy is performed as a sequence of load/store operations where each access is guaranteed to be a multiple of ``element_size`` bytes wide and -aligned at an ``element_size`` boundary. +aligned at an ``element_size`` boundary. The order of the copy is unspecified. The same value may be read from the source buffer many times, but only one write is issued to the destination buffer per @@ -14334,7 +14442,7 @@ The '``llvm.memmove.element.unordered.atomic.*``' intrinsic copies ``len`` bytes of memory from the source location to the destination location. These locations are allowed to overlap. The memory copy is performed as a sequence of load/store operations where each access is guaranteed to be a multiple of ``element_size`` -bytes wide and aligned at an ``element_size`` boundary. +bytes wide and aligned at an ``element_size`` boundary. The order of the copy is unspecified. The same value may be read from the source buffer many times, but only one write is issued to the destination buffer per @@ -14409,7 +14517,7 @@ Semantics: The '``llvm.memset.element.unordered.atomic.*``' intrinsic sets the ``len`` bytes of memory starting at the destination location to the given ``value``. The memory is set with a sequence of store operations where each access is guaranteed to be a -multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary. +multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary. The order of the assignment is unspecified. Only one write is issued to the destination buffer per element. It is well defined to have concurrent reads and diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst index 2ae84afeed84c..d4e33cb0670e6 100644 --- a/docs/LibFuzzer.rst +++ b/docs/LibFuzzer.rst @@ -24,28 +24,9 @@ Versions ======== LibFuzzer is under active development so you will need the current -(or at least a very recent) version of the Clang compiler. +(or at least a very recent) version of the Clang compiler (see `building Clang from trunk`_) -(If `building Clang from trunk`_ is too time-consuming or difficult, then -the Clang binaries that the Chromium developers build are likely to be -fairly recent: - -.. code-block:: console - - mkdir TMP_CLANG - cd TMP_CLANG - git clone https://chromium.googlesource.com/chromium/src/tools/clang - cd .. - TMP_CLANG/clang/scripts/update.py - -This installs the Clang binary as -``./third_party/llvm-build/Release+Asserts/bin/clang``) - -The libFuzzer code resides in the LLVM repository, and requires a recent Clang -compiler to build (and is used to :doc:`fuzz various parts of LLVM itself -`). However the fuzzer itself does not (and should not) depend on -any part of LLVM infrastructure and can be used for other projects without -requiring the rest of LLVM. +Refer to https://releases.llvm.org/5.0.0/docs/LibFuzzer.html for documentation on the older version. Getting Started @@ -90,15 +71,19 @@ Some important things to remember about fuzz targets: Fuzzer Usage ------------ -Very recent versions of Clang (after April 20 2017) include libFuzzer, -and no installation is necessary. -In order to fuzz your binary, use the `-fsanitize=fuzzer` flag during the compilation:: +Recent versions of Clang (starting from 6.0) include libFuzzer, and no extra installation is necessary. + +In order to build your fuzzer binary, use the `-fsanitize=fuzzer` flag during the +compilation and linking. In most cases you may want to combine libFuzzer with +AddressSanitizer_ (ASAN), UndefinedBehaviorSanitizer_ (UBSAN), or both:: + + clang -g -O1 -fsanitize=fuzzer mytarget.c # Builds the fuzz target w/o sanitizers + clang -g -O1 -fsanitize=fuzzer,address mytarget.c # Builds the fuzz target with ASAN + clang -g -O1 -fsanitize=fuzzer,signed-integer-overflow mytarget.c # Builds the fuzz target with a part of UBSAN - clang -fsanitize=fuzzer,address mytarget.c +This will perform the necessary instrumentation, as well as linking with the libFuzzer library. +Note that ``-fsanitize=fuzzer`` links in the libFuzzer's ``main()`` symbol. -This will perform the necessary instrumentation, as well as linking in libFuzzer -library. -Note that linking in libFuzzer defines the ``main`` symbol. If modifying ``CFLAGS`` of a large project, which also compiles executables requiring their own ``main`` symbol, it may be desirable to request just the instrumentation without linking:: @@ -108,37 +93,12 @@ instrumentation without linking:: Then libFuzzer can be linked to the desired driver by passing in ``-fsanitize=fuzzer`` during the linking stage. -Otherwise, build the libFuzzer library as a static archive, without any sanitizer -options. Note that the libFuzzer library contains the ``main()`` function: - -.. code-block:: console - - svn co http://llvm.org/svn/llvm-project/llvm/trunk/lib/Fuzzer # or git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer - ./Fuzzer/build.sh # Produces libFuzzer.a - -Then build the fuzzing target function and the library under test using -the SanitizerCoverage_ option, which instruments the code so that the fuzzer -can retrieve code coverage information (to guide the fuzzing). Linking with -the libFuzzer code then gives a fuzzer executable. - -You should also enable one or more of the *sanitizers*, which help to expose -latent bugs by making incorrect behavior generate errors at runtime: - - - AddressSanitizer_ (ASAN) detects memory access errors. Use `-fsanitize=address`. - - UndefinedBehaviorSanitizer_ (UBSAN) detects the use of various features of C/C++ that are explicitly - listed as resulting in undefined behavior. Use `-fsanitize=undefined -fno-sanitize-recover=undefined` - or any individual UBSAN check, e.g. `-fsanitize=signed-integer-overflow -fno-sanitize-recover=undefined`. - You may combine ASAN and UBSAN in one build. - - MemorySanitizer_ (MSAN) detects uninitialized reads: code whose behavior relies on memory - contents that have not been initialized to a specific value. Use `-fsanitize=memory`. - MSAN can not be combined with other sanirizers and should be used as a seprate build. - -Finally, link with ``libFuzzer.a``:: - - clang -fsanitize-coverage=trace-pc-guard -fsanitize=address your_lib.cc fuzz_target.cc libFuzzer.a -o my_fuzzer +Using MemorySanitizer_ (MSAN) with libFuzzer is possible too, but tricky. +The exact details are out of scope, we expect to simplify this in future +versions. .. _libfuzzer-corpus: - + Corpus ------ @@ -175,7 +135,6 @@ Only the inputs that trigger new coverage will be added to the first corpus. ./my_fuzzer -merge=1 CURRENT_CORPUS_DIR NEW_POTENTIALLY_INTERESTING_INPUTS_DIR - Running ------- @@ -222,6 +181,33 @@ running with ``-jobs=30`` on a 12-core machine would run 6 workers by default, with each worker averaging 5 bugs by completion of the entire process. +Resuming merge +-------------- + +Merging large corpora may be time consuming, and it is often desirable to do it +on preemptable VMs, where the process may be killed at any time. +In order to seamlessly resume the merge, use the ``-merge_control_file`` flag +and use ``killall -SIGUSR1 /path/to/fuzzer/binary`` to stop the merge gracefully. Example: + +.. code-block:: console + + % rm -f SomeLocalPath + % ./my_fuzzer CORPUS1 CORPUS2 -merge=1 -merge_control_file=SomeLocalPath + ... + MERGE-INNER: using the control file 'SomeLocalPath' + ... + # While this is running, do `killall -SIGUSR1 my_fuzzer` in another console + ==9015== INFO: libFuzzer: exiting as requested + + # This will leave the file SomeLocalPath with the partial state of the merge. + # Now, you can continue the merge by executing the same command. The merge + # will continue from where it has been interrupted. + % ./my_fuzzer CORPUS1 CORPUS2 -merge=1 -merge_control_file=SomeLocalPath + ... + MERGE-OUTER: non-empty control file provided: 'SomeLocalPath' + MERGE-OUTER: control file ok, 32 files total, first not processed file 20 + ... + Options ======= @@ -271,6 +257,10 @@ The most important command line options are: If set to 1, any corpus inputs from the 2nd, 3rd etc. corpus directories that trigger new code coverage will be merged into the first corpus directory. Defaults to 0. This flag can be used to minimize a corpus. +``-merge_control_file`` + Specify a control file used for the merge proccess. + If a merge process gets killed it tries to leave this file in a state + suitable for resuming the merge. By default a temporary file will be used. ``-minimize_crash`` If 1, minimizes the provided crash input. Use with -runs=N or -max_total_time=N to limit the number of attempts. @@ -292,6 +282,9 @@ The most important command line options are: ``-use_counters`` Use `coverage counters`_ to generate approximate counts of how often code blocks are hit; defaults to 1. +``-reduce_inputs`` + Try to reduce the size of inputs while preserving their full feature sets; + defaults to 1. ``-use_value_profile`` Use `value profile`_ to guide corpus expansion; defaults to 0. ``-only_ascii`` @@ -478,7 +471,7 @@ Tracing CMP instructions ------------------------ With an additional compiler flag ``-fsanitize-coverage=trace-cmp`` -(see SanitizerCoverageTraceDataFlow_) +(on by default as part of ``-fsanitize=fuzzer``, see SanitizerCoverageTraceDataFlow_) libFuzzer will intercept CMP instructions and guide mutations based on the arguments of intercepted CMP instructions. This may slow down the fuzzing but is very likely to improve the results. @@ -486,7 +479,6 @@ the fuzzing but is very likely to improve the results. Value Profile ------------- -*EXPERIMENTAL*. With ``-fsanitize-coverage=trace-cmp`` and extra run-time flag ``-use_value_profile=1`` the fuzzer will collect value profiles for the parameters of compare instructions @@ -548,7 +540,7 @@ Periodically restart both fuzzers so that they can use each other's findings. Currently, there is no simple way to run both fuzzing engines in parallel while sharing the same corpus dir. You may also use AFL on your target function ``LLVMFuzzerTestOneInput``: -see an example `here `__. +see an example `here `__. How good is my fuzzer? ---------------------- @@ -687,6 +679,8 @@ network, crypto. Trophies ======== +* Thousands of bugs found on OSS-Fuzz: https://opensource.googleblog.com/2017/05/oss-fuzz-five-months-later-and.html + * GLIBC: https://sourceware.org/glibc/wiki/FuzzingLibc * MUSL LIBC: `[1] `__ `[2] `__ @@ -741,7 +735,7 @@ Trophies .. _AddressSanitizer: http://clang.llvm.org/docs/AddressSanitizer.html .. _LeakSanitizer: http://clang.llvm.org/docs/LeakSanitizer.html .. _Heartbleed: http://en.wikipedia.org/wiki/Heartbleed -.. _FuzzerInterface.h: https://github.com/llvm-mirror/llvm/blob/master/lib/Fuzzer/FuzzerInterface.h +.. _FuzzerInterface.h: https://github.com/llvm-mirror/compiler-rt/blob/master/lib/fuzzer/FuzzerInterface.h .. _3.7.0: http://llvm.org/releases/3.7.0/docs/LibFuzzer.html .. _building Clang from trunk: http://clang.llvm.org/get_started.html .. _MemorySanitizer: http://clang.llvm.org/docs/MemorySanitizer.html diff --git a/docs/Proposals/VectorizationPlan.rst b/docs/Proposals/VectorizationPlan.rst index aed8e3d2b7935..f9700d177d235 100644 --- a/docs/Proposals/VectorizationPlan.rst +++ b/docs/Proposals/VectorizationPlan.rst @@ -82,8 +82,14 @@ The design of VPlan follows several high-level guidelines: replicated VF*UF times to handle scalarized and predicated instructions. Innerloops are also modelled as SESE regions. -Low-level Design -================ +7. Support instruction-level analysis and transformation, as part of Planning + Step 2.b: During vectorization instructions may need to be traversed, moved, + replaced by other instructions or be created. For example, vector idiom + detection and formation involves searching for and optimizing instruction + patterns. + +Definitions +=========== The low-level design of VPlan comprises of the following classes. :LoopVectorizationPlanner: @@ -139,11 +145,64 @@ The low-level design of VPlan comprises of the following classes. instructions; e.g., cloned once, replicated multiple times or widened according to selected VF. +:VPValue: + The base of VPlan's def-use relations class hierarchy. When instantiated, it + models a constant or a live-in Value in VPlan. It has users, which are of type + VPUser, but no operands. + +:VPUser: + A VPValue representing a general vertex in the def-use graph of VPlan. It has + operands which are of type VPValue. When instantiated, it represents a + live-out Instruction that exists outside VPlan. VPUser is similar in some + aspects to LLVM's User class. + +:VPInstruction: + A VPInstruction is both a VPRecipe and a VPUser. It models a single + VPlan-level instruction to be generated if the VPlan is executed, including + its opcode and possibly additional characteristics. It is the basis for + writing instruction-level analyses and optimizations in VPlan as creating, + replacing or moving VPInstructions record both def-use and scheduling + decisions. VPInstructions also extend LLVM IR's opcodes with idiomatic + operations that enrich the Vectorizer's semantics. + :VPTransformState: Stores information used for generating output IR, passed from LoopVectorizationPlanner to its selected VPlan for execution, and used to pass additional information down to VPBlocks and VPRecipes. +The Planning Process and VPlan Roadmap +====================================== + +Transforming the Loop Vectorizer to use VPlan follows a staged approach. First, +VPlan is used to record the final vectorization decisions, and to execute them: +the Hierarchical CFG models the planned control-flow, and Recipes capture +decisions taken inside basic-blocks. Next, VPlan will be used also as the basis +for taking these decisions, effectively turning them into a series of +VPlan-to-VPlan algorithms. Finally, VPlan will support the planning process +itself including cost-based analyses for making these decisions, to fully +support compositional and iterative decision making. + +Some decisions are local to an instruction in the loop, such as whether to widen +it into a vector instruction or replicate it, keeping the generated instructions +in place. Other decisions, however, involve moving instructions, replacing them +with other instructions, and/or introducing new instructions. For example, a +cast may sink past a later instruction and be widened to handle first-order +recurrence; an interleave group of strided gathers or scatters may effectively +move to one place where they are replaced with shuffles and a common wide vector +load or store; new instructions may be introduced to compute masks, shuffle the +elements of vectors, and pack scalar values into vectors or vice-versa. + +In order for VPlan to support making instruction-level decisions and analyses, +it needs to model the relevant instructions along with their def/use relations. +This too follows a staged approach: first, the new instructions that compute +masks are modeled as VPInstructions, along with their induced def/use subgraph. +This effectively models masks in VPlan, facilitating VPlan-based predication. +Next, the logic embedded within each Recipe for generating its instructions at +VPlan execution time, will instead take part in the planning process by modeling +them as VPInstructions. Finally, only logic that applies to instructions as a +group will remain in Recipes, such as interleave groups and potentially other +idiom groups having synergistic cost. + Related LLVM components ----------------------- 1. SLP Vectorizer: one can compare the VPlan model with LLVM's existing SLP @@ -152,6 +211,9 @@ Related LLVM components 2. RegionInfo: one can compare VPlan's H-CFG with the Region Analysis as used by Polly [7]_. +3. Loop Vectorizer: the Vectorization Plan aims to upgrade the infrastructure of + the Loop Vectorizer and extend it to handle outer loops [8,9]_. + References ---------- .. [1] "Outer-loop vectorization: revisited for short SIMD architectures", Dorit @@ -180,3 +242,6 @@ References .. [8] "Introducing VPlan to the Loop Vectorizer", Gil Rapaport and Ayal Zaks, European LLVM Developers' Meeting 2017. + +.. [9] "Extending LoopVectorizer: OpenMP4.5 SIMD and Outer Loop + Auto-Vectorization", Intel Vectorizer Team, LLVM Developers' Meeting 2016. diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 6e747873ecae0..4b6d7931e8487 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -44,6 +44,11 @@ Non-comprehensive list of changes in this release ``llvm::sys::ExecuteNoWait`` was changed to an ``ArrayRef`` of optional ``StringRef``'s to make it safer and more convenient to use. +* The backend name was added to the Target Registry to allow run-time + information to be fed back into TableGen. Out-of-tree targets will need to add + the name used in the `def X : Target` definition to the call to + `RegisterTarget`. + * Note.. .. NOTE diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst index c46b51c4d8115..103c6e0365bad 100644 --- a/docs/SourceLevelDebugging.rst +++ b/docs/SourceLevelDebugging.rst @@ -188,7 +188,7 @@ the variable. The third argument is a `complex expression `_. An `llvm.dbg.addr` intrinsic describes the *address* of a source variable. -.. code-block:: llvm +.. code-block:: text %i.addr = alloca i32, align 4 call void @llvm.dbg.addr(metadata i32* %i.addr, metadata !1, @@ -239,9 +239,9 @@ and we plan to deprecate it in future LLVM releases. void @llvm.dbg.value(metadata, metadata, metadata) This intrinsic provides information when a user source variable is set to a new -value. The first argument is the new value (wrapped as metadata). The third +value. The first argument is the new value (wrapped as metadata). The second argument is a `local variable `_ containing a -description of the variable. The fourth argument is a `complex expression +description of the variable. The third argument is a `complex expression `_. Object lifetimes and scoping diff --git a/docs/WritingAnLLVMPass.rst b/docs/WritingAnLLVMPass.rst index 54b3630e655ff..41f400740e84e 100644 --- a/docs/WritingAnLLVMPass.rst +++ b/docs/WritingAnLLVMPass.rst @@ -1032,7 +1032,7 @@ implementation for the interface. Pass Statistics =============== -The `Statistic `_ class is +The `Statistic `_ class is designed to be an easy way to expose various success metrics from passes. These statistics are printed at the end of a run, when the :option:`-stats` command line option is enabled on the command line. See the :ref:`Statistics @@ -1043,7 +1043,7 @@ section ` in the Programmer's Manual for details. What PassManager does --------------------- -The `PassManager `_ `class +The `PassManager `_ `class `_ takes a list of passes, ensures their :ref:`prerequisites ` are set up correctly, and then schedules passes to run efficiently. All of the diff --git a/docs/XRay.rst b/docs/XRay.rst index 9e08c35880396..e9ecc13e3b286 100644 --- a/docs/XRay.rst +++ b/docs/XRay.rst @@ -75,11 +75,11 @@ GCC-style attributes or C++11-style attributes. .. code-block:: c++ - [[clang::xray_always_intrument]] void always_instrumented(); + [[clang::xray_always_instrument]] void always_instrumented(); [[clang::xray_never_instrument]] void never_instrumented(); - void alt_always_instrumented() __attribute__((xray_always_intrument)); + void alt_always_instrumented() __attribute__((xray_always_instrument)); void alt_never_instrumented() __attribute__((xray_never_instrument)); diff --git a/docs/XRayExample.rst b/docs/XRayExample.rst index 718b302a50327..56f17507d82f1 100644 --- a/docs/XRayExample.rst +++ b/docs/XRayExample.rst @@ -275,11 +275,11 @@ application. #include #include - [[clang::xray_always_intrument]] void f() { + [[clang::xray_always_instrument]] void f() { std::cerr << '.'; } - [[clang::xray_always_intrument]] void g() { + [[clang::xray_always_instrument]] void g() { for (int i = 0; i < 1 << 10; ++i) { std::cerr << '-'; } diff --git a/docs/index.rst b/docs/index.rst index 955607a751cd9..47c2f04739313 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -68,6 +68,7 @@ representation. CMakePrimer AdvancedBuilds HowToBuildOnARM + HowToCrossCompileBuiltinsOnArm HowToCrossCompileLLVM CommandGuide/index GettingStarted @@ -105,6 +106,9 @@ representation. :doc:`HowToBuildOnARM` Notes on building and testing LLVM/Clang on ARM. +:doc:`HowToCrossCompileBuiltinsOnArm` + Notes on cross-building and testing the compiler-rt builtins for Arm. + :doc:`HowToCrossCompileLLVM` Notes on cross-building and testing LLVM/Clang. diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 22cef23007c36..8238c09f9dd0b 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -1136,6 +1136,16 @@ LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C); */ LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C); +/** + * Create a token type in a context. + */ +LLVMTypeRef LLVMTokenTypeInContext(LLVMContextRef C); + +/** + * Create a metadata type in a context. + */ +LLVMTypeRef LLVMMetadataTypeInContext(LLVMContextRef C); + /** * These are similar to the above functions except they operate on the * global context. diff --git a/include/llvm-c/DebugInfo.h b/include/llvm-c/DebugInfo.h new file mode 100644 index 0000000000000..d17c690be4dad --- /dev/null +++ b/include/llvm-c/DebugInfo.h @@ -0,0 +1,232 @@ +//===------------ DebugInfo.h - LLVM C API Debug Info API -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This file declares the C API endpoints for generating DWARF Debug Info +/// +/// Note: This interface is experimental. It is *NOT* stable, and may be +/// changed without warning. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_C_DEBUGINFO_H +#define LLVM_C_DEBUGINFO_H + +#include "llvm-c/Core.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Debug info flags. + */ +typedef enum { + LLVMDIFlagZero = 0, + LLVMDIFlagPrivate = 1, + LLVMDIFlagProtected = 2, + LLVMDIFlagPublic = 3, + LLVMDIFlagFwdDecl = 1 << 2, + LLVMDIFlagAppleBlock = 1 << 3, + LLVMDIFlagBlockByrefStruct = 1 << 4, + LLVMDIFlagVirtual = 1 << 5, + LLVMDIFlagArtificial = 1 << 6, + LLVMDIFlagExplicit = 1 << 7, + LLVMDIFlagPrototyped = 1 << 8, + LLVMDIFlagObjcClassComplete = 1 << 9, + LLVMDIFlagObjectPointer = 1 << 10, + LLVMDIFlagVector = 1 << 11, + LLVMDIFlagStaticMember = 1 << 12, + LLVMDIFlagLValueReference = 1 << 13, + LLVMDIFlagRValueReference = 1 << 14, + LLVMDIFlagReserved = 1 << 15, + LLVMDIFlagSingleInheritance = 1 << 16, + LLVMDIFlagMultipleInheritance = 2 << 16, + LLVMDIFlagVirtualInheritance = 3 << 16, + LLVMDIFlagIntroducedVirtual = 1 << 18, + LLVMDIFlagBitField = 1 << 19, + LLVMDIFlagNoReturn = 1 << 20, + LLVMDIFlagMainSubprogram = 1 << 21, + LLVMDIFlagIndirectVirtualBase = (1 << 2) | (1 << 5), + LLVMDIFlagAccessibility = LLVMDIFlagPrivate | LLVMDIFlagProtected | + LLVMDIFlagPublic, + LLVMDIFlagPtrToMemberRep = LLVMDIFlagSingleInheritance | + LLVMDIFlagMultipleInheritance | + LLVMDIFlagVirtualInheritance +} LLVMDIFlags; + +/** + * Source languages known by DWARF. + */ +typedef enum { + LLVMDWARFSourceLanguageC89, + LLVMDWARFSourceLanguageC, + LLVMDWARFSourceLanguageAda83, + LLVMDWARFSourceLanguageC_plus_plus, + LLVMDWARFSourceLanguageCobol74, + LLVMDWARFSourceLanguageCobol85, + LLVMDWARFSourceLanguageFortran77, + LLVMDWARFSourceLanguageFortran90, + LLVMDWARFSourceLanguagePascal83, + LLVMDWARFSourceLanguageModula2, + // New in DWARF v3: + LLVMDWARFSourceLanguageJava, + LLVMDWARFSourceLanguageC99, + LLVMDWARFSourceLanguageAda95, + LLVMDWARFSourceLanguageFortran95, + LLVMDWARFSourceLanguagePLI, + LLVMDWARFSourceLanguageObjC, + LLVMDWARFSourceLanguageObjC_plus_plus, + LLVMDWARFSourceLanguageUPC, + LLVMDWARFSourceLanguageD, + // New in DWARF v4: + LLVMDWARFSourceLanguagePython, + // New in DWARF v5: + LLVMDWARFSourceLanguageOpenCL, + LLVMDWARFSourceLanguageGo, + LLVMDWARFSourceLanguageModula3, + LLVMDWARFSourceLanguageHaskell, + LLVMDWARFSourceLanguageC_plus_plus_03, + LLVMDWARFSourceLanguageC_plus_plus_11, + LLVMDWARFSourceLanguageOCaml, + LLVMDWARFSourceLanguageRust, + LLVMDWARFSourceLanguageC11, + LLVMDWARFSourceLanguageSwift, + LLVMDWARFSourceLanguageJulia, + LLVMDWARFSourceLanguageDylan, + LLVMDWARFSourceLanguageC_plus_plus_14, + LLVMDWARFSourceLanguageFortran03, + LLVMDWARFSourceLanguageFortran08, + LLVMDWARFSourceLanguageRenderScript, + LLVMDWARFSourceLanguageBLISS, + // Vendor extensions: + LLVMDWARFSourceLanguageMips_Assembler, + LLVMDWARFSourceLanguageGOOGLE_RenderScript, + LLVMDWARFSourceLanguageBORLAND_Delphi +} LLVMDWARFSourceLanguage; + +/** + * The amount of debug information to emit. + */ +typedef enum { + LLVMDWARFEmissionNone = 0, + LLVMDWARFEmissionFull, + LLVMDWARFEmissionLineTablesOnly +} LLVMDWARFEmissionKind; + +/** + * The current debug metadata version number. + */ +unsigned LLVMDebugMetadataVersion(void); + +/** + * The version of debug metadata that's present in the provided \c Module. + */ +unsigned LLVMGetModuleDebugMetadataVersion(LLVMModuleRef Module); + +/** + * Strip debug info in the module if it exists. + * To do this, we remove all calls to the debugger intrinsics and any named + * metadata for debugging. We also remove debug locations for instructions. + * Return true if module is modified. + */ +LLVMBool LLVMStripModuleDebugInfo(LLVMModuleRef Module); + +/** + * Construct a builder for a module, and do not allow for unresolved nodes + * attached to the module. + */ +LLVMDIBuilderRef LLVMCreateDIBuilderDisallowUnresolved(LLVMModuleRef M); + +/** + * Construct a builder for a module and collect unresolved nodes attached + * to the module in order to resolve cycles during a call to + * \c LLVMDIBuilderFinalize. + */ +LLVMDIBuilderRef LLVMCreateDIBuilder(LLVMModuleRef M); + +/** + * Deallocates the \c DIBuilder and everything it owns. + * @note You must call \c LLVMDIBuilderFinalize before this + */ +void LLVMDisposeDIBuilder(LLVMDIBuilderRef Builder); + +/** + * Construct any deferred debug info descriptors. + */ +void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder); + +/** + * A CompileUnit provides an anchor for all debugging + * information generated during this instance of compilation. + * \param Lang Source programming language, eg. + * \c LLVMDWARFSourceLanguageC99 + * \param FileRef File info. + * \param Producer Identify the producer of debugging information + * and code. Usually this is a compiler + * version string. + * \param ProducerLen The length of the C string passed to \c Producer. + * \param isOptimized A boolean flag which indicates whether optimization + * is enabled or not. + * \param Flags This string lists command line options. This + * string is directly embedded in debug info + * output which may be used by a tool + * analyzing generated debugging information. + * \param FlagsLen The length of the C string passed to \c Flags. + * \param RuntimeVer This indicates runtime version for languages like + * Objective-C. + * \param SplitName The name of the file that we'll split debug info + * out into. + * \param SplitNameLen The length of the C string passed to \c SplitName. + * \param Kind The kind of debug information to generate. + * \param DWOId The DWOId if this is a split skeleton compile unit. + * \param SplitDebugInlining Whether to emit inline debug info. + * \param DebugInfoForProfiling Whether to emit extra debug info for + * profile collection. + */ +LLVMMetadataRef LLVMDIBuilderCreateCompileUnit( + LLVMDIBuilderRef Builder, LLVMDWARFSourceLanguage Lang, + LLVMMetadataRef FileRef, const char *Producer, size_t ProducerLen, + LLVMBool isOptimized, const char *Flags, size_t FlagsLen, + unsigned RuntimeVer, const char *SplitName, size_t SplitNameLen, + LLVMDWARFEmissionKind Kind, unsigned DWOId, LLVMBool SplitDebugInlining, + LLVMBool DebugInfoForProfiling); + +/** + * Create a file descriptor to hold debugging information for a file. + * \param Builder The \c DIBuilder. + * \param Filename File name. + * \param FilenameLen The length of the C string passed to \c Filename. + * \param Directory Directory. + * \param DirectoryLen The length of the C string passed to \c Directory. + */ +LLVMMetadataRef +LLVMDIBuilderCreateFile(LLVMDIBuilderRef Builder, const char *Filename, + size_t FilenameLen, const char *Directory, + size_t DirectoryLen); + +/** + * Creates a new DebugLocation that describes a source location. + * \param Line The line in the source file. + * \param Column The column in the source file. + * \param Scope The scope in which the location resides. + * \param InlinedAt The scope where this location was inlined, if at all. + * (optional). + * \note If the item to which this location is attached cannot be + * attributed to a source line, pass 0 for the line and column. + */ +LLVMMetadataRef +LLVMDIBuilderCreateDebugLocation(LLVMContextRef Ctx, unsigned Line, + unsigned Column, LLVMMetadataRef Scope, + LLVMMetadataRef InlinedAt); + +#ifdef __cplusplus +} /* end extern "C" */ +#endif + +#endif diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h index b9612b9cec044..8991e0904849c 100644 --- a/include/llvm-c/Transforms/Scalar.h +++ b/include/llvm-c/Transforms/Scalar.h @@ -44,9 +44,6 @@ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM); /** See llvm::createCFGSimplificationPass function. */ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM); -/** See llvm::createLateCFGSimplificationPass function. */ -void LLVMAddLateCFGSimplificationPass(LLVMPassManagerRef PM); - /** See llvm::createDeadStoreEliminationPass function. */ void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM); diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h index af293d4c1422a..e3f48433c69fd 100644 --- a/include/llvm/ADT/EquivalenceClasses.h +++ b/include/llvm/ADT/EquivalenceClasses.h @@ -239,6 +239,16 @@ class EquivalenceClasses { return L1; } + // isEquivalent - Return true if V1 is equivalent to V2. This can happen if + // V1 is equal to V2 or if they belong to one equivalence class. + bool isEquivalent(const ElemTy &V1, const ElemTy &V2) const { + // Fast path: any element is equivalent to itself. + if (V1 == V2) + return true; + auto It = findLeader(V1); + return It != member_end() && It == findLeader(V2); + } + class member_iterator : public std::iterator { friend class EquivalenceClasses; diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h index c5987a947e182..e363e69d032aa 100644 --- a/include/llvm/ADT/FoldingSet.h +++ b/include/llvm/ADT/FoldingSet.h @@ -1,4 +1,4 @@ -//===-- llvm/ADT/FoldingSet.h - Uniquing Hash Set ---------------*- C++ -*-===// +//===- llvm/ADT/FoldingSet.h - Uniquing Hash Set ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -115,11 +115,9 @@ class FoldingSetBase { protected: /// Buckets - Array of bucket chains. - /// void **Buckets; /// NumBuckets - Length of the Buckets array. Always a power of 2. - /// unsigned NumBuckets; /// NumNodes - Number of nodes in the folding set. Growth occurs when NumNodes @@ -135,14 +133,13 @@ class FoldingSetBase { //===--------------------------------------------------------------------===// /// Node - This class is used to maintain the singly linked bucket list in /// a folding set. - /// class Node { private: // NextInFoldingSetBucket - next link in the bucket list. - void *NextInFoldingSetBucket; + void *NextInFoldingSetBucket = nullptr; public: - Node() : NextInFoldingSetBucket(nullptr) {} + Node() = default; // Accessors void *getNextInBucket() const { return NextInFoldingSetBucket; } @@ -221,7 +218,6 @@ class FoldingSetBase { /// DefaultFoldingSetTrait - This class provides default implementations /// for FoldingSetTrait implementations. -/// template struct DefaultFoldingSetTrait { static void Profile(const T &X, FoldingSetNodeID &ID) { X.Profile(ID); @@ -307,7 +303,6 @@ class FoldingSetNodeIDRef { /// FoldingSetNodeID - This class is used to gather all the unique data bits of /// a node. When all the bits are gathered this class is used to produce a /// hash value for the node. -/// class FoldingSetNodeID { /// Bits - Vector of all the data bits that make the node unique. /// Use a SmallVector to avoid a heap allocation in the common case. @@ -320,7 +315,6 @@ class FoldingSetNodeID { : Bits(Ref.getData(), Ref.getData() + Ref.getSize()) {} /// Add* - Add various data types to Bit data. - /// void AddPointer(const void *Ptr); void AddInteger(signed I); void AddInteger(unsigned I); @@ -344,7 +338,6 @@ class FoldingSetNodeID { unsigned ComputeHash() const; /// operator== - Used to compare two nodes to each other. - /// bool operator==(const FoldingSetNodeID &RHS) const; bool operator==(const FoldingSetNodeIDRef RHS) const; @@ -363,7 +356,7 @@ class FoldingSetNodeID { }; // Convenience type to hide the implementation of the folding set. -typedef FoldingSetBase::Node FoldingSetNode; +using FoldingSetNode = FoldingSetBase::Node; template class FoldingSetIterator; template class FoldingSetBucketIterator; @@ -415,15 +408,17 @@ template class FoldingSetImpl : public FoldingSetBase { ~FoldingSetImpl() = default; public: - typedef FoldingSetIterator iterator; + using iterator = FoldingSetIterator; + iterator begin() { return iterator(Buckets); } iterator end() { return iterator(Buckets+NumBuckets); } - typedef FoldingSetIterator const_iterator; + using const_iterator = FoldingSetIterator; + const_iterator begin() const { return const_iterator(Buckets); } const_iterator end() const { return const_iterator(Buckets+NumBuckets); } - typedef FoldingSetBucketIterator bucket_iterator; + using bucket_iterator = FoldingSetBucketIterator; bucket_iterator bucket_begin(unsigned hash) { return bucket_iterator(Buckets + (hash & (NumBuckets-1))); @@ -503,9 +498,7 @@ template class FoldingSet final : public FoldingSetImpl { } public: - explicit FoldingSet(unsigned Log2InitSize = 6) - : Super(Log2InitSize) {} - + explicit FoldingSet(unsigned Log2InitSize = 6) : Super(Log2InitSize) {} FoldingSet(FoldingSet &&Arg) = default; FoldingSet &operator=(FoldingSet &&RHS) = default; }; @@ -552,8 +545,7 @@ class ContextualFoldingSet final : public FoldingSetImpl { public: explicit ContextualFoldingSet(Ctx Context, unsigned Log2InitSize = 6) - : Super(Log2InitSize), Context(Context) - {} + : Super(Log2InitSize), Context(Context) {} Ctx getContext() const { return Context; } }; @@ -569,15 +561,15 @@ class FoldingSetVector { VectorT Vector; public: - explicit FoldingSetVector(unsigned Log2InitSize = 6) - : Set(Log2InitSize) { - } + explicit FoldingSetVector(unsigned Log2InitSize = 6) : Set(Log2InitSize) {} + + using iterator = pointee_iterator; - typedef pointee_iterator iterator; iterator begin() { return Vector.begin(); } iterator end() { return Vector.end(); } - typedef pointee_iterator const_iterator; + using const_iterator = pointee_iterator; + const_iterator begin() const { return Vector.begin(); } const_iterator end() const { return Vector.end(); } @@ -667,15 +659,13 @@ template class FoldingSetIterator : public FoldingSetIteratorImpl { /// FoldingSetBucketIteratorImpl - This is the common bucket iterator support /// shared by all folding sets, which knows how to walk a particular bucket /// of a folding set hash table. - class FoldingSetBucketIteratorImpl { protected: void *Ptr; explicit FoldingSetBucketIteratorImpl(void **Bucket); - FoldingSetBucketIteratorImpl(void **Bucket, bool) - : Ptr(Bucket) {} + FoldingSetBucketIteratorImpl(void **Bucket, bool) : Ptr(Bucket) {} void advance() { void *Probe = static_cast(Ptr)->getNextInBucket(); diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h index 26a555ee1d3bd..3d78f4b203c87 100644 --- a/include/llvm/ADT/MapVector.h +++ b/include/llvm/ADT/MapVector.h @@ -56,6 +56,13 @@ class MapVector { size_type size() const { return Vector.size(); } + /// Grow the MapVector so that it can contain at least \p NumEntries items + /// before resizing again. + void reserve(size_type NumEntries) { + Map.reserve(NumEntries); + Vector.reserve(NumEntries); + } + iterator begin() { return Vector.begin(); } const_iterator begin() const { return Vector.begin(); } iterator end() { return Vector.end(); } diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h index b782d9da17ac4..2811d5c1e21ba 100644 --- a/include/llvm/ADT/Optional.h +++ b/include/llvm/ADT/Optional.h @@ -27,8 +27,7 @@ namespace llvm { -template -class Optional { +template class Optional { AlignedCharArrayUnion storage; bool hasVal = false; @@ -38,18 +37,14 @@ class Optional { Optional(NoneType) {} explicit Optional() {} - Optional(const T &y) : hasVal(true) { - new (storage.buffer) T(y); - } + Optional(const T &y) : hasVal(true) { new (storage.buffer) T(y); } Optional(const Optional &O) : hasVal(O.hasVal) { if (hasVal) new (storage.buffer) T(*O); } - Optional(T &&y) : hasVal(true) { - new (storage.buffer) T(std::forward(y)); - } + Optional(T &&y) : hasVal(true) { new (storage.buffer) T(std::forward(y)); } Optional(Optional &&O) : hasVal(O) { if (O) { @@ -58,9 +53,7 @@ class Optional { } } - ~Optional() { - reset(); - } + ~Optional() { reset(); } Optional &operator=(T &&y) { if (hasVal) @@ -83,14 +76,13 @@ class Optional { } /// Create a new object by constructing it in place with the given arguments. - template - void emplace(ArgTypes &&...Args) { + template void emplace(ArgTypes &&... Args) { reset(); hasVal = true; new (storage.buffer) T(std::forward(Args)...); } - static inline Optional create(const T* y) { + static inline Optional create(const T *y) { return y ? Optional(*y) : Optional(); } @@ -124,17 +116,35 @@ class Optional { } } - const T* getPointer() const { assert(hasVal); return reinterpret_cast(storage.buffer); } - T* getPointer() { assert(hasVal); return reinterpret_cast(storage.buffer); } - const T& getValue() const LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } - T& getValue() LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } + const T *getPointer() const { + assert(hasVal); + return reinterpret_cast(storage.buffer); + } + T *getPointer() { + assert(hasVal); + return reinterpret_cast(storage.buffer); + } + const T &getValue() const LLVM_LVALUE_FUNCTION { + assert(hasVal); + return *getPointer(); + } + T &getValue() LLVM_LVALUE_FUNCTION { + assert(hasVal); + return *getPointer(); + } explicit operator bool() const { return hasVal; } bool hasValue() const { return hasVal; } - const T* operator->() const { return getPointer(); } - T* operator->() { return getPointer(); } - const T& operator*() const LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } - T& operator*() LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } + const T *operator->() const { return getPointer(); } + T *operator->() { return getPointer(); } + const T &operator*() const LLVM_LVALUE_FUNCTION { + assert(hasVal); + return *getPointer(); + } + T &operator*() LLVM_LVALUE_FUNCTION { + assert(hasVal); + return *getPointer(); + } template constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION { @@ -142,8 +152,14 @@ class Optional { } #if LLVM_HAS_RVALUE_REFERENCE_THIS - T&& getValue() && { assert(hasVal); return std::move(*getPointer()); } - T&& operator*() && { assert(hasVal); return std::move(*getPointer()); } + T &&getValue() && { + assert(hasVal); + return std::move(*getPointer()); + } + T &&operator*() && { + assert(hasVal); + return std::move(*getPointer()); + } template T getValueOr(U &&value) && { diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h index eb5a336990007..884d05155bffa 100644 --- a/include/llvm/ADT/PointerIntPair.h +++ b/include/llvm/ADT/PointerIntPair.h @@ -14,15 +14,14 @@ #ifndef LLVM_ADT_POINTERINTPAIR_H #define LLVM_ADT_POINTERINTPAIR_H -#include "llvm/Support/Compiler.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include +#include #include namespace llvm { template struct DenseMapInfo; - template struct PointerIntPairInfo; @@ -39,25 +38,24 @@ struct PointerIntPairInfo; /// for something else. For example, this allows: /// PointerIntPair, 1, bool> /// ... and the two bools will land in different bits. -/// template , typename Info = PointerIntPairInfo> class PointerIntPair { - intptr_t Value; + intptr_t Value = 0; public: - constexpr PointerIntPair() : Value(0) {} + constexpr PointerIntPair() = default; + PointerIntPair(PointerTy PtrVal, IntType IntVal) { setPointerAndInt(PtrVal, IntVal); } + explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); } PointerTy getPointer() const { return Info::getPointer(Value); } - IntType getInt() const { - return (IntType)Info::getInt(Value); - } + IntType getInt() const { return (IntType)Info::getInt(Value); } void setPointer(PointerTy PtrVal) { Value = Info::updatePointer(Value, PtrVal); @@ -88,6 +86,7 @@ class PointerIntPair { } void *getOpaqueValue() const { return reinterpret_cast(Value); } + void setFromOpaqueValue(void *Val) { Value = reinterpret_cast(Val); } @@ -108,14 +107,18 @@ class PointerIntPair { bool operator==(const PointerIntPair &RHS) const { return Value == RHS.Value; } + bool operator!=(const PointerIntPair &RHS) const { return Value != RHS.Value; } + bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; } bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; } + bool operator<=(const PointerIntPair &RHS) const { return Value <= RHS.Value; } + bool operator>=(const PointerIntPair &RHS) const { return Value >= RHS.Value; } @@ -180,21 +183,25 @@ struct isPodLike> { // Provide specialization of DenseMapInfo for PointerIntPair. template struct DenseMapInfo> { - typedef PointerIntPair Ty; + using Ty = PointerIntPair; + static Ty getEmptyKey() { uintptr_t Val = static_cast(-1); Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return Ty::getFromOpaqueValue(reinterpret_cast(Val)); } + static Ty getTombstoneKey() { uintptr_t Val = static_cast(-2); Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return Ty::getFromOpaqueValue(reinterpret_cast(Val)); } + static unsigned getHashValue(Ty V) { uintptr_t IV = reinterpret_cast(V.getOpaqueValue()); return unsigned(IV) ^ unsigned(IV >> 9); } + static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; } }; @@ -207,16 +214,20 @@ struct PointerLikeTypeTraits< getAsVoidPointer(const PointerIntPair &P) { return P.getOpaqueValue(); } + static inline PointerIntPair getFromVoidPointer(void *P) { return PointerIntPair::getFromOpaqueValue(P); } + static inline PointerIntPair getFromVoidPointer(const void *P) { return PointerIntPair::getFromOpaqueValue(P); } + enum { NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits }; }; } // end namespace llvm -#endif + +#endif // LLVM_ADT_POINTERINTPAIR_H diff --git a/include/llvm/ADT/PointerSumType.h b/include/llvm/ADT/PointerSumType.h index 1a49e062dc2a6..e37957160d981 100644 --- a/include/llvm/ADT/PointerSumType.h +++ b/include/llvm/ADT/PointerSumType.h @@ -11,8 +11,10 @@ #define LLVM_ADT_POINTERSUMTYPE_H #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/PointerLikeTypeTraits.h" +#include +#include +#include namespace llvm { @@ -24,16 +26,15 @@ template > struct PointerSumTypeMember { enum { Tag = N }; - typedef PointerArgT PointerT; - typedef TraitsArgT TraitsT; + using PointerT = PointerArgT; + using TraitsT = TraitsArgT; }; namespace detail { -template -struct PointerSumTypeHelper; +template struct PointerSumTypeHelper; -} +} // end namespace detail /// A sum type over pointer-like types. /// @@ -60,12 +61,12 @@ struct PointerSumTypeHelper; /// There is no support for constructing or accessing with a dynamic tag as /// that would fundamentally violate the type safety provided by the sum type. template class PointerSumType { - uintptr_t Value; + uintptr_t Value = 0; - typedef detail::PointerSumTypeHelper HelperT; + using HelperT = detail::PointerSumTypeHelper; public: - constexpr PointerSumType() : Value(0) {} + constexpr PointerSumType() = default; /// A typed constructor for a specific tagged member of the sum type. template @@ -128,14 +129,14 @@ struct PointerSumTypeHelper : MemberTs... { template static void LookupOverload(...); template struct Lookup { // Compute a particular member type by resolving the lookup helper ovorload. - typedef decltype(LookupOverload( - static_cast(nullptr))) MemberT; + using MemberT = decltype( + LookupOverload(static_cast(nullptr))); /// The Nth member's pointer type. - typedef typename MemberT::PointerT PointerT; + using PointerT = typename MemberT::PointerT; /// The Nth member's traits type. - typedef typename MemberT::TraitsT TraitsT; + using TraitsT = typename MemberT::TraitsT; }; // Next we need to compute the number of bits available for the discriminant @@ -171,35 +172,36 @@ struct PointerSumTypeHelper : MemberTs... { "Each member must pass the checker."); }; -} +} // end namespace detail // Teach DenseMap how to use PointerSumTypes as keys. template struct DenseMapInfo> { - typedef PointerSumType SumType; - - typedef detail::PointerSumTypeHelper HelperT; + using SumType = PointerSumType; + using HelperT = detail::PointerSumTypeHelper; enum { SomeTag = HelperT::MinTag }; - typedef typename HelperT::template Lookup::PointerT - SomePointerT; - typedef DenseMapInfo SomePointerInfo; + using SomePointerT = + typename HelperT::template Lookup::PointerT; + using SomePointerInfo = DenseMapInfo; static inline SumType getEmptyKey() { return SumType::create(SomePointerInfo::getEmptyKey()); } + static inline SumType getTombstoneKey() { - return SumType::create( - SomePointerInfo::getTombstoneKey()); + return SumType::create(SomePointerInfo::getTombstoneKey()); } + static unsigned getHashValue(const SumType &Arg) { uintptr_t OpaqueValue = Arg.getOpaqueValue(); return DenseMapInfo::getHashValue(OpaqueValue); } + static bool isEqual(const SumType &LHS, const SumType &RHS) { return LHS == RHS; } }; -} +} // end namespace llvm -#endif +#endif // LLVM_ADT_POINTERSUMTYPE_H diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index d019edb57e569..4276859e9254c 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -25,7 +25,7 @@ namespace llvm { template struct PointerUnionTypeSelectorReturn { - typedef T Return; + using Return = T; }; /// Get a type based on whether two types are the same or not. @@ -33,25 +33,25 @@ template struct PointerUnionTypeSelectorReturn { /// For: /// /// \code -/// typedef typename PointerUnionTypeSelector::Return Ret; +/// using Ret = typename PointerUnionTypeSelector::Return; /// \endcode /// /// Ret will be EQ type if T1 is same as T2 or NE type otherwise. template struct PointerUnionTypeSelector { - typedef typename PointerUnionTypeSelectorReturn::Return Return; + using Return = typename PointerUnionTypeSelectorReturn::Return; }; template struct PointerUnionTypeSelector { - typedef typename PointerUnionTypeSelectorReturn::Return Return; + using Return = typename PointerUnionTypeSelectorReturn::Return; }; template struct PointerUnionTypeSelectorReturn< PointerUnionTypeSelector> { - typedef - typename PointerUnionTypeSelector::Return Return; + using Return = + typename PointerUnionTypeSelector::Return; }; /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion @@ -86,8 +86,8 @@ template class PointerUnionUIntTraits { /// X = P.get(); // runtime assertion failure. template class PointerUnion { public: - typedef PointerIntPair> - ValTy; + using ValTy = + PointerIntPair>; private: ValTy Val; @@ -102,7 +102,6 @@ template class PointerUnion { public: PointerUnion() = default; - PointerUnion(PT1 V) : Val(const_cast( PointerLikeTypeTraits::getAsVoidPointer(V))) {} @@ -117,14 +116,15 @@ template class PointerUnion { // we recursively strip off low bits if we have a nested PointerUnion. return !PointerLikeTypeTraits::getFromVoidPointer(Val.getPointer()); } + explicit operator bool() const { return !isNull(); } /// Test if the Union currently holds the type matching T. template int is() const { - typedef typename ::llvm::PointerUnionTypeSelector< - PT1, T, IsPT1, ::llvm::PointerUnionTypeSelector< - PT2, T, IsPT2, UNION_DOESNT_CONTAIN_TYPE>>::Return - Ty; + using Ty = typename ::llvm::PointerUnionTypeSelector< + PT1, T, IsPT1, + ::llvm::PointerUnionTypeSelector>>::Return; int TyNo = Ty::Num; return static_cast(Val.getInt()) == TyNo; } @@ -158,7 +158,8 @@ template class PointerUnion { assert( get() == Val.getPointer() && "Can't get the address because PointerLikeTypeTraits changes the ptr"); - return const_cast(reinterpret_cast(Val.getAddrOfPointer())); + return const_cast( + reinterpret_cast(Val.getAddrOfPointer())); } /// Assignment from nullptr which just clears the union. @@ -227,19 +228,22 @@ struct PointerLikeTypeTraits> { /// for usage. template class PointerUnion3 { public: - typedef PointerUnion InnerUnion; - typedef PointerUnion ValTy; + using InnerUnion = PointerUnion; + using ValTy = PointerUnion; private: ValTy Val; struct IsInnerUnion { ValTy Val; + IsInnerUnion(ValTy val) : Val(val) {} + template int is() const { return Val.template is() && Val.template get().template is(); } + template T get() const { return Val.template get().template get(); } @@ -247,14 +251,15 @@ template class PointerUnion3 { struct IsPT3 { ValTy Val; + IsPT3(ValTy val) : Val(val) {} + template int is() const { return Val.template is(); } template T get() const { return Val.template get(); } }; public: PointerUnion3() = default; - PointerUnion3(PT1 V) { Val = InnerUnion(V); } PointerUnion3(PT2 V) { Val = InnerUnion(V); } PointerUnion3(PT3 V) { Val = V; } @@ -267,10 +272,9 @@ template class PointerUnion3 { /// Test if the Union currently holds the type matching T. template int is() const { // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3. - typedef typename ::llvm::PointerUnionTypeSelector< + using Ty = typename ::llvm::PointerUnionTypeSelector< PT1, T, IsInnerUnion, - ::llvm::PointerUnionTypeSelector>::Return - Ty; + ::llvm::PointerUnionTypeSelector>::Return; return Ty(Val).template is(); } @@ -280,10 +284,9 @@ template class PointerUnion3 { template T get() const { assert(is() && "Invalid accessor called"); // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3. - typedef typename ::llvm::PointerUnionTypeSelector< + using Ty = typename ::llvm::PointerUnionTypeSelector< PT1, T, IsInnerUnion, - ::llvm::PointerUnionTypeSelector>::Return - Ty; + ::llvm::PointerUnionTypeSelector>::Return; return Ty(Val).template get(); } @@ -348,16 +351,15 @@ struct PointerLikeTypeTraits> { template class PointerUnion4 { public: - typedef PointerUnion InnerUnion1; - typedef PointerUnion InnerUnion2; - typedef PointerUnion ValTy; + using InnerUnion1 = PointerUnion; + using InnerUnion2 = PointerUnion; + using ValTy = PointerUnion; private: ValTy Val; public: PointerUnion4() = default; - PointerUnion4(PT1 V) { Val = InnerUnion1(V); } PointerUnion4(PT2 V) { Val = InnerUnion1(V); } PointerUnion4(PT3 V) { Val = InnerUnion2(V); } @@ -371,9 +373,10 @@ class PointerUnion4 { /// Test if the Union currently holds the type matching T. template int is() const { // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2. - typedef typename ::llvm::PointerUnionTypeSelector< - PT1, T, InnerUnion1, ::llvm::PointerUnionTypeSelector< - PT2, T, InnerUnion1, InnerUnion2>>::Return Ty; + using Ty = typename ::llvm::PointerUnionTypeSelector< + PT1, T, InnerUnion1, + ::llvm::PointerUnionTypeSelector>::Return; return Val.template is() && Val.template get().template is(); } @@ -383,9 +386,10 @@ class PointerUnion4 { template T get() const { assert(is() && "Invalid accessor called"); // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2. - typedef typename ::llvm::PointerUnionTypeSelector< - PT1, T, InnerUnion1, ::llvm::PointerUnionTypeSelector< - PT2, T, InnerUnion1, InnerUnion2>>::Return Ty; + using Ty = typename ::llvm::PointerUnionTypeSelector< + PT1, T, InnerUnion1, + ::llvm::PointerUnionTypeSelector>::Return; return Val.template get().template get(); } @@ -452,18 +456,21 @@ struct PointerLikeTypeTraits> { // Teach DenseMap how to use PointerUnions as keys. template struct DenseMapInfo> { - typedef PointerUnion Pair; - typedef DenseMapInfo FirstInfo; - typedef DenseMapInfo SecondInfo; + using Pair = PointerUnion; + using FirstInfo = DenseMapInfo; + using SecondInfo = DenseMapInfo; static inline Pair getEmptyKey() { return Pair(FirstInfo::getEmptyKey()); } + static inline Pair getTombstoneKey() { return Pair(FirstInfo::getTombstoneKey()); } + static unsigned getHashValue(const Pair &PairVal) { intptr_t key = (intptr_t)PairVal.getOpaqueValue(); return DenseMapInfo::getHashValue(key); } + static bool isEqual(const Pair &LHS, const Pair &RHS) { return LHS.template is() == RHS.template is() && (LHS.template is() ? FirstInfo::isEqual(LHS.template get(), diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 1d1eb601a334b..bcd992b4a7163 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -17,23 +17,24 @@ #ifndef LLVM_ADT_STLEXTRAS_H #define LLVM_ADT_STLEXTRAS_H -#include // for std::all_of +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/ErrorHandling.h" +#include #include -#include // for std::size_t -#include // for qsort +#include +#include +#include #include +#include #include #include #include #include -#include // for std::pair - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" +#include +#include namespace llvm { @@ -50,7 +51,7 @@ template using ValueOfRange = typename std::remove_reference()))>::type; -} // End detail namespace +} // end namespace detail //===----------------------------------------------------------------------===// // Extra additions to @@ -58,6 +59,7 @@ using ValueOfRange = typename std::remove_reference struct identity { using argument_type = Ty; + Ty &operator()(Ty &self) const { return self; } @@ -88,7 +90,7 @@ template class function_ref; template class function_ref { - Ret (*callback)(intptr_t callable, Params ...params); + Ret (*callback)(intptr_t callable, Params ...params) = nullptr; intptr_t callable; template @@ -98,7 +100,7 @@ class function_ref { } public: - function_ref() : callback(nullptr) {} + function_ref() = default; template function_ref(Callable &&callable, @@ -107,6 +109,7 @@ class function_ref { function_ref>::value>::type * = nullptr) : callback(callback_fn::type>), callable(reinterpret_cast(&callable)) {} + Ret operator()(Params ...params) const { return callback(callable, std::forward(params)...); } @@ -118,114 +121,95 @@ class function_ref { // delete on something. It is used like this: // // for_each(V.begin(), B.end(), deleter); -// template inline void deleter(T *Ptr) { delete Ptr; } - - //===----------------------------------------------------------------------===// // Extra additions to //===----------------------------------------------------------------------===// -// mapped_iterator - This is a simple iterator adapter that causes a function to -// be applied whenever operator* is invoked on the iterator. -// -template -class mapped_iterator { - RootIt current; - UnaryFunc Fn; -public: - typedef typename std::iterator_traits::iterator_category - iterator_category; - typedef typename std::iterator_traits::difference_type - difference_type; - typedef decltype(std::declval()(*std::declval())) - value_type; +namespace adl_detail { - typedef void pointer; - //typedef typename UnaryFunc::result_type *pointer; - typedef void reference; // Can't modify value returned by fn +using std::begin; - typedef RootIt iterator_type; +template +auto adl_begin(ContainerTy &&container) + -> decltype(begin(std::forward(container))) { + return begin(std::forward(container)); +} - inline const RootIt &getCurrent() const { return current; } - inline const UnaryFunc &getFunc() const { return Fn; } +using std::end; - inline explicit mapped_iterator(const RootIt &I, UnaryFunc F) - : current(I), Fn(F) {} +template +auto adl_end(ContainerTy &&container) + -> decltype(end(std::forward(container))) { + return end(std::forward(container)); +} - inline value_type operator*() const { // All this work to do this - return Fn(*current); // little change - } +using std::swap; - mapped_iterator &operator++() { - ++current; - return *this; - } - mapped_iterator &operator--() { - --current; - return *this; - } - mapped_iterator operator++(int) { - mapped_iterator __tmp = *this; - ++current; - return __tmp; - } - mapped_iterator operator--(int) { - mapped_iterator __tmp = *this; - --current; - return __tmp; - } - mapped_iterator operator+(difference_type n) const { - return mapped_iterator(current + n, Fn); - } - mapped_iterator &operator+=(difference_type n) { - current += n; - return *this; - } - mapped_iterator operator-(difference_type n) const { - return mapped_iterator(current - n, Fn); - } - mapped_iterator &operator-=(difference_type n) { - current -= n; - return *this; - } - reference operator[](difference_type n) const { return *(*this + n); } +template +void adl_swap(T &&lhs, T &&rhs) noexcept(noexcept(swap(std::declval(), + std::declval()))) { + swap(std::forward(lhs), std::forward(rhs)); +} - bool operator!=(const mapped_iterator &X) const { return !operator==(X); } - bool operator==(const mapped_iterator &X) const { - return current == X.current; - } - bool operator<(const mapped_iterator &X) const { return current < X.current; } +} // end namespace adl_detail - difference_type operator-(const mapped_iterator &X) const { - return current - X.current; - } -}; +template +auto adl_begin(ContainerTy &&container) + -> decltype(adl_detail::adl_begin(std::forward(container))) { + return adl_detail::adl_begin(std::forward(container)); +} -template -inline mapped_iterator -operator+(typename mapped_iterator::difference_type N, - const mapped_iterator &X) { - return mapped_iterator(X.getCurrent() - N, X.getFunc()); +template +auto adl_end(ContainerTy &&container) + -> decltype(adl_detail::adl_end(std::forward(container))) { + return adl_detail::adl_end(std::forward(container)); } +template +void adl_swap(T &&lhs, T &&rhs) noexcept( + noexcept(adl_detail::adl_swap(std::declval(), std::declval()))) { + adl_detail::adl_swap(std::forward(lhs), std::forward(rhs)); +} + +// mapped_iterator - This is a simple iterator adapter that causes a function to +// be applied whenever operator* is invoked on the iterator. + +template ()(*std::declval()))> +class mapped_iterator + : public iterator_adaptor_base< + mapped_iterator, ItTy, + typename std::iterator_traits::iterator_category, + typename std::remove_reference::type> { +public: + mapped_iterator(ItTy U, FuncTy F) + : mapped_iterator::iterator_adaptor_base(std::move(U)), F(std::move(F)) {} + + ItTy getCurrent() { return this->I; } + + FuncReturnTy operator*() { return F(*this->I); } + +private: + FuncTy F; +}; // map_iterator - Provide a convenient way to create mapped_iterators, just like // make_pair is useful for creating pairs... -// template -inline mapped_iterator map_iterator(const ItTy &I, FuncTy F) { - return mapped_iterator(I, F); +inline mapped_iterator map_iterator(ItTy I, FuncTy F) { + return mapped_iterator(std::move(I), std::move(F)); } /// Helper to determine if type T has a member called rbegin(). template class has_rbegin_impl { - typedef char yes[1]; - typedef char no[2]; + using yes = char[1]; + using no = char[2]; template static yes& test(Inner *I, decltype(I->rbegin()) * = nullptr); @@ -363,12 +347,13 @@ template struct index_sequence; template struct index_sequence_for; namespace detail { + using std::declval; // We have to alias this since inlining the actual type at the usage site // in the parameter list of iterator_facade_base<> below ICEs MSVC 2017. template struct ZipTupleType { - typedef std::tuple())...> type; + using type = std::tuple())...>; }; template @@ -454,11 +439,11 @@ class zip_shortest : public zip_common, Iters...> { public: using Base = zip_common, Iters...>; + zip_shortest(Iters &&... ts) : Base(std::forward(ts)...) {} + bool operator==(const zip_shortest &other) const { return !test(other, index_sequence_for{}); } - - zip_shortest(Iters &&... ts) : Base(std::forward(ts)...) {} }; template