Skip to content

Commit

Permalink
Use cpuinfo to get cpu instruction set extension information (#1600)
Browse files Browse the repository at this point in the history
* Add cpuinfo third-party library for CPU feature detection (licensed under BSD)
  • Loading branch information
startrekdude committed Nov 29, 2020
1 parent 025e361 commit fedc119
Show file tree
Hide file tree
Showing 27 changed files with 4,012 additions and 113 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -276,5 +276,6 @@ if(BUILD_TESTING)
endif()

add_subdirectory(third_party/fastfeat)
add_subdirectory(third_party/cpuinfo)

install(DIRECTORY ${PROJECT_SOURCE_DIR}/Source/API/ DESTINATION "${CMAKE_INSTALL_FULL_INCLUDEDIR}/svt-av1" FILES_MATCHING PATTERN "*.h")
6 changes: 4 additions & 2 deletions Source/Lib/Common/Codec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,16 @@ include_directories(${PROJECT_SOURCE_DIR}/Source/API/
${PROJECT_SOURCE_DIR}/Source/Lib/Common/ASM_AVX2/
${PROJECT_SOURCE_DIR}/Source/Lib/Common/ASM_AVX512/
${PROJECT_SOURCE_DIR}/Source/Lib/Encoder/Codec/
${PROJECT_SOURCE_DIR}/third_party/fastfeat/)
${PROJECT_SOURCE_DIR}/third_party/fastfeat/
${PROJECT_SOURCE_DIR}/third_party/cpuinfo/include/)
else ()
# Include Encoder Subdirectories
include_directories(${PROJECT_SOURCE_DIR}/Source/API/
${PROJECT_SOURCE_DIR}/Source/Lib/Common/Codec/
${PROJECT_SOURCE_DIR}/Source/Lib/Common/C_DEFAULT/
${PROJECT_SOURCE_DIR}/Source/Lib/Encoder/Codec/
${PROJECT_SOURCE_DIR}/third_party/fastfeat/)
${PROJECT_SOURCE_DIR}/third_party/fastfeat/
${PROJECT_SOURCE_DIR}/third_party/cpuinfo/include/)
endif ()
file(GLOB all_files
"*.h"
Expand Down
130 changes: 21 additions & 109 deletions Source/Lib/Common/Codec/common_dsp_rtcd.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
#include "EbPackUnPack_C.h"
#include "EbAvcStyleMcp.h"

// for get_cpu_flags
#include "cpuinfo.h"

/*
* DSP deprecated flags
*/
Expand Down Expand Up @@ -66,120 +69,30 @@ int64_t svt_av1_block_error_c(const TranLow *coeff, const TranLow *dqcoeff,
/**************************************
* Instruction Set Support
**************************************/
#ifdef ARCH_X86_64
// Helper Functions
static INLINE void run_cpuid(int eax, int ecx, int* abcd) {
#ifdef _WIN32
__cpuidex(abcd, eax, ecx);
#else
int ebx = 0, edx = 0;
#if defined(__i386__) && defined(__PIC__)
/* in case of PIC under 32-bit EBX cannot be clobbered */
__asm__("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi"
: "=D"(ebx),
#else
__asm__("cpuid"
: "+b"(ebx),
#endif
"+a"(eax),
"+c"(ecx),
"=d"(edx));
abcd[0] = eax;
abcd[1] = ebx;
abcd[2] = ecx;
abcd[3] = edx;
#endif
}

static INLINE int32_t check_xcr0_ymm() {
uint32_t xcr0;
#ifdef _WIN32
xcr0 = (uint32_t)_xgetbv(0); /* min VS2010 SP1 compiler is required */
#else
__asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx");
#endif
return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */
}

static int32_t check_4thgen_intel_core_features() {
int abcd[4];
const int fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27));
const int avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8);

/* CPUID.(EAX=01H, ECX=0H):ECX.FMA[bit 12]==1 &&
CPUID.(EAX=01H, ECX=0H):ECX.MOVBE[bit 22]==1 &&
CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1 */
run_cpuid(1, 0, abcd);
if ((abcd[2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask) return 0;

if (!check_xcr0_ymm()) return 0;

/* CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 &&
CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]==1 &&
CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */
run_cpuid(7, 0, abcd);
if ((abcd[1] & avx2_bmi12_mask) != avx2_bmi12_mask) return 0;
/* CPUID.(EAX=80000001H):ECX.LZCNT[bit 5]==1 */
run_cpuid(0x80000001, 0, abcd);
if ((abcd[2] & (1 << 5)) == 0) return 0;
return 1;
}

static INLINE int check_xcr0_zmm() {
uint32_t xcr0;
uint32_t zmm_ymm_xmm = (7 << 5) | (1 << 2) | (1 << 1);
#ifdef _WIN32
xcr0 = (uint32_t)_xgetbv(0); /* min VS2010 SP1 compiler is required */
#else
__asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx");
#endif
return ((xcr0 & zmm_ymm_xmm) ==
zmm_ymm_xmm); /* check if xmm, ymm and zmm state are enabled in XCR0 */
}

static int32_t can_use_intel_avx512() {
int abcd[4];

/* CPUID.(EAX=07H, ECX=0):EBX[bit 16]==1 AVX512F
CPUID.(EAX=07H, ECX=0):EBX[bit 17] AVX512DQ
CPUID.(EAX=07H, ECX=0):EBX[bit 28] AVX512CD
CPUID.(EAX=07H, ECX=0):EBX[bit 30] AVX512BW
CPUID.(EAX=07H, ECX=0):EBX[bit 31] AVX512VL */

const int avx512_ebx_mask = (1u << 16) // AVX-512F
| (1u << 17) // AVX-512DQ
| (1u << 28) // AVX-512CD
| (1u << 30) // AVX-512BW
| (1u << 31); // AVX-512VL

if (!check_4thgen_intel_core_features()) return 0;

// ensure OS supports ZMM registers (and YMM, and XMM)
if (!check_xcr0_zmm()) return 0;

run_cpuid(7, 0, abcd);
if ((abcd[1] & avx512_ebx_mask) != avx512_ebx_mask) return 0;

return 1;
}

CPU_FLAGS get_cpu_flags() {
CPU_FLAGS flags = 0;

/* To detail tests CPU features, requires more accurate implementation.
Documentation help:
https://docs.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex?redirectedfrom=MSDN&view=vs-2019
*/
// safe to call multiple times, and threadsafe
// also correctly checks whether the OS saves AVX(2|512) registers
cpuinfo_initialize();

flags |= cpuinfo_has_x86_mmx() ? CPU_FLAGS_MMX : 0;
flags |= cpuinfo_has_x86_sse() ? CPU_FLAGS_SSE : 0;
flags |= cpuinfo_has_x86_sse2() ? CPU_FLAGS_SSE2 : 0;
flags |= cpuinfo_has_x86_sse3() ? CPU_FLAGS_SSE3 : 0;
flags |= cpuinfo_has_x86_ssse3() ? CPU_FLAGS_SSSE3 : 0;
flags |= cpuinfo_has_x86_sse4_1() ? CPU_FLAGS_SSE4_1 : 0;
flags |= cpuinfo_has_x86_sse4_2() ? CPU_FLAGS_SSE4_2 : 0;

if (check_4thgen_intel_core_features()) {
flags |= CPU_FLAGS_MMX | CPU_FLAGS_SSE | CPU_FLAGS_SSE2 | CPU_FLAGS_SSE3 | CPU_FLAGS_SSSE3 |
CPU_FLAGS_SSE4_1 | CPU_FLAGS_SSE4_2 | CPU_FLAGS_AVX | CPU_FLAGS_AVX2;
}
flags |= cpuinfo_has_x86_avx() ? CPU_FLAGS_AVX : 0;
flags |= cpuinfo_has_x86_avx2() ? CPU_FLAGS_AVX2 : 0;

if (can_use_intel_avx512()) {
flags |= CPU_FLAGS_AVX512F | CPU_FLAGS_AVX512DQ | CPU_FLAGS_AVX512CD | CPU_FLAGS_AVX512BW |
CPU_FLAGS_AVX512VL;
}
flags |= cpuinfo_has_x86_avx512f() ? CPU_FLAGS_AVX512F : 0;
flags |= cpuinfo_has_x86_avx512dq() ? CPU_FLAGS_AVX512DQ : 0;
flags |= cpuinfo_has_x86_avx512cd() ? CPU_FLAGS_AVX512CD : 0;
flags |= cpuinfo_has_x86_avx512bw() ? CPU_FLAGS_AVX512BW : 0;
flags |= cpuinfo_has_x86_avx512vl() ? CPU_FLAGS_AVX512VL : 0;

return flags;
}
Expand All @@ -192,7 +105,6 @@ CPU_FLAGS get_cpu_flags_to_use() {
#endif
return flags;
}
#endif

#ifdef ARCH_X86_64
#ifndef NON_AVX512_SUPPORT
Expand Down
3 changes: 2 additions & 1 deletion Source/Lib/Decoder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ endif()
set_target_properties(SvtAv1Dec PROPERTIES VERSION ${DEC_VERSION})
set_target_properties(SvtAv1Dec PROPERTIES SOVERSION ${DEC_VERSION_MAJOR})
add_dependencies(SvtAv1Dec EbVersionHeaderGen)
target_link_libraries(SvtAv1Dec ${PLATFORM_LIBS})
target_link_libraries(SvtAv1Dec PUBLIC ${PLATFORM_LIBS})
target_link_libraries(SvtAv1Dec PRIVATE cpuinfo_public)
install(TARGETS SvtAv1Dec DESTINATION "${CMAKE_INSTALL_LIBDIR}")

configure_file(pkg-config.pc.in ${CMAKE_BINARY_DIR}/SvtAv1Dec.pc @ONLY)
Expand Down
3 changes: 2 additions & 1 deletion Source/Lib/Encoder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ endif()

set_target_properties(SvtAv1Enc PROPERTIES VERSION ${ENC_VERSION})
set_target_properties(SvtAv1Enc PROPERTIES SOVERSION ${ENC_VERSION_MAJOR})
target_link_libraries(SvtAv1Enc ${PLATFORM_LIBS})
target_link_libraries(SvtAv1Enc PUBLIC ${PLATFORM_LIBS})
target_link_libraries(SvtAv1Enc PRIVATE cpuinfo_public)
install(TARGETS SvtAv1Enc DESTINATION "${CMAKE_INSTALL_LIBDIR}")

configure_file(pkg-config.pc.in ${CMAKE_BINARY_DIR}/SvtAv1Enc.pc @ONLY)
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ set(lib_list
$<TARGET_OBJECTS:ENCODER_ASM_AVX2>
$<TARGET_OBJECTS:ENCODER_ASM_AVX512>
$<TARGET_OBJECTS:ENCODER_GLOBALS>
cpuinfo_public
gtest_all)
if(UNIX)
# App Source Files
Expand Down
165 changes: 165 additions & 0 deletions third_party/cpuinfo/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)

INCLUDE(GNUInstallDirs)

# ---[ Options.
SET(CPUINFO_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared, static, or default) to use")
SET_PROPERTY(CACHE CPUINFO_RUNTIME_TYPE PROPERTY STRINGS default static shared)
SET(CPUINFO_LOG_LEVEL "default" CACHE STRING "Minimum logging level (info with lower severity will be ignored)")
SET_PROPERTY(CACHE CPUINFO_LOG_LEVEL PROPERTY STRINGS default debug info warning error fatal none)

MACRO(CPUINFO_TARGET_ENABLE_C99 target)
IF(${CMAKE_VERSION} VERSION_LESS "3.1")
IF(NOT MSVC)
TARGET_COMPILE_OPTIONS(${target} PRIVATE -std=c99)
ENDIF()
ELSE()
SET_TARGET_PROPERTIES(${target} PROPERTIES
C_STANDARD 99
C_EXTENSIONS NO)
ENDIF()
ENDMACRO()

MACRO(CPUINFO_TARGET_ENABLE_CXX11 target)
IF(${CMAKE_VERSION} VERSION_LESS "3.1")
IF(NOT MSVC)
TARGET_COMPILE_OPTIONS(${target} PRIVATE -std=c++11)
ENDIF()
ELSE()
SET_TARGET_PROPERTIES(${target} PROPERTIES
CXX_STANDARD 11
CXX_EXTENSIONS NO)
ENDIF()
ENDMACRO()

MACRO(CPUINFO_TARGET_RUNTIME_LIBRARY target)
IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "default")
IF(CPUINFO_RUNTIME_TYPE STREQUAL "shared")
TARGET_COMPILE_OPTIONS(${target} PRIVATE
"/MD$<$<CONFIG:Debug>:d>")
ELSEIF(CPUINFO_RUNTIME_TYPE STREQUAL "static")
TARGET_COMPILE_OPTIONS(${target} PRIVATE
"/MT$<$<CONFIG:Debug>:d>")
ENDIF()
ENDIF()
ENDMACRO()

# ---[ Build flags
SET(CPUINFO_SUPPORTED_PLATFORM TRUE)
IF(NOT CMAKE_SYSTEM_PROCESSOR)
IF(NOT IOS)
MESSAGE(WARNING
"Target processor architecture is not specified. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ENDIF()
ELSEIF(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$")
MESSAGE(WARNING
"Target processor architecture \"${CMAKE_SYSTEM_PROCESSOR}\" is not supported in cpuinfo. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ENDIF()

IF(NOT CMAKE_SYSTEM_NAME)
MESSAGE(WARNING
"Target operating system is not specified. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
MESSAGE(WARNING
"Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ENDIF()
ENDIF()

# ---[ cpuinfo library
SET(CPUINFO_SRCS
src/init.c
src/api.c)

IF(CPUINFO_SUPPORTED_PLATFORM)
IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
LIST(APPEND CPUINFO_SRCS
src/x86/x86_init.c
src/x86/vendor.c
src/x86/isa.c)
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
LIST(APPEND CPUINFO_SRCS
src/x86/linux/init.c)
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
LIST(APPEND CPUINFO_SRCS src/x86/mach/x86_mach_init.c)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
ENDIF()
ENDIF()

IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
SET(CMAKE_THREAD_PREFER_PTHREAD TRUE)
SET(THREADS_PREFER_PTHREAD_FLAG TRUE)
FIND_PACKAGE(Threads REQUIRED)
ENDIF()
ENDIF()

ADD_LIBRARY(cpuinfo OBJECT ${CPUINFO_SRCS})

CPUINFO_TARGET_ENABLE_C99(cpuinfo)

INCLUDE_DIRECTORIES(deps/clog/include)

SET_PROPERTY(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)

CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo)
IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
# Target Windows 7+ API
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601)
ENDIF()
SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include)
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src)
IF(CPUINFO_LOG_LEVEL STREQUAL "default")
# default logging level: error (subject to change)
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "debug")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=5)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "info")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=4)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "warning")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=3)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "error")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "fatal")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=1)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "none")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=0)
ELSE()
MESSAGE(FATAL_ERROR "Unsupported logging level ${CPUINFO_LOG_LEVEL}")
ENDIF()

IF(CPUINFO_SUPPORTED_PLATFORM)
TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=1)
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _GNU_SOURCE=1)
ENDIF()
ELSE()
TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=0)
ENDIF()

# ---[ cpuinfo dependencies: clog
IF(NOT DEFINED CLOG_SOURCE_DIR)
SET(CLOG_SOURCE_DIR "deps/clog")
ENDIF()
IF(NOT TARGET clog)
SET(CLOG_BUILD_TESTS OFF CACHE BOOL "")
SET(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "")
ADD_SUBDIRECTORY(
"${CLOG_SOURCE_DIR}")
# We build static version of clog but a dynamic library may indirectly depend on it
SET_PROPERTY(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON)
ENDIF()

ADD_LIBRARY(cpuinfo_public INTERFACE)
TARGET_SOURCES(cpuinfo_public INTERFACE
$<TARGET_OBJECTS:cpuinfo>
$<TARGET_OBJECTS:clog>)
Loading

0 comments on commit fedc119

Please sign in to comment.