Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ IndentCaseLabels: true
SpaceAfterTemplateKeyword: true
SpaceBeforeCpp11BracedList: false
SortIncludes: true
IndentPPDirectives: None
IndentPPDirectives: AfterHash
IncludeCategories:
- Regex: '^<[a-z_]+>'
Priority: 1
Expand Down
25 changes: 11 additions & 14 deletions .claude/skills/perf-report/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ Do NOT summarize, collapse, or hide entries. Do NOT dismiss anything as "noise"

### 3. Cross-reference with master history

**This step is MANDATORY.** For every test that shows as slower or faster above 1.10x, check the public CI database to determine if this is a known flaky test or a genuine change introduced by the PR. Do NOT skip this step or use alternative approaches (like manually fetching perf reports from other PRs).
For every test that shows as slower or faster above 1.10x, check the public CI database to determine if this is a known flaky test or a genuine change introduced by the PR.

Query the `default.checks` table on `play.clickhouse.com` with `user=explorer`:
Query the public CI database:

```bash
clickhouse client --format PrettyCompactNoEscapes --host play.clickhouse.com --user explorer --secure --query "
clickhouse client --host play.clickhouse.com --user explorer --secure -q "
SELECT
replaceRegexpOne(test_name, '::(new|old)$', '') AS test,
countIf(test_status = 'slower') AS slower_count,
Expand All @@ -58,25 +58,22 @@ SELECT
count() AS total_runs
FROM default.checks
WHERE pull_request_number = 0
AND check_name LIKE '%Performance%amd%'
AND check_name LIKE '%Performance%amd%' -- or arm
AND check_start_time >= now() - INTERVAL 30 DAY
AND test_name IN (
'norm_distance #2::new',
'array_sort #0::new'
'test_name #N::new',
...
)
GROUP BY test
ORDER BY slower_count DESC, test
"
```

Run **one query per architecture** — use `'%Performance%amd%'` for x86 and `'%Performance%arm%'` for ARM.

**Critical details:**
- **Host:** `play.clickhouse.com`, **user:** `explorer` (NOT `play`)
- **Table:** `default.checks` (NOT `perftest` or other tables)
- `pull_request_number = 0` filters to master-only commits (no PR noise)
- Test names in the DB have `::new` and `::old` suffixes — always query with `::new`
- Include ALL changed tests in a single IN clause to minimize round-trips
**Important:**
- `pull_request_number = 0` means master commits
- Test names in the DB have `::new` and `::old` suffixes — query with `::new`
- Use `%amd%` for AMD results and `%arm%` for ARM results
- Query both architectures separately

### 4. Classify each change

Expand Down
38 changes: 12 additions & 26 deletions cmake/cpu_features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,42 +85,27 @@ elseif (ARCH_PPC64LE)

elseif (ARCH_AMD64)
# x86-64 microarchitecture levels (https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels):
# 1 - SSE2 baseline, maximum compatibility with older/embedded hardware
# 2 - SSE4.2, SSSE3, POPCNT (ClickHouse's historical baseline)
# 3 - AVX2, BMI1/2, FMA, F16C, LZCNT, MOVBE etc. (default)
# 4 - AVX-512F/BW/CD/DQ/VL
set (X86_ARCH_LEVEL "3" CACHE STRING "x86-64 microarchitecture level (1, 2, 3, 4)")
# 1 SSE2 baseline, maximum compatibility with older/embedded hardware
# 2 SSE4.2, SSSE3, POPCNT (default, matches ClickHouse's historical baseline)
# 3 AVX2, BMI1/2, FMA, F16C etc.
# 4 AVX-512F/BW/CD/DQ/VL
set (X86_ARCH_LEVEL "2" CACHE STRING "x86-64 microarchitecture level (1, 2, 3, 4)")
set_property (CACHE X86_ARCH_LEVEL PROPERTY STRINGS "1" "2" "3" "4")

if (NOT X86_ARCH_LEVEL MATCHES "^[1-4]$")
message (FATAL_ERROR "X86_ARCH_LEVEL must be one of: 1, 2, 3, 4 (got '${X86_ARCH_LEVEL}')")
endif ()

# Best-effort check: verify that the build host supports the requested microarchitecture level. Build-time tools
# (tablegen, code generators) are compiled with these flags and will crash with SIGILL otherwise.
# Same best-effort check for x86 as above for ARM.
if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64" AND X86_ARCH_LEVEL VERSION_GREATER_EQUAL 2)
# Test for a representative flag at each level. We intentionally keep this simple - no real CPU has avx2 without
# fma/bmi2, so checking the headline flag is enough while avoiding false positives from /proc/cpuinfo quirks
# (containers, nested virtualization, unusual flag naming).
set (X86_REPRESENTATIVE_FLAG "sse4_2")
if (X86_ARCH_LEVEL VERSION_GREATER_EQUAL 3)
set (X86_REPRESENTATIVE_FLAG "avx2")
endif ()
if (X86_ARCH_LEVEL VERSION_GREATER_EQUAL 4)
# `x86-64-v4` requires AVX-512 F/BW/CD/DQ/VL. Knights Landing has `avx512f` but lacks BW/DQ/VL, so check one
# of those instead - any CPU with `avx512vl` will also have F.
set (X86_REPRESENTATIVE_FLAG "avx512vl")
endif ()
# Test for flags in the default v2 profile.
execute_process(
COMMAND grep -P "(?=.*${X86_REPRESENTATIVE_FLAG})" /proc/cpuinfo
COMMAND grep -P "^(?=.*ssse3)(?=.*sse4_1)(?=.*sse4_2)" /proc/cpuinfo
OUTPUT_VARIABLE FLAGS)
if (NOT FLAGS)
message (FATAL_ERROR
"The build machine does not support x86-64-v${X86_ARCH_LEVEL} "
"(${X86_REPRESENTATIVE_FLAG} not found in /proc/cpuinfo). "
"Run cmake with -DX86_ARCH_LEVEL=<level> to lower the requirement.")
endif ()
endif ()
MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DX86_ARCH_LEVEL=1")
endif()
endif()

# ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/
# AVX. We only assume that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
Expand All @@ -134,6 +119,7 @@ elseif (ARCH_AMD64)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpclmul")
list (APPEND RUSTFLAGS_CPU "-C" "target-feature=+pclmulqdq")
endif ()

else ()
# RISC-V + exotic platforms
endif ()
17 changes: 15 additions & 2 deletions cmake/linux/default_libs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,21 @@ endif()

if (ENABLE_LLVM_LIBC_MATH)
link_directories("${CMAKE_BINARY_DIR}/contrib/libllvmlibc-cmake")
target_link_libraries(global-libs INTERFACE libllvmlibc)
set (DEFAULT_LIBS "${DEFAULT_LIBS} -llibllvmlibc")

if (ARCH_AMD64)
if (X86_ARCH_LEVEL VERSION_LESS 2)
# Compat mode: single library, no dispatch
target_link_libraries(global-libs INTERFACE libllvmlibc)
set (DEFAULT_LIBS "${DEFAULT_LIBS} -llibllvmlibc")
else()
# Dispatch mode: v2/v3 variants with runtime CPU detection
target_link_libraries(global-libs INTERFACE llvmlibc_dispatch libllvmlibc_x86_64_v2 libllvmlibc_x86_64_v3)
set (DEFAULT_LIBS "${DEFAULT_LIBS} -lllvmlibc_dispatch -llibllvmlibc_x86_64_v2 -llibllvmlibc_x86_64_v3")
endif()
elseif (ARCH_AARCH64)
target_link_libraries(global-libs INTERFACE libllvmlibc)
set (DEFAULT_LIBS "${DEFAULT_LIBS} -llibllvmlibc")
endif()
endif()

if (OS_ANDROID)
Expand Down
126 changes: 0 additions & 126 deletions cmake/localize_rust_c_symbols.sh

This file was deleted.

43 changes: 0 additions & 43 deletions contrib/corrosion-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ function(clickhouse_import_crate)
enable_dummy_launchers_if_needed()
endfunction()

# Configure a Rust crate target with ClickHouse-specific flags.
function(clickhouse_config_crate_flags target_name)
corrosion_set_env_vars(${target_name} "CFLAGS=${RUST_CFLAGS}")
corrosion_set_env_vars(${target_name} "CXXFLAGS=${RUST_CXXFLAGS}")
Expand Down Expand Up @@ -258,46 +257,4 @@ function(clickhouse_config_crate_flags target_name)
# Refs: https://github.com/rust-lang/cargo/issues/15099#issuecomment-2732847355
corrosion_set_env_vars(${target_name} "CARGO_NET_OFFLINE=true")
corrosion_link_libraries(${target_name} cxx)

# Localize all C-namespace symbols from Rust static libraries so they
# don't shadow our own implementations (libllvmlibc, glibc-compat).
# Rust's compiler_builtins exports cbrt, fma, sin, etc. as global symbols;
# without this, they silently win at link time and bypass our -falign-functions=64
# and -march flags, causing performance regressions.
#
# For workspace crates the .a may land in a parent directory rather than
# CMAKE_CURRENT_BINARY_DIR. Set IMPORTED_LOCATION on the target before
# calling this function if the default path is wrong (see wasmtime for an
# example).
get_target_property(_rust_lib_path ${target_name} IMPORTED_LOCATION)
if (NOT _rust_lib_path OR _rust_lib_path MATCHES "NOTFOUND")
set(_rust_lib_path "${CMAKE_CURRENT_BINARY_DIR}/lib${target_name}.a")
endif()

# Skip on Darwin (Mach-O), ppc64le (broken .eh_frame), and dummy builds.
if (CMAKE_OBJCOPY AND NOT OS_DARWIN
AND NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le"
AND NOT CMAKE_AR MATCHES "dummy_compiler_linker")
# Verify the library path looks plausible at configure time. The file
# won't exist yet (cargo hasn't run), but the parent directory must exist
# and the filename must end in .a — otherwise someone forgot to set
# IMPORTED_LOCATION for a workspace subcrate.
get_filename_component(_rust_lib_dir "${_rust_lib_path}" DIRECTORY)
if (NOT IS_DIRECTORY "${_rust_lib_dir}")
message(FATAL_ERROR
"localize_rust_c_symbols: output directory '${_rust_lib_dir}' for "
"target '${target_name}' does not exist. If this is a workspace "
"subcrate, set IMPORTED_LOCATION on the target before calling "
"clickhouse_config_crate_flags(). Expected library: ${_rust_lib_path}")
endif()

add_custom_target(localize_rust_c_${target_name}
COMMAND bash "${ClickHouse_SOURCE_DIR}/cmake/localize_rust_c_symbols.sh"
"${_rust_lib_path}" "${CMAKE_AR}" "${CMAKE_OBJCOPY}" "${NM_PATH}"
COMMENT "Localizing C-namespace symbols in ${target_name}"
VERBATIM
)
add_dependencies(localize_rust_c_${target_name} cargo-build_${target_name})
add_dependencies(${target_name} localize_rust_c_${target_name})
endif()
endfunction()
28 changes: 11 additions & 17 deletions contrib/fastops-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,27 @@ endif()

set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/fastops")

set(SRCS
"${LIBRARY_DIR}/fastops/fastops.cpp"
)
set(SRCS "")

if (ARCH_AMD64)
list(APPEND SRCS "${LIBRARY_DIR}/fastops/avx2/ops_avx2.cpp")
endif()

if (ARCH_AMD64 AND X86_ARCH_LEVEL VERSION_LESS 3)
# Below v3: need runtime dispatch with AVX-only and Plain fallbacks.
list(APPEND SRCS
"${LIBRARY_DIR}/fastops/avx/ops_avx.cpp"
"${LIBRARY_DIR}/fastops/plain/ops_plain.cpp"
"${LIBRARY_DIR}/fastops/core/avx_id.cpp"
)
if(ARCH_AMD64)
set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/avx/ops_avx.cpp")
set_source_files_properties("${LIBRARY_DIR}/fastops/avx/ops_avx.cpp" PROPERTIES COMPILE_FLAGS "-mavx -DNO_AVX2")
set_source_files_properties("${LIBRARY_DIR}/fastops/avx2/ops_avx2.cpp" PROPERTIES COMPILE_FLAGS "-mavx2 -mfma -DNO_AVX")

set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/avx2/ops_avx2.cpp")
set_source_files_properties("${LIBRARY_DIR}/fastops/avx2/ops_avx2.cpp" PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")

set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/plain/ops_plain.cpp" "${LIBRARY_DIR}/fastops/core/avx_id.cpp")
elseif(ARCH_AARCH64)
list(APPEND SRCS "${LIBRARY_DIR}/fastops/neon/ops_neon.cpp")
set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/neon/ops_neon.cpp")

# SVE requires arm_sve.h which is only available on Linux toolchains (not Darwin/FreeBSD).
if(OS_LINUX)
list(APPEND SRCS "${LIBRARY_DIR}/fastops/sve/ops_sve.cpp")
set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/sve/ops_sve.cpp")
set_source_files_properties("${LIBRARY_DIR}/fastops/sve/ops_sve.cpp" PROPERTIES COMPILE_FLAGS "-march=armv8-a+sve")
endif()
endif()

set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/fastops.cpp")

add_library(_fastops ${SRCS})

Expand Down
6 changes: 3 additions & 3 deletions contrib/libdivide-cmake/libdivide-config.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#if defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__)
#if defined(__SSE2__)
# define LIBDIVIDE_SSE2
#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__)
# define LIBDIVIDE_AVX512
#elif defined(__AVX2__)
# define LIBDIVIDE_AVX2
#elif defined(__SSE2__)
# define LIBDIVIDE_SSE2
#elif defined(__aarch64__) && defined(__ARM_NEON)
# define LIBDIVIDE_NEON
#endif
Loading
Loading