diff --git a/.github/workflows/arm.yml b/.github/workflows/arm.yml index e1fad3af1..1362b57b2 100644 --- a/.github/workflows/arm.yml +++ b/.github/workflows/arm.yml @@ -34,7 +34,6 @@ jobs: uses: ./.github/workflows/task-unit-test.yml with: env: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - run-valgrind: false # run the job without valgrind stop-runner: name: Stop self-hosted EC2 runner diff --git a/.github/workflows/debian11.yml b/.github/workflows/debian11.yml index b4cdaea7f..50d8d3826 100644 --- a/.github/workflows/debian11.yml +++ b/.github/workflows/debian11.yml @@ -1,9 +1,9 @@ name: debian bullseye flow -on: [workflow_dispatch] +on: [workflow_dispatch, workflow_call] jobs: bullseye: uses: ./.github/workflows/task-unit-test.yml with: - container: debian:bullseye + container: gcc:11-bullseye diff --git a/.github/workflows/event-merge-to-queue.yml b/.github/workflows/event-merge-to-queue.yml index d54e9920f..eb99c280c 100644 --- a/.github/workflows/event-merge-to-queue.yml +++ b/.github/workflows/event-merge-to-queue.yml @@ -29,9 +29,7 @@ jobs: # with: # container: ubuntu:xenial bullseye: - uses: ./.github/workflows/task-unit-test.yml - with: - container: debian:bullseye + uses: ./.github/workflows/debian11.yml # amazonlinux2: # needs: [check-if-docs-only] # if: ${{ needs.check-if-docs-only.outputs.only-docs-changed == 'false' }} diff --git a/.github/workflows/event-nightly.yml b/.github/workflows/event-nightly.yml index 9a378c441..97ff189e0 100644 --- a/.github/workflows/event-nightly.yml +++ b/.github/workflows/event-nightly.yml @@ -16,48 +16,38 @@ jobs: uses: ./.github/workflows/task-unit-test.yml with: container: ubuntu:jammy - run-valgrind: false focal: uses: ./.github/workflows/task-unit-test.yml with: container: ubuntu:focal - run-valgrind: false # bionic: # uses: ./.github/workflows/task-unit-test.yml # with: # container: ubuntu:bionic -# run-valgrind: false -# xenial: -# uses: ./.github/workflows/task-unit-test.yml -# with: -# container: ubuntu:xenial -# run-valgrind: false bullseye: - uses: ./.github/workflows/task-unit-test.yml - with: - container: debian:bullseye - run-valgrind: false -# centos7: -# uses: ./.github/workflows/task-unit-test.yml -# with: -# container: centos:7 -# run-valgrind: false -# amazonlinux2: -# uses: ./.github/workflows/amazon2.yml + uses: ./.github/workflows/debian11.yml + # amazonlinux2: + # uses: ./.github/workflows/amazon2.yml mariner2: uses: ./.github/workflows/mariner2.yml rocky8: uses: ./.github/workflows/task-unit-test.yml with: container: rockylinux:8 - run-valgrind: false rocky9: uses: ./.github/workflows/task-unit-test.yml with: container: rockylinux:9 - run-valgrind: false macos: uses: ./.github/workflows/macos.yml arm: uses: ./.github/workflows/arm.yml secrets: inherit + coverage: + uses: ./.github/workflows/coverage.yml + secrets: inherit + sanitizer: + uses: ./.github/workflows/task-unit-test.yml + with: + san: address + secrets: inherit diff --git a/.github/workflows/event-pull_request.yml b/.github/workflows/event-pull_request.yml index 190520812..c39eb7f69 100644 --- a/.github/workflows/event-pull_request.yml +++ b/.github/workflows/event-pull_request.yml @@ -30,8 +30,13 @@ jobs: - name: flow tests run: make flow_test + sanitizer: + uses: ./.github/workflows/task-unit-test.yml + with: + san: address + secrets: inherit + coverage: - needs: [basic-tests] if: ${{ !github.event.pull_request.draft}} uses: ./.github/workflows/coverage.yml secrets: inherit @@ -54,6 +59,7 @@ jobs: pr-validation: needs: - basic-tests + - sanitizer - coverage - codeql-analysis - spellcheck diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 64ab0224e..ec93149aa 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -7,4 +7,8 @@ jobs: uses: ./.github/workflows/task-unit-test.yml with: env: macos-latest - run-valgrind: false + macos-san: + uses: ./.github/workflows/task-unit-test.yml + with: + env: macos-latest + san: address diff --git a/.github/workflows/mariner2.yml b/.github/workflows/mariner2.yml index 109c8c178..4bc582596 100644 --- a/.github/workflows/mariner2.yml +++ b/.github/workflows/mariner2.yml @@ -8,4 +8,3 @@ jobs: with: container: mcr.microsoft.com/cbl-mariner/base/core:2.0 pre-checkout-script: tdnf install -y --noplugins --skipsignature tar gzip - run-valgrind: false # TODO: enable valgrind? (requires to install valgrind) diff --git a/.github/workflows/task-unit-test.yml b/.github/workflows/task-unit-test.yml index 49ccaa295..b2a011e0e 100644 --- a/.github/workflows/task-unit-test.yml +++ b/.github/workflows/task-unit-test.yml @@ -13,14 +13,15 @@ on: pre-checkout-script: description: 'Script to run before checkout' type: string - run-valgrind: - description: 'Run valgrind tests' - type: boolean - default: true + san: + type: string + +env: + SAN_VALUE: ${{ inputs.san }} jobs: test: - name: Test ${{ inputs.container && format('{0} (on {1})', inputs.container, inputs.env) || inputs.env }} + name: Test ${{ inputs.container && format('{0} (on {1})', inputs.container, inputs.env) || inputs.env }}${{ inputs.san && ' sanitizer' || '' }} runs-on: ${{ inputs.env }} container: ${{ inputs.container || null }} defaults: @@ -47,13 +48,16 @@ jobs: echo "name=$NAME" >> $GITHUB_OUTPUT - name: unit tests - run: make unit_test - - name: valgrind - if: ${{ inputs.run-valgrind }} - run: make valgrind - - name: Archive valgrind tests reports - if: ${{ inputs.run-valgrind && failure() }} + run: make unit_test SAN="$SAN_VALUE" + - name: Set test path + if: ${{ inputs.san == 'address' && failure() }} + id: tests-artifact-path + run: | + FULL_VARIANT=$(uname)-$(uname -m)-debug-asan + echo "path=bin/${FULL_VARIANT}/unit_tests/Testing/Temporary/" >> $GITHUB_OUTPUT + - name: Archive san tests reports + if: ${{ inputs.san == 'address' && failure() }} uses: actions/upload-artifact@v4 with: - name: valgrind tests reports on ${{ steps.artifact-name.outputs.name }} - path: bin/Linux-x86_64-debug/unit_tests/Testing/Temporary/ + name: san tests reports on ${{ steps.artifact-name.outputs.name }} + path: ${{ steps.tests-artifact-path.outputs.path }} diff --git a/.install/ubuntu_22.04.sh b/.install/ubuntu_22.04.sh index 399708c1c..0a5f1af96 100755 --- a/.install/ubuntu_22.04.sh +++ b/.install/ubuntu_22.04.sh @@ -4,5 +4,5 @@ export DEBIAN_FRONTEND=noninteractive MODE=$1 # whether to install using sudo or not $MODE apt-get update -qq -$MODE apt-get install -yqq git wget build-essential valgrind lcov +$MODE apt-get install -yqq git wget build-essential lcov source install_cmake.sh $MODE diff --git a/Makefile b/Makefile index 55c39d647..2b2727baf 100644 --- a/Makefile +++ b/Makefile @@ -8,14 +8,6 @@ ifneq ($(filter coverage show-cov upload-cov,$(MAKECMDGOALS)),) COV=1 endif -ifneq ($(VG),) -VALGRIND=$(VG) -endif - -ifeq ($(VALGRIND),1) -override DEBUG ?= 1 -endif - ifeq ($(COV),1) override DEBUG ?= 1 CMAKE_COV += -DUSE_COVERAGE=ON @@ -26,30 +18,18 @@ CMAKE_TESTS += -DVECSIM_BUILD_TESTS=off endif ifneq ($(SAN),) -override DEBUG ?= 1 -export ASAN_OPTIONS=detect_odr_violation=0:allocator_may_return_null=1 -export MSAN_OPTIONS=allocator_may_return_null=1 -ifeq ($(SAN),mem) -override SAN=memory -else ifeq ($(SAN),addr) +ifeq ($(SAN),address) override SAN=address -endif - -ifeq ($(SAN),memory) -CMAKE_SAN=-DUSE_MSAN=ON -override CTEST_ARGS += --exclude-regex BruteForceTest.sanity_rinsert_1280 - -else ifeq ($(SAN),address) +DEBUG=1 +export ASAN_OPTIONS=detect_odr_violation=0:allocator_may_return_null=1 CMAKE_SAN=-DUSE_ASAN=ON -else ifeq ($(SAN),leak) -else ifeq ($(SAN),thread) else -$(error SAN=mem|addr|leak|thread) +$(error SAN=address is currently the only supported option) endif export SAN -endif # SAN +endif # SAN != '' ROOT=. export ROOT @@ -60,18 +40,18 @@ make build DEBUG=1 # build debug variant COV=1 # build for code coverage VERBOSE=1 # print detailed build info - VG|VALGRIND=1 # build for Valgrind - SAN=type # build with LLVM sanitizer (type=address|memory|leak|thread) - SLOW=1 # don't run build in parallel (for diagnostics) - PROFILE=1 # enable profiling compile flags (and debug symbols) for release type. + SAN=address # build with AddressSanitizer (clang) + SLOW=1 # don't run build in parallel (for diagnostics) + PROFILE=1 # enable profiling compile flags (and debug symbols) for release type. make pybind # build Python bindings make clean # remove binary files ALL=1 # remove binary directories make unit_test # run unit tests CTEST_ARGS=args # extra CTest arguments - VG|VALGRIND=1 # run tests with valgrind -make valgrind # build for Valgrind and run tests + SAN=address # run tests with AddressSanitizer + FP_64=1 # run tests with 64-bit floating point +make asan # build with AddressSanitizer and run unit tests make flow_test # run flow tests (with pytest) TEST=file::name # run specific test make mod_test # run Redis module intergration tests (with RLTest) @@ -94,6 +74,11 @@ FLAVOR=debug else FLAVOR=release endif + +ifeq ($(SAN),address) +FLAVOR := ${FLAVOR}-asan +endif + FULL_VARIANT:=$(shell uname)-$(shell uname -m)-$(FLAVOR) BINROOT=$(ROOT)/bin/$(FULL_VARIANT) BINDIR=$(BINROOT) @@ -166,11 +151,7 @@ ifeq ($(VERBOSE),1) _CTEST_ARGS += -V endif -ifeq ($(VALGRIND),1) -_CTEST_ARGS += \ - -T memcheck \ - --overwrite MemoryCheckCommandOptions="--leak-check=full --error-exitcode=255" -endif +# AddressSanitizer is handled via SAN=address in cmake/san.cmake unit_test: $(SHOW)mkdir -p $(BINDIR) @@ -178,10 +159,10 @@ unit_test: @make --no-print-directory -C $(BINDIR) $(MAKE_J) $(SHOW)cd $(TESTDIR) && GTEST_COLOR=1 ctest $(_CTEST_ARGS) -valgrind: - $(SHOW)$(MAKE) VG=1 unit_test +asan: + $(SHOW)$(MAKE) SAN=address unit_test -.PHONY: unit_test valgrind +.PHONY: unit_test asan #---------------------------------------------------------------------------------------------- diff --git a/cmake/san.cmake b/cmake/san.cmake index 4e4aa049b..9415be8d7 100644 --- a/cmake/san.cmake +++ b/cmake/san.cmake @@ -1,23 +1,9 @@ option(USE_ASAN "Use AddressSanitizer (clang)" OFF) -option(USE_MSAN "Use MemorySanitizer (clang)" OFF) -if (USE_ASAN OR USE_MSAN) +if (USE_ASAN) # define this before project() find_file(CMAKE_C_COMPILER "clang") find_file(CMAKE_CXX_COMPILER "clang++") set(CMAKE_LINKER "${CMAKE_C_COMPILER}") + set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=address -fsized-deallocation") - if (USE_ASAN) - set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=address") - - elseif (USE_MSAN) - set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins=2") - if (NOT MSAN_PREFIX) - set(MSAN_PREFIX "/opt/llvm-project/build-msan") - if (NOT EXISTS ${MSAN_PREFIX}) - message(FATAL_ERROR "LLVM/MSAN stdlibc++ directory '${MSAN_PREFIX}' is missing") - endif() - endif() - set(LLVM_CXX_FLAGS "-stdlib=libc++ -I${MSAN_PREFIX}/include -I${MSAN_PREFIX}/include/c++/v1") - set(LLVM_LD_FLAGS "-stdlib=libc++ -Wl,-rpath=${MSAN_PREFIX}/lib -L${MSAN_PREFIX}/lib -lc++abi") - endif() -endif() \ No newline at end of file +endif() diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 6098fc99d..a6418c550 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -389,7 +389,8 @@ void HNSWIndex::removeExtraLinks( size_t removed_idx = 0; size_t link_idx = 0; - while (orig_candidates.size() > 0) { + // candidates <= orig_candidates + while (candidates.size() > 0) { if (orig_candidates.top().second != candidates.top().second) { if (neighbors_bitmap[orig_candidates.top().second]) { removed_links[removed_idx++] = orig_candidates.top().second; @@ -401,6 +402,15 @@ void HNSWIndex::removeExtraLinks( orig_candidates.pop(); } } + + assert(candidates.empty() && "candidates should be empty"); + // Handle remaining elements in orig_candidates that were rejected by heuristic + while (orig_candidates.size() > 0) { + if (neighbors_bitmap[orig_candidates.top().second]) { + removed_links[removed_idx++] = orig_candidates.top().second; + } + orig_candidates.pop(); + } setListCount(node_ll, link_idx); *removed_links_num = removed_idx; } @@ -466,7 +476,10 @@ DistType HNSWIndex::processCandidate( } // Pre-fetch the neighbours list of the top candidate (the one that is going // to be processed in the next iteration) into memory cache, to improve performance. - __builtin_prefetch(get_linklist_at_level(candidate_set.top().second, layer)); + if (!candidate_set.empty()) { + assert(links_num); + __builtin_prefetch(get_linklist_at_level(candidate_set.top().second, layer)); + } return lowerBound; } @@ -515,7 +528,10 @@ void HNSWIndex::processCandidate_RangeSearch( } // Pre-fetch the neighbours list of the top candidate (the one that is going // to be processed in the next iteration) into memory cache, to improve performance. - __builtin_prefetch(get_linklist_at_level(candidate_set.top().second, layer)); + if (!candidate_set.empty()) { + assert(links_num); + __builtin_prefetch(get_linklist_at_level(candidate_set.top().second, layer)); + } } template