From 166ddcae6077beee449886ff63ab1da03a0c86cc Mon Sep 17 00:00:00 2001
From: meiravgri <109056284+meiravgri@users.noreply.github.com>
Date: Wed, 15 Oct 2025 18:37:37 +0300
Subject: [PATCH 1/2] [0.8] Replace Valgrind with address-sanitizer [MOD-10409]
 (#803)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Replace Valgrind with address-sanitizer [MOD-10409] (#734)

* add asan using augment - wip (code with svs cannot compile with llvm clang currently)

* rename run-valgrind to asan

* fix buffer overflow in test

* fix bad testSizeEstimation in int8 and uint8 + cleanup

* fix cmake install for macos

* ran sanitizer as part of every flow + remove from non latest OSs + only run debian11 with gcc11 (alignment with search)

* revert 18 from clang format

* fix syntax errors

* * rename  dataSize -> storedDataSize

*refine distinction between storedDataSize and inputBlobSize: inputBlobSize not necessarly dim *sizeof(type)

* vecsimAbstract: move data members to private when possible and not too complicated

* remove effective_input_size check from VecSimIndexAbstract<DataType, DistType>::preprocessQuery: That's logically wrong.

The previous logic incorrectly used force_copy to determine blob size:
- force_copy only indicates whether to copy the blob for memory management
- It doesn't determine the actual size of the input blob

In tiered indexes, when the backend creates a batch iterator, it receives
the query blob from the frontend's batch iterator (already preprocessed).
This preprocessed blob has the processed size (e.g., with cosine norm
appended), not the original dim * sizeof(type) size.

The old logic accidentally worked when inputBlobSize == dim * sizeof(type)
because using dataSize (processed size) happened to match the preprocessed
blob size from the frontend. However, this was a coincidence that broke
when inputBlobSize was updated to reflect actual input blob sizes.

Now preprocessQuery correctly uses inputBlobSize, which represents the
actual size of the input blob being processed.

BruteForce Factory Changes:
• Templated  NewAbstractInitParams to be reusable across different algorithm parameter types

All factories changes:
 storedDataSize: Size after preprocessing (e.g., with cosine norm appended)
 inputBlobSize: Size of input blobs the index expects to receive (depends on whether vectors are pre-normalized)
• Added  inputBlobSize calculation based on normalization status: uses  storedDataSize if normalized, otherwise dim * sizeof(type)

*svs tests testSizeEstimation: using constexpr to eliminate compiler warnings about potential division by zero when  quantBits could be 0 (VecSimSvsQuant_NONE)

* fix merge

* another fix

* have leak

* run first san

* build with debug

* upload san srtifacts

* fix condition

* fix path

* clean up

* run wuth san macos

* real leak

* fix pah

* fix security issue

* remove san from event pr

run cov with san
run mac with san

* trigger again

* unit test flow name

* add space

* split sanitizer in intel machine

* fix cpu info

* fix alpine pre deps

* add to needs

* unify codecov and sanitizer again

* disable flow temp

add cov and sanitize to nightly

* revert unnecessary changes

* remove bin before building sanitizer in codecov to use clanmg

* install clang

* install clang 18

* instsll clang 18

* remove tools

* install llvm on cov

* fix deps

---------

Co-authored-by: meiravgri <meirav.grimberg@redis.com>
(cherry picked from commit 2fcc6690aa465b35855c11d199e182f34b483b46)

* fix debian and nightly

* remove slack notify on nightly

* change cov machine

* revert cov changes - no sanitizer

* no pre deps

* remove

---------

Co-authored-by: alonre24 <alon.reshef@redis.com>
(cherry picked from commit 44df9db329c10e8fe6abb1db5a9285ece6e53ba0)
---
 .github/workflows/arm.yml                  |  1 -
 .github/workflows/debian11.yml             |  4 +-
 .github/workflows/event-merge-to-queue.yml |  4 +-
 .github/workflows/event-nightly.yml        | 32 ++++--------
 .github/workflows/event-pull_request.yml   |  8 ++-
 .github/workflows/macos.yml                |  6 ++-
 .github/workflows/mariner2.yml             |  1 -
 .github/workflows/task-unit-test.yml       | 30 ++++++-----
 .install/ubuntu_22.04.sh                   |  2 +-
 Makefile                                   | 59 ++++++++--------------
 cmake/san.cmake                            | 20 ++------
 11 files changed, 67 insertions(+), 100 deletions(-)

diff --git a/.github/workflows/arm.yml b/.github/workflows/arm.yml
index e1fad3af1..1362b57b2 100644
--- a/.github/workflows/arm.yml
+++ b/.github/workflows/arm.yml
@@ -34,7 +34,6 @@ jobs:
     uses: ./.github/workflows/task-unit-test.yml
     with:
       env: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
-      run-valgrind: false # run the job without valgrind
 
   stop-runner:
     name: Stop self-hosted EC2 runner
diff --git a/.github/workflows/debian11.yml b/.github/workflows/debian11.yml
index b4cdaea7f..50d8d3826 100644
--- a/.github/workflows/debian11.yml
+++ b/.github/workflows/debian11.yml
@@ -1,9 +1,9 @@
 name: debian bullseye flow
 
-on: [workflow_dispatch]
+on: [workflow_dispatch, workflow_call]
 
 jobs:
   bullseye:
     uses: ./.github/workflows/task-unit-test.yml
     with:
-      container: debian:bullseye
+      container: gcc:11-bullseye
diff --git a/.github/workflows/event-merge-to-queue.yml b/.github/workflows/event-merge-to-queue.yml
index d54e9920f..eb99c280c 100644
--- a/.github/workflows/event-merge-to-queue.yml
+++ b/.github/workflows/event-merge-to-queue.yml
@@ -29,9 +29,7 @@ jobs:
 #    with:
 #      container: ubuntu:xenial
   bullseye:
-    uses: ./.github/workflows/task-unit-test.yml
-    with:
-      container: debian:bullseye
+    uses: ./.github/workflows/debian11.yml
 #  amazonlinux2:
 #    needs: [check-if-docs-only]
 #    if: ${{ needs.check-if-docs-only.outputs.only-docs-changed == 'false' }}
diff --git a/.github/workflows/event-nightly.yml b/.github/workflows/event-nightly.yml
index 9a378c441..97ff189e0 100644
--- a/.github/workflows/event-nightly.yml
+++ b/.github/workflows/event-nightly.yml
@@ -16,48 +16,38 @@ jobs:
     uses: ./.github/workflows/task-unit-test.yml
     with:
       container: ubuntu:jammy
-      run-valgrind: false
   focal:
     uses: ./.github/workflows/task-unit-test.yml
     with:
       container: ubuntu:focal
-      run-valgrind: false
 #  bionic:
 #    uses: ./.github/workflows/task-unit-test.yml
 #    with:
 #      container: ubuntu:bionic
-#      run-valgrind: false
-#  xenial:
-#    uses: ./.github/workflows/task-unit-test.yml
-#    with:
-#      container: ubuntu:xenial
-#      run-valgrind: false
   bullseye:
-    uses: ./.github/workflows/task-unit-test.yml
-    with:
-      container: debian:bullseye
-      run-valgrind: false
-#  centos7:
-#    uses: ./.github/workflows/task-unit-test.yml
-#    with:
-#      container: centos:7
-#      run-valgrind: false
-#  amazonlinux2:
-#    uses: ./.github/workflows/amazon2.yml
+    uses: ./.github/workflows/debian11.yml
+  #  amazonlinux2:
+  #    uses: ./.github/workflows/amazon2.yml
   mariner2:
     uses: ./.github/workflows/mariner2.yml
   rocky8:
     uses: ./.github/workflows/task-unit-test.yml
     with:
       container: rockylinux:8
-      run-valgrind: false
   rocky9:
     uses: ./.github/workflows/task-unit-test.yml
     with:
       container: rockylinux:9
-      run-valgrind: false
   macos:
     uses: ./.github/workflows/macos.yml
   arm:
     uses: ./.github/workflows/arm.yml
     secrets: inherit
+  coverage:
+    uses: ./.github/workflows/coverage.yml
+    secrets: inherit
+  sanitizer:
+    uses: ./.github/workflows/task-unit-test.yml
+    with:
+      san: address
+    secrets: inherit
diff --git a/.github/workflows/event-pull_request.yml b/.github/workflows/event-pull_request.yml
index 190520812..c39eb7f69 100644
--- a/.github/workflows/event-pull_request.yml
+++ b/.github/workflows/event-pull_request.yml
@@ -30,8 +30,13 @@ jobs:
       - name: flow tests
         run: make flow_test
 
+  sanitizer:
+    uses: ./.github/workflows/task-unit-test.yml
+    with:
+      san: address
+    secrets: inherit
+
   coverage:
-    needs: [basic-tests]
     if: ${{ !github.event.pull_request.draft}}
     uses: ./.github/workflows/coverage.yml
     secrets: inherit
@@ -54,6 +59,7 @@ jobs:
   pr-validation:
     needs:
       - basic-tests
+      - sanitizer
       - coverage
       - codeql-analysis
       - spellcheck
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
index 64ab0224e..ec93149aa 100644
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -7,4 +7,8 @@ jobs:
     uses: ./.github/workflows/task-unit-test.yml
     with:
       env: macos-latest
-      run-valgrind: false
+  macos-san:
+    uses: ./.github/workflows/task-unit-test.yml
+    with:
+      env: macos-latest
+      san: address
diff --git a/.github/workflows/mariner2.yml b/.github/workflows/mariner2.yml
index 109c8c178..4bc582596 100644
--- a/.github/workflows/mariner2.yml
+++ b/.github/workflows/mariner2.yml
@@ -8,4 +8,3 @@ jobs:
     with:
       container: mcr.microsoft.com/cbl-mariner/base/core:2.0
       pre-checkout-script: tdnf install -y --noplugins --skipsignature tar gzip
-      run-valgrind: false # TODO: enable valgrind? (requires to install valgrind)
diff --git a/.github/workflows/task-unit-test.yml b/.github/workflows/task-unit-test.yml
index 49ccaa295..b2a011e0e 100644
--- a/.github/workflows/task-unit-test.yml
+++ b/.github/workflows/task-unit-test.yml
@@ -13,14 +13,15 @@ on:
       pre-checkout-script:
         description: 'Script to run before checkout'
         type: string
-      run-valgrind:
-        description: 'Run valgrind tests'
-        type: boolean
-        default: true
+      san:
+        type: string
+
+env:
+  SAN_VALUE: ${{ inputs.san }}
 
 jobs:
   test:
-    name: Test ${{ inputs.container && format('{0} (on {1})', inputs.container, inputs.env) || inputs.env }}
+    name: Test ${{ inputs.container && format('{0} (on {1})', inputs.container, inputs.env) || inputs.env }}${{ inputs.san && ' sanitizer' || '' }}
     runs-on: ${{ inputs.env }}
     container: ${{ inputs.container || null }}
     defaults:
@@ -47,13 +48,16 @@ jobs:
           echo "name=$NAME" >> $GITHUB_OUTPUT
 
       - name: unit tests
-        run: make unit_test
-      - name: valgrind
-        if: ${{ inputs.run-valgrind }}
-        run: make valgrind
-      - name: Archive valgrind tests reports
-        if: ${{ inputs.run-valgrind && failure() }}
+        run: make unit_test SAN="$SAN_VALUE"
+      - name: Set test path
+        if: ${{ inputs.san  == 'address' && failure() }}
+        id: tests-artifact-path
+        run: |
+          FULL_VARIANT=$(uname)-$(uname -m)-debug-asan
+          echo "path=bin/${FULL_VARIANT}/unit_tests/Testing/Temporary/" >> $GITHUB_OUTPUT
+      - name: Archive san tests reports
+        if: ${{ inputs.san  == 'address' && failure() }}
         uses: actions/upload-artifact@v4
         with:
-          name: valgrind tests reports on ${{ steps.artifact-name.outputs.name }}
-          path: bin/Linux-x86_64-debug/unit_tests/Testing/Temporary/
+          name: san tests reports on ${{ steps.artifact-name.outputs.name }}
+          path: ${{ steps.tests-artifact-path.outputs.path }}
diff --git a/.install/ubuntu_22.04.sh b/.install/ubuntu_22.04.sh
index 399708c1c..0a5f1af96 100755
--- a/.install/ubuntu_22.04.sh
+++ b/.install/ubuntu_22.04.sh
@@ -4,5 +4,5 @@ export DEBIAN_FRONTEND=noninteractive
 MODE=$1 # whether to install using sudo or not
 
 $MODE apt-get update -qq
-$MODE apt-get install -yqq git wget build-essential valgrind lcov
+$MODE apt-get install -yqq git wget build-essential lcov
 source install_cmake.sh $MODE
diff --git a/Makefile b/Makefile
index 55c39d647..2b2727baf 100644
--- a/Makefile
+++ b/Makefile
@@ -8,14 +8,6 @@ ifneq ($(filter coverage show-cov upload-cov,$(MAKECMDGOALS)),)
 COV=1
 endif
 
-ifneq ($(VG),)
-VALGRIND=$(VG)
-endif
-
-ifeq ($(VALGRIND),1)
-override DEBUG ?= 1
-endif
-
 ifeq ($(COV),1)
 override DEBUG ?= 1
 CMAKE_COV += -DUSE_COVERAGE=ON
@@ -26,30 +18,18 @@ CMAKE_TESTS += -DVECSIM_BUILD_TESTS=off
 endif
 
 ifneq ($(SAN),)
-override DEBUG ?= 1
-export ASAN_OPTIONS=detect_odr_violation=0:allocator_may_return_null=1
-export MSAN_OPTIONS=allocator_may_return_null=1
 
-ifeq ($(SAN),mem)
-override SAN=memory
-else ifeq ($(SAN),addr)
+ifeq ($(SAN),address)
 override SAN=address
-endif
-
-ifeq ($(SAN),memory)
-CMAKE_SAN=-DUSE_MSAN=ON
-override CTEST_ARGS += --exclude-regex BruteForceTest.sanity_rinsert_1280
-
-else ifeq ($(SAN),address)
+DEBUG=1
+export ASAN_OPTIONS=detect_odr_violation=0:allocator_may_return_null=1
 CMAKE_SAN=-DUSE_ASAN=ON
-else ifeq ($(SAN),leak)
-else ifeq ($(SAN),thread)
 else
-$(error SAN=mem|addr|leak|thread)
+$(error SAN=address is currently the only supported option)
 endif
 
 export SAN
-endif # SAN
+endif # SAN != ''
 
 ROOT=.
 export ROOT
@@ -60,18 +40,18 @@ make build
   DEBUG=1          # build debug variant
   COV=1			   # build for code coverage
   VERBOSE=1        # print detailed build info
-  VG|VALGRIND=1    # build for Valgrind
-  SAN=type         # build with LLVM sanitizer (type=address|memory|leak|thread)
-  SLOW=1           # don't run build in parallel (for diagnostics)
-  PROFILE=1		   # enable profiling compile flags (and debug symbols) for release type.
+  SAN=address          # build with AddressSanitizer (clang)
+  SLOW=1               # don't run build in parallel (for diagnostics)
+  PROFILE=1            # enable profiling compile flags (and debug symbols) for release type.
 make pybind        # build Python bindings
 make clean         # remove binary files
   ALL=1            # remove binary directories
 
 make unit_test     # run unit tests
   CTEST_ARGS=args    # extra CTest arguments
-  VG|VALGRIND=1      # run tests with valgrind
-make valgrind      # build for Valgrind and run tests
+  SAN=address        # run tests with AddressSanitizer
+  FP_64=1			# run tests with 64-bit floating point
+make asan            # build with AddressSanitizer and run unit tests
 make flow_test     # run flow tests (with pytest)
   TEST=file::name    # run specific test
 make mod_test      # run Redis module intergration tests (with RLTest)
@@ -94,6 +74,11 @@ FLAVOR=debug
 else
 FLAVOR=release
 endif
+
+ifeq ($(SAN),address)
+FLAVOR := ${FLAVOR}-asan
+endif
+
 FULL_VARIANT:=$(shell uname)-$(shell uname -m)-$(FLAVOR)
 BINROOT=$(ROOT)/bin/$(FULL_VARIANT)
 BINDIR=$(BINROOT)
@@ -166,11 +151,7 @@ ifeq ($(VERBOSE),1)
 _CTEST_ARGS += -V
 endif
 
-ifeq ($(VALGRIND),1)
-_CTEST_ARGS += \
-	-T memcheck \
-	--overwrite MemoryCheckCommandOptions="--leak-check=full --error-exitcode=255"
-endif
+# AddressSanitizer is handled via SAN=address in cmake/san.cmake
 
 unit_test:
 	$(SHOW)mkdir -p $(BINDIR)
@@ -178,10 +159,10 @@ unit_test:
 	@make --no-print-directory -C $(BINDIR) $(MAKE_J)
 	$(SHOW)cd $(TESTDIR) && GTEST_COLOR=1 ctest $(_CTEST_ARGS)
 
-valgrind:
-	$(SHOW)$(MAKE) VG=1 unit_test
+asan:
+	$(SHOW)$(MAKE) SAN=address unit_test
 
-.PHONY: unit_test valgrind
+.PHONY: unit_test asan
 
 #----------------------------------------------------------------------------------------------
 
diff --git a/cmake/san.cmake b/cmake/san.cmake
index 4e4aa049b..9415be8d7 100644
--- a/cmake/san.cmake
+++ b/cmake/san.cmake
@@ -1,23 +1,9 @@
 option(USE_ASAN "Use AddressSanitizer (clang)" OFF)
-option(USE_MSAN "Use MemorySanitizer (clang)" OFF)
-if (USE_ASAN OR USE_MSAN)
+if (USE_ASAN)
 	# define this before project()
 	find_file(CMAKE_C_COMPILER "clang")
 	find_file(CMAKE_CXX_COMPILER "clang++")
 	set(CMAKE_LINKER "${CMAKE_C_COMPILER}")
+	set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=address -fsized-deallocation")
 
-	if (USE_ASAN)
-		set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=address")
-
-	elseif (USE_MSAN)
-		set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins=2")
-		if (NOT MSAN_PREFIX)
-			set(MSAN_PREFIX "/opt/llvm-project/build-msan")
-			if (NOT EXISTS ${MSAN_PREFIX})
-				message(FATAL_ERROR "LLVM/MSAN stdlibc++ directory '${MSAN_PREFIX}' is missing")
-			endif()
-		endif()
-		set(LLVM_CXX_FLAGS "-stdlib=libc++ -I${MSAN_PREFIX}/include -I${MSAN_PREFIX}/include/c++/v1")
-		set(LLVM_LD_FLAGS "-stdlib=libc++ -Wl,-rpath=${MSAN_PREFIX}/lib -L${MSAN_PREFIX}/lib -lc++abi")
-	endif()
-endif()
\ No newline at end of file
+endif()

From 5fcf6ee7f78b85c962d30d4da94b9f8ba1dd2abd Mon Sep 17 00:00:00 2001
From: meiravgri <109056284+meiravgri@users.noreply.github.com>
Date: Thu, 16 Oct 2025 15:11:23 +0300
Subject: [PATCH 2/2] [MOD-11881] Fix HNSW undefined behavior in container
 access (#806)

fix invalid reads
---
 src/VecSim/algorithms/hnsw/hnsw.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h
index 6098fc99d..a6418c550 100644
--- a/src/VecSim/algorithms/hnsw/hnsw.h
+++ b/src/VecSim/algorithms/hnsw/hnsw.h
@@ -389,7 +389,8 @@ void HNSWIndex<DataType, DistType>::removeExtraLinks(
     size_t removed_idx = 0;
     size_t link_idx = 0;
 
-    while (orig_candidates.size() > 0) {
+    // candidates <= orig_candidates
+    while (candidates.size() > 0) {
         if (orig_candidates.top().second != candidates.top().second) {
             if (neighbors_bitmap[orig_candidates.top().second]) {
                 removed_links[removed_idx++] = orig_candidates.top().second;
@@ -401,6 +402,15 @@ void HNSWIndex<DataType, DistType>::removeExtraLinks(
             orig_candidates.pop();
         }
     }
+
+    assert(candidates.empty() && "candidates should be empty");
+    // Handle remaining elements in orig_candidates that were rejected by heuristic
+    while (orig_candidates.size() > 0) {
+        if (neighbors_bitmap[orig_candidates.top().second]) {
+            removed_links[removed_idx++] = orig_candidates.top().second;
+        }
+        orig_candidates.pop();
+    }
     setListCount(node_ll, link_idx);
     *removed_links_num = removed_idx;
 }
@@ -466,7 +476,10 @@ DistType HNSWIndex<DataType, DistType>::processCandidate(
     }
     // Pre-fetch the neighbours list of the top candidate (the one that is going
     // to be processed in the next iteration) into memory cache, to improve performance.
-    __builtin_prefetch(get_linklist_at_level(candidate_set.top().second, layer));
+    if (!candidate_set.empty()) {
+        assert(links_num);
+        __builtin_prefetch(get_linklist_at_level(candidate_set.top().second, layer));
+    }
 
     return lowerBound;
 }
@@ -515,7 +528,10 @@ void HNSWIndex<DataType, DistType>::processCandidate_RangeSearch(
     }
     // Pre-fetch the neighbours list of the top candidate (the one that is going
     // to be processed in the next iteration) into memory cache, to improve performance.
-    __builtin_prefetch(get_linklist_at_level(candidate_set.top().second, layer));
+    if (!candidate_set.empty()) {
+        assert(links_num);
+        __builtin_prefetch(get_linklist_at_level(candidate_set.top().second, layer));
+    }
 }
 
 template <typename DataType, typename DistType>