From 3cec5cb907416cce5c5bf1b3b9a03dc12118bf05 Mon Sep 17 00:00:00 2001 From: alon Date: Sun, 13 Mar 2022 17:33:00 +0200 Subject: [PATCH 1/3] change the resize index factor to 1.1 instead of 2 --- src/VecSim/algorithms/brute_force/brute_force.cpp | 3 ++- src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/VecSim/algorithms/brute_force/brute_force.cpp b/src/VecSim/algorithms/brute_force/brute_force.cpp index 0f6a97043..e61035413 100644 --- a/src/VecSim/algorithms/brute_force/brute_force.cpp +++ b/src/VecSim/algorithms/brute_force/brute_force.cpp @@ -10,6 +10,7 @@ #include #include #include +#include using namespace std; @@ -76,7 +77,7 @@ int BruteForceIndex::addVector(const void *vector_data, size_t label) { // See if new id is bigger than current vector count. Needs to resize the index. if (id >= this->idToVectorBlockMemberMapping.size()) { - this->idToVectorBlockMemberMapping.resize(this->count * 2); + this->idToVectorBlockMemberMapping.resize(std::ceil(this->count * 1.1)); } // Get vector block to store the vector in. diff --git a/src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp b/src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp index de2e48f0d..aec9b69dd 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp +++ b/src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp @@ -40,7 +40,7 @@ int HNSWIndex::addVector(const void *vector_data, size_t id) { vector_data = normalized_data; } if (hnsw->getIndexSize() == this->hnsw->getIndexCapacity()) { - this->hnsw->resizeIndex(std::max(this->hnsw->getIndexCapacity() * 2, 2)); + this->hnsw->resizeIndex(std::max(std::ceil(this->hnsw->getIndexCapacity() * 1.1), 2)); } this->hnsw->addPoint(vector_data, id); return true; From dc38c236b8d45333960e8e7b4e607ba2491af610 Mon Sep 17 00:00:00 2001 From: alon Date: Sun, 13 Mar 2022 18:44:22 +0200 Subject: [PATCH 2/3] formatting --- src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp b/src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp index aec9b69dd..6d3127d98 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp +++ b/src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp @@ -40,7 +40,8 @@ int HNSWIndex::addVector(const void *vector_data, size_t id) { vector_data = normalized_data; } if (hnsw->getIndexSize() == this->hnsw->getIndexCapacity()) { - this->hnsw->resizeIndex(std::max(std::ceil(this->hnsw->getIndexCapacity() * 1.1), 2)); + this->hnsw->resizeIndex( + std::max(std::ceil(this->hnsw->getIndexCapacity() * 1.1), 2)); } this->hnsw->addPoint(vector_data, id); return true; From 9433ab18518a118231039bee88903f202ec2f54b Mon Sep 17 00:00:00 2001 From: alon Date: Mon, 14 Mar 2022 10:06:40 +0200 Subject: [PATCH 3/3] Add tests --- .../algorithms/brute_force/brute_force.h | 3 +- tests/unit/test_bruteforce.cpp | 29 +++++++++++++++++++ tests/unit/test_hnswlib.cpp | 29 +++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h index 7b12e4681..39a1b5e28 100644 --- a/src/VecSim/algorithms/brute_force/brute_force.h +++ b/src/VecSim/algorithms/brute_force/brute_force.h @@ -44,8 +44,9 @@ class BruteForceIndex : public VecSimIndex { DISTFUNC dist_func; VecSearchMode last_mode; #ifdef BUILD_TESTS - // Allow the following tests to access the index size private member. + // Allow the following tests to access the index private members. friend class BruteForceTest_preferAdHocOptimization_Test; friend class BruteForceTest_test_dynamic_bf_info_iterator_Test; + friend class BruteForceTest_resizeIndex_Test; #endif }; diff --git a/tests/unit/test_bruteforce.cpp b/tests/unit/test_bruteforce.cpp index df285518c..15f207bfb 100644 --- a/tests/unit/test_bruteforce.cpp +++ b/tests/unit/test_bruteforce.cpp @@ -35,6 +35,35 @@ TEST_F(BruteForceTest, brute_force_vector_add_test) { VecSimIndex_Free(index); } +TEST_F(BruteForceTest, resizeIndex) { + size_t dim = 4; + size_t n = 15; + VecSimParams params{.algo = VecSimAlgo_BF, + .bfParams = BFParams{.type = VecSimType_FLOAT32, + .dim = dim, + .metric = VecSimMetric_L2, + .initialCapacity = n}}; + VecSimIndex *index = VecSimIndex_New(¶ms); + ASSERT_EQ(VecSimIndex_IndexSize(index), 0); + + float a[dim]; + for (size_t i = 0; i < n; i++) { + for (size_t j = 0; j < dim; j++) { + a[j] = (float)i; + } + VecSimIndex_AddVector(index, (const void *)a, i); + } + ASSERT_EQ(reinterpret_cast(index)->idToVectorBlockMemberMapping.size(), n); + + // Add another vector, since index size equals to the capacity, this should cause resizing + // (by 10% factor from the new index size). + VecSimIndex_AddVector(index, (const void *)a, n + 1); + ASSERT_EQ(VecSimIndex_IndexSize(index), n + 1); + ASSERT_EQ(reinterpret_cast(index)->idToVectorBlockMemberMapping.size(), + std::ceil(1.1 * (n + 1))); + VecSimIndex_Free(index); +} + TEST_F(BruteForceTest, brute_force_vector_search_test_ip) { size_t dim = 4; size_t n = 100; diff --git a/tests/unit/test_hnswlib.cpp b/tests/unit/test_hnswlib.cpp index 82bc6f498..9dc68607c 100644 --- a/tests/unit/test_hnswlib.cpp +++ b/tests/unit/test_hnswlib.cpp @@ -40,6 +40,35 @@ TEST_F(HNSWLibTest, hnswlib_vector_add_test) { VecSimIndex_Free(index); } +TEST_F(HNSWLibTest, resizeIndex) { + size_t dim = 4; + size_t n = 15; + VecSimParams params{.algo = VecSimAlgo_HNSWLIB, + .hnswParams = HNSWParams{.type = VecSimType_FLOAT32, + .dim = dim, + .metric = VecSimMetric_L2, + .initialCapacity = n}}; + VecSimIndex *index = VecSimIndex_New(¶ms); + ASSERT_EQ(VecSimIndex_IndexSize(index), 0); + + float a[dim]; + for (size_t i = 0; i < n; i++) { + for (size_t j = 0; j < dim; j++) { + a[j] = (float)i; + } + VecSimIndex_AddVector(index, (const void *)a, i); + } + ASSERT_EQ(reinterpret_cast(index)->getHNSWIndex()->getIndexCapacity(), n); + + // Add another vector, since index size equals to the capacity, this should cause resizing + // (by 10% factor from the index size). + VecSimIndex_AddVector(index, (const void *)a, n + 1); + ASSERT_EQ(VecSimIndex_IndexSize(index), n + 1); + ASSERT_EQ(reinterpret_cast(index)->getHNSWIndex()->getIndexCapacity(), + std::ceil(1.1 * n)); + VecSimIndex_Free(index); +} + TEST_F(HNSWLibTest, hnswlib_vector_search_test) { size_t n = 100; size_t k = 11;