Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 8 additions & 17 deletions src/VecSim/algorithms/svs/svs.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
}

int deleteVectorsImpl(const labelType *labels, size_t n) {
if (indexSize() == 0) {
if (indexLabelCount() == 0) {
return 0;
}

Expand Down Expand Up @@ -280,22 +280,13 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
return;

// SVS index instance should not be empty
if (indexSize() == 0) {
if (indexLabelCount() == 0) {
this->impl_.reset();
num_marked_deleted = 0;
return;
}

num_marked_deleted += n;
// consolidate index if number of changes bigger than 50% of index size
const float consolidation_threshold = .5f;
// indexSize() should not be 0 see above lines
assert(indexSize() > 0);
// Note: if this function is called after deleteVectorsImpl, indexSize is already updated
if (static_cast<float>(num_marked_deleted) / indexSize() > consolidation_threshold) {
impl_->consolidate();
num_marked_deleted = 0;
}
}

bool isTwoLevelLVQ(const VecSimSvsQuantBits &qbits) {
Expand Down Expand Up @@ -330,7 +321,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl

~SVSIndex() = default;

size_t indexSize() const override { return impl_ ? impl_->size() : 0; }
size_t indexSize() const override { return indexStorageSize(); }

size_t indexStorageSize() const override { return impl_ ? impl_->view_data().size() : 0; }

Expand All @@ -342,7 +333,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
if constexpr (isMulti) {
return impl_ ? impl_->labelcount() : 0;
} else {
return indexSize();
return impl_ ? impl_->size() : 0;
}
}

Expand Down Expand Up @@ -524,7 +515,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
VecSimQueryParams *queryParams) const override {
auto rep = new VecSimQueryReply(this->allocator);
this->lastMode = STANDARD_KNN;
if (k == 0 || this->indexSize() == 0) {
if (k == 0 || this->indexLabelCount() == 0) {
return rep;
}

Expand Down Expand Up @@ -569,7 +560,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
VecSimQueryParams *queryParams) const override {
auto rep = new VecSimQueryReply(this->allocator);
this->lastMode = RANGE_QUERY;
if (radius == 0 || this->indexSize() == 0) {
if (radius == 0 || this->indexLabelCount() == 0) {
return rep;
}

Expand Down Expand Up @@ -642,7 +633,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
// take ownership of the blob copy and pass it to the batch iterator.
auto *queryBlobCopyPtr = queryBlobCopy.release();
// Ownership of queryBlobCopy moves to VecSimBatchIterator that will free it at the end.
if (indexSize() == 0) {
if (indexLabelCount() == 0) {
return new (this->getAllocator())
NullSVS_BatchIterator(queryBlobCopyPtr, queryParams, this->getAllocator());
} else {
Expand All @@ -652,7 +643,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
}

bool preferAdHocSearch(size_t subsetSize, size_t k, bool initial_check) const override {
size_t index_size = this->indexSize();
size_t index_size = this->indexLabelCount();

// Calculate the ratio of the subset size to the total index size.
double subsetRatio = (index_size == 0) ? 0.f : static_cast<double>(subsetSize) / index_size;
Expand Down
2 changes: 1 addition & 1 deletion tests/flow/test_svs_tiered.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def test_recall_after_deletion(test_logger):
test_logger.info(f"Done deleting half of the index")
assert index.svs_label_count() >= (num_elements // 2) - indices_ctx.tiered_svs_params.updateTriggerThreshold
assert index.svs_label_count() <= (num_elements // 2) + indices_ctx.tiered_svs_params.updateTriggerThreshold
assert svs_index.index_size() == (num_elements // 2)
assert svs_index.index_size() == num_elements

# Create a list of tuples of the vectors that left.
vectors = [vectors[i] for i in range(1, num_elements, 2)]
Expand Down
44 changes: 26 additions & 18 deletions tests/unit/test_svs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,20 +262,18 @@ TYPED_TEST(SVSTest, svs_bulk_vectors_add_delete_test) {
runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID);

// Delete almost all vectors
// First delete small amount of vector to prevent consolidation.
const size_t first_batch_deletion = 10;
ASSERT_EQ(svs_index->deleteVectors(ids.data(), first_batch_deletion), first_batch_deletion);
ASSERT_EQ(VecSimIndex_IndexSize(index), n - first_batch_deletion);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), first_batch_deletion);

// Now delete enough vectors to trigger consolidation.
const size_t keep_num = 1;
ASSERT_EQ(svs_index->deleteVectors(ids.data() + first_batch_deletion,
n - keep_num - first_batch_deletion),
n - keep_num - first_batch_deletion);
ASSERT_EQ(VecSimIndex_IndexSize(index), keep_num);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
ASSERT_EQ(svs_index->deleteVectors(ids.data(), n - keep_num), n - keep_num);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), keep_num);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - keep_num);

// Delete rest of the vectors
// num_marked_deleted should reset.
ASSERT_EQ(svs_index->deleteVectors(ids.data() + n - keep_num, keep_num), keep_num);
ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
ASSERT_EQ(index->indexLabelCount(), 0);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
VecSimIndex_Free(index);
}

Expand Down Expand Up @@ -453,14 +451,18 @@ TYPED_TEST(SVSTest, svs_reindexing_same_vector) {
for (size_t i = 0; i < n - 1; i++) {
VecSimIndex_DeleteVector(index, i);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), 1);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);

// Reinsert the same vectors under the same ids.
for (size_t i = 0; i < n; i++) {
// i / 10 is in integer (take the "floor value).
GenerateAndAddVector<TEST_DATA_T>(index, dim, i, i / 10);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
ASSERT_EQ(index->indexLabelCount(), n);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);

// Run the same query again.
runTopKSearchTest(index, query, k, verify_res);
Expand Down Expand Up @@ -513,14 +515,18 @@ TYPED_TEST(SVSTest, svs_reindexing_same_vector_different_id) {
for (size_t i = 0; i < n - 1; i++) {
VecSimIndex_DeleteVector(index, i);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), 1);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);

// Reinsert the same vectors under different ids than before.
for (size_t i = 0; i < n; i++) {
GenerateAndAddVector<TEST_DATA_T>(index, dim, i + 10,
i / 10); // i / 10 is in integer (take the "floor" value).
}
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
ASSERT_EQ(index->indexLabelCount(), n);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);

// Run the same query again.
auto verify_res_different_id = [&](size_t id, double score, size_t index) {
Expand Down Expand Up @@ -920,7 +926,8 @@ TYPED_TEST(SVSTest, test_delete_vector) {

// Here the shift should happen.
VecSimIndex_DeleteVector(index, 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n - 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), n - 1);

TEST_DATA_T query[] = {0.0, 0.0};
auto verify_res = [&](size_t id, double score, size_t index) {
Expand Down Expand Up @@ -3024,7 +3031,8 @@ TYPED_TEST(SVSTest, logging_runtime_params) {
index->addVector(v[i].data(), ids[i]);
}
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 10);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(VecSimIndex_IndexSize(index), n + 10);
ASSERT_EQ(index->indexLabelCount(), n);

float query[] = {50, 50, 50, 50};
auto verify_res = [&](size_t id, double score, size_t index) { EXPECT_EQ(id, (index + 45)); };
Expand Down
35 changes: 26 additions & 9 deletions tests/unit/test_svs_fp16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,9 @@ TYPED_TEST(FP16SVSTest, svs_bulk_vectors_add_delete_test) {
// Delete almost all vectors
const size_t keep_num = 10;
ASSERT_EQ(svs_index->deleteVectors(ids.data(), n - keep_num), n - keep_num);
ASSERT_EQ(VecSimIndex_IndexSize(index), keep_num);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), keep_num);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - keep_num);

auto verify_res_after_delete = [&](size_t id, double score, size_t index) {
EXPECT_EQ(id, n - keep_num + index);
Expand All @@ -252,6 +254,12 @@ TYPED_TEST(FP16SVSTest, svs_bulk_vectors_add_delete_test) {
// Thread 0: Couldn't find key.
runTopKSearchTest(index, query, keep_num, verify_res_after_delete, nullptr, BY_ID);

// Delete rest of the vectors
// num_marked_deleted should reset.
ASSERT_EQ(svs_index->deleteVectors(ids.data() + n - keep_num, keep_num), keep_num);
ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
ASSERT_EQ(index->indexLabelCount(), 0);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
VecSimIndex_Free(index);
}

Expand Down Expand Up @@ -334,14 +342,18 @@ TYPED_TEST(FP16SVSTest, svs_reindexing_same_vector) {
for (size_t i = 0; i < n - 1; i++) {
VecSimIndex_DeleteVector(index, i);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), 1);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);

// Reinsert the same vectors under the same ids.
for (size_t i = 0; i < n; i++) {
// i / 10 is in integer (take the "floor value).
this->GenerateAndAddVector(index, dim, i, i / 10);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
ASSERT_EQ(index->indexLabelCount(), n);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);

// Run the same query again.
runTopKSearchTest(index, query, k, verify_res);
Expand Down Expand Up @@ -388,14 +400,18 @@ TYPED_TEST(FP16SVSTest, svs_reindexing_same_vector_different_id) {
for (size_t i = 0; i < n - 1; i++) {
VecSimIndex_DeleteVector(index, i);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), 1);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);

// Reinsert the same vectors under different ids than before.
for (size_t i = 0; i < n; i++) {
this->GenerateAndAddVector(index, dim, i + 10,
i / 10); // i / 10 is in integer (take the "floor" value).
}
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
ASSERT_EQ(index->indexLabelCount(), n);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);

// Run the same query again.
auto verify_res_different_id = [&](size_t id, double score, size_t index) {
Expand Down Expand Up @@ -787,7 +803,8 @@ TYPED_TEST(FP16SVSTest, test_delete_vector) {

// Here the shift should happen.
VecSimIndex_DeleteVector(index, 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n - 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), n - 1);

float16 query[dim];
this->GenerateVector(query, dim, 0.0);
Expand Down Expand Up @@ -2627,7 +2644,7 @@ TYPED_TEST(FP16SVSTieredIndexTest, KNNSearch) {
VecSimIndex_DeleteVector(svs_index, i);
}
ASSERT_EQ(flat_index->indexSize(), n * 2 / 3);
ASSERT_EQ(svs_index->indexSize(), n / 2);
ASSERT_EQ(svs_index->indexLabelCount(), n / 2);
k = n * 2 / 3;
cur_memory_usage = allocator->getAllocationSize();
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
Expand All @@ -2642,7 +2659,7 @@ TYPED_TEST(FP16SVSTieredIndexTest, KNNSearch) {
VecSimIndex_DeleteVector(flat_index, i);
}
ASSERT_EQ(flat_index->indexSize(), n / 6);
ASSERT_EQ(svs_index->indexSize(), n / 2);
ASSERT_EQ(svs_index->indexLabelCount(), n / 2);
k = n / 4;
cur_memory_usage = allocator->getAllocationSize();
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
Expand All @@ -2656,7 +2673,7 @@ TYPED_TEST(FP16SVSTieredIndexTest, KNNSearch) {
this->GenerateAndAddVector(flat_index, dim, i, i);
}
ASSERT_EQ(flat_index->indexSize(), n * 2 / 3);
ASSERT_EQ(svs_index->indexSize(), 0);
ASSERT_EQ(svs_index->indexLabelCount(), 0);
k = n / 3;
cur_memory_usage = allocator->getAllocationSize();
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_svs_multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ TYPED_TEST(SVSMultiTest, test_dynamic_svs_info_iterator) {
VecSimIndex_DeleteVector(index, 0);
info = VecSimIndex_DebugInfo(index);
infoIter = VecSimIndex_DebugInfoIterator(index);
ASSERT_EQ(2, info.commonInfo.indexSize);
ASSERT_EQ(4, info.commonInfo.indexSize);
ASSERT_EQ(1, info.commonInfo.indexLabelCount);
compareSVSIndexInfoToIterator(info, infoIter);
VecSimDebugInfoIterator_Free(infoIter);
Expand Down
Loading
Loading