Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/VecSim/algorithms/brute_force/brute_force.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,9 +425,9 @@ bool BruteForceIndex<DataType, DistType>::preferAdHocSearch(size_t subsetSize, s
// This heuristic is based on sklearn decision tree classifier (with 10 leaves nodes) -
// see scripts/BF_batches_clf.py
size_t index_size = this->indexSize();
if (subsetSize > index_size) {
throw std::runtime_error("internal error: subset size cannot be larger than index size");
}
// Referring to too large subset size as if it was the maximum possible size.
subsetSize = std::min(subsetSize, index_size);

size_t d = this->dim;
float r = (index_size == 0) ? 0.0f : (float)(subsetSize) / (float)this->indexLabelCount();
bool res;
Expand Down
6 changes: 3 additions & 3 deletions src/VecSim/algorithms/hnsw/hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -2249,9 +2249,9 @@ bool HNSWIndex<DataType, DistType>::preferAdHocSearch(size_t subsetSize, size_t
// This heuristic is based on sklearn decision tree classifier (with 20 leaves nodes) -
// see scripts/HNSW_batches_clf.py
size_t index_size = this->indexSize();
if (subsetSize > index_size) {
throw std::runtime_error("internal error: subset size cannot be larger than index size");
}
// Referring to too large subset size as if it was the maximum possible size.
subsetSize = std::min(subsetSize, index_size);

size_t d = this->dim;
size_t M = this->getM();
float r = (index_size == 0) ? 0.0f : (float)(subsetSize) / (float)this->indexLabelCount();
Expand Down
6 changes: 0 additions & 6 deletions src/VecSim/algorithms/hnsw/hnsw_tiered.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,6 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
return new (this->allocator)
TieredHNSW_BatchIterator(queryBlobCopy, this, queryParams, this->allocator);
}
bool preferAdHocSearch(size_t subsetSize, size_t k, bool initial_check) override {
// For now, decide according to the bigger index.
return this->backendIndex->indexSize() > this->frontendIndex->indexSize()
? this->backendIndex->preferAdHocSearch(subsetSize, k, initial_check)
: this->frontendIndex->preferAdHocSearch(subsetSize, k, initial_check);
}
inline void setLastSearchMode(VecSearchMode mode) override {
return this->backendIndex->setLastSearchMode(mode);
}
Expand Down
1 change: 1 addition & 0 deletions src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_deleteVectorMulti_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_deleteVectorMultiFromFlatAdvanced_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_overwriteVectorBasic_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_overwriteVectorAsync_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_preferAdHocOptimization_Test)
7 changes: 7 additions & 0 deletions src/VecSim/vec_sim_tiered_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ class VecSimTieredIndex : public VecSimIndexInterface {
VecSimQueryParams *queryParams,
VecSimQueryResult_Order order) override;

bool preferAdHocSearch(size_t subsetSize, size_t k, bool initial_check) override {
// For now, decide according to the bigger index.
return this->backendIndex->indexSize() > this->frontendIndex->indexSize()
? this->backendIndex->preferAdHocSearch(subsetSize, k, initial_check)
: this->frontendIndex->preferAdHocSearch(subsetSize, k, initial_check);
}

// Return the current state of the global write mode (async/in-place).
static VecSimWriteMode getWriteMode() { return VecSimIndexInterface::asyncWriteMode; }

Expand Down
10 changes: 3 additions & 7 deletions tests/unit/test_bruteforce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1246,13 +1246,9 @@ TYPED_TEST(BruteForceTest, preferAdHocOptimization) {
ASSERT_TRUE(VecSimIndex_PreferAdHocSearch(index, 0, 50, true));

// Corner cases - subset size is greater than index size.
try {
VecSimIndex_PreferAdHocSearch(index, 1, 50, true);
FAIL() << "Expected std::runtime error";
} catch (std::runtime_error const &err) {
EXPECT_EQ(err.what(),
std::string("internal error: subset size cannot be larger than index size"));
}
ASSERT_EQ(VecSimIndex_PreferAdHocSearch(index, 42, 50, true),
VecSimIndex_PreferAdHocSearch(index, 0, 50, true));

VecSimIndex_Free(index);
}

Expand Down
10 changes: 3 additions & 7 deletions tests/unit/test_hnsw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1503,13 +1503,9 @@ TYPED_TEST(HNSWTest, preferAdHocOptimization) {
ASSERT_TRUE(VecSimIndex_PreferAdHocSearch(index, 0, 50, true));

// Corner cases - subset size is greater than index size.
try {
VecSimIndex_PreferAdHocSearch(index, 1, 50, true);
FAIL() << "Expected std::runtime error";
} catch (std::runtime_error const &err) {
EXPECT_EQ(err.what(),
std::string("internal error: subset size cannot be larger than index size"));
}
ASSERT_EQ(VecSimIndex_PreferAdHocSearch(index, 42, 50, true),
VecSimIndex_PreferAdHocSearch(index, 0, 50, true));

VecSimIndex_Free(index);
}

Expand Down
10 changes: 3 additions & 7 deletions tests/unit/test_hnsw_multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,13 +627,9 @@ TYPED_TEST(HNSWMultiTest, preferAdHocOptimization) {
ASSERT_TRUE(VecSimIndex_PreferAdHocSearch(index, 0, 50, true));

// Corner cases - subset size is greater than index size.
try {
VecSimIndex_PreferAdHocSearch(index, 1, 50, true);
FAIL() << "Expected std::runtime error";
} catch (std::runtime_error const &err) {
EXPECT_EQ(err.what(),
std::string("internal error: subset size cannot be larger than index size"));
}
ASSERT_EQ(VecSimIndex_PreferAdHocSearch(index, 42, 50, true),
VecSimIndex_PreferAdHocSearch(index, 0, 50, true));

VecSimIndex_Free(index);
}
TYPED_TEST(HNSWMultiTest, search_empty_index) {
Expand Down
42 changes: 42 additions & 0 deletions tests/unit/test_hnsw_tiered.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3374,3 +3374,45 @@ TYPED_TEST(HNSWTieredIndexTest, parallelRangeSearch) {
// Cleanup.
delete index_ctx;
}

TYPED_TEST(HNSWTieredIndexTestBasic, preferAdHocOptimization) {
size_t dim = 4;

HNSWParams params = {
.type = TypeParam::get_index_type(),
.dim = dim,
.metric = VecSimMetric_L2,
};
VecSimParams hnsw_params = CreateParams(params);
auto jobQ = JobQueue();
auto index_ctx = new IndexExtCtx();
size_t memory_ctx = 0;

// Create tiered index with buffer limit set to 0.
auto *tiered_index = this->CreateTieredHNSWIndex(hnsw_params, &jobQ, index_ctx, &memory_ctx);
auto allocator = tiered_index->getAllocator();

auto hnsw = tiered_index->backendIndex;
auto flat = tiered_index->frontendIndex;

// Insert 5 vectors to the main index.
for (size_t i = 0; i < 5; i++) {
GenerateAndAddVector<TEST_DATA_T>(hnsw, dim, i, i);
}
// Sanity check. Should choose as HNSW.
ASSERT_EQ(tiered_index->preferAdHocSearch(5, 5, true), hnsw->preferAdHocSearch(5, 5, true));

// Insert 6 vectors to the flat index.
for (size_t i = 0; i < 6; i++) {
GenerateAndAddVector<TEST_DATA_T>(flat, dim, i, i);
}
// Sanity check. Should choose as flat as it has more vectors.
ASSERT_EQ(tiered_index->preferAdHocSearch(5, 5, true), flat->preferAdHocSearch(5, 5, true));

// Check for preference of tiered with subset (10) smaller than the tiered index size (11),
// but larger than any of the underlying indexes.
ASSERT_NO_THROW(tiered_index->preferAdHocSearch(10, 5, false));

// Cleanup.
delete index_ctx;
}