Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/VecSim/algorithms/brute_force/bf_batch_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ VecSimQueryResult *BF_BatchIterator::selectBasedSearch(size_t n_res) {
return results;
}

BF_BatchIterator::BF_BatchIterator(const void *query_vector, const BruteForceIndex *bf_index,
BF_BatchIterator::BF_BatchIterator(void *query_vector, const BruteForceIndex *bf_index,
std::shared_ptr<VecSimAllocator> allocator)
: VecSimBatchIterator(query_vector, allocator), index(bf_index), scores_valid_start_pos(0) {
BF_BatchIterator::next_id++;
Expand Down
2 changes: 1 addition & 1 deletion src/VecSim/algorithms/brute_force/bf_batch_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class BF_BatchIterator : public VecSimBatchIterator {
void swapScores(const unordered_map<size_t, size_t> &TopCandidatesIndices, size_t res_num);

public:
BF_BatchIterator(const void *query_vector, const BruteForceIndex *index,
BF_BatchIterator(void *query_vector, const BruteForceIndex *index,
std::shared_ptr<VecSimAllocator> allocator);

inline const BruteForceIndex *getIndex() const { return index; };
Expand Down
14 changes: 12 additions & 2 deletions src/VecSim/algorithms/brute_force/brute_force.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <memory>
#include <cstring>
#include <queue>
#include <cassert>

using namespace std;

Expand Down Expand Up @@ -229,7 +230,7 @@ VecSimQueryResult_List BruteForceIndex::topKQuery(const void *queryBlob, size_t
return results;
}

VecSimIndexInfo BruteForceIndex::info() {
VecSimIndexInfo BruteForceIndex::info() const {

VecSimIndexInfo info;
info.algo = VecSimAlgo_BF;
Expand Down Expand Up @@ -281,7 +282,16 @@ VecSimInfoIterator *BruteForceIndex::infoIterator() {
}

VecSimBatchIterator *BruteForceIndex::newBatchIterator(const void *queryBlob) {
return new (this->allocator) BF_BatchIterator(queryBlob, this, this->allocator);
// As this is the only supported type, we always allocate 4 bytes for every element in the
// vector.
assert(this->vecType == VecSimType_FLOAT32);
auto *queryBlobCopy = this->allocator->allocate(sizeof(float) * this->dim);
memcpy(queryBlobCopy, queryBlob, dim * sizeof(float));
if (metric == VecSimMetric_Cosine) {
float_vector_normalize((float *)queryBlobCopy, dim);
}
// Ownership of queryBlobCopy moves to BF_BatchIterator that will free it at the end.
return new (this->allocator) BF_BatchIterator(queryBlobCopy, this, this->allocator);
}

bool BruteForceIndex::preferAdHocSearch(size_t subsetSize, size_t k) {
Expand Down
2 changes: 1 addition & 1 deletion src/VecSim/algorithms/brute_force/brute_force.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class BruteForceIndex : public VecSimIndex {
VecSimQueryParams *qparams) override;
virtual VecSimQueryResult_List topKQuery(const void *queryBlob, size_t k,
VecSimQueryParams *queryParams) override;
virtual VecSimIndexInfo info() override;
virtual VecSimIndexInfo info() const override;
virtual VecSimInfoIterator *infoIterator() override;
virtual VecSimBatchIterator *newBatchIterator(const void *queryBlob) override;
bool preferAdHocSearch(size_t subsetSize, size_t k) override;
Expand Down
2 changes: 1 addition & 1 deletion src/VecSim/algorithms/hnsw/hnsw_batch_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ candidatesMaxHeap HNSW_BatchIterator::scanGraph(candidatesMinHeap &candidates,
return top_candidates;
}

HNSW_BatchIterator::HNSW_BatchIterator(const void *query_vector, HNSWIndex *index_wrapper,
HNSW_BatchIterator::HNSW_BatchIterator(void *query_vector, HNSWIndex *index_wrapper,
std::shared_ptr<VecSimAllocator> allocator)
: VecSimBatchIterator(query_vector, std::move(allocator)), index_wrapper(index_wrapper),
depleted(false), top_candidates_extras(this->allocator), candidates(this->allocator) {
Expand Down
2 changes: 1 addition & 1 deletion src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class HNSW_BatchIterator : public VecSimBatchIterator {
inline bool hasVisitedNode(idType node_id) const;

public:
HNSW_BatchIterator(const void *query_vector, HNSWIndex *index,
HNSW_BatchIterator(void *query_vector, HNSWIndex *index,
std::shared_ptr<VecSimAllocator> allocator);

VecSimQueryResult_List getNextResults(size_t n_res, VecSimQueryResult_Order order) override;
Expand Down
13 changes: 11 additions & 2 deletions src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ VecSimQueryResult_List HNSWIndex::topKQuery(const void *query_data, size_t k,
}
}

VecSimIndexInfo HNSWIndex::info() {
VecSimIndexInfo HNSWIndex::info() const {

VecSimIndexInfo info;
info.algo = VecSimAlgo_HNSWLIB;
Expand All @@ -152,7 +152,16 @@ VecSimIndexInfo HNSWIndex::info() {
}

VecSimBatchIterator *HNSWIndex::newBatchIterator(const void *queryBlob) {
return new (this->allocator) HNSW_BatchIterator(queryBlob, this, this->allocator);
// As this is the only supported type, we always allocate 4 bytes for every element in the
// vector.
assert(this->vecType == VecSimType_FLOAT32);
auto *queryBlobCopy = this->allocator->allocate(sizeof(float) * this->dim);
memcpy(queryBlobCopy, queryBlob, dim * sizeof(float));
if (metric == VecSimMetric_Cosine) {
float_vector_normalize((float *)queryBlobCopy, dim);
}
// Ownership of queryBlobCopy moves to HNSW_BatchIterator that will free it at the end.
return new (this->allocator) HNSW_BatchIterator(queryBlobCopy, this, this->allocator);
}

VecSimInfoIterator *HNSWIndex::infoIterator() {
Expand Down
2 changes: 1 addition & 1 deletion src/VecSim/algorithms/hnsw/hnsw_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class HNSWIndex : public VecSimIndex {
VecSimQueryParams *qparams) override;
virtual VecSimQueryResult_List topKQuery(const void *queryBlob, size_t k,
VecSimQueryParams *queryParams) override;
virtual VecSimIndexInfo info() override;
virtual VecSimIndexInfo info() const override;
virtual VecSimInfoIterator *infoIterator() override;
virtual VecSimBatchIterator *newBatchIterator(const void *queryBlob) override;
bool preferAdHocSearch(size_t subsetSize, size_t k) override;
Expand Down
7 changes: 3 additions & 4 deletions src/VecSim/batch_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
*/
struct VecSimBatchIterator : public VecsimBaseObject {
private:
const void *query_vector;
void *query_vector;
size_t returned_results_count;

public:
explicit VecSimBatchIterator(const void *query_vector,
std::shared_ptr<VecSimAllocator> allocator)
explicit VecSimBatchIterator(void *query_vector, std::shared_ptr<VecSimAllocator> allocator)
: VecsimBaseObject(allocator), query_vector(query_vector), returned_results_count(0){};

inline const void *getQueryBlob() const { return query_vector; }
Expand All @@ -35,5 +34,5 @@ struct VecSimBatchIterator : public VecsimBaseObject {
// Reset the iterator to the initial state, before any results has been returned.
virtual void reset() = 0;

virtual ~VecSimBatchIterator() = default;
virtual ~VecSimBatchIterator() { allocator->free_allocation(this->query_vector); };
};
2 changes: 1 addition & 1 deletion src/VecSim/vec_sim_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ struct VecSimIndex : public VecsimBaseObject {
*
* @return Index general and specific meta-data.
*/
virtual VecSimIndexInfo info() = 0;
virtual VecSimIndexInfo info() const = 0;

/**
* @brief Returns an index information in an iterable structure.
Expand Down
67 changes: 67 additions & 0 deletions tests/unit/test_bruteforce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1124,3 +1124,70 @@ TEST_F(BruteForceTest, batchIteratorSwapIndices) {
VecSimBatchIterator_Free(batchIterator);
VecSimIndex_Free(index);
}

TEST_F(BruteForceTest, testCosine) {
size_t dim = 128;
size_t n = 100;

VecSimParams params{.algo = VecSimAlgo_BF,
.bfParams = BFParams{.type = VecSimType_FLOAT32,
.dim = dim,
.metric = VecSimMetric_Cosine,
.initialCapacity = n}};
VecSimIndex *index = VecSimIndex_New(&params);

for (size_t i = 1; i <= n; i++) {
float f[dim];
f[0] = (float)i / n;
for (size_t j = 1; j < dim; j++) {
f[j] = 1.0f;
}
VecSimIndex_AddVector(index, (const void *)f, i);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
float query[dim];
for (size_t i = 0; i < dim; i++) {
query[i] = 1.0f;
}
auto verify_res = [&](size_t id, float score, size_t index) {
ASSERT_EQ(id, (n - index));
float first_coordinate = (float)id / n;
// By cosine definition: 1 - ((A \dot B) / (norm(A)*norm(B))), where A is the query vector
// and B is the current result vector.
float expected_score =
1.0f -
((first_coordinate + (float)dim - 1.0f) /
(sqrtf((float)dim) * sqrtf((float)(dim - 1) + first_coordinate * first_coordinate)));
// Verify that abs difference between the actual and expected score is at most 1/10^6.
ASSERT_NEAR(score, expected_score, 1e-6);
};
runTopKSearchTest(index, query, 10, verify_res);

// Test with batch iterator.
VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(index, query);
size_t iteration_num = 0;

// get the 10 vectors whose ids are the maximal among those that hasn't been returned yet,
// in every iteration. The order should be from the largest to the lowest id.
size_t n_res = 10;
while (VecSimBatchIterator_HasNext(batchIterator)) {
std::vector<size_t> expected_ids(n_res);
auto verify_res_batch = [&](size_t id, float score, size_t index) {
ASSERT_EQ(id, (n - n_res * iteration_num - index));
float first_coordinate = (float)id / n;
// By cosine definition: 1 - ((A \dot B) / (norm(A)*norm(B))), where A is the query
// vector and B is the current result vector.
float expected_score =
1.0f - ((first_coordinate + (float)dim - 1.0f) /
(sqrtf((float)dim) *
sqrtf((float)(dim - 1) + first_coordinate * first_coordinate)));
// Verify that abs difference between the actual and expected score is at most 1/10^6.
ASSERT_NEAR(score, expected_score, 1e-6);
};
runBatchIteratorSearchTest(batchIterator, n_res, verify_res_batch);
iteration_num++;
}
ASSERT_EQ(iteration_num, n / n_res);
VecSimBatchIterator_Free(batchIterator);
VecSimIndex_Free(index);
}
67 changes: 67 additions & 0 deletions tests/unit/test_hnswlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1242,4 +1242,71 @@ TEST_F(HNSWLibTest, preferAdHocOptimization) {
}
VecSimIndex_Free(index);
}

TEST_F(HNSWLibTest, testCosine) {
size_t dim = 128;
size_t n = 100;

VecSimParams params{.algo = VecSimAlgo_HNSWLIB,
.hnswParams = HNSWParams{.type = VecSimType_FLOAT32,
.dim = dim,
.metric = VecSimMetric_Cosine,
.initialCapacity = n}};
VecSimIndex *index = VecSimIndex_New(&params);

for (size_t i = 1; i <= n; i++) {
float f[dim];
f[0] = (float)i / n;
for (size_t j = 1; j < dim; j++) {
f[j] = 1.0f;
}
VecSimIndex_AddVector(index, (const void *)f, i);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
float query[dim];
for (size_t i = 0; i < dim; i++) {
query[i] = 1.0f;
}
auto verify_res = [&](size_t id, float score, size_t index) {
ASSERT_EQ(id, (n - index));
float first_coordinate = (float)id / n;
// By cosine definition: 1 - ((A \dot B) / (norm(A)*norm(B))), where A is the query vector
// and B is the current result vector.
float expected_score =
1.0f -
((first_coordinate + (float)dim - 1.0f) /
(sqrtf((float)dim) * sqrtf((float)(dim - 1) + first_coordinate * first_coordinate)));
// Verify that abs difference between the actual and expected score is at most 1/10^6.
ASSERT_NEAR(score, expected_score, 1e-6);
};
runTopKSearchTest(index, query, 10, verify_res);

// Test with batch iterator.
VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(index, query);
size_t iteration_num = 0;

// get the 10 vectors whose ids are the maximal among those that hasn't been returned yet,
// in every iteration. The order should be from the largest to the lowest id.
size_t n_res = 10;
while (VecSimBatchIterator_HasNext(batchIterator)) {
std::vector<size_t> expected_ids(n_res);
auto verify_res_batch = [&](size_t id, float score, size_t index) {
ASSERT_EQ(id, (n - n_res * iteration_num - index));
float first_coordinate = (float)id / n;
// By cosine definition: 1 - ((A \dot B) / (norm(A)*norm(B))), where A is the query
// vector and B is the current result vector.
float expected_score =
1.0f - ((first_coordinate + (float)dim - 1.0f) /
(sqrtf((float)dim) *
sqrtf((float)(dim - 1) + first_coordinate * first_coordinate)));
// Verify that abs difference between the actual and expected score is at most 1/10^6.
ASSERT_NEAR(score, expected_score, 1e-6);
};
runBatchIteratorSearchTest(batchIterator, n_res, verify_res_batch);
iteration_num++;
}
ASSERT_EQ(iteration_num, n / n_res);
VecSimBatchIterator_Free(batchIterator);
VecSimIndex_Free(index);
}
} // namespace hnswlib