Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions src/VecSim/algorithms/brute_force/brute_force.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,7 @@ double BruteForceIndex::getDistanceFrom(size_t label, const void *vector_data) {
}
idType id = optionalId->second;
VectorBlockMember *vector_index = this->idToVectorBlockMemberMapping[id];
float normalized_blob[this->dim]; // This will be use only if metric == VecSimMetric_Cosine
if (this->metric == VecSimMetric_Cosine) {
// TODO: need more generic
memcpy(normalized_blob, vector_data, this->dim * sizeof(float));
float_vector_normalize(normalized_blob, this->dim);
vector_data = normalized_blob;
}

return this->dist_func(vector_index->block->getVector(vector_index->index), vector_data,
&this->dim);
}
Expand Down
7 changes: 0 additions & 7 deletions src/VecSim/algorithms/hnsw/hnsw_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,6 @@ VecSimResolveCode HNSWIndex::resolveParams(VecSimRawParam *rparams, int paramNum
}

double HNSWIndex::getDistanceFrom(size_t label, const void *vector_data) {
if (this->metric == VecSimMetric_Cosine) {
// TODO: need more generic
float normalized_data[this->dim];
memcpy(normalized_data, vector_data, this->dim * sizeof(float));
float_vector_normalize(normalized_data, this->dim);
return this->hnsw->getDistanceByLabelFromPoint(label, normalized_data);
}
return this->hnsw->getDistanceByLabelFromPoint(label, vector_data);
}

Expand Down
6 changes: 6 additions & 0 deletions src/VecSim/vec_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ extern "C" double VecSimIndex_GetDistanceFrom(VecSimIndex *index, size_t id, con
return index->getDistanceFrom(id, blob);
}

extern "C" void VecSim_Normalize(void *blob, size_t dim, VecSimType type) {
// TODO: need more generic
assert(type == VecSimType_FLOAT32);
float_vector_normalize((float *)blob, dim);
}

extern "C" size_t VecSimIndex_IndexSize(VecSimIndex *index) { return index->indexSize(); }

extern "C" VecSimResolveCode VecSimIndex_ResolveParams(VecSimIndex *index, VecSimRawParam *rparams,
Expand Down
15 changes: 13 additions & 2 deletions src/VecSim/vec_sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,28 @@ int VecSimIndex_AddVector(VecSimIndex *index, const void *blob, size_t id);
int VecSimIndex_DeleteVector(VecSimIndex *index, size_t id);

/**
* @brief Calculate the distance of a vector from an index to a vector.
* @brief Calculate the distance of a vector from an index to a vector. This function assumes that
* the vector fits the index - its type and dimension are the same as the index's, and if the
* index's distance metric is cosine, the vector is already normalized.
* @param index the index from which the first vector is located, and that defines the distance
* metric.
* @param id the id of the vector in the index.
* @param blob binary representation of the second vector. Blob size should match the index data
* type and dimension.
* type and dimension, and pre-normalized if needed.
* @return The distance (according to the index's distance metric) between `blob` and the vector
* with id `id`.
*/
double VecSimIndex_GetDistanceFrom(VecSimIndex *index, size_t id, const void *blob);

/**
* @brief normalize the vector blob in place.
* @param blob binary representation of a vector. Blob size should match the specified type and
* dimension.
* @param dim vector dimension.
* @param type vector type.
*/
void VecSim_Normalize(void *blob, size_t dim, VecSimType type);

/**
* @brief Return the number of vectors in the index.
* @param index the index whose size is requested.
Expand Down
2 changes: 1 addition & 1 deletion src/VecSim/vec_sim_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct VecSimIndex : public VecsimBaseObject {
* metric.
* @param id the id of the vector in the index.
* @param blob binary representation of the second vector. Blob size should match the index data
* type and dimension.
* type and dimension, and pre-normalized if needed.
* @return The distance (according to the index's distance metric) between `blob` and the vector
* with id `id`.
*/
Expand Down
8 changes: 6 additions & 2 deletions tests/unit/test_bruteforce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,10 @@ TEST_F(BruteForceTest, brute_get_distance) {
}

void *query = v1;
void *norm = v2; // {e, e}
VecSim_Normalize(norm, dim, VecSimType_FLOAT32); // now {1/sqrt(2), 1/sqrt(2)}
ASSERT_FLOAT_EQ(((float *)norm)[0], 1.0f / sqrt(2.0f));
ASSERT_FLOAT_EQ(((float *)norm)[1], 1.0f / sqrt(2.0f));
double dist;

// VecSimMetric_L2
Expand All @@ -990,12 +994,12 @@ TEST_F(BruteForceTest, brute_get_distance) {
// VecSimMetric_Cosine
distances = {5.9604644775390625e-08, 5.9604644775390625e-08, 0.0025991201400756836, 1};
for (size_t i = 0; i < n; i++) {
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_Cosine], i + 1, query);
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_Cosine], i + 1, norm);
ASSERT_DOUBLE_EQ(dist, distances[i]);
}

// Bad values
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_Cosine], 0, query);
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_Cosine], 0, norm);
ASSERT_TRUE(std::isnan(dist));
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_L2], 46, query);
ASSERT_TRUE(std::isnan(dist));
Expand Down
8 changes: 6 additions & 2 deletions tests/unit/test_hnswlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,10 @@ TEST_F(HNSWLibTest, hnsw_get_distance) {
}

void *query = v1;
void *norm = v2; // {e, e}
VecSim_Normalize(norm, dim, VecSimType_FLOAT32); // now {1/sqrt(2), 1/sqrt(2)}
ASSERT_FLOAT_EQ(((float *)norm)[0], 1.0f / sqrt(2.0f));
ASSERT_FLOAT_EQ(((float *)norm)[1], 1.0f / sqrt(2.0f));
double dist;

// VecSimMetric_L2
Expand All @@ -1159,12 +1163,12 @@ TEST_F(HNSWLibTest, hnsw_get_distance) {
// VecSimMetric_Cosine
distances = {5.9604644775390625e-08, 5.9604644775390625e-08, 0.0025991201400756836, 1};
for (size_t i = 0; i < n; i++) {
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_Cosine], i + 1, query);
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_Cosine], i + 1, norm);
ASSERT_DOUBLE_EQ(dist, distances[i]);
}

// Bad values
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_Cosine], 0, query);
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_Cosine], 0, norm);
ASSERT_TRUE(std::isnan(dist));
dist = VecSimIndex_GetDistanceFrom(index[VecSimMetric_L2], 46, query);
ASSERT_TRUE(std::isnan(dist));
Expand Down