Skip to content

Commit 26e599c

Browse files
committed
mhnsw: make the search less greedy
introduced a generosity factor that makes the search less greedy. it dramatically improves the recall by making the search a bit slower (for the same recall one can use half the M and smaller ef). had to add Queue::safe_push() method that removes one of the furthest elements (not necessarily the furthest) in the queue to keep it from overflowing.
1 parent 885eb19 commit 26e599c

File tree

2 files changed

+15
-5
lines changed

2 files changed

+15
-5
lines changed

sql/sql_queue.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ class Queue
4444
Element *top() const { return (Element*)queue_top(&m_queue); }
4545

4646
void push(const Element *element) { queue_insert(&m_queue, (uchar*)element); }
47+
void safe_push(const Element *element)
48+
{
49+
if (is_full()) m_queue.elements--; // remove one of the furthest elements
50+
queue_insert(&m_queue, (uchar*)element);
51+
}
4752
Element *pop() { return (Element *)queue_remove_top(&m_queue); }
4853
void clear() { queue_remove_all(&m_queue); }
4954
void propagate_top() { queue_replace_top(&m_queue); }

sql/vector_mhnsw.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ ulonglong mhnsw_cache_size;
2727

2828
// Algorithm parameters
2929
static constexpr float alpha = 1.1f;
30+
static constexpr float generosity = 1.1f;
3031
static constexpr uint ef_construction= 10;
3132

3233
enum Graph_table_fields {
@@ -928,7 +929,8 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
928929
best.push(v);
929930
}
930931

931-
float furthest_best= FLT_MAX;
932+
float furthest_best= best.is_empty() ? FLT_MAX
933+
: best.top()->distance_to_target * generosity;
932934
while (candidates.elements())
933935
{
934936
const Visited &cur= *candidates.pop();
@@ -958,15 +960,18 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
958960
if (skip_deleted && v->node->deleted)
959961
continue;
960962
best.push(v);
961-
furthest_best= best.top()->distance_to_target;
963+
furthest_best= best.top()->distance_to_target * generosity;
962964
}
963965
else if (v->distance_to_target < furthest_best)
964966
{
965-
candidates.push(v);
967+
candidates.safe_push(v);
966968
if (skip_deleted && v->node->deleted)
967969
continue;
968-
best.replace_top(v);
969-
furthest_best= best.top()->distance_to_target;
970+
if (v->distance_to_target < best.top()->distance_to_target)
971+
{
972+
best.replace_top(v);
973+
furthest_best= best.top()->distance_to_target * generosity;
974+
}
970975
}
971976
}
972977
}

0 commit comments

Comments
 (0)