26
26
27
27
// Algorithm parameters
28
28
static constexpr float alpha = 1 .1f ;
29
- static constexpr float generosity = 1 .1f ;
30
29
static constexpr uint ef_construction= 10 ;
31
30
32
31
static ulonglong mhnsw_cache_size;
@@ -334,6 +333,7 @@ class MHNSW_Context : public Sql_alloc
334
333
size_t vec_len= 0 ;
335
334
size_t byte_len= 0 ;
336
335
Atomic_relaxed<double > ef_power{0.6 }; // for the bloom filter size heuristic
336
+ Atomic_relaxed<float > diameter{0 }; // for the generosity heuristic
337
337
FVectorNode *start= 0 ;
338
338
const uint tref_len;
339
339
const uint gref_len;
@@ -957,6 +957,17 @@ static int update_second_degree_neighbors(MHNSW_Context *ctx, TABLE *graph,
957
957
return 0 ;
958
958
}
959
959
960
+
961
+ static inline float generous_furthest (const Queue<Visited> &q, float maxd, float g)
962
+ {
963
+ float d0=maxd*g/2 ;
964
+ float d= q.top ()->distance_to_target ;
965
+ float k= 5 ;
966
+ float x= (d-d0)/d0;
967
+ float sigmoid= k*x/std::sqrt (1 +(k*k-1 )*x*x); // or any other sigmoid
968
+ return d*(1 + (g - 1 )/2 * (1 - sigmoid));
969
+ }
970
+
960
971
static int search_layer (MHNSW_Context *ctx, TABLE *graph, const FVector *target,
961
972
Neighborhood *start_nodes, uint result_size,
962
973
size_t layer, Neighborhood *result, bool construction)
@@ -968,6 +979,7 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
968
979
Queue<Visited> candidates, best;
969
980
bool skip_deleted;
970
981
uint ef= result_size;
982
+ float generosity= 1 .1f + ctx->M /500 .0f ;
971
983
972
984
if (construction)
973
985
{
@@ -991,17 +1003,19 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
991
1003
best.init (ef, true , Visited::cmp);
992
1004
993
1005
DBUG_ASSERT (start_nodes->num <= result_size);
1006
+ float max_distance= ctx->diameter ;
994
1007
for (size_t i=0 ; i < start_nodes->num ; i++)
995
1008
{
996
1009
Visited *v= visited.create (start_nodes->links [i]);
1010
+ max_distance= std::max (max_distance, v->distance_to_target );
997
1011
candidates.push (v);
998
1012
if (skip_deleted && v->node ->deleted )
999
1013
continue ;
1000
1014
best.push (v);
1001
1015
}
1002
1016
1003
1017
float furthest_best= best.is_empty () ? FLT_MAX
1004
- : best. top ()-> distance_to_target * generosity;
1018
+ : generous_furthest (best, max_distance, generosity) ;
1005
1019
while (candidates.elements ())
1006
1020
{
1007
1021
const Visited &cur= *candidates.pop ();
@@ -1027,11 +1041,12 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
1027
1041
Visited *v= visited.create (links[i]);
1028
1042
if (!best.is_full ())
1029
1043
{
1044
+ max_distance= std::max (max_distance, v->distance_to_target );
1030
1045
candidates.push (v);
1031
1046
if (skip_deleted && v->node ->deleted )
1032
1047
continue ;
1033
1048
best.push (v);
1034
- furthest_best= best. top ()-> distance_to_target * generosity;
1049
+ furthest_best= generous_furthest (best, max_distance, generosity) ;
1035
1050
}
1036
1051
else if (v->distance_to_target < furthest_best)
1037
1052
{
@@ -1041,12 +1056,13 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
1041
1056
if (v->distance_to_target < best.top ()->distance_to_target )
1042
1057
{
1043
1058
best.replace_top (v);
1044
- furthest_best= best. top ()-> distance_to_target * generosity;
1059
+ furthest_best= generous_furthest (best, max_distance, generosity) ;
1045
1060
}
1046
1061
}
1047
1062
}
1048
1063
}
1049
1064
}
1065
+ set_if_bigger (ctx->diameter , max_distance); // not atomic, but it's ok
1050
1066
if (ef > 1 && visited.count *2 > est_size)
1051
1067
{
1052
1068
double ef_power= std::log (visited.count *2 /est_heuristic) / std::log (ef);
0 commit comments