Skip to content

Commit e11592a

Browse files
committed
MDEV-35450 VEC_DISTANCE() function to autouse the available index type
1 parent 528249a commit e11592a

File tree

6 files changed

+210
-2
lines changed

6 files changed

+210
-2
lines changed

mysql-test/main/vector_funcs.result

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,103 @@ select vec_totext(`null`) from (values (null),(0x00000000)) x;
155155
vec_totext(`null`)
156156
NULL
157157
[0]
158+
# End of 11.7 tests
159+
#
160+
# MDEV-35450 VEC_DISTANCE() function to autouse the available index type
161+
#
162+
create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean);
163+
insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')),
164+
(2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')),
165+
(4,vec_fromtext('[1,2,4,5,5]'));
166+
create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine);
167+
insert t2 select * from t1;
168+
create table t3 (e int primary key, f vector(5) not null);
169+
insert t3 select * from t1;
170+
select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5;
171+
a c vec_distance(b,d) vec_distance_euclidean(b,d)
172+
3 0 1 1
173+
1 1 0 0
174+
4 2 1.73205 1.73205
175+
2 3 1.41421 1.41421
176+
0 4 1.41421 1.41421
177+
select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5;
178+
a c vec_distance(d,b) vec_distance_cosine(b,d)
179+
3 0 0.00676 0.00676
180+
1 1 0 0
181+
4 2 0.01943 0.01943
182+
2 3 0.01626 0.01626
183+
0 4 0.00784 0.00784
184+
select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5;
185+
a e vec_distance(b,f) vec_distance_euclidean(b,f)
186+
3 0 1 1
187+
1 1 0 0
188+
4 2 1.73205 1.73205
189+
2 3 1.41421 1.41421
190+
0 4 1.41421 1.41421
191+
select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5;
192+
e c vec_distance(f,d) vec_distance_cosine(d,f)
193+
3 0 0.00676 0.00676
194+
1 1 0 0
195+
4 2 0.01943 0.01943
196+
2 3 0.01626 0.01626
197+
0 4 0.00784 0.00784
198+
select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1;
199+
a vec_distance(b,vec_fromtext('[5,4,3,2,1]')) vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]'))
200+
0 6.32455 6.32455
201+
1 5.74456 5.74456
202+
2 6.08276 6.08276
203+
3 6.40312 6.40312
204+
4 6.78232 6.78232
205+
select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2;
206+
c vec_distance(d,vec_fromtext('[5,4,3,2,1]')) vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]'))
207+
0 0.36363 0.36363
208+
1 0.29178 0.29178
209+
2 0.32109 0.32109
210+
3 0.34926 0.34926
211+
4 0.35989 0.35989
212+
select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3;
213+
ERROR HY000: Cannot determine distance type for VEC_DISTANCE, index is not found
214+
drop table t1, t2, t3;
215+
#
216+
# Item_func_vec_distance::do_get_copy()
217+
#
218+
create table t1 (a vector(1) not null, vector(a));
219+
create algorithm=temptable view v1 as select * from t1;
220+
select * from v1 where vec_distance(a,0x30303030) > 0;
221+
a
222+
drop view v1;
223+
drop table t1;
224+
#
225+
# MDEV-35724 VEC_DISTANCE does not work in HAVING clause
226+
#
227+
create table t (v vector(1) not null, vector(v));
228+
insert t values (0x31313131),(0x32323232);
229+
select v from t having vec_distance(v,0x30303030) > 0;
230+
v
231+
1111
232+
2222
233+
drop table t;
234+
#
235+
# MDEV-35752 VEC_DISTANCE does not work in triggers
236+
#
237+
create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float);
238+
create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030);
239+
insert t (v) values (0x31313131);
240+
select vec_distance(default(v), 0x31313131) from t;
241+
vec_distance(default(v), 0x31313131)
242+
0.00000
243+
insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131);
244+
drop table t;
245+
#
246+
# MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view
247+
#
248+
create table t (x vector(1) not null, vector(x));
249+
insert into t values (0x31313131),(0x32323232);
250+
create algorithm=temptable view v as select * from t;
251+
select * from v order by vec_distance(0x30303030, x);
252+
x
253+
1111
254+
2222
255+
drop view v;
256+
drop table t;
257+
# End of 11.8 tests

mysql-test/main/vector_funcs.test

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,75 @@ select vec_fromtext(0x00000000);
7171
--echo # MDEV-35220 Assertion `!item->null_value' failed upon VEC_TOTEXT call
7272
--echo #
7373
select vec_totext(`null`) from (values (null),(0x00000000)) x;
74+
75+
--echo # End of 11.7 tests
76+
77+
--echo #
78+
--echo # MDEV-35450 VEC_DISTANCE() function to autouse the available index type
79+
--echo #
80+
81+
create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean);
82+
insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')),
83+
(2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')),
84+
(4,vec_fromtext('[1,2,4,5,5]'));
85+
create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine);
86+
insert t2 select * from t1;
87+
create table t3 (e int primary key, f vector(5) not null);
88+
insert t3 select * from t1;
89+
90+
--replace_regex /(\.\d{5})\d+/\1/
91+
select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5;
92+
--replace_regex /(\.\d{5})\d+/\1/
93+
select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5;
94+
--replace_regex /(\.\d{5})\d+/\1/
95+
select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5;
96+
--replace_regex /(\.\d{5})\d+/\1/
97+
select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5;
98+
--replace_regex /(\.\d{5})\d+/\1/
99+
select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1;
100+
--replace_regex /(\.\d{5})\d+/\1/
101+
select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2;
102+
--error ER_VEC_DISTANCE_TYPE
103+
select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3;
104+
105+
drop table t1, t2, t3;
106+
107+
--echo #
108+
--echo # Item_func_vec_distance::do_get_copy()
109+
--echo #
110+
create table t1 (a vector(1) not null, vector(a));
111+
create algorithm=temptable view v1 as select * from t1;
112+
select * from v1 where vec_distance(a,0x30303030) > 0;
113+
drop view v1;
114+
drop table t1;
115+
116+
--echo #
117+
--echo # MDEV-35724 VEC_DISTANCE does not work in HAVING clause
118+
--echo #
119+
create table t (v vector(1) not null, vector(v));
120+
insert t values (0x31313131),(0x32323232);
121+
select v from t having vec_distance(v,0x30303030) > 0;
122+
drop table t;
123+
124+
--echo #
125+
--echo # MDEV-35752 VEC_DISTANCE does not work in triggers
126+
--echo #
127+
create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float);
128+
create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030);
129+
insert t (v) values (0x31313131);
130+
--replace_regex /(\.\d{5})\d+/\1/
131+
select vec_distance(default(v), 0x31313131) from t;
132+
insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131);
133+
drop table t;
134+
135+
--echo #
136+
--echo # MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view
137+
--echo #
138+
create table t (x vector(1) not null, vector(x));
139+
insert into t values (0x31313131),(0x32323232);
140+
create algorithm=temptable view v as select * from t;
141+
select * from v order by vec_distance(0x30303030, x);
142+
drop view v;
143+
drop table t;
144+
145+
--echo # End of 11.8 tests

sql/item_create.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6287,6 +6287,22 @@ class Create_func_vec_distance_cosine: public Create_func_arg2
62876287

62886288
Create_func_vec_distance_cosine Create_func_vec_distance_cosine::s_singleton;
62896289

6290+
class Create_func_vec_distance: public Create_func_arg2
6291+
{
6292+
public:
6293+
Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override
6294+
{ return new (thd->mem_root)
6295+
Item_func_vec_distance(thd, arg1, arg2, Item_func_vec_distance::AUTO); }
6296+
6297+
static Create_func_vec_distance s_singleton;
6298+
6299+
protected:
6300+
Create_func_vec_distance() = default;
6301+
virtual ~Create_func_vec_distance() = default;
6302+
};
6303+
6304+
Create_func_vec_distance Create_func_vec_distance::s_singleton;
6305+
62906306
class Create_func_vec_totext: public Create_func_arg1
62916307
{
62926308
public:
@@ -6549,6 +6565,7 @@ const Native_func_registry func_array[] =
65496565
{ { STRING_WITH_LEN("UUID_SHORT") }, BUILDER(Create_func_uuid_short)},
65506566
{ { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, BUILDER(Create_func_vec_distance_euclidean)},
65516567
{ { STRING_WITH_LEN("VEC_DISTANCE_COSINE") }, BUILDER(Create_func_vec_distance_cosine)},
6568+
{ { STRING_WITH_LEN("VEC_DISTANCE") }, BUILDER(Create_func_vec_distance)},
65526569
{ { STRING_WITH_LEN("VEC_FROMTEXT") }, BUILDER(Create_func_vec_fromtext)},
65536570
{ { STRING_WITH_LEN("VEC_TOTEXT") }, BUILDER(Create_func_vec_totext)},
65546571
{ { STRING_WITH_LEN("VERSION") }, BUILDER(Create_func_version)},

sql/item_vectorfunc.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,22 @@ bool Item_func_vec_distance::fix_length_and_dec(THD *thd)
5959
switch (kind) {
6060
case EUCLIDEAN: calc_distance= calc_distance_euclidean; break;
6161
case COSINE: calc_distance= calc_distance_cosine; break;
62+
case AUTO:
63+
for (uint i=0; i < 2; i++)
64+
if (auto *item= dynamic_cast<Item_field*>(args[i]->real_item()))
65+
{
66+
TABLE_SHARE *share= item->field->orig_table->s;
67+
Field *f= share->field[item->field->field_index];
68+
KEY *kinfo= share->key_info;
69+
for (uint j= share->keys; j < share->total_keys; j++)
70+
if (kinfo[j].algorithm == HA_KEY_ALG_VECTOR && f->key_start.is_set(j))
71+
{
72+
kind= mhnsw_uses_distance(f->table, kinfo + j);
73+
return fix_length_and_dec(thd);
74+
}
75+
}
76+
my_error(ER_VEC_DISTANCE_TYPE, MYF(0));
77+
return 1;
6278
}
6379
set_maybe_null(); // if wrong dimensions
6480
return Item_real_func::fix_length_and_dec(thd);

sql/item_vectorfunc.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,14 @@ class Item_func_vec_distance: public Item_real_func
3939
double (*calc_distance)(float *v1, float *v2, size_t v_len);
4040

4141
public:
42-
enum distance_kind { EUCLIDEAN, COSINE } kind;
42+
enum distance_kind { EUCLIDEAN, COSINE, AUTO } kind;
4343
Item_func_vec_distance(THD *thd, Item *a, Item *b, distance_kind kind);
4444
LEX_CSTRING func_name_cstring() const override
4545
{
4646
static LEX_CSTRING name[3]= {
4747
{ STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") },
48-
{ STRING_WITH_LEN("VEC_DISTANCE_COSINE") }
48+
{ STRING_WITH_LEN("VEC_DISTANCE_COSINE") },
49+
{ STRING_WITH_LEN("VEC_DISTANCE") }
4950
};
5051
return name[kind];
5152
}

sql/share/errmsg-utf8.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12291,3 +12291,5 @@ ER_VECTOR_BINARY_FORMAT_INVALID
1229112291
eng "Invalid binary vector format. Must use IEEE standard float representation in little-endian format. Use VEC_FromText() to generate it."
1229212292
ER_VECTOR_FORMAT_INVALID
1229312293
eng "Invalid vector format at offset: %d for '%-.100s'. Must be a valid JSON array of numbers."
12294+
ER_VEC_DISTANCE_TYPE
12295+
eng "Cannot determine distance type for VEC_DISTANCE, index is not found"

0 commit comments

Comments
 (0)