Skip to content

Commit e3661b9

Browse files
committed
Cherry-picked from MyRocks upstream: Issue #809: Wrong query result with bloom filters
In reverse-ordered column families, if one wants to start reading at the logical end of the index, they should Seek() to a key value that is not covered by the index. This may (and typically does) prevent use of a bloom filter. The calls to setup_scan_iterator() that are made for index and table scan didn't take this into account and passed eq_cond_len=INDEX_NUMBER_SIZE. Fixed them to compute and pass correct eq_cond_len. Also, removed an incorrect assert in ha_rocksdb::setup_iterator_bounds.
1 parent 39fbafb commit e3661b9

File tree

5 files changed

+149
-10
lines changed

5 files changed

+149
-10
lines changed

storage/rocksdb/ha_rocksdb.cc

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8526,7 +8526,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
85268526
DBUG_ASSERT(key != nullptr);
85278527

85288528
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
8529-
kd.get_first_key(key, &key_size);
8529+
int key_start_matching_bytes = kd.get_first_key(key, &key_size);
85308530

85318531
rocksdb::Slice index_key((const char *)key, key_size);
85328532

@@ -8537,7 +8537,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
85378537
// Loop as long as we get a deadlock error AND we end up creating the
85388538
// snapshot here (i.e. it did not exist prior to this)
85398539
for (;;) {
8540-
setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE);
8540+
setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes);
85418541
m_scan_it->Seek(index_key);
85428542
m_skip_scan_it_next_call = true;
85438543

@@ -9539,7 +9539,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
95399539
void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd,
95409540
const rocksdb::Slice &eq_cond) {
95419541
uint eq_cond_len = eq_cond.size();
9542-
DBUG_ASSERT(eq_cond_len >= Rdb_key_def::INDEX_NUMBER_SIZE);
95439542
memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len);
95449543
kd.successor(m_eq_cond_upper_bound, eq_cond_len);
95459544
memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len);
@@ -9635,12 +9634,12 @@ void ha_rocksdb::release_scan_iterator() {
96359634
void ha_rocksdb::setup_iterator_for_rnd_scan() {
96369635
uint key_size;
96379636

9638-
m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
9637+
int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
96399638

96409639
rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size);
96419640

96429641
setup_scan_iterator(*m_pk_descr, &table_key, false,
9643-
Rdb_key_def::INDEX_NUMBER_SIZE);
9642+
key_start_matching_bytes);
96449643
m_scan_it->Seek(table_key);
96459644
m_skip_scan_it_next_call = true;
96469645
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#
2+
# Issue #809: Wrong query result with bloom filters
3+
#
4+
create table t1 (
5+
id1 bigint not null,
6+
id2 bigint not null,
7+
id3 varchar(100) not null,
8+
id4 int not null,
9+
id5 int not null,
10+
value bigint,
11+
value2 varchar(100),
12+
primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
13+
) engine=ROCKSDB;
14+
create table t2(a int);
15+
insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
16+
create table t3(seq int);
17+
insert into t3
18+
select
19+
1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
20+
from t2 A, t2 B, t2 C, t2 D;
21+
insert t1
22+
select
23+
(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
24+
from t3;
25+
set global rocksdb_force_flush_memtable_now=1;
26+
# Full table scan
27+
explain
28+
select * from t1 limit 10;
29+
id select_type table type possible_keys key key_len ref rows Extra
30+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000
31+
select * from t1 limit 10;
32+
id1 id2 id3 id4 id5 value value2
33+
1000 2000 2000 10000 10000 1000 aaabbbccc
34+
1000 2000 2000 9999 9999 1000 aaabbbccc
35+
1000 2000 2000 9998 9998 1000 aaabbbccc
36+
1000 2000 2000 9997 9997 1000 aaabbbccc
37+
1000 2000 2000 9996 9996 1000 aaabbbccc
38+
1000 1999 1999 9995 9995 1000 aaabbbccc
39+
1000 1999 1999 9994 9994 1000 aaabbbccc
40+
1000 1999 1999 9993 9993 1000 aaabbbccc
41+
1000 1999 1999 9992 9992 1000 aaabbbccc
42+
1000 1999 1999 9991 9991 1000 aaabbbccc
43+
# An index scan starting from the end of the table:
44+
explain
45+
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
46+
id select_type table type possible_keys key key_len ref rows Extra
47+
1 SIMPLE t1 index NULL PRIMARY 122 NULL 1
48+
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
49+
id1 id2 id3 id4 id5 value value2
50+
1000 2000 2000 10000 10000 1000 aaabbbccc
51+
create table t4 (
52+
pk int unsigned not null primary key,
53+
kp1 int unsigned not null,
54+
kp2 int unsigned not null,
55+
col1 int unsigned,
56+
key(kp1, kp2) comment 'rev:bf5_2'
57+
) engine=rocksdb;
58+
insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
59+
# This must not fail an assert:
60+
select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
61+
pk kp1 kp2 col1
62+
drop table t1,t2,t3,t4;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}};
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
2+
--echo #
3+
--echo # Issue #809: Wrong query result with bloom filters
4+
--echo #
5+
6+
create table t1 (
7+
id1 bigint not null,
8+
id2 bigint not null,
9+
id3 varchar(100) not null,
10+
id4 int not null,
11+
id5 int not null,
12+
value bigint,
13+
value2 varchar(100),
14+
primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
15+
) engine=ROCKSDB;
16+
17+
18+
create table t2(a int);
19+
insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
20+
21+
create table t3(seq int);
22+
insert into t3
23+
select
24+
1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
25+
from t2 A, t2 B, t2 C, t2 D;
26+
27+
insert t1
28+
select
29+
(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
30+
from t3;
31+
32+
set global rocksdb_force_flush_memtable_now=1;
33+
34+
--echo # Full table scan
35+
explain
36+
select * from t1 limit 10;
37+
select * from t1 limit 10;
38+
39+
--echo # An index scan starting from the end of the table:
40+
explain
41+
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
42+
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
43+
44+
# A testcase for an assertion that the fix is removing
45+
# The only requirement for the used column family is that it is reverse-ordered
46+
create table t4 (
47+
pk int unsigned not null primary key,
48+
kp1 int unsigned not null,
49+
kp2 int unsigned not null,
50+
col1 int unsigned,
51+
key(kp1, kp2) comment 'rev:bf5_2'
52+
) engine=rocksdb;
53+
54+
insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
55+
56+
--echo # This must not fail an assert:
57+
select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
58+
59+
drop table t1,t2,t3,t4;
60+
61+

storage/rocksdb/rdb_datadic.h

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -238,12 +238,28 @@ class Rdb_key_def {
238238
*size = INDEX_NUMBER_SIZE;
239239
}
240240

241-
/* Get the first key that you need to position at to start iterating.
242-
Returns a "supremum" or "infimum" for this index based on collation order
241+
/*
242+
Get the first key that you need to position at to start iterating.
243+
244+
Stores into *key a "supremum" or "infimum" key value for the index.
245+
246+
@return Number of bytes in the key that are usable for bloom filter use.
243247
*/
244-
inline void get_first_key(uchar *const key, uint *const size) const {
245-
return m_is_reverse_cf ? get_supremum_key(key, size)
246-
: get_infimum_key(key, size);
248+
inline int get_first_key(uchar *const key, uint *const size) const {
249+
if (m_is_reverse_cf)
250+
get_supremum_key(key, size);
251+
else
252+
get_infimum_key(key, size);
253+
254+
/* Find out how many bytes of infimum are the same as m_index_number */
255+
uchar unmodified_key[INDEX_NUMBER_SIZE];
256+
rdb_netbuf_store_index(unmodified_key, m_index_number);
257+
int i;
258+
for (i = 0; i < INDEX_NUMBER_SIZE; i++) {
259+
if (key[i] != unmodified_key[i])
260+
break;
261+
}
262+
return i;
247263
}
248264

249265
/* Make a key that is right after the given key. */

0 commit comments

Comments
 (0)