Skip to content

Commit 6cecf61

Browse files
committed
MDEV-34417 Wrong result set with utf8mb4_danish_ci and BNLH join
There were erroneous calls for charpos() in key_hashnr() and key_buf_cmp(). These functions are never called with prefix segments. The charpos() calls were wrong. Before the change BNHL joins - could return wrong result sets, as reported in MDEV-34417 - were extremely slow for multi-byte character sets, because the hash was calculated on string prefixes, which increased the amount of collisions drastically. This patch fixes the wrong result set as reported in MDEV-34417, as well as (partially) the performance problem reported in MDEV-34352.
1 parent 2f0e7f6 commit 6cecf61

File tree

3 files changed

+76
-25
lines changed

3 files changed

+76
-25
lines changed

mysql-test/main/ctype_uca.result

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15370,3 +15370,39 @@ DROP TABLE t1;
1537015370
#
1537115371
# End of MariaDB-10.2 tests
1537215372
#
15373+
#
15374+
# Start of 10.5 tests
15375+
#
15376+
#
15377+
# MDEV-34417 Wrong result set with utf8mb4_danish_ci and BNLH join
15378+
#
15379+
CREATE TABLE t1 (a VARCHAR(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_danish_ci);
15380+
INSERT INTO t1 VALUES ('aaaa'),('åå');
15381+
SELECT * FROM t1 WHERE a='aaaa';
15382+
a
15383+
aaaa
15384+
åå
15385+
SET join_cache_level=1;
15386+
SELECT * FROM t1 NATURAL JOIN t1 t2;
15387+
a
15388+
aaaa
15389+
åå
15390+
aaaa
15391+
åå
15392+
# Expect a BNHL join
15393+
SET join_cache_level=3;
15394+
EXPLAIN SELECT * FROM t1 NATURAL JOIN t1 t2;
15395+
id select_type table type possible_keys key key_len ref rows Extra
15396+
1 SIMPLE t1 ALL NULL NULL NULL NULL 2 Using where
15397+
1 SIMPLE t2 hash_ALL NULL #hash#$hj 2003 test.t1.a 2 Using where; Using join buffer (flat, BNLH join)
15398+
SELECT * FROM t1 NATURAL JOIN t1 t2;
15399+
a
15400+
aaaa
15401+
åå
15402+
aaaa
15403+
åå
15404+
DROP TABLE t1;
15405+
SET join_cache_level=DEFAULT;
15406+
#
15407+
# End of 10.5 tests
15408+
#

mysql-test/main/ctype_uca.test

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,3 +696,32 @@ DROP TABLE t1;
696696
--echo #
697697
--echo # End of MariaDB-10.2 tests
698698
--echo #
699+
700+
701+
--echo #
702+
--echo # Start of 10.5 tests
703+
--echo #
704+
705+
--echo #
706+
--echo # MDEV-34417 Wrong result set with utf8mb4_danish_ci and BNLH join
707+
--echo #
708+
709+
CREATE TABLE t1 (a VARCHAR(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_danish_ci);
710+
INSERT INTO t1 VALUES ('aaaa'),('åå');
711+
SELECT * FROM t1 WHERE a='aaaa';
712+
713+
SET join_cache_level=1;
714+
SELECT * FROM t1 NATURAL JOIN t1 t2;
715+
716+
--echo # Expect a BNHL join
717+
SET join_cache_level=3;
718+
EXPLAIN SELECT * FROM t1 NATURAL JOIN t1 t2;
719+
SELECT * FROM t1 NATURAL JOIN t1 t2;
720+
721+
DROP TABLE t1;
722+
SET join_cache_level=DEFAULT;
723+
724+
725+
--echo #
726+
--echo # End of 10.5 tests
727+
--echo #

sql/key.cc

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -754,13 +754,11 @@ ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key)
754754

755755
if (is_string)
756756
{
757-
if (cs->mbmaxlen > 1)
758-
{
759-
size_t char_length= cs->charpos(pos + pack_length,
760-
pos + pack_length + length,
761-
length / cs->mbmaxlen);
762-
set_if_smaller(length, char_length);
763-
}
757+
/*
758+
Prefix keys are not possible in BNLH joins.
759+
Use the whole string to calculate the hash.
760+
*/
761+
DBUG_ASSERT((key_part->key_part_flag & HA_PART_KEY_SEG) == 0);
764762
cs->hash_sort(pos+pack_length, length, &nr, &nr2);
765763
key+= pack_length;
766764
}
@@ -864,25 +862,13 @@ bool key_buf_cmp(KEY *key_info, uint used_key_parts,
864862
if (is_string)
865863
{
866864
/*
867-
Compare the strings taking into account length in characters
868-
and collation
865+
Prefix keys are not possible in BNLH joins.
866+
Compare whole strings.
869867
*/
870-
size_t byte_len1= length1, byte_len2= length2;
871-
if (cs->mbmaxlen > 1)
872-
{
873-
size_t char_length1= cs->charpos(pos1 + pack_length,
874-
pos1 + pack_length + length1,
875-
length1 / cs->mbmaxlen);
876-
size_t char_length2= cs->charpos(pos2 + pack_length,
877-
pos2 + pack_length + length2,
878-
length2 / cs->mbmaxlen);
879-
set_if_smaller(length1, char_length1);
880-
set_if_smaller(length2, char_length2);
881-
}
882-
if (length1 != length2 ||
883-
cs->strnncollsp(pos1 + pack_length, byte_len1,
884-
pos2 + pack_length, byte_len2))
885-
return TRUE;
868+
DBUG_ASSERT((key_part->key_part_flag & HA_PART_KEY_SEG) == 0);
869+
if (cs->strnncollsp(pos1 + pack_length, length1,
870+
pos2 + pack_length, length2))
871+
return true;
886872
key1+= pack_length; key2+= pack_length;
887873
}
888874
else

0 commit comments

Comments
 (0)