Skip to content

Commit

Permalink
MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky col…
Browse files Browse the repository at this point in the history
…lations

- Adding a new argument "flag" to MY_COLLATION_HANDLER::strnncollsp_nchars()
  and a flag MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES.
  The flag defines if strnncollsp_nchars() should emulate trailing spaces
  which were possibly trimmed earlier (e.g. in InnoDB CHAR compression).
  This is important for NOPAD collations.

  For example, with this input:
   - str1= 'a '    (Latin letter a followed by one space)
   - str2= 'a  '   (Latin letter a followed by two spaces)
   - nchars= 3
  if the flag is given, strnncollsp_nchars() will virtually restore
  one trailing space to str1 up to nchars (3) characters and compare two
  strings as equal:
  - str1= 'a  '  (one extra trailing space emulated)
  - str2= 'a  '  (as is)

  If the flag is not given, strnncollsp_nchars() does not add trailing
  virtual spaces, so in case of a NOPAD collation, str1 will be compared
  as less than str2 because it is shorter.

- Field_string::cmp_prefix() now passes the new flag.
  Field_varstring::cmp_prefix() and Field_blob::cmp_prefix() do
  not pass the new flag.

- The branch in cmp_whole_field() in storage/innobase/rem/rem0cmp.cc
  (which handles the CHAR data type) now also passed the new flag.

- Fixing UCA collations to respect the new flag.
  Other collations are possibly also affected, however
  I had no success in making an SQL script demonstrating the problem.
  Other collations will be extended to respect this flags in a separate
  patch later.

- Changing the meaning of the last parameter of Field::cmp_prefix()
  from "number of bytes" (internal length)
  to "number of characters" (user visible length).

  The code calling cmp_prefix() from handler.cc was wrong.
  After this change, the call in handler.cc became correct.

  The code calling cmp_prefix() from key_rec_cmp() in key.cc
  was adjusted according to this change.

- Old strnncollsp_nchar() related tests in unittest/strings/strings-t.c
  now pass the new flag.
  A few new tests also were added, without the flag.
  • Loading branch information
abarkov committed Apr 4, 2023
1 parent 0cc1694 commit 8020b1b
Show file tree
Hide file tree
Showing 18 changed files with 659 additions and 237 deletions.
25 changes: 24 additions & 1 deletion include/m_ctype.h
Expand Up @@ -248,6 +248,28 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */
#define MY_STRXFRM_REVERSE_SHIFT 16

/* Flags to strnncollsp_nchars */
/*
MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES -
defines if inside strnncollsp_nchars()
short strings should be virtually extended to "nchars"
characters by emulating trimmed trailing spaces.
This flag is needed when comparing packed strings of the CHAR
data type, when trailing spaces are trimmed on storage (like in InnoDB),
however the actual values (after unpacking) will have those trailing
spaces.
If this flag is passed, strnncollsp_nchars() performs both
truncating longer strings and extending shorter strings
to exactly "nchars".
If this flag is not passed, strnncollsp_nchars() only truncates longer
strings to "nchars", but does not extend shorter strings to "nchars".
*/
#define MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES 1


/*
Collation IDs for MariaDB that should not conflict with MySQL.
We reserve 256..511, because MySQL will most likely use this range
Expand Down Expand Up @@ -383,7 +405,8 @@ struct my_collation_handler_st
int (*strnncollsp_nchars)(CHARSET_INFO *,
const uchar *str1, size_t len1,
const uchar *str2, size_t len2,
size_t nchars);
size_t nchars,
uint flags);
size_t (*strnxfrm)(CHARSET_INFO *,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags);
Expand Down
85 changes: 85 additions & 0 deletions mysql-test/include/ctype_nopad_prefix_unique.inc
@@ -0,0 +1,85 @@
--echo #
--echo # MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky collations
--echo #

# TEXT

if (`SELECT UPPER(@@storage_engine) != 'MEMORY'`)
{
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;


EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
}


# VARCHAR

EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;


EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;

# CHAR

# MyISAM is buggy on CHAR+BTREE+UNIQUE+PREFIX (see MDEV-30048), disable for now
# Other engines work fine

if (`SELECT UPPER(@@storage_engine) != 'MYISAM'`)
{
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
}

EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
148 changes: 148 additions & 0 deletions mysql-test/main/ctype_utf8_uca.result
Expand Up @@ -761,3 +761,151 @@ DROP TABLE case_folding;
#
# End of 10.3 tests
#
#
# Start of 10.4 tests
#
SET STORAGE_ENGINE=MyISAM;
SET NAMES utf8mb3 COLLATE utf8mb3_unicode_nopad_ci;
#
# MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky collations
#
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
SET STORAGE_ENGINE=HEAP;
#
# MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky collations
#
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
SET STORAGE_ENGINE=DEFAULT;
#
# End of 10.4 tests
#
18 changes: 18 additions & 0 deletions mysql-test/main/ctype_utf8_uca.test
Expand Up @@ -50,3 +50,21 @@ SET NAMES utf8mb3 COLLATE utf8mb3_thai_520_w2;
--echo #
--echo # End of 10.3 tests
--echo #


--echo #
--echo # Start of 10.4 tests
--echo #

SET STORAGE_ENGINE=MyISAM;
SET NAMES utf8mb3 COLLATE utf8mb3_unicode_nopad_ci;
--source include/ctype_nopad_prefix_unique.inc

SET STORAGE_ENGINE=HEAP;
--source include/ctype_nopad_prefix_unique.inc

SET STORAGE_ENGINE=DEFAULT;

--echo #
--echo # End of 10.4 tests
--echo #

0 comments on commit 8020b1b

Please sign in to comment.