Skip to content
Permalink
Browse files
MDEV-27653 long uniques don't work with unicode collations
  • Loading branch information
abarkov authored and Jan Lindström committed Jan 20, 2023
1 parent beb1e23 commit a27b8b2
Show file tree
Hide file tree
Showing 27 changed files with 668 additions and 66 deletions.
@@ -11379,3 +11379,178 @@ a
#
# End of 10.3 tests
#
#
# Start of 10.4 tests
#
#
# MDEV-27653 long uniques don't work with unicode collations
#
SET NAMES utf8mb3;
CREATE TABLE t1 (
a CHAR(30) COLLATE utf8mb3_general_ci,
UNIQUE KEY(a) USING HASH
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(30) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('a');
INSERT INTO t1 VALUES ('ä');
ERROR 23000: Duplicate entry 'ä' for key 'a'
SELECT * FROM t1;
a
a
DROP TABLE t1;
CREATE TABLE t1 (
a CHAR(30) COLLATE utf8mb3_general_ci,
UNIQUE KEY(a(10)) USING HASH
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(30) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(10)) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('a');
INSERT INTO t1 VALUES ('ä');
ERROR 23000: Duplicate entry 'ä' for key 'a'
SELECT * FROM t1;
a
a
DROP TABLE t1;
CREATE TABLE t1 (
a VARCHAR(30) COLLATE utf8mb3_general_ci,
UNIQUE KEY(a) USING HASH
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(30) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('a');
INSERT INTO t1 VALUES ('ä');
ERROR 23000: Duplicate entry 'ä' for key 'a'
SELECT * FROM t1;
a
a
DROP TABLE t1;
CREATE TABLE t1 (
a VARCHAR(30) COLLATE utf8mb3_general_ci,
UNIQUE KEY(a(10)) USING HASH
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(30) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(10)) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('a');
INSERT INTO t1 VALUES ('ä');
ERROR 23000: Duplicate entry 'ä' for key 'a'
SELECT * FROM t1;
a
a
DROP TABLE t1;
CREATE TABLE t1 (a TEXT COLLATE utf8mb3_general_ci UNIQUE);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('a');
INSERT INTO t1 VALUES ('ä');
ERROR 23000: Duplicate entry 'ä' for key 'a'
SELECT * FROM t1;
a
a
DROP TABLE t1;
CREATE TABLE t1 (
a LONGTEXT COLLATE utf8mb3_general_ci,
UNIQUE KEY(a(10)) USING HASH
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` longtext CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(10)) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('a');
INSERT INTO t1 VALUES ('ä');
ERROR 23000: Duplicate entry 'ä' for key 'a'
SELECT * FROM t1;
a
a
DROP TABLE t1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
a OCTET_LENGTH(a)
a 1
ä 2
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check error Upgrade required. Please do "REPAIR TABLE `t1`" or dump/reload to fix it!
INSERT INTO t1 VALUES ('A');
ERROR 23000: Duplicate entry 'A' for key 'a'
INSERT INTO t1 VALUES ('Ä');
ERROR 23000: Duplicate entry 'Ä' for key 'a'
INSERT INTO t1 VALUES ('Ấ');
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
a OCTET_LENGTH(a)
a 1
ä 2
Ấ 3
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check error Upgrade required. Please do "REPAIR TABLE `t1`" or dump/reload to fix it!
ALTER TABLE t1 FORCE;
ERROR 23000: Duplicate entry 'ä' for key 'a'
DELETE FROM t1 WHERE OCTET_LENGTH(a)>1;
ALTER TABLE t1 FORCE;
INSERT INTO t1 VALUES ('ä');
ERROR 23000: Duplicate entry 'ä' for key 'a'
DROP TABLE t1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
a OCTET_LENGTH(a)
a 1
ä 2
ALTER IGNORE TABLE t1 FORCE;
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
a OCTET_LENGTH(a)
a 1
DROP TABLE t1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
a OCTET_LENGTH(a)
a 1
ä 2
REPAIR TABLE t1;
Table Op Msg_type Msg_text
test.t1 repair Warning Number of rows changed from 2 to 1
test.t1 repair status OK
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
a OCTET_LENGTH(a)
a 1
DROP TABLE t1;
#
# End of 10.4 tests
#
@@ -2310,3 +2310,161 @@ VALUES (_latin1 0xDF) UNION VALUES(_utf8'a' COLLATE utf8_bin);
--echo #
--echo # End of 10.3 tests
--echo #


--echo #
--echo # Start of 10.4 tests
--echo #

--echo #
--echo # MDEV-27653 long uniques don't work with unicode collations
--echo #

SET NAMES utf8mb3;

# CHAR

CREATE TABLE t1 (
a CHAR(30) COLLATE utf8mb3_general_ci,
UNIQUE KEY(a) USING HASH
);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('a');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ('ä');
SELECT * FROM t1;
DROP TABLE t1;

CREATE TABLE t1 (
a CHAR(30) COLLATE utf8mb3_general_ci,
UNIQUE KEY(a(10)) USING HASH
);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('a');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ('ä');
SELECT * FROM t1;
DROP TABLE t1;


# VARCHAR

CREATE TABLE t1 (
a VARCHAR(30) COLLATE utf8mb3_general_ci,
UNIQUE KEY(a) USING HASH
);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('a');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ('ä');
SELECT * FROM t1;
DROP TABLE t1;

CREATE TABLE t1 (
a VARCHAR(30) COLLATE utf8mb3_general_ci,
UNIQUE KEY(a(10)) USING HASH
);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('a');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ('ä');
SELECT * FROM t1;
DROP TABLE t1;


# TEXT

CREATE TABLE t1 (a TEXT COLLATE utf8mb3_general_ci UNIQUE);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('a');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ('ä');
SELECT * FROM t1;
DROP TABLE t1;

CREATE TABLE t1 (
a LONGTEXT COLLATE utf8mb3_general_ci,
UNIQUE KEY(a(10)) USING HASH
);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('a');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ('ä');
SELECT * FROM t1;
DROP TABLE t1;


# Testing upgrade:
# Prior to MDEV-27653, the UNIQUE HASH function errorneously
# took into account string octet length.
# Old tables should still open and work, but with wrong results.

copy_file std_data/mysql_upgrade/mdev27653_100422_myisam_text.frm $MYSQLD_DATADIR/test/t1.frm;
copy_file std_data/mysql_upgrade/mdev27653_100422_myisam_text.MYD $MYSQLD_DATADIR/test/t1.MYD;
copy_file std_data/mysql_upgrade/mdev27653_100422_myisam_text.MYI $MYSQLD_DATADIR/test/t1.MYI;
SHOW CREATE TABLE t1;
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
CHECK TABLE t1;

# There is already a one byte value 'a' in the table
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ('A');

# There is already a two-byte value 'ä' in the table
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ('Ä');

# There were no three-byte values in the table so far.
# The below value violates UNIQUE, but it gets inserted.
# This is wrong but expected for a pre-MDEV-27653 table.
INSERT INTO t1 VALUES ('Ấ');
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
CHECK TABLE t1;

# ALTER FORCE fails: it tries to rebuild the table
# with a correct UNIQUE HASH function, but there are duplicates!
--error ER_DUP_ENTRY
ALTER TABLE t1 FORCE;

# Let's remove all duplicate values, so only the one-byte 'a' stays.
# ALTER..FORCE should work after that.
DELETE FROM t1 WHERE OCTET_LENGTH(a)>1;
ALTER TABLE t1 FORCE;

# Make sure that 'a' and 'ä' cannot co-exists any more,
# because the table was recreated with a correct UNIQUE HASH function.
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ('ä');
DROP TABLE t1;

#
# Testing an old table with ALTER IGNORE.
# The table is expected to rebuild with a new hash function,
# duplicates go away.
#
copy_file std_data/mysql_upgrade/mdev27653_100422_myisam_text.frm $MYSQLD_DATADIR/test/t1.frm;
copy_file std_data/mysql_upgrade/mdev27653_100422_myisam_text.MYD $MYSQLD_DATADIR/test/t1.MYD;
copy_file std_data/mysql_upgrade/mdev27653_100422_myisam_text.MYI $MYSQLD_DATADIR/test/t1.MYI;
SHOW CREATE TABLE t1;
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
ALTER IGNORE TABLE t1 FORCE;
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
DROP TABLE t1;

#
# Testing an old table with REPAIR.
# The table is expected to rebuild with a new hash function,
# duplicates go away.
#
copy_file std_data/mysql_upgrade/mdev27653_100422_myisam_text.frm $MYSQLD_DATADIR/test/t1.frm;
copy_file std_data/mysql_upgrade/mdev27653_100422_myisam_text.MYD $MYSQLD_DATADIR/test/t1.MYD;
copy_file std_data/mysql_upgrade/mdev27653_100422_myisam_text.MYI $MYSQLD_DATADIR/test/t1.MYI;
SHOW CREATE TABLE t1;
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
REPAIR TABLE t1;
SELECT a, OCTET_LENGTH(a) FROM t1 ORDER BY BINARY a;
DROP TABLE t1;

--echo #
--echo # End of 10.4 tests
--echo #
@@ -1320,5 +1320,27 @@ CASE WHEN a THEN DEFAULT(a) END
DROP TABLE t1;
SET timestamp=DEFAULT;
#
# MDEV-27653 long uniques don't work with unicode collations
#
CREATE TABLE t1 (a timestamp, UNIQUE KEY(a) USING HASH);
SET time_zone='+00:00';
INSERT INTO t1 VALUES ('2001-01-01 10:20:30');
SET time_zone='+01:00';
INSERT INTO t1 SELECT MAX(a) FROM t1;
ERROR 23000: Duplicate entry '2001-01-01 11:20:30' for key 'a'
SELECT * FROM t1;
a
2001-01-01 11:20:30
DROP TABLE t1;
CREATE TABLE t1 (a timestamp, UNIQUE KEY(a) USING HASH);
SET time_zone='+00:00';
INSERT INTO t1 VALUES ('2001-01-01 10:20:30');
SET time_zone='+01:00';
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
DROP TABLE t1;
SET time_zone=DEFAULT;
#
# End of 10.4 tests
#
@@ -878,6 +878,27 @@ DROP TABLE t1;
SET timestamp=DEFAULT;


--echo #
--echo # MDEV-27653 long uniques don't work with unicode collations
--echo #

CREATE TABLE t1 (a timestamp, UNIQUE KEY(a) USING HASH);
SET time_zone='+00:00';
INSERT INTO t1 VALUES ('2001-01-01 10:20:30');
SET time_zone='+01:00';
--error ER_DUP_ENTRY
INSERT INTO t1 SELECT MAX(a) FROM t1;
SELECT * FROM t1;
DROP TABLE t1;

CREATE TABLE t1 (a timestamp, UNIQUE KEY(a) USING HASH);
SET time_zone='+00:00';
INSERT INTO t1 VALUES ('2001-01-01 10:20:30');
SET time_zone='+01:00';
CHECK TABLE t1;
DROP TABLE t1;
SET time_zone=DEFAULT;

--echo #
--echo # End of 10.4 tests
--echo #
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit a27b8b2

Please sign in to comment.