-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MDEV-11233 CREATE FULLTEXT INDEX with a token longer than 127 bytes
crashes server This bug is the result of merging the Oracle MySQL follow-up fix BUG#22963169 MYSQL CRASHES ON CREATE FULLTEXT INDEX without merging the base bug fix: Bug#79475 Insert a token of 84 4-bytes chars into fts index causes server crash. Unlike the above mentioned fixes in MySQL, our fix will not change the storage format of fulltext indexes in InnoDB or XtraDB when a character encoding with mbmaxlen=2 or mbmaxlen=3 and the length of a word is between 128 and 84*mbmaxlen bytes. The Oracle fix would allocate 2 length bytes for these cases. Compatibility with other MySQL and MariaDB releases is ensured by persisting the used maximum length in the SYS_COLUMNS table in the InnoDB data dictionary. This fix also removes some unnecessary strcmp() calls when checking for the legacy default collation my_charset_latin1 (my_charset_latin1.name=="latin1_swedish_ci"). fts_create_one_index_table(): Store the actual length in bytes. This metadata will be written to the SYS_COLUMNS table. fts_zip_initialize(): Initialize only the first byte of the buffer. Actually the code should not even care about this first byte, because the length is set as 0. FTX_MAX_WORD_LEN: Define as HA_FT_MAXCHARLEN * 4 aka 336 bytes, not as 254 bytes. row_merge_create_fts_sort_index(): Set the actual maximum length of the column in bytes, similar to fts_create_one_index_table(). row_merge_fts_doc_tokenize(): Remove the redundant parameter word_dtype. Use the actual maximum length of the column. Calculate the extra_size in the same way as row_merge_buf_encode() does.
- Loading branch information
Showing
13 changed files
with
365 additions
and
108 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
SET NAMES utf8mb4; | ||
# | ||
# MDEV-11233 CREATE FULLTEXT INDEX with a token | ||
# longer than 127 bytes crashes server | ||
# | ||
CREATE TABLE t(t TEXT CHARACTER SET utf8mb3) ENGINE=InnoDB; | ||
INSERT INTO t SET t=REPEAT(CONCAT(REPEAT(_utf8mb3 0xE0B987, 4), REPEAT(_utf8mb3 0xE0B989, 5)), 5); | ||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc90,84); | ||
INSERT INTO t SET t=REPEAT('befor',17); | ||
INSERT INTO t SET t='BeforeTheIndexCreation'; | ||
CREATE FULLTEXT INDEX ft ON t(t); | ||
Warnings: | ||
Warning 124 InnoDB rebuilding table to add column FTS_DOC_ID | ||
INSERT INTO t SET t='this was inserted after creating the index'; | ||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc91,84); | ||
INSERT INTO t SET t=REPEAT('after',17); | ||
INSERT INTO t SET t=REPEAT(_utf8mb3 0xe794b2e9aaa8e69687, 15); | ||
# The data below is not 3-byte UTF-8, but 4-byte chars. | ||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9695, 84); | ||
Warnings: | ||
Warning 1366 Incorrect string value: '\xF0\x9F\x96\x95\xF0\x9F...' for column 't' at row 1 | ||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9696, 85); | ||
Warnings: | ||
Warning 1366 Incorrect string value: '\xF0\x9F\x96\x96\xF0\x9F...' for column 't' at row 1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST | ||
(REPEAT(CONCAT(REPEAT(_utf8mb3 0xE0B987, 4), REPEAT(_utf8mb3 0xE0B989, 5)), 5)); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('BeforeTheIndexCreation'); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('befor',17)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('after'); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('after',17)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 83)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 84)); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 85)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 83)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 84)); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 85)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 83)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 84)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 84)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 85)); | ||
COUNT(*) | ||
0 | ||
SELECT * FROM t; | ||
t | ||
็็็็้้้้้็็็็้้้้้็็็็้้้้้็็็็้้้้้็็็็้้้้้ | ||
000000000000000000000000000000000000000000000000000000000000000000000000000000000000 | ||
beforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbefor | ||
BeforeTheIndexCreation | ||
this was inserted after creating the index | ||
111111111111111111111111111111111111111111111111111111111111111111111111111111111111 | ||
afterafterafterafterafterafterafterafterafterafterafterafterafterafterafterafterafter | ||
甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文 | ||
???????????????????????????????????????????????????????????????????????????????????? | ||
????????????????????????????????????????????????????????????????????????????????????? | ||
SELECT len,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS where name='word' GROUP BY len; | ||
len COUNT(*) | ||
252 6 | ||
DROP TABLE t; | ||
CREATE TABLE t(t TEXT CHARACTER SET utf8mb4) ENGINE=InnoDB; | ||
INSERT INTO t SET t=REPEAT(_utf8mb3 0xe794b2e9aaa8e69687, 15); | ||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc90,84); | ||
INSERT INTO t SET t=REPEAT('befor',17); | ||
INSERT INTO t SET t='BeforeTheIndexCreation'; | ||
CREATE FULLTEXT INDEX ft ON t(t); | ||
Warnings: | ||
Warning 124 InnoDB rebuilding table to add column FTS_DOC_ID | ||
INSERT INTO t SET t='this was inserted after creating the index'; | ||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc91,84); | ||
INSERT INTO t SET t=REPEAT('after',17); | ||
INSERT INTO t SET t=REPEAT(concat(repeat(_utf8mb3 0xE0B987, 4), repeat(_utf8mb3 0xE0B989, 5)), 5); | ||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9695, 84); | ||
# The token below exceeds the 84-character limit. | ||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9696, 85); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb3 0xe794b2e9aaa8e69687, 15)); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('BeforeTheIndexCreation'); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('befor',17)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('after'); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('after',17)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 83)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 84)); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 85)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 83)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 84)); | ||
COUNT(*) | ||
1 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 85)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 83)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 84)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 84)); | ||
COUNT(*) | ||
0 | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 85)); | ||
COUNT(*) | ||
0 | ||
SELECT * FROM t; | ||
t | ||
甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文 | ||
000000000000000000000000000000000000000000000000000000000000000000000000000000000000 | ||
beforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbefor | ||
BeforeTheIndexCreation | ||
this was inserted after creating the index | ||
111111111111111111111111111111111111111111111111111111111111111111111111111111111111 | ||
afterafterafterafterafterafterafterafterafterafterafterafterafterafterafterafterafter | ||
็็็็้้้้้็็็็้้้้้็็็็้้้้้็็็็้้้้้็็็็้้้้้ | ||
🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕 | ||
🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖 | ||
SELECT len,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS where name='word' GROUP BY len; | ||
len COUNT(*) | ||
336 6 | ||
DROP TABLE t; | ||
CREATE TABLE t(t TEXT CHARACTER SET latin1, FULLTEXT INDEX(t)) | ||
ENGINE=InnoDB; | ||
SELECT len,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS where name='word' GROUP BY len; | ||
len COUNT(*) | ||
84 6 | ||
DROP TABLE t; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
--loose-innodb-sys-columns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
--source include/have_innodb.inc | ||
SET NAMES utf8mb4; | ||
|
||
--echo # | ||
--echo # MDEV-11233 CREATE FULLTEXT INDEX with a token | ||
--echo # longer than 127 bytes crashes server | ||
--echo # | ||
|
||
# This bug is the result of merging the Oracle MySQL follow-up fix | ||
# BUG#22963169 MYSQL CRASHES ON CREATE FULLTEXT INDEX | ||
# without merging a fix of Bug#79475 Insert a token of 84 4-bytes | ||
# chars into fts index causes server crash. | ||
|
||
# Oracle did not publish tests for either of the above MySQL bugs. | ||
# The tests below were developed for MariaDB Server. | ||
# The maximum length of a fulltext-indexed word is 84 characters. | ||
|
||
CREATE TABLE t(t TEXT CHARACTER SET utf8mb3) ENGINE=InnoDB; | ||
INSERT INTO t SET t=REPEAT(CONCAT(REPEAT(_utf8mb3 0xE0B987, 4), REPEAT(_utf8mb3 0xE0B989, 5)), 5); | ||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc90,84); | ||
INSERT INTO t SET t=REPEAT('befor',17); # too long, will not be indexed | ||
INSERT INTO t SET t='BeforeTheIndexCreation'; | ||
CREATE FULLTEXT INDEX ft ON t(t); | ||
INSERT INTO t SET t='this was inserted after creating the index'; | ||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc91,84); | ||
INSERT INTO t SET t=REPEAT('after',17); # too long, will not be indexed | ||
INSERT INTO t SET t=REPEAT(_utf8mb3 0xe794b2e9aaa8e69687, 15); | ||
--echo # The data below is not 3-byte UTF-8, but 4-byte chars. | ||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9695, 84); | ||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9696, 85); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST | ||
(REPEAT(CONCAT(REPEAT(_utf8mb3 0xE0B987, 4), REPEAT(_utf8mb3 0xE0B989, 5)), 5)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('BeforeTheIndexCreation'); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('befor',17)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('after'); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('after',17)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 83)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 84)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 85)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 83)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 84)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 85)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 83)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 84)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 84)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 85)); | ||
SELECT * FROM t; | ||
|
||
# The column length should be 252 bytes (84 characters * 3 bytes/character). | ||
SELECT len,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS where name='word' GROUP BY len; | ||
DROP TABLE t; | ||
|
||
CREATE TABLE t(t TEXT CHARACTER SET utf8mb4) ENGINE=InnoDB; | ||
INSERT INTO t SET t=REPEAT(_utf8mb3 0xe794b2e9aaa8e69687, 15); | ||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc90,84); | ||
INSERT INTO t SET t=REPEAT('befor',17); # too long, will not be indexed | ||
INSERT INTO t SET t='BeforeTheIndexCreation'; | ||
CREATE FULLTEXT INDEX ft ON t(t); | ||
INSERT INTO t SET t='this was inserted after creating the index'; | ||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc91,84); | ||
INSERT INTO t SET t=REPEAT('after',17); # too long, will not be indexed | ||
INSERT INTO t SET t=REPEAT(concat(repeat(_utf8mb3 0xE0B987, 4), repeat(_utf8mb3 0xE0B989, 5)), 5); | ||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9695, 84); | ||
--echo # The token below exceeds the 84-character limit. | ||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9696, 85); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb3 0xe794b2e9aaa8e69687, 15)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('BeforeTheIndexCreation'); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('befor',17)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('after'); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('after',17)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 83)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 84)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 85)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 83)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 84)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 85)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 83)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 84)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 84)); | ||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 85)); | ||
SELECT * FROM t; | ||
|
||
# The column length should be 336 bytes (84 characters * 4 bytes/character). | ||
SELECT len,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS where name='word' GROUP BY len; | ||
DROP TABLE t; | ||
|
||
CREATE TABLE t(t TEXT CHARACTER SET latin1, FULLTEXT INDEX(t)) | ||
ENGINE=InnoDB; | ||
|
||
# The column length should be 84 bytes (84 characters * 1 byte/character). | ||
SELECT len,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS where name='word' GROUP BY len; | ||
DROP TABLE t; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.