diff --git a/include/m_ctype.h b/include/m_ctype.h index ab2f84656ef51..8159e9ce7eeb5 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -1709,12 +1709,6 @@ size_t my_strnxfrm_unicode_full_nopad_bin(CHARSET_INFO *, size_t my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *, size_t); -int my_wildcmp_unicode(CHARSET_INFO *cs, - const char *str, const char *str_end, - const char *wildstr, const char *wildend, - int escape, int w_one, int w_many, - MY_CASEFOLD_INFO *weights); - extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t buflen); extern char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end, diff --git a/mysql-test/main/ctype_ucs.result b/mysql-test/main/ctype_ucs.result index 99d8209d32e87..a6a0e628e119f 100644 --- a/mysql-test/main/ctype_ucs.result +++ b/mysql-test/main/ctype_ucs.result @@ -5773,7 +5773,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3' # # MDEV-9711 NO PAD Collatons # -SET character_set_connection=ucs2; +SET collation_connection=ucs2_general_nopad_ci; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -6051,6 +6051,127 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +ucs2_general_nopad_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET ucs2 COLLATE ucs2_general_nopad_ci DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET collation_connection=ucs2_nopad_bin; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -6328,6 +6449,124 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +ucs2_nopad_bin +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET ucs2 COLLATE ucs2_nopad_bin DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 # # MDEV-10585 EXECUTE IMMEDIATE statement # diff --git a/mysql-test/main/ctype_ucs.test b/mysql-test/main/ctype_ucs.test index 993f9be5c54d8..43bd50f639acb 100644 --- a/mysql-test/main/ctype_ucs.test +++ b/mysql-test/main/ctype_ucs.test @@ -1049,14 +1049,17 @@ SELECT CAST(CONVERT('1IJ3' USING ucs2) AS SIGNED); --echo # --echo # MDEV-9711 NO PAD Collatons --echo # -SET character_set_connection=ucs2; +SET collation_connection=ucs2_general_nopad_ci; let $coll='ucs2_general_nopad_ci'; let $coll_pad='ucs2_general_ci'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc +SET collation_connection=ucs2_nopad_bin; let $coll='ucs2_nopad_bin'; let $coll_pad='ucs2_bin'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc --echo # --echo # MDEV-10585 EXECUTE IMMEDIATE statement diff --git a/mysql-test/main/ctype_utf16.result b/mysql-test/main/ctype_utf16.result index 89d5283b63c67..834d05828ca01 100644 --- a/mysql-test/main/ctype_utf16.result +++ b/mysql-test/main/ctype_utf16.result @@ -2258,7 +2258,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3' # # MDEV-9711 NO PAD Collatons # -SET character_set_connection=utf16; +SET collation_connection=utf16_general_nopad_ci; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2536,6 +2536,127 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf16_general_nopad_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf16 COLLATE utf16_general_nopad_ci DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET collation_connection=utf16_nopad_bin; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2813,6 +2934,124 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf16_nopad_bin +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf16 COLLATE utf16_nopad_bin DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 # # End of 10.2 tests # diff --git a/mysql-test/main/ctype_utf16.test b/mysql-test/main/ctype_utf16.test index 529b737fcfb2c..9f81f23a5b2b7 100644 --- a/mysql-test/main/ctype_utf16.test +++ b/mysql-test/main/ctype_utf16.test @@ -927,14 +927,17 @@ SELECT CAST(CONVERT('1IJ3' USING utf16) AS SIGNED); --echo # --echo # MDEV-9711 NO PAD Collatons --echo # -SET character_set_connection=utf16; +SET collation_connection=utf16_general_nopad_ci; let $coll='utf16_general_nopad_ci'; let $coll_pad='utf16_general_ci'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc +SET collation_connection=utf16_nopad_bin; let $coll='utf16_nopad_bin'; let $coll_pad='utf16_bin'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc --echo # --echo # End of 10.2 tests diff --git a/mysql-test/main/ctype_utf16le.result b/mysql-test/main/ctype_utf16le.result index 9bc53a1041dcb..99e9a2ef4d371 100644 --- a/mysql-test/main/ctype_utf16le.result +++ b/mysql-test/main/ctype_utf16le.result @@ -2444,7 +2444,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3' # # MDEV-9711 NO PAD Collatons # -SET character_set_connection=utf16le; +SET collation_connection=utf16le_general_nopad_ci; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2722,6 +2722,127 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf16le_general_nopad_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf16le COLLATE utf16le_general_nopad_ci DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET collation_connection=utf16le_nopad_bin; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2999,6 +3120,124 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf16le_nopad_bin +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf16le COLLATE utf16le_nopad_bin DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 # # MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset # diff --git a/mysql-test/main/ctype_utf16le.test b/mysql-test/main/ctype_utf16le.test index 537a456f7dbb5..15fbda6b2eec5 100644 --- a/mysql-test/main/ctype_utf16le.test +++ b/mysql-test/main/ctype_utf16le.test @@ -806,14 +806,17 @@ SELECT CAST(CONVERT('1IJ3' USING utf16le) AS SIGNED); --echo # --echo # MDEV-9711 NO PAD Collatons --echo # -SET character_set_connection=utf16le; +SET collation_connection=utf16le_general_nopad_ci; let $coll='utf16le_general_nopad_ci'; let $coll_pad='utf16le_general_ci'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc +SET collation_connection=utf16le_nopad_bin; let $coll='utf16le_nopad_bin'; let $coll_pad='utf16le_bin'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc --echo # --echo # MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset diff --git a/mysql-test/main/ctype_utf32.result b/mysql-test/main/ctype_utf32.result index 7806b90b08146..47497cdd77133 100644 --- a/mysql-test/main/ctype_utf32.result +++ b/mysql-test/main/ctype_utf32.result @@ -2314,7 +2314,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3' # # MDEV-9711 NO PAD Collatons # -SET character_set_connection=utf32; +SET collation_connection=utf32_general_nopad_ci; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2592,6 +2592,127 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf32_general_nopad_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf32 COLLATE utf32_general_nopad_ci DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET collation_connection=utf32_nopad_bin; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2869,6 +2990,124 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf32_nopad_bin +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf32 COLLATE utf32_nopad_bin DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 # # MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed # 10.2 tests diff --git a/mysql-test/main/ctype_utf32.test b/mysql-test/main/ctype_utf32.test index bcbc3b14691ff..52071325199a7 100644 --- a/mysql-test/main/ctype_utf32.test +++ b/mysql-test/main/ctype_utf32.test @@ -1040,15 +1040,17 @@ SELECT CAST(CONVERT('1IJ3' USING utf32) AS SIGNED); --echo # --echo # MDEV-9711 NO PAD Collatons --echo # -SET character_set_connection=utf32; +SET collation_connection=utf32_general_nopad_ci; let $coll='utf32_general_nopad_ci'; let $coll_pad='utf32_general_ci'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc +SET collation_connection=utf32_nopad_bin; let $coll='utf32_nopad_bin'; let $coll_pad='utf32_bin'; --source include/ctype_pad_all_engines.inc - +--source include/ctype_like.inc --echo # --echo # MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index aa4d366208f9e..e3ea43d589015 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -32211,145 +32211,16 @@ static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) return 0; } + /* -** Compare string against string with wildcard -** 0 if matched -** -1 if not matched with wildcard -** 1 if matched with wildcard + my_wildcmp_uca_impl() + A generic function for all Unicode character sets. + For UCA collations. */ - -static -int my_wildcmp_uca_impl(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, int recurse_level) -{ - int result= -1; /* Not found, using wildcards */ - my_wc_t s_wc, w_wc; - int scan; - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - - if (my_string_stack_guard && my_string_stack_guard(recurse_level)) - return 1; - while (wildstr != wildend) - { - while (1) - { - my_bool escaped= 0; - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - - if (w_wc == (my_wc_t) w_many) - { - result= 1; /* Found an anchor char */ - break; - } - - wildstr+= scan; - if (w_wc == (my_wc_t) escape && wildstr < wildend) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - escaped= 1; - } - - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - str+= scan; - - if (!escaped && w_wc == (my_wc_t) w_one) - { - result= 1; /* Found an anchor char */ - } - else - { - if (my_uca_charcmp(cs,s_wc,w_wc)) - return 1; /* No match */ - } - if (wildstr == wildend) - return (str != str_end); /* Match if both are at end */ - } - - if (w_wc == (my_wc_t) w_many) - { /* Found w_many */ - /* Remove any '%' and '_' from the wild search string */ - for ( ; wildstr != wildend ; ) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - - if (w_wc == (my_wc_t) w_many) - { - wildstr+= scan; - continue; - } - - if (w_wc == (my_wc_t) w_one) - { - wildstr+= scan; - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - str+= scan; - continue; - } - break; /* Not a wild character */ - } - - if (wildstr == wildend) - return 0; /* Ok if w_many is last */ - - if (str == str_end) - return -1; - - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - - if (w_wc == (my_wc_t) escape) - { - if (wildstr < wildend) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - } - } - - while (1) - { - /* Skip until the first character from wildstr is found */ - while (str != str_end) - { - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - - if (!my_uca_charcmp(cs,s_wc,w_wc)) - break; - str+= scan; - } - if (str == str_end) - return -1; - - str+= scan; - result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, - recurse_level + 1); - if (result <= 0) - return result; - } - } - } - return (str != str_end ? 1 : 0); -} +#define MY_FUNCTION_NAME(x) my_ ## x ## _uca_impl +#define MY_MB_WC(cs, pwc, s, e) ((cs)->cset->mb_wc)(cs, pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) (my_uca_charcmp(cs, wc1, wc2)==0) +#include "ctype-wildcmp.inl" int my_wildcmp_uca(CHARSET_INFO *cs, diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index c8da32a87af93..c4c4444188b70 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -796,6 +796,65 @@ my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs, return (int) (dst -db); } + +static inline my_bool +my_char_eq_mb2_or_mb4_general_ci(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) +{ + DBUG_ASSERT((cs->state & MY_CS_BINSORT) == 0); + return my_casefold_char_eq_general_ci(cs->casefold, wc1, wc2); +} + + +static inline my_bool +my_char_eq_mb2_or_mb4_bin(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) +{ + DBUG_ASSERT((cs->state & MY_CS_BINSORT) != 0); + return wc1 == wc2; +} + + +/* + my_wildcmp_mb2_or_mb4_general_ci_impl() + A generic function for ucs2, utf16, utf32, for general_ci-style collations. +*/ +#define MY_FUNCTION_NAME(x) my_ ## x ## _mb2_or_mb4_general_ci_impl +#define MY_MB_WC(cs, pwc, s, e) ((cs)->cset->mb_wc)(cs, pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_mb2_or_mb4_general_ci(cs, wc1, wc2) +#include "ctype-wildcmp.inl" + + +static int +my_wildcmp_mb2_or_mb4_general_ci(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_mb2_or_mb4_general_ci_impl(cs, str, str_end, + wildstr, wildend, + escape, w_one, w_many, 1); +} + + +/* + my_wildcmp_mb2_or_mb4_bin_impl() + A generic function for ucs2, utf16, utf32, for _bin collations. +*/ +#define MY_FUNCTION_NAME(x) my_ ## x ## _mb2_or_mb4_bin_impl +#define MY_MB_WC(cs, pwc, s, e) ((cs)->cset->mb_wc)(cs, pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_mb2_or_mb4_bin(cs, wc1, wc2) +#include "ctype-wildcmp.inl" + + +static int +my_wildcmp_mb2_or_mb4_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_mb2_or_mb4_bin_impl(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, 1); +} + #endif /* HAVE_CHARSET_mb2_or_mb4 */ @@ -1407,29 +1466,6 @@ my_charpos_utf16(CHARSET_INFO *cs, } -static int -my_wildcmp_utf16_ci(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - MY_CASEFOLD_INFO *uni_plane= cs->casefold; - return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, uni_plane); -} - - -static int -my_wildcmp_utf16_bin(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, NULL); -} - - static void my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *pos, size_t len, @@ -1465,7 +1501,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = my_strnxfrm_utf16_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf16_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16, @@ -1486,7 +1522,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf16_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_bin, @@ -1507,7 +1543,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler = my_strnxfrm_nopad_utf16_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf16_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad, @@ -1528,7 +1564,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler = my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf16_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad_bin, @@ -1816,7 +1852,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler = my_strnxfrm_utf16le_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf16_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16, @@ -1837,7 +1873,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf16_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_bin, @@ -1858,7 +1894,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler = my_strnxfrm_nopad_utf16le_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf16_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad, @@ -1879,7 +1915,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler = my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf16_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad_bin, @@ -2554,29 +2590,6 @@ void my_fill_utf32(CHARSET_INFO *cs, } -static int -my_wildcmp_utf32_ci(CHARSET_INFO *cs, - const char *str, const char *str_end, - const char *wildstr, const char *wildend, - int escape, int w_one, int w_many) -{ - MY_CASEFOLD_INFO *uni_plane= cs->casefold; - return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, uni_plane); -} - - -static int -my_wildcmp_utf32_bin(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, NULL); -} - - static size_t my_scan_utf32(CHARSET_INFO *cs, const char *str, const char *end, int sequence_type) @@ -2613,7 +2626,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler = my_strnxfrm_utf32_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf32_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32, @@ -2634,7 +2647,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf32_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32, @@ -2655,7 +2668,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler = my_strnxfrm_nopad_utf32_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf32_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32_nopad, @@ -2676,7 +2689,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler = my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf32_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32_nopad, @@ -3148,29 +3161,6 @@ my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)), } -static -int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - MY_CASEFOLD_INFO *uni_plane= cs->casefold; - return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, - escape,w_one,w_many,uni_plane); -} - - -static -int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, - escape,w_one,w_many,NULL); -} - - static void my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *key, size_t len, @@ -3205,7 +3195,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = my_strnxfrm_ucs2_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2, @@ -3226,7 +3216,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_mysql500_ci_handler = my_strnxfrm_ucs2_general_mysql500_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2, @@ -3247,7 +3237,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = my_strnxfrm_ucs2_bin, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_bin, @@ -3268,7 +3258,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler = my_strnxfrm_nopad_ucs2_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_nopad, @@ -3289,7 +3279,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler = my_strnxfrm_nopad_ucs2_bin, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_nopad_bin, diff --git a/strings/ctype-unidata.h b/strings/ctype-unidata.h index 0bcf96c09a007..bb741dc3eb5fe 100644 --- a/strings/ctype-unidata.h +++ b/strings/ctype-unidata.h @@ -132,6 +132,24 @@ my_toupper_unicode(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc) } +/* + Compare two characters for equality, according to the collation. + For simple Unicode AI CI collations, e.g. utf8mb4_general_ci. + + @return TRUE if the two characters are equal + @return FALSE otherwise +*/ +static inline my_bool +my_casefold_char_eq_general_ci(MY_CASEFOLD_INFO *casefold, + my_wc_t wc1, my_wc_t wc2) +{ + DBUG_ASSERT(casefold->simple_weight); + my_tosort_unicode(casefold, &wc1); + my_tosort_unicode(casefold, &wc2); + return wc1 == wc2; +} + + extern MY_CASEFOLD_INFO my_casefold_default; extern MY_CASEFOLD_INFO my_casefold_turkish; extern MY_CASEFOLD_INFO my_casefold_mysql500; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index d4a5c5be0d86b..2dc120a2d0dd5 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -133,170 +133,11 @@ my_casefold_multiply_utf8mbx(CHARSET_INFO *cs) } -/* -** Compare string against string with wildcard -** This function is used in UTF8 and UCS2 -** -** 0 if matched -** -1 if not matched with wildcard -** 1 if matched with wildcard -*/ - -static -int my_wildcmp_unicode_impl(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, - MY_CASEFOLD_INFO *weights, int recurse_level) -{ - int result= -1; /* Not found, using wildcards */ - my_wc_t s_wc, w_wc; - int scan; - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - - if (my_string_stack_guard && my_string_stack_guard(recurse_level)) - return 1; - while (wildstr != wildend) - { - while (1) - { - my_bool escaped= 0; - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - - if (w_wc == (my_wc_t) w_many) - { - result= 1; /* Found an anchor char */ - break; - } - - wildstr+= scan; - if (w_wc == (my_wc_t) escape && wildstr < wildend) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - escaped= 1; - } - - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - str+= scan; - - if (!escaped && w_wc == (my_wc_t) w_one) - { - result= 1; /* Found an anchor char */ - } - else - { - if (weights) - { - my_tosort_unicode(weights, &s_wc); - my_tosort_unicode(weights, &w_wc); - } - if (s_wc != w_wc) - return 1; /* No match */ - } - if (wildstr == wildend) - return (str != str_end); /* Match if both are at end */ - } - - if (w_wc == (my_wc_t) w_many) - { /* Found w_many */ - /* Remove any '%' and '_' from the wild search string */ - for ( ; wildstr != wildend ; ) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - - if (w_wc == (my_wc_t) w_many) - { - wildstr+= scan; - continue; - } - - if (w_wc == (my_wc_t) w_one) - { - wildstr+= scan; - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - str+= scan; - continue; - } - break; /* Not a wild character */ - } - - if (wildstr == wildend) - return 0; /* Ok if w_many is last */ - - if (str == str_end) - return -1; - - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - - if (w_wc == (my_wc_t) escape) - { - if (wildstr < wildend) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - } - } - - while (1) - { - /* Skip until the first character from wildstr is found */ - while (str != str_end) - { - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - if (weights) - { - my_tosort_unicode(weights, &s_wc); - my_tosort_unicode(weights, &w_wc); - } - - if (s_wc == w_wc) - break; - str+= scan; - } - if (str == str_end) - return -1; - - str+= scan; - result= my_wildcmp_unicode_impl(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, - weights, recurse_level + 1); - if (result <= 0) - return result; - } - } - } - return (str != str_end ? 1 : 0); -} - - -int -my_wildcmp_unicode(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, - MY_CASEFOLD_INFO *weights) +static inline my_bool +my_char_eq_utf8mbx_general_ci(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) { - return my_wildcmp_unicode_impl(cs, str, str_end, - wildstr, wildend, - escape, w_one, w_many, weights, 1); + DBUG_ASSERT((cs->state & MY_CS_BINSORT) == 0); + return my_casefold_char_eq_general_ci(cs->casefold, wc1, wc2); } @@ -774,15 +615,25 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t) } +/* + my_wildcmp_utf8mb3_general_ci_impl() + An optimized functions for utf8mb3. + For general_ci-style collations. +*/ +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_general_ci_impl +#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_utf8mbx_general_ci(cs, wc1, wc2) +#include "ctype-wildcmp.inl" + + static int my_wildcmp_utf8mb3(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { - MY_CASEFOLD_INFO *uni_plane= cs->casefold; - return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, - escape,w_one,w_many,uni_plane); + return my_wildcmp_utf8mb3_general_ci_impl(cs,str,str_end,wildstr,wildend, + escape, w_one, w_many, 1); } @@ -3117,14 +2968,25 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t) } +/* + my_wildcmp_utf8mb4_general_ci_impl() + An optimized function for utf8mb4. + For general_ci-style collations. +*/ +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4_general_ci_impl +#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb4_quick(pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_utf8mbx_general_ci(cs, wc1, wc2) +#include "ctype-wildcmp.inl" + + static int my_wildcmp_utf8mb4(CHARSET_INFO *cs, const char *str, const char *strend, const char *wildstr, const char *wildend, int escape, int w_one, int w_many) { - return my_wildcmp_unicode(cs, str, strend, wildstr, wildend, - escape, w_one, w_many, cs->casefold); + return my_wildcmp_utf8mb4_general_ci_impl(cs, str, strend, wildstr, wildend, + escape, w_one, w_many, 1); } diff --git a/strings/ctype-wildcmp.inl b/strings/ctype-wildcmp.inl new file mode 100644 index 0000000000000..f7a5c02df7852 --- /dev/null +++ b/strings/ctype-wildcmp.inl @@ -0,0 +1,177 @@ +/* + Copyright (c) 2024, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + + +#ifndef MY_FUNCTION_NAME +#error MY_FUNCTION_NAME is not defined +#endif + +#ifndef MY_MB_WC +#error MY_MB_WC is not defined +#endif + +#ifndef MY_CHAR_EQ +#error MY_CHAR_EQ is not defined +#endif + +/* +** Compare string against string with wildcard +** +** 0 if matched +** -1 if not matched with wildcard +** 1 if matched with wildcard +*/ + +static int +MY_FUNCTION_NAME(wildcmp)(CHARSET_INFO *cs, + const char *str, const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many, + int recurse_level) +{ + int result= -1; /* Not found, using wildcards */ + my_wc_t s_wc, w_wc; + int scan; + + if (my_string_stack_guard && my_string_stack_guard(recurse_level)) + return 1; + while (wildstr != wildend) + { + while (1) + { + my_bool escaped= 0; + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + + if (w_wc == (my_wc_t) w_many) + { + result= 1; /* Found an anchor char */ + break; + } + + wildstr+= scan; + if (w_wc == (my_wc_t) escape && wildstr < wildend) + { + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + wildstr+= scan; + escaped= 1; + } + + if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str, + (const uchar*) str_end)) <= 0) + return 1; + str+= scan; + + if (!escaped && w_wc == (my_wc_t) w_one) + { + result= 1; /* Found an anchor char */ + } + else + { + if (!MY_CHAR_EQ(cs, s_wc, w_wc)) + return 1; /* No match */ + } + if (wildstr == wildend) + return (str != str_end); /* Match if both are at end */ + } + + if (w_wc == (my_wc_t) w_many) + { /* Found w_many */ + /* Remove any '%' and '_' from the wild search string */ + for ( ; wildstr != wildend ; ) + { + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + + if (w_wc == (my_wc_t) w_many) + { + wildstr+= scan; + continue; + } + + if (w_wc == (my_wc_t) w_one) + { + wildstr+= scan; + if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str, + (const uchar*) str_end)) <= 0) + return 1; + str+= scan; + continue; + } + break; /* Not a wild character */ + } + + if (wildstr == wildend) + return 0; /* Ok if w_many is last */ + + if (str == str_end) + return -1; + + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + wildstr+= scan; + + if (w_wc == (my_wc_t) escape) + { + if (wildstr < wildend) + { + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + wildstr+= scan; + } + } + + while (1) + { + /* Skip until the first character from wildstr is found */ + while (str != str_end) + { + if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str, + (const uchar*) str_end)) <= 0) + return 1; + + if (MY_CHAR_EQ(cs, s_wc, w_wc)) + break; + str+= scan; + } + if (str == str_end) + return -1; + + str+= scan; + result= MY_FUNCTION_NAME(wildcmp)(cs, + str, str_end, + wildstr, wildend, + escape, w_one, w_many, + recurse_level + 1); + if (result <= 0) + return result; + } + } + } + return (str != str_end ? 1 : 0); +} + + +#undef MY_FUNCTION_NAME +#undef MY_MB_WC +#undef MY_CHAR_EQ