Skip to content

Commit fd247cc

Browse files
committed
MDEV-31340 Remove MY_COLLATION_HANDLER::strcasecmp()
This patch also fixes: MDEV-33050 Build-in schemas like oracle_schema are accent insensitive MDEV-33084 LASTVAL(t1) and LASTVAL(T1) do not work well with lower-case-table-names=0 MDEV-33085 Tables T1 and t1 do not work well with ENGINE=CSV and lower-case-table-names=0 MDEV-33086 SHOW OPEN TABLES IN DB1 -- is case insensitive with lower-case-table-names=0 MDEV-33088 Cannot create triggers in the database `MYSQL` MDEV-33103 LOCK TABLE t1 AS t2 -- alias is not case sensitive with lower-case-table-names=0 MDEV-33109 DROP DATABASE MYSQL -- does not drop SP with lower-case-table-names=0 MDEV-33110 HANDLER commands are case insensitive with lower-case-table-names=0 MDEV-33119 User is case insensitive in INFORMATION_SCHEMA.VIEWS MDEV-33120 System log table names are case insensitive with lower-cast-table-names=0 - Removing the virtual function strnncoll() from MY_COLLATION_HANDLER - Adding a wrapper function CHARSET_INFO::streq(), to compare two strings for equality. For now it calls strnncoll() internally. In the future it will turn into a virtual function. - Adding new accent sensitive case insensitive collations: - utf8mb4_general1400_as_ci - utf8mb3_general1400_as_ci They implement accent sensitive case insensitive comparison. The weight of a character is equal to the code point of its upper case variant. These collations use Unicode-14.0.0 casefolding data. The result of my_charset_utf8mb3_general1400_as_ci.strcoll() is very close to the former my_charset_utf8mb3_general_ci.strcasecmp() There is only a difference in a couple dozen rare characters, because: - the switch from "tolower" to "toupper" comparison, to make utf8mb3_general1400_as_ci closer to utf8mb3_general_ci - the switch from Unicode-3.0.0 to Unicode-14.0.0 This difference should be tolarable. See the list of affected characters in the MDEV description. Note, utf8mb4_general1400_as_ci correctly handles non-BMP characters! Unlike utf8mb4_general_ci, it does not treat all BMP characters as equal. - Adding classes representing names of the file based database objects: Lex_ident_db Lex_ident_table Lex_ident_trigger Their comparison collation depends on the underlying file system case sensitivity and on --lower-case-table-names and can be either my_charset_bin or my_charset_utf8mb3_general1400_as_ci. - Adding classes representing names of other database objects, whose names have case insensitive comparison style, using my_charset_utf8mb3_general1400_as_ci: Lex_ident_column Lex_ident_sys_var Lex_ident_user_var Lex_ident_sp_var Lex_ident_ps Lex_ident_i_s_table Lex_ident_window Lex_ident_func Lex_ident_partition Lex_ident_with_element Lex_ident_rpl_filter Lex_ident_master_info Lex_ident_host Lex_ident_locale Lex_ident_plugin Lex_ident_engine Lex_ident_server Lex_ident_savepoint Lex_ident_charset engine_option_value::Name - All the mentioned Lex_ident_xxx classes implement a method streq(): if (ident1.streq(ident2)) do_equal(); This method works as a wrapper for CHARSET_INFO::streq(). - Changing a lot of "LEX_CSTRING name" to "Lex_ident_xxx name" in class members and in function/method parameters. - Replacing all calls like system_charset_info->coll->strcasecmp(ident1, ident2) to ident1.streq(ident2) - Taking advantage of the c++11 user defined literal operator for LEX_CSTRING (see m_strings.h) and Lex_ident_xxx (see lex_ident.h) data types. Use example: const Lex_ident_column primary_key_name= "PRIMARY"_Lex_ident_column; is now a shorter version of: const Lex_ident_column primary_key_name= Lex_ident_column({STRING_WITH_LEN("PRIMARY")});
1 parent 159b7ca commit fd247cc

File tree

204 files changed

+8969
-3200
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

204 files changed

+8969
-3200
lines changed

client/mysql.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,9 @@ static void my_vidattr(chtype attrs)
207207
#endif
208208

209209
#ifdef FN_NO_CASE_SENSE
210-
#define cmp_database(cs,A,B) my_strcasecmp((cs), (A), (B))
210+
#define cmp_database(A,B) my_strcasecmp_latin1((A), (B))
211211
#else
212-
#define cmp_database(cs,A,B) strcmp((A),(B))
212+
#define cmp_database(A,B) strcmp((A),(B))
213213
#endif
214214

215215
#include "completion_hash.h"
@@ -4764,7 +4764,7 @@ com_use(String *buffer __attribute__((unused)), char *line)
47644764
*/
47654765
get_current_db();
47664766

4767-
if (!current_db || cmp_database(charset_info, current_db,tmp))
4767+
if (!current_db || cmp_database(current_db, tmp))
47684768
{
47694769
if (one_database)
47704770
{

client/mysqlcheck.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,12 @@ static char *fix_table_name(char *dest, char *src);
247247
int what_to_do = 0;
248248

249249

250+
static inline int cmp_database(const char *a, const char *b)
251+
{
252+
return my_strcasecmp_latin1(a, b);
253+
}
254+
255+
250256
static void usage(void)
251257
{
252258
DBUG_ENTER("usage");
@@ -869,10 +875,10 @@ static int use_db(char *database)
869875
DBUG_ENTER("use_db");
870876

871877
if (mysql_get_server_version(sock) >= FIRST_INFORMATION_SCHEMA_VERSION &&
872-
!my_strcasecmp(&my_charset_latin1, database, INFORMATION_SCHEMA_DB_NAME))
878+
!cmp_database(database, INFORMATION_SCHEMA_DB_NAME))
873879
DBUG_RETURN(1);
874880
if (mysql_get_server_version(sock) >= FIRST_PERFORMANCE_SCHEMA_VERSION &&
875-
!my_strcasecmp(&my_charset_latin1, database, PERFORMANCE_SCHEMA_DB_NAME))
881+
!cmp_database(database, PERFORMANCE_SCHEMA_DB_NAME))
876882
DBUG_RETURN(1);
877883
if (mysql_select_db(sock, database))
878884
{

client/mysqldump.cc

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,19 @@ static int dump_tablespaces_for_databases(char** databases);
658658
static int dump_tablespaces(char* ts_where);
659659
static void print_comment(FILE *, my_bool, const char *, ...);
660660

661+
662+
static inline int cmp_database(const char *a, const char *b)
663+
{
664+
return my_strcasecmp_latin1(a, b);
665+
}
666+
667+
668+
static inline int cmp_table(const char *a, const char *b)
669+
{
670+
return my_strcasecmp_latin1(a, b);
671+
}
672+
673+
661674
/*
662675
Print the supplied message if in verbose mode
663676
@@ -2993,10 +3006,10 @@ static uint dump_routines_for_db(char *db)
29933006
static inline my_bool general_log_or_slow_log_tables(const char *db,
29943007
const char *table)
29953008
{
2996-
return (!my_strcasecmp(charset_info, db, "mysql")) &&
2997-
(!my_strcasecmp(charset_info, table, "general_log") ||
2998-
!my_strcasecmp(charset_info, table, "slow_log") ||
2999-
!my_strcasecmp(charset_info, table, "transaction_registry"));
3009+
return (!cmp_database(db, "mysql")) &&
3010+
(!cmp_table(table, "general_log") ||
3011+
!cmp_table(table, "slow_log") ||
3012+
!cmp_table(table, "transaction_registry"));
30003013
}
30013014
/*
30023015
get_sequence_structure-- retrieves sequence structure, prints out corresponding
@@ -4165,8 +4178,8 @@ static void dump_table(const char *table, const char *db, const uchar *hash_key,
41654178
discarding SHOW CREATE EVENT statements generation. The myslq.event
41664179
table data should be skipped too.
41674180
*/
4168-
if (!opt_events && !my_strcasecmp(&my_charset_latin1, db, "mysql") &&
4169-
!my_strcasecmp(&my_charset_latin1, table, "event"))
4181+
if (!opt_events && !cmp_database(db, "mysql") &&
4182+
!cmp_table(table, "event"))
41704183
{
41714184
verbose_msg("-- Skipping data table mysql.event, --skip-events was used\n");
41724185
DBUG_VOID_RETURN;
@@ -5430,15 +5443,15 @@ static int dump_all_databases()
54305443
while ((row= mysql_fetch_row(tableres)))
54315444
{
54325445
if (mysql_get_server_version(mysql) >= FIRST_INFORMATION_SCHEMA_VERSION &&
5433-
!my_strcasecmp(&my_charset_latin1, row[0], INFORMATION_SCHEMA_DB_NAME))
5446+
!cmp_database(row[0], INFORMATION_SCHEMA_DB_NAME))
54345447
continue;
54355448

54365449
if (mysql_get_server_version(mysql) >= FIRST_PERFORMANCE_SCHEMA_VERSION &&
5437-
!my_strcasecmp(&my_charset_latin1, row[0], PERFORMANCE_SCHEMA_DB_NAME))
5450+
!cmp_database(row[0], PERFORMANCE_SCHEMA_DB_NAME))
54385451
continue;
54395452

54405453
if (mysql_get_server_version(mysql) >= FIRST_SYS_SCHEMA_VERSION &&
5441-
!my_strcasecmp(&my_charset_latin1, row[0], SYS_SCHEMA_DB_NAME))
5454+
!cmp_database(row[0], SYS_SCHEMA_DB_NAME))
54425455
continue;
54435456

54445457
if (include_database(row[0]))
@@ -5458,15 +5471,15 @@ static int dump_all_databases()
54585471
while ((row= mysql_fetch_row(tableres)))
54595472
{
54605473
if (mysql_get_server_version(mysql) >= FIRST_INFORMATION_SCHEMA_VERSION &&
5461-
!my_strcasecmp(&my_charset_latin1, row[0], INFORMATION_SCHEMA_DB_NAME))
5474+
!cmp_database(row[0], INFORMATION_SCHEMA_DB_NAME))
54625475
continue;
54635476

54645477
if (mysql_get_server_version(mysql) >= FIRST_PERFORMANCE_SCHEMA_VERSION &&
5465-
!my_strcasecmp(&my_charset_latin1, row[0], PERFORMANCE_SCHEMA_DB_NAME))
5478+
!cmp_database(row[0], PERFORMANCE_SCHEMA_DB_NAME))
54665479
continue;
54675480

54685481
if (mysql_get_server_version(mysql) >= FIRST_SYS_SCHEMA_VERSION &&
5469-
!my_strcasecmp(&my_charset_latin1, row[0], SYS_SCHEMA_DB_NAME))
5482+
!cmp_database(row[0], SYS_SCHEMA_DB_NAME))
54705483
continue;
54715484

54725485
if (include_database(row[0]))
@@ -5676,7 +5689,7 @@ static int dump_all_tables_in_db(char *database)
56765689
char hash_key[2*NAME_LEN+2]; /* "db.tablename" */
56775690
char *afterdot;
56785691
my_bool transaction_registry_table_exists= 0;
5679-
int using_mysql_db= !my_strcasecmp(charset_info, database, "mysql");
5692+
int using_mysql_db= !cmp_database(database, "mysql");
56805693
DBUG_ENTER("dump_all_tables_in_db");
56815694

56825695
afterdot= strmov(hash_key, database);
@@ -5787,7 +5800,7 @@ static int dump_all_tables_in_db(char *database)
57875800
after 'UNLOCK TABLES' query is executed on the session, get the table
57885801
structure from server and dump it in the file.
57895802
*/
5790-
if (using_mysql_db && !my_strcasecmp(charset_info, table, "transaction_registry"))
5803+
if (using_mysql_db && !cmp_table(table, "transaction_registry"))
57915804
transaction_registry_table_exists= 1;
57925805
}
57935806
}
@@ -6070,9 +6083,9 @@ static int dump_selected_tables(char *db, char **table_names, int tables)
60706083
/* Can't LOCK TABLES in I_S / P_S, so don't try. */
60716084
if (lock_tables &&
60726085
!(mysql_get_server_version(mysql) >= FIRST_INFORMATION_SCHEMA_VERSION &&
6073-
!my_strcasecmp(&my_charset_latin1, db, INFORMATION_SCHEMA_DB_NAME)) &&
6086+
!cmp_database(db, INFORMATION_SCHEMA_DB_NAME)) &&
60746087
!(mysql_get_server_version(mysql) >= FIRST_PERFORMANCE_SCHEMA_VERSION &&
6075-
!my_strcasecmp(&my_charset_latin1, db, PERFORMANCE_SCHEMA_DB_NAME)))
6088+
!cmp_database(db, PERFORMANCE_SCHEMA_DB_NAME)))
60766089
{
60776090
if (mysql_real_query(mysql, lock_tables_query.str,
60786091
(ulong)lock_tables_query.length-1))

client/mysqlshow.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ list_dbs(MYSQL *mysql,const char *wild)
413413
if (wild && mysql_num_rows(result) == 1)
414414
{
415415
row= mysql_fetch_row(result);
416-
if (!my_strcasecmp(&my_charset_latin1, row[0], wild))
416+
if (!my_strcasecmp_latin1(row[0], wild))
417417
{
418418
mysql_free_result(result);
419419
if (opt_status)

include/ft_global.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@ struct _ft_vft_ext
5656
#define FTS_ORDERED_RESULT (1LL << 1)
5757
#define FTS_DOCID_IN_RESULT (1LL << 2)
5858

59-
#define FTS_DOC_ID_COL_NAME "FTS_DOC_ID"
60-
6159
#ifndef FT_CORE
6260
struct st_ft_info
6361
{

include/m_ctype.h

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -547,8 +547,6 @@ struct my_collation_handler_st
547547
const char *wildstr,const char *wildend,
548548
int escape,int w_one, int w_many);
549549

550-
int (*strcasecmp)(CHARSET_INFO *, const char *, const char *);
551-
552550
uint (*instr)(CHARSET_INFO *,
553551
const char *b, size_t b_length,
554552
const char *s, size_t s_length,
@@ -804,6 +802,17 @@ struct charset_info_st
804802

805803
#ifdef __cplusplus
806804
/* Character set routines */
805+
806+
/* Make sure the comparison operand is valid. */
807+
static bool is_valid_string(const LEX_CSTRING &str)
808+
{
809+
/*
810+
LEX_CSTRING::str can be NULL, but only if LEX_CSTRING::length is 0.
811+
Does not have to be a 0-terminated string.
812+
*/
813+
return str.str != NULL || str.length == 0;
814+
}
815+
807816
bool use_mb() const
808817
{
809818
return mbmaxlen > 1;
@@ -1027,6 +1036,26 @@ struct charset_info_st
10271036
return state & MY_CS_COMPILED;
10281037
}
10291038

1039+
/*
1040+
Compare two strings for equality.
1041+
There may be a separate more optimized virtual function streq() in
1042+
MY_COLLATION_HANDLER eventually. For now it's a wrapper for strnncoll().
1043+
*/
1044+
my_bool streq(const LEX_CSTRING a, const LEX_CSTRING b) const
1045+
{
1046+
return 0 == strnncoll(a, b, FALSE);
1047+
}
1048+
1049+
int strnncoll(const LEX_CSTRING a, const LEX_CSTRING b,
1050+
my_bool b_is_prefix= FALSE) const
1051+
{
1052+
DBUG_ASSERT(is_valid_string(a));
1053+
DBUG_ASSERT(is_valid_string(b));
1054+
return (coll->strnncoll)(this,
1055+
(const uchar *) a.str, a.length,
1056+
(const uchar *) b.str, b.length, b_is_prefix);
1057+
}
1058+
10301059
int strnncoll(const uchar *a, size_t alen,
10311060
const uchar *b, size_t blen, my_bool b_is_prefix= FALSE) const
10321061
{
@@ -1392,6 +1421,10 @@ extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1;
13921421
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1_nopad;
13931422
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_filename;
13941423
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_utf8mb3_general_ci;
1424+
extern MYSQL_PLUGIN_IMPORT struct charset_info_st
1425+
my_charset_utf8mb3_general1400_as_ci;
1426+
extern MYSQL_PLUGIN_IMPORT struct charset_info_st
1427+
my_charset_utf8mb4_general1400_as_ci;
13951428

13961429
extern struct charset_info_st my_charset_big5_bin;
13971430
extern struct charset_info_st my_charset_big5_chinese_ci;
@@ -1658,7 +1691,6 @@ extern size_t my_caseup_ujis(CHARSET_INFO *,
16581691
extern size_t my_casedn_ujis(CHARSET_INFO *,
16591692
const char *src, size_t srclen,
16601693
char *dst, size_t dstlen);
1661-
extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char *, const char *);
16621694

16631695
int my_wildcmp_mb(CHARSET_INFO *,
16641696
const char *str,const char *str_end,
@@ -1677,9 +1709,6 @@ int my_wildcmp_mb_bin(CHARSET_INFO *cs,
16771709
const char *wildstr,const char *wildend,
16781710
int escape, int w_one, int w_many);
16791711

1680-
int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
1681-
const char *s, const char *t);
1682-
16831712
void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
16841713
const uchar *key, size_t len,ulong *nr1, ulong *nr2);
16851714

@@ -1838,7 +1867,6 @@ size_t my_convert_fix(CHARSET_INFO *dstcs, char *dst, size_t dst_length,
18381867
#define my_binary_compare(s) ((s)->state & MY_CS_BINSORT)
18391868
#define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM)
18401869
#define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
1841-
#define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
18421870

18431871
/**
18441872
Detect if the leftmost character in a string is a valid multi-byte character
@@ -1886,6 +1914,14 @@ my_well_formed_length(CHARSET_INFO *cs, const char *b, const char *e,
18861914
}
18871915

18881916

1917+
static inline int
1918+
my_strcasecmp_latin1(const char *a, const char *b)
1919+
{
1920+
return my_strcasecmp_8bit(&my_charset_latin1, a, b);
1921+
}
1922+
1923+
1924+
18891925
/* XXX: still need to take care of this one */
18901926
#ifdef MY_CHARSET_TIS620
18911927
#error The TIS620 charset is broken at the moment. Tell tim to fix it.

include/m_string.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,15 @@ template<typename T> inline constexpr const char *_swl_check(T s)
220220

221221
typedef struct st_mysql_const_lex_string LEX_CSTRING;
222222

223+
#ifdef __cplusplus
224+
static inline constexpr
225+
LEX_CSTRING operator"" _LEX_CSTRING(const char *str, size_t length)
226+
{
227+
return LEX_CSTRING{str, length};
228+
}
229+
#endif /* __cplusplus */
230+
231+
223232
/* A variant with const and unsigned */
224233
struct st_mysql_const_unsigned_lex_string
225234
{

mysql-test/main/ctype_like_range.result

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4477,3 +4477,18 @@ DROP TABLE t1;
44774477
#
44784478
# End of 10.2 tests
44794479
#
4480+
#
4481+
# Start of 11.5 tests
4482+
#
4483+
#
4484+
# MDEV-33806 Server crashes when executing Admin SQL/DML after setting character_set_collations to utf8mb3_general1400_as_ci
4485+
#
4486+
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb3 COLLATE utf8mb3_general1400_as_ci);
4487+
INSERT INTO t1 VALUES ('111%');
4488+
SELECT a, HEX(LIKE_RANGE_MAX(a,40)) FROM t1 ORDER BY a;
4489+
a HEX(LIKE_RANGE_MAX(a,40))
4490+
111% 313131EFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBFEFBFBF20
4491+
DROP TABLE t1;
4492+
#
4493+
# End of 11.5 tests
4494+
#

mysql-test/main/ctype_like_range.test

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,21 @@ DROP TABLE t1;
197197
--echo #
198198
--echo # End of 10.2 tests
199199
--echo #
200+
201+
202+
--echo #
203+
--echo # Start of 11.5 tests
204+
--echo #
205+
206+
--echo #
207+
--echo # MDEV-33806 Server crashes when executing Admin SQL/DML after setting character_set_collations to utf8mb3_general1400_as_ci
208+
--echo #
209+
210+
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb3 COLLATE utf8mb3_general1400_as_ci);
211+
INSERT INTO t1 VALUES ('111%');
212+
SELECT a, HEX(LIKE_RANGE_MAX(a,40)) FROM t1 ORDER BY a;
213+
DROP TABLE t1;
214+
215+
--echo #
216+
--echo # End of 11.5 tests
217+
--echo #
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#
2+
# Start of 11.5 tests
3+
#
4+
#
5+
# MDEV-33806 Server crashes when executing Admin SQL/DML after setting character_set_collations to utf8mb3_general1400_as_ci
6+
#
7+
CREATE TABLE t1(a CHAR (32),KEY (a)) DEFAULT CHARSET=utf8mb3 COLLATE utf8mb3_general1400_as_ci;
8+
SELECT * FROM t1 WHERE a LIKE 'a%';
9+
a
10+
INSERT INTO t1 VALUES ('a');
11+
SELECT * FROM t1 WHERE a LIKE 'a%';
12+
a
13+
a
14+
FOR i IN 0..32
15+
DO
16+
INSERT INTO t1 VALUES (CONCAT('b', i));
17+
END FOR;
18+
$$
19+
SELECT * FROM t1 WHERE a LIKE 'a%';
20+
a
21+
a
22+
DROP TABLE t1;
23+
#
24+
# End of 11.5 tests
25+
#

0 commit comments

Comments
 (0)