Skip to content

Commit e092995

Browse files
author
Alexander Barkov
committed
MDEV-9665 Remove cs->cset->ismbchar()
Using a more powerfull cs->cset->charlen() instead.
1 parent dc08cca commit e092995

20 files changed

+83
-188
lines changed

include/m_ctype.h

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,6 @@ struct my_charset_handler_st
400400
{
401401
my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader);
402402
/* Multibyte routines */
403-
uint (*ismbchar)(CHARSET_INFO *, const char *, const char *);
404403
uint (*mbcharlen)(CHARSET_INFO *, uint c);
405404
size_t (*numchars)(CHARSET_INFO *, const char *b, const char *e);
406405
size_t (*charpos)(CHARSET_INFO *, const char *b, const char *e,
@@ -972,8 +971,42 @@ size_t my_convert_fix(CHARSET_INFO *dstcs, char *dst, size_t dst_length,
972971
#define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
973972
#define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))
974973

975-
#define use_mb(s) ((s)->cset->ismbchar != NULL)
976-
#define my_ismbchar(s, a, b) ((s)->cset->ismbchar((s), (a), (b)))
974+
#define use_mb(s) ((s)->mbmaxlen > 1)
975+
/**
976+
Detect if the leftmost character in a string is a valid multi-byte character
977+
and return its length, or return 0 otherwise.
978+
@param cs - character set
979+
@param str - the beginning of the string
980+
@param end - the string end (the next byte after the string)
981+
@return >0, for a multi-byte character
982+
@rerurn 0, for a single byte character, broken sequence, empty string.
983+
*/
984+
static inline
985+
uint my_ismbchar(CHARSET_INFO *cs, const char *str, const char *end)
986+
{
987+
int char_length= (cs->cset->charlen)(cs, (const uchar *) str,
988+
(const uchar *) end);
989+
return char_length > 1 ? (uint) char_length : 0U;
990+
}
991+
992+
993+
/**
994+
Return length of the leftmost character in a string.
995+
@param cs - character set
996+
@param str - the beginning of the string
997+
@param end - the string end (the next byte after the string)
998+
@return <=0 on errors (EOL, wrong byte sequence)
999+
@return 1 on a single byte character
1000+
@return >1 on a multi-byte character
1001+
1002+
Note, inlike my_ismbchar(), 1 is returned for a single byte character.
1003+
*/
1004+
static inline
1005+
uint my_charlen(CHARSET_INFO *cs, const char *str, const char *end)
1006+
{
1007+
return (cs->cset->charlen)(cs, (const uchar *) str,
1008+
(const uchar *) end);
1009+
}
9771010
#ifdef USE_MB
9781011
#define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
9791012
#else

sql/item.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5437,8 +5437,7 @@ String_copier_for_item::copy_with_warn(CHARSET_INFO *dstcs, String *dst,
54375437
if (const char *pos= cannot_convert_error_pos())
54385438
{
54395439
char buf[16];
5440-
int mblen= srccs->cset->charlen(srccs, (const uchar *) pos,
5441-
(const uchar *) src + src_length);
5440+
int mblen= my_charlen(srccs, pos, src + src_length);
54425441
DBUG_ASSERT(mblen > 0 && mblen * 2 + 1 <= (int) sizeof(buf));
54435442
octet2hex(buf, pos, mblen);
54445443
push_warning_printf(m_thd, Sql_condition::WARN_LEVEL_WARN,

sql/sql_lex.cc

Lines changed: 22 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,28 +1406,22 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
14061406
if (use_mb(cs))
14071407
{
14081408
result_state= IDENT_QUOTED;
1409-
if (my_mbcharlen(cs, lip->yyGetLast()) > 1)
1409+
int char_length= my_charlen(cs, lip->get_ptr() - 1,
1410+
lip->get_end_of_query());
1411+
if (char_length <= 0)
14101412
{
1411-
int l = my_ismbchar(cs,
1412-
lip->get_ptr() -1,
1413-
lip->get_end_of_query());
1414-
if (l == 0) {
1415-
state = MY_LEX_CHAR;
1416-
continue;
1417-
}
1418-
lip->skip_binary(l - 1);
1413+
state= MY_LEX_CHAR;
1414+
continue;
14191415
}
1416+
lip->skip_binary(char_length - 1);
1417+
14201418
while (ident_map[c=lip->yyGet()])
14211419
{
1422-
if (my_mbcharlen(cs, c) > 1)
1423-
{
1424-
int l;
1425-
if ((l = my_ismbchar(cs,
1426-
lip->get_ptr() -1,
1427-
lip->get_end_of_query())) == 0)
1428-
break;
1429-
lip->skip_binary(l-1);
1430-
}
1420+
char_length= my_charlen(cs, lip->get_ptr() - 1,
1421+
lip->get_end_of_query());
1422+
if (char_length <= 0)
1423+
break;
1424+
lip->skip_binary(char_length - 1);
14311425
}
14321426
}
14331427
else
@@ -1568,15 +1562,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
15681562
result_state= IDENT_QUOTED;
15691563
while (ident_map[c=lip->yyGet()])
15701564
{
1571-
if (my_mbcharlen(cs, c) > 1)
1572-
{
1573-
int l;
1574-
if ((l = my_ismbchar(cs,
1575-
lip->get_ptr() -1,
1576-
lip->get_end_of_query())) == 0)
1577-
break;
1578-
lip->skip_binary(l-1);
1579-
}
1565+
int char_length= my_charlen(cs, lip->get_ptr() - 1,
1566+
lip->get_end_of_query());
1567+
if (char_length <= 0)
1568+
break;
1569+
lip->skip_binary(char_length - 1);
15801570
}
15811571
}
15821572
else
@@ -1604,8 +1594,9 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
16041594
char quote_char= c; // Used char
16051595
while ((c=lip->yyGet()))
16061596
{
1607-
int var_length;
1608-
if ((var_length= my_mbcharlen(cs, c)) == 1)
1597+
int var_length= my_charlen(cs, lip->get_ptr() - 1,
1598+
lip->get_end_of_query());
1599+
if (var_length == 1)
16091600
{
16101601
if (c == quote_char)
16111602
{
@@ -1617,11 +1608,9 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
16171608
}
16181609
}
16191610
#ifdef USE_MB
1620-
else if (use_mb(cs))
1611+
else if (var_length > 1)
16211612
{
1622-
if ((var_length= my_ismbchar(cs, lip->get_ptr() - 1,
1623-
lip->get_end_of_query())))
1624-
lip->skip_binary(var_length-1);
1613+
lip->skip_binary(var_length - 1);
16251614
}
16261615
#endif
16271616
}

sql/sql_show.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,14 +1431,13 @@ mysqld_list_fields(THD *thd, TABLE_LIST *table_list, const char *wild)
14311431

14321432
static const char *require_quotes(const char *name, uint name_length)
14331433
{
1434-
uint length;
14351434
bool pure_digit= TRUE;
14361435
const char *end= name + name_length;
14371436

14381437
for (; name < end ; name++)
14391438
{
14401439
uchar chr= (uchar) *name;
1441-
length= my_mbcharlen(system_charset_info, chr);
1440+
int length= my_charlen(system_charset_info, name, end);
14421441
if (length == 1 && !system_charset_info->ident_map[chr])
14431442
return name;
14441443
if (length == 1 && (chr < '0' || chr > '9'))
@@ -1496,24 +1495,25 @@ append_identifier(THD *thd, String *packet, const char *name, uint length)
14961495
if (packet->append(&quote_char, 1, quote_charset))
14971496
return true;
14981497

1499-
for (name_end= name+length ; name < name_end ; name+= length)
1498+
for (name_end= name+length ; name < name_end ; )
15001499
{
15011500
uchar chr= (uchar) *name;
1502-
length= my_mbcharlen(system_charset_info, chr);
1501+
int char_length= my_charlen(system_charset_info, name, name_end);
15031502
/*
1504-
my_mbcharlen can return 0 on a wrong multibyte
1503+
charlen can return 0 and negative numbers on a wrong multibyte
15051504
sequence. It is possible when upgrading from 4.0,
15061505
and identifier contains some accented characters.
15071506
The manual says it does not work. So we'll just
1508-
change length to 1 not to hang in the endless loop.
1507+
change char_length to 1 not to hang in the endless loop.
15091508
*/
1510-
if (!length)
1511-
length= 1;
1512-
if (length == 1 && chr == (uchar) quote_char &&
1509+
if (char_length <= 0)
1510+
char_length= 1;
1511+
if (char_length == 1 && chr == (uchar) quote_char &&
15131512
packet->append(&quote_char, 1, quote_charset))
15141513
return true;
1515-
if (packet->append(name, length, system_charset_info))
1514+
if (packet->append(name, char_length, system_charset_info))
15161515
return true;
1516+
name+= char_length;
15171517
}
15181518
return packet->append(&quote_char, 1, quote_charset);
15191519
}

sql/sql_table.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ static char* add_identifier(THD* thd, char *to_p, const char * end_p,
9090
{
9191
uint res;
9292
uint errors;
93-
const char *conv_name;
93+
const char *conv_name, *conv_name_end;
9494
char tmp_name[FN_REFLEN];
9595
char conv_string[FN_REFLEN];
9696
int quote;
@@ -111,11 +111,13 @@ static char* add_identifier(THD* thd, char *to_p, const char * end_p,
111111
{
112112
DBUG_PRINT("error", ("strconvert of '%s' failed with %u (errors: %u)", conv_name, res, errors));
113113
conv_name= name;
114+
conv_name_end= name + name_len;
114115
}
115116
else
116117
{
117118
DBUG_PRINT("info", ("conv '%s' -> '%s'", conv_name, conv_string));
118119
conv_name= conv_string;
120+
conv_name_end= conv_string + res;
119121
}
120122

121123
quote = thd ? get_quote_char_for_identifier(thd, conv_name, res - 1) : '"';
@@ -125,8 +127,8 @@ static char* add_identifier(THD* thd, char *to_p, const char * end_p,
125127
*(to_p++)= (char) quote;
126128
while (*conv_name && (end_p - to_p - 1) > 0)
127129
{
128-
uint length= my_mbcharlen(system_charset_info, *conv_name);
129-
if (!length)
130+
int length= my_charlen(system_charset_info, conv_name, conv_name_end);
131+
if (length <= 0)
130132
length= 1;
131133
if (length == 1 && *conv_name == (char) quote)
132134
{

strings/ctype-big5.c

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -862,12 +862,12 @@ my_strnxfrm_big5(CHARSET_INFO *cs,
862862

863863
for (; dst < de && src < se && nweights; nweights--)
864864
{
865-
if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se))
865+
if (my_charlen(cs, src, se) > 1)
866866
{
867867
/*
868868
Note, it is safe not to check (src < se)
869-
in the code below, because ismbchar() would
870-
not return TRUE if src was too short
869+
in the code below, because my_charlen() would
870+
not return 2 if src was too short
871871
*/
872872
uint16 e= big5strokexfrm((uint16) big5code(*src, *(src + 1)));
873873
*dst++= big5head(e);
@@ -930,13 +930,6 @@ static int my_strxfrm_big5(uchar *dest, const uchar *src, int len)
930930
#endif
931931

932932

933-
static uint ismbchar_big5(CHARSET_INFO *cs __attribute__((unused)),
934-
const char* p, const char *e)
935-
{
936-
return (isbig5head(*(p)) && (e)-(p)>1 && isbig5tail(*((p)+1))? 2: 0);
937-
}
938-
939-
940933
static uint mbcharlen_big5(CHARSET_INFO *cs __attribute__((unused)), uint c)
941934
{
942935
return (isbig5head(c)? 2 : 1);
@@ -6818,7 +6811,6 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
68186811
static MY_CHARSET_HANDLER my_charset_big5_handler=
68196812
{
68206813
NULL, /* init */
6821-
ismbchar_big5,
68226814
mbcharlen_big5,
68236815
my_numchars_mb,
68246816
my_charpos_mb,

strings/ctype-bin.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,6 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
521521
static MY_CHARSET_HANDLER my_charset_handler=
522522
{
523523
NULL, /* init */
524-
NULL, /* ismbchar */
525524
my_mbcharlen_8bit, /* mbcharlen */
526525
my_numchars_8bit,
527526
my_charpos_8bit,

strings/ctype-cp932.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -191,12 +191,6 @@ static const uchar sort_order_cp932[]=
191191
#include "ctype-mb.ic"
192192

193193

194-
static uint ismbchar_cp932(CHARSET_INFO *cs __attribute__((unused)),
195-
const char* p, const char *e)
196-
{
197-
return (iscp932head((uchar) *p) && (e-p)>1 && iscp932tail((uchar)p[1]) ? 2: 0);
198-
}
199-
200194
static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c)
201195
{
202196
return (iscp932head((uchar) c) ? 2 : 1);
@@ -34693,7 +34687,6 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
3469334687
static MY_CHARSET_HANDLER my_charset_handler=
3469434688
{
3469534689
NULL, /* init */
34696-
ismbchar_cp932,
3469734690
mbcharlen_cp932,
3469834691
my_numchars_mb,
3469934692
my_charpos_mb,

strings/ctype-euc_kr.c

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,6 @@ static const uchar sort_order_euc_kr[]=
210210
#include "ctype-mb.ic"
211211

212212

213-
static uint ismbchar_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
214-
const char* p, const char *e)
215-
{
216-
return ((*(uchar*)(p)<0x80)? 0:\
217-
iseuc_kr_head(*(p)) && (e)-(p)>1 && iseuc_kr_tail(*((p)+1))? 2:\
218-
0);
219-
}
220-
221213
static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
222214
{
223215
return (iseuc_kr_head(c) ? 2 : 1);
@@ -9987,7 +9979,6 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
99879979
static MY_CHARSET_HANDLER my_charset_handler=
99889980
{
99899981
NULL, /* init */
9990-
ismbchar_euc_kr,
99919982
mbcharlen_euc_kr,
99929983
my_numchars_mb,
99939984
my_charpos_mb,

strings/ctype-eucjpms.c

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -220,16 +220,6 @@ static const uchar sort_order_eucjpms[]=
220220
#include "strcoll.ic"
221221

222222

223-
static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
224-
const char* p, const char *e)
225-
{
226-
return ((*(uchar*)(p)<0x80)? 0:\
227-
iseucjpms(*(p)) && (e)-(p)>1 && iseucjpms(*((p)+1))? 2:\
228-
iseucjpms_ss2(*(p)) && (e)-(p)>1 && iskata(*((p)+1))? 2:\
229-
iseucjpms_ss3(*(p)) && (e)-(p)>2 && iseucjpms(*((p)+1)) && iseucjpms(*((p)+2))? 3:\
230-
0);
231-
}
232-
233223
static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c)
234224
{
235225
return (iseucjpms(c)? 2: iseucjpms_ss2(c)? 2: iseucjpms_ss3(c)? 3: 1);
@@ -67520,7 +67510,6 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
6752067510
static MY_CHARSET_HANDLER my_charset_handler=
6752167511
{
6752267512
NULL, /* init */
67523-
ismbchar_eucjpms,
6752467513
mbcharlen_eucjpms,
6752567514
my_numchars_mb,
6752667515
my_charpos_mb,

0 commit comments

Comments
 (0)