Skip to content

Commit

Permalink
MDEV-9665 Remove cs->cset->ismbchar()
Browse files Browse the repository at this point in the history
Using a more powerfull cs->cset->charlen() instead.
  • Loading branch information
Alexander Barkov committed Mar 16, 2016
1 parent dc08cca commit e092995
Show file tree
Hide file tree
Showing 20 changed files with 83 additions and 188 deletions.
39 changes: 36 additions & 3 deletions include/m_ctype.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,6 @@ struct my_charset_handler_st
{
my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader);
/* Multibyte routines */
uint (*ismbchar)(CHARSET_INFO *, const char *, const char *);
uint (*mbcharlen)(CHARSET_INFO *, uint c);
size_t (*numchars)(CHARSET_INFO *, const char *b, const char *e);
size_t (*charpos)(CHARSET_INFO *, const char *b, const char *e,
Expand Down Expand Up @@ -972,8 +971,42 @@ size_t my_convert_fix(CHARSET_INFO *dstcs, char *dst, size_t dst_length,
#define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
#define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))

#define use_mb(s) ((s)->cset->ismbchar != NULL)
#define my_ismbchar(s, a, b) ((s)->cset->ismbchar((s), (a), (b)))
#define use_mb(s) ((s)->mbmaxlen > 1)
/**
Detect if the leftmost character in a string is a valid multi-byte character
and return its length, or return 0 otherwise.
@param cs - character set
@param str - the beginning of the string
@param end - the string end (the next byte after the string)
@return >0, for a multi-byte character
@rerurn 0, for a single byte character, broken sequence, empty string.
*/
static inline
uint my_ismbchar(CHARSET_INFO *cs, const char *str, const char *end)
{
int char_length= (cs->cset->charlen)(cs, (const uchar *) str,
(const uchar *) end);
return char_length > 1 ? (uint) char_length : 0U;
}


/**
Return length of the leftmost character in a string.
@param cs - character set
@param str - the beginning of the string
@param end - the string end (the next byte after the string)
@return <=0 on errors (EOL, wrong byte sequence)
@return 1 on a single byte character
@return >1 on a multi-byte character
Note, inlike my_ismbchar(), 1 is returned for a single byte character.
*/
static inline
uint my_charlen(CHARSET_INFO *cs, const char *str, const char *end)
{
return (cs->cset->charlen)(cs, (const uchar *) str,
(const uchar *) end);
}
#ifdef USE_MB
#define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
#else
Expand Down
3 changes: 1 addition & 2 deletions sql/item.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5437,8 +5437,7 @@ String_copier_for_item::copy_with_warn(CHARSET_INFO *dstcs, String *dst,
if (const char *pos= cannot_convert_error_pos())
{
char buf[16];
int mblen= srccs->cset->charlen(srccs, (const uchar *) pos,
(const uchar *) src + src_length);
int mblen= my_charlen(srccs, pos, src + src_length);
DBUG_ASSERT(mblen > 0 && mblen * 2 + 1 <= (int) sizeof(buf));
octet2hex(buf, pos, mblen);
push_warning_printf(m_thd, Sql_condition::WARN_LEVEL_WARN,
Expand Down
55 changes: 22 additions & 33 deletions sql/sql_lex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1406,28 +1406,22 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
if (use_mb(cs))
{
result_state= IDENT_QUOTED;
if (my_mbcharlen(cs, lip->yyGetLast()) > 1)
int char_length= my_charlen(cs, lip->get_ptr() - 1,
lip->get_end_of_query());
if (char_length <= 0)
{
int l = my_ismbchar(cs,
lip->get_ptr() -1,
lip->get_end_of_query());
if (l == 0) {
state = MY_LEX_CHAR;
continue;
}
lip->skip_binary(l - 1);
state= MY_LEX_CHAR;
continue;
}
lip->skip_binary(char_length - 1);

while (ident_map[c=lip->yyGet()])
{
if (my_mbcharlen(cs, c) > 1)
{
int l;
if ((l = my_ismbchar(cs,
lip->get_ptr() -1,
lip->get_end_of_query())) == 0)
break;
lip->skip_binary(l-1);
}
char_length= my_charlen(cs, lip->get_ptr() - 1,
lip->get_end_of_query());
if (char_length <= 0)
break;
lip->skip_binary(char_length - 1);
}
}
else
Expand Down Expand Up @@ -1568,15 +1562,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
result_state= IDENT_QUOTED;
while (ident_map[c=lip->yyGet()])
{
if (my_mbcharlen(cs, c) > 1)
{
int l;
if ((l = my_ismbchar(cs,
lip->get_ptr() -1,
lip->get_end_of_query())) == 0)
break;
lip->skip_binary(l-1);
}
int char_length= my_charlen(cs, lip->get_ptr() - 1,
lip->get_end_of_query());
if (char_length <= 0)
break;
lip->skip_binary(char_length - 1);
}
}
else
Expand Down Expand Up @@ -1604,8 +1594,9 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
char quote_char= c; // Used char
while ((c=lip->yyGet()))
{
int var_length;
if ((var_length= my_mbcharlen(cs, c)) == 1)
int var_length= my_charlen(cs, lip->get_ptr() - 1,
lip->get_end_of_query());
if (var_length == 1)
{
if (c == quote_char)
{
Expand All @@ -1617,11 +1608,9 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
}
}
#ifdef USE_MB
else if (use_mb(cs))
else if (var_length > 1)
{
if ((var_length= my_ismbchar(cs, lip->get_ptr() - 1,
lip->get_end_of_query())))
lip->skip_binary(var_length-1);
lip->skip_binary(var_length - 1);
}
#endif
}
Expand Down
20 changes: 10 additions & 10 deletions sql/sql_show.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1431,14 +1431,13 @@ mysqld_list_fields(THD *thd, TABLE_LIST *table_list, const char *wild)

static const char *require_quotes(const char *name, uint name_length)
{
uint length;
bool pure_digit= TRUE;
const char *end= name + name_length;

for (; name < end ; name++)
{
uchar chr= (uchar) *name;
length= my_mbcharlen(system_charset_info, chr);
int length= my_charlen(system_charset_info, name, end);
if (length == 1 && !system_charset_info->ident_map[chr])
return name;
if (length == 1 && (chr < '0' || chr > '9'))
Expand Down Expand Up @@ -1496,24 +1495,25 @@ append_identifier(THD *thd, String *packet, const char *name, uint length)
if (packet->append(&quote_char, 1, quote_charset))
return true;

for (name_end= name+length ; name < name_end ; name+= length)
for (name_end= name+length ; name < name_end ; )
{
uchar chr= (uchar) *name;
length= my_mbcharlen(system_charset_info, chr);
int char_length= my_charlen(system_charset_info, name, name_end);
/*
my_mbcharlen can return 0 on a wrong multibyte
charlen can return 0 and negative numbers on a wrong multibyte
sequence. It is possible when upgrading from 4.0,
and identifier contains some accented characters.
The manual says it does not work. So we'll just
change length to 1 not to hang in the endless loop.
change char_length to 1 not to hang in the endless loop.
*/
if (!length)
length= 1;
if (length == 1 && chr == (uchar) quote_char &&
if (char_length <= 0)
char_length= 1;
if (char_length == 1 && chr == (uchar) quote_char &&
packet->append(&quote_char, 1, quote_charset))
return true;
if (packet->append(name, length, system_charset_info))
if (packet->append(name, char_length, system_charset_info))
return true;
name+= char_length;
}
return packet->append(&quote_char, 1, quote_charset);
}
Expand Down
8 changes: 5 additions & 3 deletions sql/sql_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static char* add_identifier(THD* thd, char *to_p, const char * end_p,
{
uint res;
uint errors;
const char *conv_name;
const char *conv_name, *conv_name_end;
char tmp_name[FN_REFLEN];
char conv_string[FN_REFLEN];
int quote;
Expand All @@ -111,11 +111,13 @@ static char* add_identifier(THD* thd, char *to_p, const char * end_p,
{
DBUG_PRINT("error", ("strconvert of '%s' failed with %u (errors: %u)", conv_name, res, errors));
conv_name= name;
conv_name_end= name + name_len;
}
else
{
DBUG_PRINT("info", ("conv '%s' -> '%s'", conv_name, conv_string));
conv_name= conv_string;
conv_name_end= conv_string + res;
}

quote = thd ? get_quote_char_for_identifier(thd, conv_name, res - 1) : '"';
Expand All @@ -125,8 +127,8 @@ static char* add_identifier(THD* thd, char *to_p, const char * end_p,
*(to_p++)= (char) quote;
while (*conv_name && (end_p - to_p - 1) > 0)
{
uint length= my_mbcharlen(system_charset_info, *conv_name);
if (!length)
int length= my_charlen(system_charset_info, conv_name, conv_name_end);
if (length <= 0)
length= 1;
if (length == 1 && *conv_name == (char) quote)
{
Expand Down
14 changes: 3 additions & 11 deletions strings/ctype-big5.c
Original file line number Diff line number Diff line change
Expand Up @@ -862,12 +862,12 @@ my_strnxfrm_big5(CHARSET_INFO *cs,

for (; dst < de && src < se && nweights; nweights--)
{
if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se))
if (my_charlen(cs, src, se) > 1)
{
/*
Note, it is safe not to check (src < se)
in the code below, because ismbchar() would
not return TRUE if src was too short
in the code below, because my_charlen() would
not return 2 if src was too short
*/
uint16 e= big5strokexfrm((uint16) big5code(*src, *(src + 1)));
*dst++= big5head(e);
Expand Down Expand Up @@ -930,13 +930,6 @@ static int my_strxfrm_big5(uchar *dest, const uchar *src, int len)
#endif


static uint ismbchar_big5(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
{
return (isbig5head(*(p)) && (e)-(p)>1 && isbig5tail(*((p)+1))? 2: 0);
}


static uint mbcharlen_big5(CHARSET_INFO *cs __attribute__((unused)), uint c)
{
return (isbig5head(c)? 2 : 1);
Expand Down Expand Up @@ -6818,7 +6811,6 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
static MY_CHARSET_HANDLER my_charset_big5_handler=
{
NULL, /* init */
ismbchar_big5,
mbcharlen_big5,
my_numchars_mb,
my_charpos_mb,
Expand Down
1 change: 0 additions & 1 deletion strings/ctype-bin.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,6 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
NULL, /* ismbchar */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
Expand Down
7 changes: 0 additions & 7 deletions strings/ctype-cp932.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,6 @@ static const uchar sort_order_cp932[]=
#include "ctype-mb.ic"


static uint ismbchar_cp932(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
{
return (iscp932head((uchar) *p) && (e-p)>1 && iscp932tail((uchar)p[1]) ? 2: 0);
}

static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (iscp932head((uchar) c) ? 2 : 1);
Expand Down Expand Up @@ -34693,7 +34687,6 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
ismbchar_cp932,
mbcharlen_cp932,
my_numchars_mb,
my_charpos_mb,
Expand Down
9 changes: 0 additions & 9 deletions strings/ctype-euc_kr.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,6 @@ static const uchar sort_order_euc_kr[]=
#include "ctype-mb.ic"


static uint ismbchar_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
{
return ((*(uchar*)(p)<0x80)? 0:\
iseuc_kr_head(*(p)) && (e)-(p)>1 && iseuc_kr_tail(*((p)+1))? 2:\
0);
}

static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (iseuc_kr_head(c) ? 2 : 1);
Expand Down Expand Up @@ -9987,7 +9979,6 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
ismbchar_euc_kr,
mbcharlen_euc_kr,
my_numchars_mb,
my_charpos_mb,
Expand Down
11 changes: 0 additions & 11 deletions strings/ctype-eucjpms.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,16 +220,6 @@ static const uchar sort_order_eucjpms[]=
#include "strcoll.ic"


static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
{
return ((*(uchar*)(p)<0x80)? 0:\
iseucjpms(*(p)) && (e)-(p)>1 && iseucjpms(*((p)+1))? 2:\
iseucjpms_ss2(*(p)) && (e)-(p)>1 && iskata(*((p)+1))? 2:\
iseucjpms_ss3(*(p)) && (e)-(p)>2 && iseucjpms(*((p)+1)) && iseucjpms(*((p)+2))? 3:\
0);
}

static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (iseucjpms(c)? 2: iseucjpms_ss2(c)? 2: iseucjpms_ss3(c)? 3: 1);
Expand Down Expand Up @@ -67520,7 +67510,6 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
ismbchar_eucjpms,
mbcharlen_eucjpms,
my_numchars_mb,
my_charpos_mb,
Expand Down
7 changes: 0 additions & 7 deletions strings/ctype-gb2312.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,6 @@ static const uchar sort_order_gb2312[]=
#include "ctype-mb.ic"


static uint ismbchar_gb2312(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
{
return (isgb2312head(*(p)) && (e)-(p)>1 && isgb2312tail(*((p)+1))? 2: 0);
}

static uint mbcharlen_gb2312(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (isgb2312head(c)? 2 : 1);
Expand Down Expand Up @@ -6391,7 +6385,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
ismbchar_gb2312,
mbcharlen_gb2312,
my_numchars_mb,
my_charpos_mb,
Expand Down
Loading

0 comments on commit e092995

Please sign in to comment.